20
2020
09
IP Pool
import urllibfrom bs4 import BeautifulSoupimport requestsimport osimport timeimport lxmlimport jsonimport csvimport telnetlibrows =[]i=1for i in range(3597): url='https://www.kuaidaili.com/free/inha/'+str(i) html = r
作者:kerek | 分类:Programme | 浏览:106 | 评论:0
20
2020
09
Auto-add to cart
download chrome drive first base on the version you are usinghttps://chromedriver.chromium.org/downloads# coding=utf-8import osfrom selenium import webdriverimport datetimeimport timefrom os import path#此处chromedriver改为自己下载的路径driver = webdriver.Chrom
作者:kerek | 分类:Programme | 浏览:112 | 评论:0
20
2020
09
Transforming to JSON data format
import lxmlfrom bs4 import BeautifulSoupimport timeimport randomimport csvimport codecsimport unicodecsv as csvimport jsonimport urllib.request as requrl = 'https://hk.appledaily.com/pf/api/v3/content/fetch/query-feed?query=%7B%22feedQuery%22%3A%
作者:kerek | 分类:Programme | 浏览:100 | 评论:0
20
2020
09
Crawling data and save to CSV /Excel
# -*- coding: UTF-8 -*-import requestsimport pandas as pdimport lxmlfrom bs4 import BeautifulSoupimport timeimport randomimport csv#import codecs#import unicodecsv as csvname, score, comment = [], [], []URL = 'https://ithelp.ithome.com.tw/article
作者:kerek | 分类:Programme | 浏览:114 | 评论:0
20
2020
09
Google photos crawler
import urllibimport threadingfrom bs4 import BeautifulSoupimport requestsimport osimport timeimport lxml# 頁面鏈接的初始化列表page_links_list=['https://www.google.com.hk/search?q=emoji&hl=zh-HK&gbv=2&biw=1263&bih=625&tbm=isch&ei=sLn
作者:kerek | 分类:Programme | 浏览:131 | 评论:0