之后不再研究爬虫了,转个方向。
from urllib import request, parse
import json
from random import choice
class JDPassengerTicketSpider:
def __init__(self):
self.base_url = 'https://jipiao.jd.com/ajaxTicket/weeklowprice.action?depCity='
self.headers =['Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0',
'IE 9.0User-Agent:Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;',
'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)',
'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER) ',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.1']
def random_header(self):
header = {'User-Agent': choice(self.headers)}
return header
def spider(self, url, headers):
req = request.Request(url, headers=headers)
rsp = request.urlopen(req)
jscode = rsp.read().decode('utf-8')
return jscode
def struct_url(self, dep, arr, dat):
url = self.base_url
url += dep
url += '&arrCity='
url += arr
url += '&depDate='
url += dat
return url
def urlencode(self, string):
return parse.quote(string)
def read_json_data(self, jscode):
data_dict = {}
json_data = json.loads(jscode)
weekLowPriceInfoList = json_data['weekLowPriceInfoList']
for info in weekLowPriceInfoList:
data_dict[info['date']] = info['price']
return data_dict
def main():
spi = JDPassengerTicketSpider()
arr = spi.urlencode('北京')
dep = spi.urlencode('太原')
url = spi.struct_url(dep, arr, '2017-04-29')
print(url)
header = spi.random_header()
jscode = spi.spider(url, header)
print(jscode)
data_dict = spi.read_json_data(jscode)
print(data_dict)
if __name__ == '__main__':
main()
这是上周四给人讲课的时候现写的,没加注释,看不懂就评论,我会解答的。