《爬取京东机票一周最低价格》

之后不再研究爬虫了,转个方向。

from urllib import request, parse
import json
from random import choice


class JDPassengerTicketSpider:
    def __init__(self):
        self.base_url = 'https://jipiao.jd.com/ajaxTicket/weeklowprice.action?depCity='
        self.headers =['Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
                       'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0',
                       'IE 9.0User-Agent:Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;',
                       'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11',
                       'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
                       'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)',
                       'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)',
                       'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)',
                       'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)',
                       'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko',
                       'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11',
                       'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER) ',
                       'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)',
                       'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.1']

    def random_header(self):
        header = {'User-Agent': choice(self.headers)}
        return header

    def spider(self, url, headers):
        req = request.Request(url, headers=headers)
        rsp = request.urlopen(req)
        jscode = rsp.read().decode('utf-8')
        return jscode

    def struct_url(self, dep, arr, dat):
        url = self.base_url
        url += dep
        url += '&arrCity='
        url += arr
        url += '&depDate='
        url += dat
        return url

    def urlencode(self, string):
        return parse.quote(string)

    def read_json_data(self, jscode):
        data_dict = {}
        json_data = json.loads(jscode)
        weekLowPriceInfoList = json_data['weekLowPriceInfoList']
        for info in weekLowPriceInfoList:
            data_dict[info['date']] = info['price']
        return data_dict


def main():
    spi = JDPassengerTicketSpider()
    arr = spi.urlencode('北京')
    dep = spi.urlencode('太原')
    url = spi.struct_url(dep, arr, '2017-04-29')
    print(url)
    header = spi.random_header()
    jscode = spi.spider(url, header)
    print(jscode)
    data_dict = spi.read_json_data(jscode)
    print(data_dict)
if __name__ == '__main__':
    main()

这是上周四给人讲课的时候现写的,没加注释,看不懂就评论,我会解答的。
发布了15 篇原创文章 · 获赞 3 · 访问量 2万+

猜你喜欢

转载自blog.csdn.net/Christopher_L1n/article/details/70589779