python爬虫高铁12306余票爬取

import requests
import pandas as pd
import arrow
import json
import os
from itertools import combinations
os.chdir(r'C:/Users/Windows/Desktop')

class HighSpeed(object):
    def __init__(self,date,from_station,to_station):
        self.date = date
        self.from_station = from_station
        self.to_station = to_station
        
    def get_pd(self):
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/80.0.3970.5 Safari/537.36'}
        r = requests.get('https://www.12306.cn/kfzmpt/lcxxcx/query?purpose_codes=ADULT&queryDate='+self.date+'&from_station='+self.to_station+'&to_station='+self.from_station, headers = headers).text
        return r
    
    def to_csv(self):
        js = self.get_pd()
        js2 = json.loads(js)
        try:
            data = js2['data']['datas']
            dt_data = pd.DataFrame(data)
            tm = arrow.now().format('HH:mm')
            dt_data['记录时间'] = tm

            dt_data.to_csv(self.date + 'test.csv',encoding = 'gbk',mode = 'a',index = False)
        except:
            print('empty')
        
    
if __name__ == '__main__':
    cheng_yu = ['ICW','JOW','FYW','WZW','NKW','NWW','RQ','FQW','WMW','FZW','CYW','CQW','DYW','MYW','NIW','NCW','MSW','YBW','VJW','RXW']
    com = combinations(cheng_yu,2)
    com_use = []
    for x in com:
        com_use.append(x)
    for i in range(12,13):                      #注意时间的修改
        date = '2020-01-{}'.format(str(i))
        for ls in com_use:
            hspeed = HighSpeed(date,ls[0],ls[1])
            hspeed.to_csv()
            
    print('finished!')

之前没有找到12306的这个接口,大于20张余票的都显示有,这个网址居然还是可以显示完全,而且良心的是爬取获得的数据直接就是json数据都不需要进行清洗,非常方便!
        
        
       

发布了19 篇原创文章 · 获赞 0 · 访问量 883

猜你喜欢

转载自blog.csdn.net/weixin_44056948/article/details/103943048