我们打开12306官网,点击进入
输入相关信息
点击查询可得到这些信息
单击鼠标右键进行检查,可以发现这个网站运用了反爬虫,真正的信息在一个.xhr文件当中。
这个网址为:
https://kyfw.12306.cn/otn/leftTicket/queryO?leftTicketDTO.train_date=2020-02-21&leftTicketDTO.from_station=NCG&leftTicketDTO.to_station=CSQ&purpose_codes=ADULT
对这个网址进行分析可知,我们只需输入train_data、from_station、to_station这三个参数的值便可得到所需要信息,第一个参数很容易输入,第二个和第三个参数好像是城市名的一个简称。
来到js文件下,我们可以发现这个网址下面好像有我们所需要的信息
将这个网址上面的信息进行读取,在进行一些去除操作,最后我们便可以得到城市名称和城市简称了。
代码如下:
import requests
import urllib.parse as parse
import time
import json
def Citys():
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
}
url = 'https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9141'
content = requests.get(url=url,headers=headers)
content_1 = content.content.decode('utf-8')
content_1 = content_1[content_1.find('=') + 2:-2]
content_list = content_1.split('@')
dict_city = {}
for city in content_list:
str_1 = city[city.find('|') + 1:]
city_name = str_1[str_1.find('|') + 1:str_1.find('|') + 4] # 城市简称名
city_name_1 = str_1[:str_1.find('|')] # 城市名
dict_city[city_name_1] = city_name
return dict_city
def Time():
list_time=list(time.localtime())
year=str(list_time[0])
month=str(list_time[1])
day=str(list_time[2])
if len(month)==1:
month='0'+month
if len(day)==1:
day='0'+day
return year,month,day
def str_handle(str_1,str_2,num):
i=0;len_2=0
while i<num:
len_1=str_1.find(str_2)
i+=1
str_1=str_1[str_1.find(str_2)+1:]
len_2+=len_1
len_2+=1
return len_2-1
class Citys_class(object):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36',
'Cookie':'JSESSIONID=9A3A60B55A2ACC51B24B6742E68E6230; RAIL_EXPIRATION=1582469373862; RAIL_DEVICEID=ERLN34ss4QuQiVGSBZaJz35V5mfm37V7QotSqYowrxa7ljZeEnI-RQjWRUTV8qjMdb5w8sps-WX286eIS9RF7Y_TOr4Cj6wSa_4UIfjh8GwzQPfWOV6nz8EIIIEfX-3ciBnc11jpF14E5BBpRzAqtiV8gdANBiKr; BIGipServerpool_passport=267190794.50215.0000; route=495c805987d0f5c8c84b14f60212447d; _jc_save_toDate=2020-02-20; _jc_save_wfdc_flag=dc; _jc_save_fromDate=2020-02-20; BIGipServerotn=451936778.24610.0000; _jc_save_fromStation=%u5317%u4EAC%2CBJP; _jc_save_toStation=%u4E4C%u9C81%u6728%u9F50%2CWAR'
}
url='https://kyfw.12306.cn/otn/leftTicket/queryO?leftTicketDTO.%s&leftTicketDTO.%s&leftTicketDTO.%s&purpose_codes=ADULT'
def __init__(self,from_station,to_station,train_date=Time()[0]+'-'+
Time()[1]+'-'+Time()[2]):
self.from_station=parse.urlencode({"from_station":Citys()[from_station]})
self.to_station=parse.urlencode({'to_station':Citys()[to_station]})
self.train_data=parse.urlencode({'train_date':train_date})
def get_url(self):
return Citys_class.url%(self.train_data,self.from_station,self.to_station)
def spider_city(self):
content=requests.get(url=self.get_url(),headers=Citys_class.headers)
content_1=content.content.decode('utf-8')
dict_city=json.loads(content_1)
dict_citys = dict_city['data']['result']
dict_map = dict_city['data']['map']
list_city = []
list_city2=[]
for i in dict_citys:
if '预订' in i:
i = i[i.find('预订') - 1:]
len_1=str_handle(i,'|',12)+1
i=i[:len_1]
list_city.append(i)
else:
list_city2.append(i[:i.find('IS_TIME_NOT_BUY')])
return list_city,list_city2,dict_map
def handle(self):
list_1=self.spider_city()[0]
list_2=self.spider_city()[1]
dict_1=self.spider_city()[2]
list_3=[] #全部信息
list_4=[]
for i in list_1:
len_1=str_handle(i,'|',6)+1;len_2=str_handle(i,'|',7) #出发地名
len_3 = str_handle(i, '|', 7) + 1;len_4 = str_handle(i, '|', 8) #目的地名
len_5=str_handle(i,'|',8) # 时间
len_6=str_handle(i,'|',3)+1;len_7=str_handle(i,'|',4)#车次
str_1=dict_1[i[len_1:len_2]]
str_2=dict_1[i[len_3:len_4]]
str_3=i[len_5:]
str_4=i[len_6:len_7]
if 'N' in str_3:
str_3=str_3.replace('N','已无法预约')
if 'Y' in str_3:
str_3=str_3.replace('Y','可预约')
str_3=str_3.replace('|',' | ')
str_5='| '+str_4+' | '+str_1+' | '+str_2+str_3
list_3.append(str_5)
for i in list_2:
len_1 = str_handle(i, '|', 6) + 1;len_2 = str_handle(i, '|', 7) # 出发地名
len_3 = str_handle(i, '|', 7) + 1;len_4 = str_handle(i, '|', 8) # 目的地名
len_6 = str_handle(i, '|', 3) + 1;len_7 = str_handle(i, '|', 4) # 车次
str_1 = dict_1[i[len_1:len_2]]
str_2 = dict_1[i[len_3:len_4]]
str_3 = i[len_6:len_7]
str_4 ='| '+str_3+' | '+str_1+' | '+str_2+' | '+'24:00'+' | '+'24:00'+' | '+'99:59'+' | '+'列车停运'+' |'
list_4.append(str_4)
return list_3,list_4
if __name__ == '__main__':
print('--------------------12306信息查询-----------------------')
fs=input('请输入出发地:')
ds=input('请输入目的地:')
time=input('请输入出发时间(注意格式:2020-02-25,默认情况下为购票当日):')
if time.count('-')==2 and len(time)==10:
city_class=Citys_class(fs,ds,time)
else:
city_class=Citys_class(fs,ds)
list_city,list_city_1=city_class.handle()[0],city_class.handle()[1]
print('_______________________________________________________')
print('| 车次 | 出发站 | 到达站 | 出发时间 | 到达时间 | 历时 | 备注 |')
for i in list_city:
print(i)
print('_______________________________________________________')
for i in list_city_1:
print(i)
print('_______________________________________________________')
运行结果:
本篇博文已经出现问题,希望大家原谅,如果大家有什么好的改进措施,欢迎留言。