快过年了,Python大神给免费分享一个爬取12306火车票例子(附源码)

。。。

上面是以前写的文章的资源,在以前的文章中有对应的资源,有兴趣的可以去查看。

作为一种便捷地收集网上信息并从中抽取出可用信息的方式,网络爬虫技术变得越来越有用。使用Python这样的简单编程语言,你可以使用少量编程技能就可以爬取复杂的网站。

如果手机上显示代码错乱,请分享到QQ或者其他地方,用电脑查看!!!

python能干的东西有很多,这里不再过多叙述,直接重点干货。在这里相信有许多想要学习Python的同学,大家可以+下Python学习分享裙:叁零肆+零伍零+柒玖玖,即可免费领取一整套系统的 Python学习教程!

。。。

重要提醒:要运行,需要确认安装环境:Python2.7,还是Python3.x。

缺少的包,采用命令行:pip install 包名

进行安装,不然运行不了。

...

主要源码:

用户信息配置:

配置信息很重要,一定不要弄错了,不然不能成功!各种帐号密码,电话,姓名,身份证。

###################################CONFIG START##################################

#账户信息

user='帐号'

passwd='密码'

#想买的车次,为空的话会进入交互阶段,需要手动输入车次,建议设置上

g_buy_list = ["K827", "K587", "K841", "K1224", "K836"]

#忽略的车次

g_ingnore_list = ["L74"]

#买票查询条件:时间、站点, 车站代码可以在info目录station_code.txt里查

g_query_data = [

("leftTicketDTO.train_date", "2017-12-27"),

("leftTicketDTO.from_station", "GZQ"),

("leftTicketDTO.to_station", "LZZ"),

("purpose_codes", "ADULT"),

]

#乘客信息

g_passengers = [

{

"name": u"姓名",

"id": "身份证",

"tel": "电话",

},

]

'''

g_str_seat_types = {

u"高级软卧":"gr_num",

u"软卧":"rw_num",

u"软座":"rz_num",

u"特等座":"tz_num",

u"无座":"wz_num",

u"硬卧":"yw_num",

u"硬座": "yz_num",

u"二等座":"ze_num",

u"一等座":"zy_num",

u"商务座":"swz_num",

}

'''

#座位类型,类型名在g_str_seat_types里有对应

g_care_seat_types = ["rw_num", "yw_num"]

#自动识别验证码次数,验证码无重叠无背景时候识别率高,基于tesseract的OCR

#目前仅仅遇到过一次,几个小时,dns更新后连接到的服务器有背景干扰

#可以找到这种服务器并修改host让其一直连接此服务器

#或者做更多的图像相关处理,去除噪点再做OCR

g_max_auto_times = 0

#刷新间隔

g_query_sleep_time = 1

###################################End##################################

抢票代码:

千万注意要导入的包,注意注意,不然运行可能会报错!

import urllib, sys, os, time, json

# import http.httplib

import http.client

import gzip

from io import StringIO;

import traceback

import logging

import datetime

import cProfile

import subprocess

#加载你的配置文件名

#from conf_frankie_test import *

#from conf_neil import *

#from conf_example import *

#清理临时文件,如验证码等

from shuapiao12306.conf_example import g_passengers, g_max_auto_times, passwd, g_buy_list, g_ingnore_list, \

g_care_seat_types, g_query_data, g_query_sleep_time, user

g_clean_temp = False

##########################internal###############################

g_str_train_types = {

"G": u"高铁",

"L": u"临客",

"D": u"动车",

"Z": u"直达",

"T": u"特快",

"K": u"快速",

}

#g_seat_code

g_seat_code_dict = {

"yz_num":"1",

"rz_num":"2",

"yw_num":"3",

"rw_num":"4",

"gr_num":"6",

"tz_num":"P",

"wz_num":"WZ",

"ze_num":"O",

"zy_num":"M",

"swz_num":"9",

}

logger = logging.getLogger('shuapiao')

g_conn = http.client.HTTPConnection('kyfw.12306.cn', timeout=100)

#restart conn

def restart_conn(conn):

print ("restart connection")

conn.close()

conn = http.client.HTTPConnection('kyfw.12306.cn', timeout=100)

conn.connect()

#装饰器

def retries(max_tries):

def dec(func, conn=g_conn):

def f2(*args, **kwargs):

tries = range(max_tries)

#tries.reverse()

for tries_remaining in tries:

try:

return func(*args, **kwargs)

except http.client.HTTPException as e:

print ("conneciont error")

restart_conn(conn)

except Exception as e:

if tries_remaining > 0:

traceback.print_exc()

logger.error("errror %d" % tries_remaining)

logger.error(traceback.format_exc())

else:

raise e

else:

break

return f2

return dec

#调用OCR

def call_tesseract(in_file):

tesseract_exe_name = 'tesseract'

expect_len = 4

out_file = "o"

args = [tesseract_exe_name, in_file, out_file]

proc = subprocess.Popen(args)

ret = proc.wait()

if ret != 0:

print ("call tesseract failed:%d" % ret)

return ''

out_full = out_file + '.txt'

f = open(out_full)

text = f.read()

f.close()

if g_clean_temp:

os.remove(out_full)

text = text.rstrip('\r\n')

text = text.replace(" ", "")

print ("auto read rand_code:%s" % text)

if len(text) != expect_len:

print ("auto read faild:%s, %d" % (text, len(text)))

return ''

return text

'''

HttpAuto

'''

class HttpAuto:

def __init__(self):

self.ext_header = {

"Accept":"*/*",

"X-Requested-With":"XMLHttpRequest",

"Referer": "http://kyfw.12306.cn/otn/login/init#",

"Accept-Language": "zh-cn",

"Accept-Encoding": "gzip, deflate",

"Connection":"Keep-Alive",

"Cache-Control": "no-cache",

"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",

"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",

}

self.proxy_ext_header = {

"Accept": "*/*",

"X-Requested-With":"XMLHttpRequest",

"Referer": "http://kyfw.12306.cn/otn/login/init#",

"Accept-Language": "zh-cn",

"Accept-Encoding": "gzip, deflate",

"Proxy-Connection": "Keep-Alive",

"Pragma": "no-cache",

"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",

"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",

}

#cockies

self.sid = ''

self.sip = ''

#passenger info to be POST

self.passengerTicketStr = ''

self.oldPassengerStr = ''

#used to POST

self.globalRepeatSubmitToken = ''

self.key_check_isChange = ''

self.orderId = ''

self.pass_code = 'abcd'

self.rand_code = 'abcd'

return

def construct_passengerTicketStr(self):

print ("###construct_passengerTicketStr###")

str1 = ''

str2 = ''

for p in g_passengers:

str1 = str1 + '1,0,1,' + p['name'] + ',1,' + p['id'] + ','+ p['tel']+ ',N_'

str2 = str2 + p['name'] + ',1,' + p['id'] + ',1_'

str1 = str1[:-1]

self.passengerTicketStr = str1.encode('utf8')

self.oldPassengerStr = str2.encode('utf8')

print ("new:%s" % self.passengerTicketStr)

print ("old:%s" % self.oldPassengerStr)

def logout(self):

url_logout = "http://kyfw.12306.cn/otn/login/loginOut"

g_conn.request('540', url_logout, headers=self.proxy_ext_header)

return True

def __del__(self):

self.logout()

print ("close connnection")

g_conn.close()

return

def update_session_info(self, res):

print ("process header cookie")

update = False

for h in res.getheaders():

if h[0] == "set-cookie":

l = h[1].split(',')[0].strip()

if l.startswith('JSESSIONID'):

self.sid = l.split(';')[0].strip()

update = True

print ("Update sessionid "+self.sid)

if l.startswith('BIGipServerotn'):

self.sip = l.split(';')[0].strip()

update = True

print ("Update sip:"+self.sip)

l = h[1].split(',')[1].strip()

if l.startswith('BIGipServerotn'):

self.sip = l.split(';')[0].strip()

update = True

print ("Update sip:"+self.sip)

return update

def check_pass_code_common(self, module, rand_method):

ret = False

auto_times = g_max_auto_times

while 1:

url_pass_code = "http://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=%s&rand=%s" % (module, rand_method)

print ("send getPassCodeNew:%s" % datetime.datetime.now())

header = ''

if module == 'login':

header = self.ext_header

else:

header = self.proxy_ext_header

g_conn.request('GET', url_pass_code, headers=header)

res = g_conn.getresponse()

print ("recv getPassCodeNew=====>:%s" % datetime.datetime.now())

if module == 'login':

self.update_session_info(res)

self.ext_header["Cookie"] = self.sid+';'+self.sip

#save file

pic_type = res.getheader('Content-Type').split(';')[0].split('/')[1]

data = res.read()

file_name = "./pass_code.%s" % pic_type

f = open(file_name, 'wb')

f.write(data)

f.close()

#auto read or manual

read_pass_code = ''

if g_max_auto_times > 0:

auto_times = auto_times - 1

read_pass_code = call_tesseract(file_name)

if read_pass_code == '':

read_pass_code = input("input passcode(%s):" % file_name)

if read_pass_code == "no":

print ("Get A new PassCode")

continue

elif read_pass_code == "quit":

print ("Quit")

break

print ("input:%s" % read_pass_code)

else:

print ("auto:%s" % read_pass_code)

if g_clean_temp:

os.remove(file_name)

data = []

if module == 'passenger':

self.proxy_ext_header["Referer"] = "http://kyfw.12306.cn/otn/confirmPassenger/initDc#nogo"

self.rand_code = read_pass_code

data = [

("_json_att", ''),

("rand", rand_method),

("randCode", read_pass_code),

("REPEAT_SUBMIT_TOKEN", self.globalRepeatSubmitToken),

]

elif module == 'login':

self.pass_code = read_pass_code

data = [

("randCode", read_pass_code),

("rand", rand_method)

]

else:

pass

post_data = urllib.urlencode(data)

print ("send checkRandCodeAnsyn=====>:" )#% post_data

url_check_rand = "http://kyfw.12306.cn/otn/passcodeNew/checkRandCodeAnsyn"

g_conn.request('POST', url_check_rand, body=post_data, headers=header)

res = g_conn.getresponse()

data = res.read()

print ("recv checkRandCodeAnsyn")

resp = json.loads(data)

if resp['data'] != 'Y':

print ("status error:%s" % resp['data'])

continue

else:

ret = True

break

return ret

@retries(3)

def check_pass_code(self):

print ("#############################Step1:Passcode#########")

module = 'login'

rand_method = 'sjrand'

return self.check_pass_code_common(module, rand_method)

@retries(3)

def check_rand_code(self):

print ("#############################Step8:Randcode#########")

ret = False

module = 'passenger'

rand_method = 'randp'

return self.check_pass_code_common(module, rand_method)

@retries(3)

def loginAysnSuggest(self):

if not self.check_pass_code():

return False

print ("#############################Step2:Login#########")

url_login = "http://kyfw.12306.cn/otn/login/loginAysnSuggest"

data = [

("loginUserDTO.user_name", user),

("userDTO.password", passwd),

("randCode", self.pass_code)

]

post_data = urllib.urlencode(data)

#post_data="loginUserDTO.user_name=frankiezhu%%40foxmail.com&userDTO.password=sky123&randCode=%s" % self.pass_code

self.proxy_ext_header["Cookie"] = self.sid+';'+self.sip

print ("send loginAysnSuggest=====>" ) #% post_data

g_conn.request('POST', url_login, body=post_data, headers=self.proxy_ext_header)

res = g_conn.getresponse()

print ("recv loginAysnSuggest")

data = res.read()

res_json = json.loads(data)

if res_json['status'] != True or not res_json['data'].has_key('loginCheck'):

print (u"return error:%s" % ' '.join(res_json['messages']))

return False

if res_json['data']['loginCheck'] == 'Y':

print (u"login success")

return True

else:

print( u"login error %s" % res_json['data']['loginCheck'])

return False

def show_ticket(self, it):

print( it['station_train_code'], it['from_station_name'],it['to_station_name'],it['start_time'], it['arrive_time'],it['lishi'], \

it['swz_num'],it['tz_num'], it['zy_num'],it['ze_num'],it['gr_num'], it['rw_num'],it['yw_num'],it['rz_num'],it['wz_num'],it['canWebBuy'])

return

############

#retcode: -2 for retry, -1 for error, 0 for success

############

def do_ticket(self, json_data, result, want_special):

ret = 0

for item in json_data['data']:

if item['queryLeftNewDTO']['canWebBuy'] == 'N':

continue

train_code = item['queryLeftNewDTO']['station_train_code']

if want_special and not train_code in g_buy_list:

continue

if train_code in g_ingnore_list:

continue

has_ticket = False

for care_type in g_care_seat_types:

if item['queryLeftNewDTO'][care_type] != "--" and item['queryLeftNewDTO'][care_type] != u"无":

has_ticket = True

break

if has_ticket:

result[train_code] = item

#query return none, retry

if not len(result):

return -2

#as the list prority

if want_special:

for train_code in g_buy_list:

if not result.has_key(train_code):

continue

ret = self.buy(result[train_code])

if not ret:

print ("Err during buy")

return -1

else:

return 0

#show all

for train_code, item in result.items():

self.show_ticket(item['queryLeftNewDTO'])

#get promote

cmd = input("input cmd[r|q|K101]:")

cmd = cmd.strip()

print ("input:%s" % cmd)

if cmd == "r":

print ("retry")

return -2

elif cmd == "q":

print ("quit")

return 0

else:

print ("buy ticket:%s" % cmd)

ret = self.buy(result[cmd])

if not ret:

print ("Err during buy")

return -1

else:

return 0

@retries(3)

def query(self):

print ("#############################Step3:Query#########")

self.proxy_ext_header["Referer"] = "http://kyfw.12306.cn/otn/leftTicket/init"

url_query = "http://kyfw.12306.cn/otn/leftTicket/query?" + urllib.urlencode(g_query_data)

print ("start query======>%s" % url_query)

want_special = False

if len(g_buy_list) != 0:

want_special = True

print ("JUST For:%s" % (','.join(g_buy_list)))

else:

print (u"车次 出发->到达 时间:到达 历时 商务座 特等座 一等座 二等座 高级软卧 软卧 硬卧 软座 硬座 无座 其他备注")

#"http://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2014-01-04&leftTicketDTO.from_station=SHH&leftTicketDTO.to_station=NJH&purpose_codes=ADULT"

q_cnt = 0

while 1:

q_cnt = q_cnt + 1

g_conn.request('GET', url_query, headers=self.proxy_ext_header)

res = g_conn.getresponse()

data = ''

if res.getheader('Content-Encoding') == 'gzip':

tmp = StringIO.StringIO(res.read())

gzipper = gzip.GzipFile(fileobj=tmp)

data = gzipper.read()

else:

data = res.read()

res_json = json.loads(data)

if res_json['status'] != True:

print ("parse json failed! data %s" % data)

continue

result = {}

ret = self.do_ticket(res_json, result, want_special)

if ret == 0:

break

elif ret == -2:

print (u"no ticket, refresh %d times!" % q_cnt)

time.sleep(g_query_sleep_time)

continue

return True

@retries(3)

def confirmPassenger_get_token(self):

print ("#############################Step6:confirmPassenger_get_token #########")

url_confirm_passenger = "http://kyfw.12306.cn/otn/confirmPassenger/initDc"

g_conn.request('GET', url_confirm_passenger, headers=self.proxy_ext_header)

res = g_conn.getresponse()

data = res.read()

if res.getheader('Content-Encoding') == 'gzip':

tmp = StringIO.StringIO(data)

gzipper = gzip.GzipFile(fileobj=tmp)

data = gzipper.readlines()

key_word = "globalRepeatSubmitToken"

key_find = False

line_token = ''

line_request_info = ''

for line in data:

if line.startswith(u' var globalRepeatSubmitToken = '.encode("utf8")):

line_token = line.decode("utf8")

continue

elif line.startswith(u' var ticketInfoForPassengerForm'.encode("utf8")):

line_request_info = line.decode("utf8")

key_find = True

break

if key_find:

self.globalRepeatSubmitToken = line_token.split('=')[1].strip()[1:-2]

print ("Update globalRepeatSubmitToken=%s" % self.globalRepeatSubmitToken)

req_data = line_request_info.split('=')[1].strip()[:-1]

req_data = req_data.replace("null", "''")

req_data = req_data.replace("true", "True")

req_data = req_data.replace("false", "False")

print ("line_request_info")

req_json = eval(req_data)

self.key_check_isChange = req_json['key_check_isChange']

self.leftTicketStr = req_json['leftTicketStr']

print ("Update key_check_isChange=%s" % self.key_check_isChange)

return True

else:

print ("globalRepeatSubmitToken not found")

return False

@retries(3)

def getQueueCount(self, item):

print ("#############################Step:getQueueCount #########")

url_queue_count = "http://kyfw.12306.cn/otn/confirmPassenger/getQueueCount"

#buy_date = 'Sun Jan 5 00:00:00 UTC+0800 2014'

tlist = time.ctime().split()

tlist[3] = '00:00:00'

tlist.insert(4, 'UTC+0800')

buy_date = ' '.join(tlist)

for t_type in g_care_seat_types:

if item['queryLeftNewDTO'][t_type] != "--" and item['queryLeftNewDTO'][t_type] != u"无":

break

s_type = g_seat_code_dict[t_type]

data = [

("train_date", buy_date),

("train_no", item['queryLeftNewDTO']['train_no']),

("stationTrainCode",item['queryLeftNewDTO']['station_train_code']),

("seatType", s_type),

("fromStationTelecode", item['queryLeftNewDTO']['from_station_telecode']),

("toStationTelecode", item['queryLeftNewDTO']['to_station_telecode']),

("leftTicket",item['queryLeftNewDTO']['yp_info']),

("purpose_codes", "00"),

("_json_att", ''),

("REPEAT_SUBMIT_TOKEN", self.globalRepeatSubmitToken),

]

post_data = urllib.urlencode(data)

print ("send getQueueCount=====>" ) #% post_data

g_conn.request('POST', url_queue_count, body=post_data, headers=self.proxy_ext_header)

res = g_conn.getresponse()

data = res.read()

res_json = json.loads(data)

print ("recv getQueueCount:%s" % res_json)

if res_json['status'] != True:

print ("getQueueCount error :%s" % res_json)

return False

return True

@retries(3)

def checkOrderInfo(self):

print ("#############################Step9:checkOrderInfo #########")

url_check_order = "http://kyfw.12306.cn/otn/confirmPassenger/checkOrderInfo"

data = [

("cancel_flag", "2"),

("bed_level_order_num", "000000000000000000000000000000"),

("passengerTicketStr", self.passengerTicketStr),

("oldPassengerStr", self.oldPassengerStr),

("tour_flag","dc"),

("randCode",self.rand_code),

("_json_att", ''),

("REPEAT_SUBMIT_TOKEN", self.globalRepeatSubmitToken),

]

post_data = urllib.urlencode(data)

print ("send checkOrderInfo=====>")

#print "cancel_flag=2&bed_level_order_num=000000000000000000000000000000&passengerTicketStr=1%2C0%2C1%2C%E6%9C%B1%E5%AD%94%E6%B4%8B%2C1%2C320721198711180812%2C13430680458%2CN&oldPassengerStr=%E6%9C%B1%E5%AD%94%E6%B4%8B%2C1%2C320721198711180812%2C1_&tour_flag=dc&randCode=ewgw&_json_att=&REPEAT_SUBMIT_TOKEN=ad51ea02d933faf91d3d2eaeb5d85b3e"

g_conn.request('POST', url_check_order, body=post_data, headers=self.proxy_ext_header)

res = g_conn.getresponse()

data = res.read()

res_json = json.loads(data)

print ("recv checkOrderInfo:%s" % res_json)

if res_json['status'] != True or res_json['data']['submitStatus'] != True:

print ("checkOrderInfo error :%s" % res_json['data']['errMsg'])

return False

return True

@retries(3)

def checkUser(self):

print ("#############################Step4:checkUser #########")

url_check_info = "http://kyfw.12306.cn/otn/login/checkUser"

data = [

('_json_att', ''),

]

post_data = urllib.urlencode(data)

print (post_data)

print ("send checkUser=====>") #% post_data

g_conn.request('POST', url_check_info, body=post_data, headers=self.proxy_ext_header)

res = g_conn.getresponse()

data = res.read()

res_json = json.loads(data)

print ("recv checkUser")

if not res_json['data'].has_key('flag') or res_json['data']['flag'] != True:

print ("check user failed, %s" % res_json)

return False

else:

return True

@retries(3)

def submitOrderRequest(self, item):

print ("#############################Step5:submitOrderRequest #########")

url_submit = "http://kyfw.12306.cn/otn/leftTicket/submitOrderRequest"

post_data = "secretStr=" + item['secretStr']+"&train_date=" \

+ item['queryLeftNewDTO']['start_train_date'] \

+ "&back_train_date=" + item['queryLeftNewDTO']['start_train_date'] \

+ "&tour_flag=dc&purpose_codes=ADULT&query_from_station_name=" \

+ item['queryLeftNewDTO']['from_station_name'] \

+ "&query_to_station_name="+item['queryLeftNewDTO']['to_station_name']\

+ "&undefined"

print (post_data)

print ("send submitOrderRequest=====>") #% post_data

g_conn.request('POST', url_submit, body=post_data.encode("utf8"), headers=self.proxy_ext_header)

res = g_conn.getresponse()

data = res.read()

res_json = json.loads(data)

if res_json['status'] != True:

print (u"submit order failed")

print (data)

print (''.join(res_json['messages']).encode('gb2312'))

return False

else:

return True

@retries(3)

def confirmSingleForQueue(self):

print ("#############################Step11:confirmSingleForQueue #########")

url_check_info = "http://kyfw.12306.cn/otn/confirmPassenger/confirmSingleForQueue"

data = [

('passengerTicketStr', self.passengerTicketStr),

("oldPassengerStr", self.oldPassengerStr),

('randCode', self.rand_code),

('purpose_codes', "00"),

('key_check_isChange', self.key_check_isChange),

('leftTicketStr', self.leftTicketStr),

('train_location', 'H2'),

('_json_att', ''),

("REPEAT_SUBMIT_TOKEN", self.globalRepeatSubmitToken),

]

post_data = urllib.urlencode(data)

print ("send confirmSingleForQueue=====>") #% post_data

g_conn.request('POST', url_check_info, body=post_data, headers=self.proxy_ext_header)

res = g_conn.getresponse()

data = res.read()

res_json = json.loads(data)

print ("recv confirmSingleForQueue")

if not res_json['data'].has_key('submitStatus') or res_json['data']['submitStatus'] != True:

print (u"confirmSingleForQueue failed, %s" % res_json)

return False

else:

return True

@retries(5)

def queryOrderWaitTime(self):

print ("#############################Step12:queryOrderWaitTime #########")

url_query_wait = "http://kyfw.12306.cn/otn/confirmPassenger/queryOrderWaitTime?"

cnt = 0

while 1:

data = [

('random', int(time.time())),

("tourFlag", "dc"),

('_json_att', ''),

("REPEAT_SUBMIT_TOKEN", self.globalRepeatSubmitToken),

]

url_query_wait = url_query_wait + urllib.urlencode(data)

print ("send queryOrderWaitTime:%d=====>" % cnt) #% url

g_conn.request('GET', url_query_wait, headers=self.proxy_ext_header)

res = g_conn.getresponse()

data = res.read()

res_json = json.loads(data)

print ("recv queryOrderWaitTime:%s" % res_json)

cnt = cnt + 1

if not res_json['data'].has_key('data') or res_json['data']['queryOrderWaitTimeStatus'] != True:

print ("queryOrderWaitTime error")

print (res_json['messages'])

break

if res_json['data']['waitCount'] == 0:

self.orderId = res_json['data']['orderId']

print ("Update orderId:%s" % self.orderId)

break

else:

continue

return True

@retries(3)

def resultOrderForDcQueue(self):

print ("#############################Step13:resultOrderForDcQueue #########")

url_result = "http://kyfw.12306.cn/otn/confirmPassenger/resultOrderForDcQueue"

data = [

('orderSequence_no', self.orderId),

('_json_att', ''),

("REPEAT_SUBMIT_TOKEN", self.globalRepeatSubmitToken),

]

post_data = urllib.urlencode(data)

print ("send resultOrderForDcQueue=====>") #% url

g_conn.request('POST', url_result, body=post_data, headers=self.proxy_ext_header)

res = g_conn.getresponse()

data = res.read()

res_json = json.loads(data)

print ("recv queryOrderWaitTime")

if not res_json['data'].has_key('submitStatus') or res_json['data']['submitStatus'] != True:

print ("submit error")

print (data)

return False

else:

print ("#############################Success check ticket in webbrowser #########")

return True

@retries(3)

def get_passenger_info(self):

print ("#############################Step7:getPassengerDTOs #########")

url_get_passager_info = "http://kyfw.12306.cn/otn/confirmPassenger/getPassengerDTOs"

data = [

('_json_att', ''),

('REPEAT_SUBMIT_TOKEN', self.globalRepeatSubmitToken)

]

post_data = urllib.urlencode(data)

print ("send getPassengerDTOs=====>") #% post_data

g_conn.request('POST', url_get_passager_info, body=post_data, headers=self.proxy_ext_header)

res = g_conn.getresponse()

data = res.read()

res_json = json.loads(data)

print ("recv getPassengerDTOs")

return True

def buy(self, item):

#Step4

if not self.checkUser():

return False

#Step5

if not self.submitOrderRequest(item):

return False

#Step6

if not self.confirmPassenger_get_token():

return False

self.proxy_ext_header["Referer"] = "http://kyfw.12306.cn/otn/confirmPassenger/initDc#nogo"

#Step7

#self.get_passenger_info

#Step8

if not self.check_rand_code():

return False

#Step9

if not self.checkOrderInfo():

return False

#Step10

if not self.getQueueCount(item):

return False

#Step11

if not self.confirmSingleForQueue():

return False

if not self.queryOrderWaitTime():

return False

#Step13

if not self.resultOrderForDcQueue():

return False

return True

def clean_temp_files():

print ("clean_temp_files")

pass

##############################################test#############################

@retries(3)

def test_retries():

print( "test")

raise NameError#httplib.HTTPException

def test_ocr():

f_name = "pass_code.jpeg"

text = call_tesseract(f_name)

print ("read:%s" % text)

@retries(3)

def test_reconnect():

header = {

"Accept":"*/*",

"X-Requested-With":"XMLHttpRequest",

"Accept-Language": "zh-cn",

"Accept-Encoding": "gzip, deflate",

"Connection":"Keep-Alive",

"Cache-Control": "no-cache",

"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",

"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",

}

url = "http://www.baidu.com"

for i in range(3):

print ("send")

g_conn.request('GET', url, headers=header)

res = g_conn.getresponse()

data = res.read()

print ("send")

restart_conn(g_conn)

def test_get_svr_ips():

print ("test_get_svr_ips")

pass

##############################################test#############################

def show_conf():

print ("########show conf##############")

print ("Buy:%s" % (','.join(g_buy_list)))

print ("Ingnore:%s" % (','.join(g_ingnore_list)))

print ("Query data:", g_query_data)

print ("Passengers:", g_passengers)

print ("Sleep time:%f" % g_query_sleep_time)

print ("Auto OCR: %d" % g_max_auto_times)

print ("\n")

def main():

show_conf()

#set log

hdlr = logging.FileHandler('.\log.txt')

formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

hdlr.setFormatter(formatter)

logger.addHandler(hdlr)

logger.setLevel(logging.WARNING)

#test_retries()

print ("connecting......")

g_conn.connect()

ha = HttpAuto()

ha.construct_passengerTicketStr()

if not ha.loginAysnSuggest():

return False

while 1:

try:

ha.query()

except Exception as e:

traceback.print_exc()

return True

if __name__ == '__main__':

#test_ocr()

#test_reconnect()

main()

参考项目完整源码:

真的放不了地址,请私信我,已经打包好了。

如有需要早下载,可能会失效!

以上是全部内容,只是善于分享,不足之处请包涵!爬虫基本的原理就是,获取源码,进而获取网页内容。一般来说,只要你给一个入口,通过分析,可以找到无限个其他相关的你需要的资源,进而进行爬取。

我也写了很多其他的非常简单的入门级的爬虫详细教程,关注后,点击我的头像,就可以查看到。

欢迎大家一起留言讨论和交流,谢谢!

猜你喜欢

转载自blog.csdn.net/qq_42712463/article/details/85050953
今日推荐