模拟登录之图片验证码处理

登录图片验证码处理

  超级鹰云打码等API接口获取。

案例:古诗文网登录https://so.gushiwen.org/user/login.aspx?from=http://so.gushiwen.org/user/collect.aspx

 1 import requests
 2 from lxml.html.clean import etree
 3 from fake_useragent import UserAgent
 4 UA = UserAgent()
 5 headers = {'User-Agent': UA.random, }
 6 session=requests.Session()#为避免cookie影响,一般直接采用session请求
 7 
 8 #验证码处理(机器识别,参考官方文档操作说明http://www.chaojiying.com/api-14.html---ABC123456,ABC123456)
 9 import requests
10 from hashlib import md5
11 class Chaojiying_Client(object):
12 
13     def __init__(self, username, password, soft_id):
14         self.username = username
15         password =  password.encode('utf8')
16         self.password = md5(password).hexdigest()
17         self.soft_id = soft_id
18         self.base_params = {
19             'user': self.username,
20             'pass2': self.password,
21             'softid': self.soft_id,
22         }
23         self.headers = {
24             'Connection': 'Keep-Alive',
25             'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
26         }
27 
28     def PostPic(self, im, codetype):
29         """
30         im: 图片字节
31         codetype: 题目类型 参考 http://www.chaojiying.com/price.html
32         """
33         params = {
34             'codetype': codetype,
35         }
36         params.update(self.base_params)
37         files = {'userfile': ('ccc.jpg', im)}
38         r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
39         return r.json()
40 
41     def ReportError(self, im_id):
42         """
43         im_id:报错题目的图片ID
44         """
45         params = {
46             'id': im_id,
47         }
48         params.update(self.base_params)
49         r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
50         return r.json()
51 def getCodeImgText(imgPath,imgType):#imgType 验证码类型  官方网站>>价格体系
52     chaojiying = Chaojiying_Client('bobo328410948', 'bobo328410948', '899370')#用户中心>>软件ID 生成一个替换 96001
53     im = open(imgPath, 'rb').read()#本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
54     return chaojiying.PostPic(im,imgType)['pic_str']
55 
56 
57 #二维码识别
58 url='https://so.gushiwen.org/user/login.aspx?from=http://so.gushiwen.org/user/collect.aspx'
59 response=session.get(url,headers=headers)
60 tree=etree.HTML(response.text)
61 img_src='https://so.gushiwen.org/'+tree.xpath('//img[@id="imgCode"]/@src')[0]
62 print(img_src)
63 response_img=session.get(url=img_src,headers=headers)
64 with open('Code.jpg','wb')as f :
65     f.write(response_img.content)
66 code=getCodeImgText('Code.jpg',1004)
67 print(code)
68 #一般额外的参数隐藏在页面中
69 __VIEWSTATE=tree.xpath('//input[@id="__VIEWSTATE"]/@value')[0]
70 __VIEWSTATEGENERATOR=tree.xpath('//input[@id="__VIEWSTATEGENERATOR"]/@value')[0]
71 print(__VIEWSTATE)
72 print(__VIEWSTATEGENERATOR)
73 #请求参数
74 data = {
75     '__VIEWSTATE': __VIEWSTATE,
76     '__VIEWSTATEGENERATOR': __VIEWSTATEGENERATOR,
77     'from': 'http://so.gushiwen.org/user/collect.aspx',
78     'email':'[email protected]',
79     'pwd': 'ABC123456',
80     'code': code,
81     'denglu': '登录',
82 }
83 #登录提交数据data
84 url = 'https://so.gushiwen.org/user/login.aspx?from=http%3a%2f%2fso.gushiwen.org%2fuser%2fcollect.aspx'
85 response = session.post(url=url,data=data,headers=headers)
86 with open('index.html','w',encoding='utf-8')as f:
87     f.write(response.text)
88 # print(response.text)
89 text=etree.HTML(response.text).xpath('//div[@class="mainreg2"]')
90 for i in text:
91     print(i.xpath('./span[2]/text()'))

猜你喜欢

转载自www.cnblogs.com/open-yang/p/11306027.html