知乎模拟登录,支持验证码和保存 Cookies

  1 import requests
  2 import time
  3 import re
  4 import base64
  5 import hmac
  6 import hashlib
  7 import json
  8 import matplotlib.pyplot as plt
  9 from http import cookiejar
 10 from PIL import Image
 11 
 12 HEADERS = {
 13     'Connection': 'keep-alive',
 14     'Host': 'www.zhihu.com',
 15     'Referer': 'https://www.zhihu.com/',
 16     'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 '
 17                   '(KHTML, like Gecko) Chrome/56.0.2924.87 Mobile Safari/537.36'
 18     }
 19 LOGIN_URL = 'https://www.zhihu.com/signup'
 20 LOGIN_API = 'https://www.zhihu.com/api/v3/oauth/sign_in'
 21 FORM_DATA = {
 22     'client_id': 'c3cef7c66a1843f8b3a9e6a1e3160e20',
 23     'grant_type': 'password',
 24     'source': 'com.zhihu.web',
 25     'username': '',
 26     'password': '',
 27     # 改为'cn'是倒立汉字验证码
 28     'lang': 'en',
 29     'ref_source': 'homepage'
 30 }
 31 
 32 
 33 class ZhihuAccount(object):
 34 
 35     def __init__(self):
 36         self.login_url = LOGIN_URL
 37         self.login_api = LOGIN_API
 38         self.login_data = FORM_DATA.copy()
 39         self.session = requests.session()
 40         self.session.headers = HEADERS.copy()
 41         self.session.cookies = cookiejar.LWPCookieJar(filename='./cookies.txt')
 42 
 43     def login(self, username=None, password=None, load_cookies=True):
 44         """
 45         模拟登录知乎
 46         :param username: 登录手机号
 47         :param password: 登录密码
 48         :param load_cookies: 是否读取上次保存的 Cookies
 49         :return: bool
 50         """
 51         if load_cookies and self.load_cookies():
 52             if self.check_login():
 53                 return True
 54 
 55         headers = self.session.headers.copy()
 56         headers.update({
 57             'authorization': 'oauth c3cef7c66a1843f8b3a9e6a1e3160e20',
 58             'X-Xsrftoken': self._get_token()
 59         })
 60         username, password = self._check_user_pass(username, password)
 61         self.login_data.update({
 62             'username': username,
 63             'password': password
 64         })
 65         timestamp = str(int(time.time()*1000))
 66         self.login_data.update({
 67             'captcha': self._get_captcha(self.login_data['lang'], headers),
 68             'timestamp': timestamp,
 69             'signature': self._get_signature(timestamp)
 70         })
 71 
 72         resp = self.session.post(self.login_api, data=self.login_data, headers=headers)
 73         if 'error' in resp.text:
 74             print(json.loads(resp.text)['error']['message'])
 75         elif self.check_login():
 76             return True
 77         print('登录失败')
 78         return False
 79 
 80     def load_cookies(self):
 81         """
 82         读取 Cookies 文件加载到 Session
 83         :return: bool
 84         """
 85         try:
 86             self.session.cookies.load(ignore_discard=True)
 87             return True
 88         except FileNotFoundError:
 89             return False
 90 
 91     def check_login(self):
 92         """
 93         检查登录状态,访问登录页面出现跳转则是已登录,
 94         如登录成功保存当前 Cookies
 95         :return: bool
 96         """
 97         resp = self.session.get(self.login_url, allow_redirects=False)
 98         if resp.status_code == 302:
 99             self.session.cookies.save()
100             print('登录成功')
101             return True
102         return False
103 
104     def _get_token(self):
105         """
106         从登录页面获取 token
107         :return:
108         """
109 
110         resp = requests.get("https://www.zhihu.com")
111         cookies = resp.cookies
112         token = cookies.items()[0][1]
113         return token
114 
115     def _get_captcha(self, lang, headers):
116         """
117         请求验证码的 API 接口,无论是否需要验证码都需要请求一次
118         如果需要验证码会返回图片的 base64 编码
119         根据 lang 参数匹配验证码,需要人工输入
120         :param lang: 返回验证码的语言(en/cn)
121         :param headers: 带授权信息的请求头部
122         :return: 验证码的 POST 参数
123         """
124         if lang == 'cn':
125             api = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=cn'
126         else:
127             api = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
128         resp = self.session.get(api, headers=headers)
129         show_captcha = re.search(r'true', resp.text)
130 
131         if show_captcha:
132             put_resp = self.session.put(api, headers=headers)
133             json_data = json.loads(put_resp.text)
134             img_base64 = json_data['img_base64'].replace(r'\n', '')
135             with open('./captcha.jpg', 'wb') as f:
136                 f.write(base64.b64decode(img_base64))
137             img = Image.open('./captcha.jpg')
138             if lang == 'cn':
139                 plt.imshow(img)
140                 print('点击所有倒立的汉字,按回车提交')
141                 points = plt.ginput(7)
142                 capt = json.dumps({'img_size': [200, 44],
143                                    'input_points': [[i[0]/2, i[1]/2] for i in points]})
144             else:
145                 img.show()
146                 capt = input('请输入图片里的验证码:')
147             # 这里必须先把参数 POST 验证码接口
148             self.session.post(api, data={'input_text': capt}, headers=headers)
149             return capt
150         return ''
151 
152     def _get_signature(self, timestamp):
153         """
154         通过 Hmac 算法计算返回签名
155         实际是几个固定字符串加时间戳
156         :param timestamp: 时间戳
157         :return: 签名
158         """
159         ha = hmac.new(b'd1b964811afb40118a12068ff74a12f4', digestmod=hashlib.sha1)
160         grant_type = self.login_data['grant_type']
161         client_id = self.login_data['client_id']
162         source = self.login_data['source']
163         ha.update(bytes((grant_type + client_id + source + timestamp), 'utf-8'))
164         return ha.hexdigest()
165 
166     def _check_user_pass(self, username, password):
167         """
168         检查用户名和密码是否已输入,若无则手动输入
169         """
170         if username is None:
171             username = self.login_data.get('username')
172             if not username:
173                 username = input('请输入手机号:')
174         if len(username) == 11 and username.isdigit() and '+86' not in username:
175             username = '+86' + username
176 
177         if password is None:
178             password = self.login_data.get('password')
179             if not password:
180                 password = input('请输入密码:')
181         return username, password
182 
183 
184 if __name__ == '__main__':
185     account = ZhihuAccount()
186     account.login(username=None, password=None, load_cookies=True)

GitHub:https://github.com/liyunchen/Zhihu-Login/blob/master/zhihu_login.py

猜你喜欢

转载自www.cnblogs.com/chenlove/p/9371476.html