知乎登录(小知识点)

1. 关于cookie和session

2. 英文验证码登录知乎(零碎知识点...喔喔...自己怕忘随意整理一下, 有点乱)


---判断验证码是否存在时, 请求的网址相对路径为图中:path(captcha?lang=en 请求的是英文的验证码)

---图中的{"show_captcha": false} 表明本次登录不用输入验证码


---captcha?lang=en : 验证码是英文的

---captcha?lang=cn : 验证码是中文的

---图中的{"show_captcha": true} 表明本次登录需要输入验证码

---图中{"show_captcha": true}, 在判断是否含有验证码是需要拿到---(判断是否有验证码是get请求)

======================================================================


---图中圈出的"img-base64", 在获取验证码是需要拿到---(获取验证码是put请求)

---(索取验证码图片, 在保证有验证码的前提下才会发送put)

======================================================================


---图中圈出的"success", 在验证验证码是否正确时需要拿到---(验证验证码是否正确是post请求)

---需要传参数


====================================================================


---set-cookie: ... 的位置

=====================================================================

---登录时发送的post请求,且需要传参数:


---登录成功

---图中圈出的相对路径是在登录时请求的, 如:("https://www.zhihu.com/api/v3/oauth/sign_in")

====================================================================

登录成功之后---

session.get("https://www.zhihu.com/") 知乎首页的网址

====================================================================


3. 中文验证码登录知乎

---首先请求的:path(/api/v3/oauth/captcha?lang=cn)

---其次,要把所有点的坐标记录下来(通过抓包工具抓取)

---get_captchca时判断

---完成,大概过程就是这样了,嘿嘿...

再来补充一下, 附上英文验证码的栗子~

# 英文验证码的登录方式
# 中文登录(点击倒立文字)
import requests,time,json

from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

headers = {
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0",
    'Referer': "https://www.zhihu.com/signup?next=%2F",
    'origin': "https://www.zhihu.com",
    "Authorization": "oauth c3cef7c66a1843f8b3a9e6a1e3160e20",
}
# 获取的服务器的Set-Cookie用session直接自动解析并保存, 在后续的请求中, 会在请求头中自动携带这些cookie
# LWPCookieJar: 对cookie进行自动操作, load() save()

from http.cookiejar import LWPCookieJar

"""创建session管理cookie"""
session = requests.Session()
session.cookies = LWPCookieJar(filename='zhihucookie.txt')

"""加载cookie"""
try:
    session.cookies.load(filename='zhihucookie.txt', ignore_expires=True, ignore_discard=True)
except Exception as e:
    print('加载失败')

"""请求知乎登录界面"""
res = session.get('https://www.zhihu.com/', headers=headers, verify=False)
print(res)

"""定义知乎登录函数"""
def zhihu_login():

    # 登录之前判断是否有验证码
    has_captcha = is_captcha()
    if has_captcha:
        # 获取验证码
        captcha = get_captcha() # get_captcha()的返回值
        # 在提交登录之前需要对输入的验证码的正确性进行检验
        is_true = check_captcha(captcha)
        if is_true == False:
            return

        login_url = "https://www.zhihu.com/api/v3/oauth/sign_in"
        # 登录需要传的参数
        post_params = {
            "client_id": "c3cef7c66a1843f8b3a9e6a1e3160e20",
            "grant_type": "password",
            "timestamp": "1530194590142",
            "source": "com.zhihu.web",
            "signature": "9c16a7c48a9a74c3a5747a56125d9ab08a79f070",
            "username": "XXX",
            "password": "XXXXXX",
            "captcha": captcha,
            "lang": "cn",
            "ref_source": "other_",
            "utm_source": "baidu",
        }

        try:
            response = session.post(login_url, data=post_params, headers=headers, verify=False)
            if response.status_code == 201:
                session.cookies.save(ignore_discard=True, ignore_expires=True)
                print(response.text)
            else:
                print('登录失败')
        except Exception as e:
            print('请求失败', e)



"""判断是否有英文验证码"""
def is_captcha():
    """获取验证码请求的url"""
    captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
    try:
        response = session.get(url=captcha_url, headers=headers, verify=False)
        if response.status_code == 200:
            show_captcha = json.loads(response.text)['show_captcha']
            if show_captcha:
                print('有验证码')
                return True
            else:
                print('没有验证码')
                return False
    except Exception as e:
        print('')

import base64
from PIL import Image
from io import BytesIO

"""获取验证码"""
def get_captcha():
    captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
    try:
        """索取验证码图片, 在保证有验证码的前提下才会发送put"""
        response = session.put(url=captcha_url, headers=headers, verify=False)
        if response.status_code == 202:  # 注意状态码
            captcha_url = json.loads(response.text)['img_base64']

            # 解码图片
            url = base64.b64decode(captcha_url)
            url = BytesIO(url)
            image = Image.open(url)
            image.show()

            captcha = input('请输入验证码:')
            return captcha
    except Exception as e:
        print('')


"""验证验证码是否输入正确(需要传入参数captcha)"""
def check_captcha(captcha):
    captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
    post_params = {
        'input_text': captcha
    }
    # verify = False: 在发送https请求的时候, 关闭证书认证
    response = session.post(url=captcha_url, data=post_params, headers=headers, verify=False)
    json_obj= json.loads(response.text)
    if 'success' in json_obj:
        print('输入验证码正确')
        return True
    else:
        print('输入验证码不正确')
        return False


if __name__ == '__main__':
    zhihu_login()

猜你喜欢

转载自blog.csdn.net/qq_42336549/article/details/80629057