python爬虫Day1(Post请求自动登陆github)

'''
POST请求自动登陆ithub

    请求URL:
    https://github.com/login

    请求方式:
        POST

    请求头:
        Cookie
        User-Agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36

    请求体:
        utf8: ✓
        authenticity_token: NVuVfOGJn7mujgE0t+rglSqXY8hpYaIjA4PpO2Vd+g9oTD0zdUAEEKQYjWaWX53w3olyb9H6RvJ8eKVLj8hSBQ==
        login: zxr997947
        password: zxr316794
        webauthn-support: supported
        commit: Sign in
        w


    '''

# 1获取token随机字符串
'''
1.访问登陆页面获取token随机字符串
    请求URL:
           https://github.com/login
    请求方式:
           GET
    请求头:
          COOKIES
          User-Agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36
2.解析并提取token字符串
# 正侧
<input type="hidden" name="authenticity_ .token" value="(.*?)" />
'''
import requests
import re
login_url = 'https://github.com/login'

# login页面的请求头信息
login_header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'}
login_res = requests.get(url=login_url, headers=login_header)
# print(login_res.text)

# 解析提取token字符串
authenticity_token = re.findall('<input type="hidden" name="authenticity_token" value="(.*?)" />',login_res.text,re.S)[0]
print(authenticity_token)

# 获取login页面的cookies信息
# print(type(login_res.cookies))
# print(type(login_ res. cookies.get_ dict()))
login_cookies = login_res.cookies.get_dict()

# 2.开始登陆github
'''
请求URL:
    https://github.com/login

    请求方式:
        POST

    请求头:
        Cookie
        User-Agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36

    请求体:
        utf8: ✓
        authenticity_token: NVuVfOGJn7mujgE0t+rglSqXY8hpYaIjA4PpO2Vd+g9oTD0zdUAEEKQYjWaWX53w3olyb9H6RvJ8eKVLj8hSBQ==
        login: zxr997947
        password: zxr316794
        commit: Sign in
        webauthn-support: supported
'''
# session登录url
session_url = 'https://github.com/session'
# 请求头信息
session_headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3704.400 QQBrowser/10.4.3587.400'}
# 请求体信息
form_data = {"commit": "Sign in",
"utf8": "✓",
"authenticity_token": authenticity_token,
"login": "zxr997947",
"password": "zxr316794",
"webauthn-support": "supported"}
session_res = requests.post(url=session_url,
                            headers=session_headers,
                            cookies=login_cookies,
                            data=form_data
                            )
with open('github3.html','w',encoding='utf-8') as f:
    f.write(session_res.text)

  

猜你喜欢

转载自www.cnblogs.com/Auraro997/p/11115124.html