爬虫-----lagou2.py

import re
import requests
all_cookie_dict = {}


# ##################################### 第一步:访问登录页面 #####################################


r1 = requests.get(
    url='https://passport.lagou.com/login/login.html',
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
    }
)

token =  re.findall("X_Anti_Forge_Token = '(.*)';",r1.text)[0]
code =  re.findall("X_Anti_Forge_Code = '(.*)';",r1.text)[0]
r1_cookie_dict = r1.cookies.get_dict()
all_cookie_dict.update(r1_cookie_dict)

# ##################################### 第二步:去登陆 #####################################

r2 = requests.post(
    url='https://passport.lagou.com/login/login.json',
    data={
        'isValidate':'true',
        'username':'15131255089',
        'password':'4565465',
        'request_form_verifyCode':'',
        'submit':''
    },
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
        'X-Requested-With':'XMLHttpRequest',
        'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
        'Host':'passport.lagou.com',
        'Origin':'https://passport.lagou.com',
        'Referer':'https://passport.lagou.com/login/login.html',
        'X-Anit-Forge-Code':code,
        'X-Anit-Forge-Token':token
    },
    cookies=all_cookie_dict

)
r2_response_json = r2.json()
r2_cookie_dict = r2.cookies.get_dict()
all_cookie_dict.update(r2_cookie_dict)
# ##################################### 第三步:grant #####################################
r3 = requests.get(
    url='https://passport.lagou.com/grantServiceTicket/grant.html',
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
        'Referer':'https://passport.lagou.com/login/login.html',
        'Host':'passport.lagou.com',
    },
    cookies=all_cookie_dict,
    allow_redirects=False

)
r3_cookie_dict = r3.cookies.get_dict()
all_cookie_dict.update(r3_cookie_dict)

# ##################################### 第四步:action #####################################

r4 = requests.get(
    url=r3.headers['Location'],
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
        'Referer':'https://passport.lagou.com/login/login.html',
        'Host':'www.lagou.com',
        'Upgrade-Insecure-Requests':'1',
    },
    cookies=all_cookie_dict,
    allow_redirects=False

)
r4_cookie_dict = r4.cookies.get_dict()
all_cookie_dict.update(r4_cookie_dict)

# ##################################### 第五步:获取认证信息 #####################################

r5 = requests.get(
    url=r4.headers['Location'],
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
        'Referer':'https://passport.lagou.com/login/login.html',
        'Host':'www.lagou.com',
        'Upgrade-Insecure-Requests':'1',
    },
    cookies=all_cookie_dict,
    allow_redirects=False

)
r5_cookie_dict = r5.cookies.get_dict()
all_cookie_dict.update(r5_cookie_dict)

print(r5.headers['Location'])

# ##################################### 第六步 #####################################

r6 = requests.get(
    url=r5.headers['Location'],
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
        'Referer':'https://passport.lagou.com/login/login.html',
        'Host':'www.lagou.com',
        'Upgrade-Insecure-Requests':'1',
    },
    cookies=all_cookie_dict,
    allow_redirects=False

)
r6_cookie_dict = r6.cookies.get_dict()
all_cookie_dict.update(r6_cookie_dict)

print(r6.headers['Location'])


# ##################################### 第七步 #####################################

r7 = requests.get(
    url=r6.headers['Location'],
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
        'Referer':'https://passport.lagou.com/login/login.html',
        'Host':'www.lagou.com',
        'Upgrade-Insecure-Requests':'1',
    },
    cookies=all_cookie_dict,
    allow_redirects=False

)
r7_cookie_dict = r7.cookies.get_dict()
all_cookie_dict.update(r7_cookie_dict)




# ##################################### 第八步:查看个人信息 #####################################

r8 = requests.get(
    url='https://gate.lagou.com/v1/neirong/account/users/0/',
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
        'Host':'gate.lagou.com',
        'Pragma':'no-cache',
        'Referer':'https://account.lagou.com/v2/account/userinfo.html',
        'X-L-REQ-HEADER':'{deviceType:1}'
    },
    cookies=all_cookie_dict
)
r8_response_json = r8.json()
# print(r8_response_json)
all_cookie_dict.update(r8.cookies.get_dict())


# ##################################### 第九步:查看个人信息 #####################################


r9 = requests.put(
    url='https://gate.lagou.com/v1/neirong/account/users/0/',
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
        'Host':'gate.lagou.com',
        'Origin':'https://account.lagou.com',
        'Referer':'https://account.lagou.com/v2/account/userinfo.html',
        'X-L-REQ-HEADER':'{deviceType:1}',
        'X-Anit-Forge-Code':r8_response_json.get('submitCode'),
        'X-Anit-Forge-Token':r8_response_json.get('submitToken'),
        'Content-Type':'application/json;charset=UTF-8',
    },
    json={"userName":"wupeiqi999","sex":"MALE","portrait":"images/myresume/default_headpic.png","positionName":"...","introduce":"...."},
    cookies=all_cookie_dict
)

print(r9.text)

lagou2

猜你喜欢

转载自blog.csdn.net/qq_43475097/article/details/83929804