1. Introduction
Crawlers cannot automatically access and send cookies like browsers, and we need to handle them manually
2. Methods of handling cookies
1. Use requests.utils.dict_from_cookiejar() to convert the returned cookies into a dictionary
import requests
#得到cookie
def login():
login_url = 'http://www.xxx.com/login
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01"
}
body = {
"usercode": "liuzz05@****.com",
"password": "123456"
}
try:
res = requests.post(url=login_url, headers=headers, data=body)
cookies = res.cookies
cookie = requests.utils.dict_from_cookiejar(cookies)
return cookie
except Exception as err:
print('获取cookie失败:\n{0}'.format(err))
#使用cookie
import requests
def get_data():
cookie = login()
res = requests.get(url=get_data_url, cookies=cookie)
print(res.text)
2. Traverse the key values of cookies and stitch them into cookie format
import requests
def login():
login_url = 'http://www.xxx.com/login
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01"
}
body = {
"usercode": "liuzz05@****.com",
"password": "123456"
}
try:
res = requests.post(url=login_url, headers=headers, data=body)
cookies = res.cookies.items()
cookie = ''
for name, value in cookies:
cookie += '{0}={1};'.format(name, value)
return cookie
except Exception as err:
print('获取cookie失败:\n{0}'.format(err))
#使用cookie
def get_data():
cookie = login()
headers = {
"cookie": cookie
}
res = requests.get(url=get_data_url, headers=headers)
print(res.text)
3. Directly splicing cookies, this method is silly, provided that you know the key of cookies
import requests
#得到cookie
def login():
login_url = 'http://www.xxx.com/login
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01"
}
body = {
"usercode": "liuzz05@****.com",
"password": "123456"
}
try:
res = requests.post(url=login_url, headers=headers, data=body)
cookies = res.cookies
phpsessid = cookies['phpsessid']
env_orgcode = cookies['env_orgcode']
acw_tc = cookies['acw_tc']
aliyungf_tc = cookies['aliyungf_tc']
last_env = cookies['last_env']
cookie = 'phpsessid={0};env_orgcode={1};acw_tc{2};aliyungf_tc={3};last_env={4}'.format(
phpsessid, env_orgcode, acw_tc, aliyungf_tc, last_env
)
return cookie
except Exception as err:
print('获取cookie失败:\n{0}'.format(err))
#使用cookie
def get_data():
cookie = login()
headers = {
"cookie": cookie
}
res = requests.get(url=get_data_url, headers=headers)
print(res.text)
4. Use session, the light of the right way (artifact)
Use Seesion to store cookies, no need for troublesome cookies, just hand over the request to the session.
#初始化session
session = requests.Session()
#以后请求直接用session请求就可以了
session.get()
seesion.post()