04 Python crawler cookie processing

1. Introduction

Crawlers cannot automatically access and send cookies like browsers, and we need to handle them manually

2. Methods of handling cookies

1. Use requests.utils.dict_from_cookiejar() to convert the returned cookies into a dictionary

import requests
#得到cookie
def login():
    login_url = 'http://www.xxx.com/login
    headers = {
    
    
        "Accept": "application/json, text/javascript, */*; q=0.01"
    }
    body = {
    
    
        "usercode": "liuzz05@****.com",
        "password": "123456"
    }
    try:
        res = requests.post(url=login_url, headers=headers, data=body)
        cookies = res.cookies

        cookie = requests.utils.dict_from_cookiejar(cookies)

        return cookie
    except Exception as err:
        print('获取cookie失败:\n{0}'.format(err))

#使用cookie
import requests
def get_data():
    cookie = login()
    res = requests.get(url=get_data_url, cookies=cookie)
    print(res.text)

2. Traverse the key values ​​of cookies and stitch them into cookie format

import requests

def login():
    login_url = 'http://www.xxx.com/login
    headers = {
    
    
        "Accept": "application/json, text/javascript, */*; q=0.01"
    }
    body = {
    
    
        "usercode": "liuzz05@****.com",
        "password": "123456"
    }
    try:
        res = requests.post(url=login_url, headers=headers, data=body)
        cookies = res.cookies.items()

        cookie = ''
        for name, value in cookies:
            cookie += '{0}={1};'.format(name, value)

        return cookie
    except Exception as err:
        print('获取cookie失败:\n{0}'.format(err))



#使用cookie

def get_data():
    cookie = login()
    headers = {
    
    
        "cookie": cookie
    }
    res = requests.get(url=get_data_url, headers=headers)
    print(res.text)

3. Directly splicing cookies, this method is silly, provided that you know the key of cookies

import requests
#得到cookie
def login():
    login_url = 'http://www.xxx.com/login
    headers = {
    
    
        "Accept": "application/json, text/javascript, */*; q=0.01"
    }
    body = {
    
    
        "usercode": "liuzz05@****.com",
        "password": "123456"
    }
    try:
        res = requests.post(url=login_url, headers=headers, data=body)
        cookies = res.cookies

        phpsessid = cookies['phpsessid']
        env_orgcode = cookies['env_orgcode']
        acw_tc = cookies['acw_tc']
        aliyungf_tc = cookies['aliyungf_tc']
        last_env = cookies['last_env']

        cookie = 'phpsessid={0};env_orgcode={1};acw_tc{2};aliyungf_tc={3};last_env={4}'.format(
            phpsessid, env_orgcode, acw_tc, aliyungf_tc, last_env
        )

        return cookie
    except Exception as err:
        print('获取cookie失败:\n{0}'.format(err))
        
#使用cookie
def get_data():
    cookie = login()
    headers = {
    
    
        "cookie": cookie
    }
    res = requests.get(url=get_data_url, headers=headers)
    print(res.text)

4. Use session, the light of the right way (artifact)

Use Seesion to store cookies, no need for troublesome cookies, just hand over the request to the session.

#初始化session
session = requests.Session()
#以后请求直接用session请求就可以了
session.get()
seesion.post()

Guess you like

Origin blog.csdn.net/qq_40837794/article/details/109666265