Common attributes of requests, common parameters of common functions of requests, common attributes of response

table of Contents

Common parameters of requests commonly used functions

Common function attributes of requests

requests.get   # 模拟发送get请求
requests.post  # 模拟发送post请求
requests.session # 相当于自动帮我们提交cookie的requests

# eg:
url = "https://www.baidu.com"
response = requests.get(url=url)

Common parameters of requests commonly used functions

Common parameters of get request function params parameter headers parameter

# params参数
response = requests.get(
   url="https://www.baidu.com",
　　headers={"User-Agent":User_Agent}, # 请求头携带的数据
　　params={"key":'value'}, # get请求携带数据 
)

Common parameters data, json parameters of post request function

# data,json参数
response = requests.post(
    url=url,
    data={"name":"lxx","pwd":"123"},   # 相当于请求体携带数据
    json={"name":"jerry","pwd":"123"},  # 直接序列化成字符串
)

Proxy pool proxies parameters

# 代理池
ps = ["121.228.240.101:9999","121.228.240.101:9999","121.228.240.101:9999","121.228.240.101:9999"] # 里面一堆代理服务器
response = requests.post(
    url="http://news.baidu.com/?tn=news",
    proxies={"HTTP":random.choice(ps)},   # 服务器，从代理池里随机选一个
)
with open("ttt.html","wb") as f:
    f.write(response.content)
print(resp.text)

Timeout parameter

response = requests.post(
　　"https://www.baidu.com",
　　timeout=(10,10)
)

# timeout=(10,10)，第一个参数表示请求连接超时时间，第二个参数表示响应超时时间
# timeout=10 代表超时时间

Whether to allow redirection allow_redirects parameter

response = requests.post(
    url=url,
    allow_redirects = True, # 是否允许重定向，默认为True
)

Upload file files parameter

f = open(r"D:\jerry\spiderDay2\ttt.html","rb")
# 接收一个字典  key是服务器用于提取文件的字段名  f时要上传的文件对象
response = requests.post(
    url="http://httpbin.org/post",
    files={"img":f}  # key只能为img
)
print(response.status_code)

Verify certificate verification is basically not used and replaced with HTTPS request

import requests
response=requests.get(
    url='https://www.12306.cn',
    cert=('/path/server.crt','/path/key'),  # 表示使用哪些证书验证，作为服务端验证
    # cert=('/path/client.crt','/path/key'),  # 表示使用哪些证书验证，作为客户端验证
    verify=True  # True 表示使用证书验证
)
print(response.status_code)

#改进1:去掉报错,但是会报警告
import requests
response=requests.get(
　　'https://www.12306.cn',
　　verify=False # 不验证证书,报警告,返回200
) 
print(respone.status_code)
#改进2:去掉报错,并且去掉警报信息
import requests
from requests.packages import urllib3
urllib3.disable_warnings() # 关闭警告
response=requests.get(
　　'https://www.12306.cn',verify=False )  # verify=False，不验证证书,报警告,返回200
print(response.status_code)

Exception handling

import requests
from requests.exceptions import * #可以查看requests.exceptions获取异常类型

try:
    r=requests.get('http://www.baidu.com',timeout=0.00001)
except ReadTimeout:
    print('===:')
except ConnectionError: # 链接失败
     print('-----')
except Timeout: # 超时
     print('aaaaa')
except RequestException: # 其他异常
    print('Error')

Closing: context management, for file-like objects that do not support the use of the "with" statement, use contextlib.closing():

from contextlib import closing
with closing(requests.get('http:xxxxxxxxx.com', stream=True)) as r:
    for line in response.iter_content():
        print(line)

Common attributes of response

url = "https://www.baidu.com/s"
response = requests.get(
    url=url,  # 请求的路由
    params={"wd":"egon"},   # get携带数据
    headers={"user-agent": user_agent}  # 请求头携带数据
)


# 响应的常用属性
response.text  # 响应回去的文本（字符串）
response.content # 响应回去的内容（二进制），一般用来爬取视频
response.status_code # 响应的状态码
response.url # 获取请求连接地址
response.cookies # 获取返回的cookies信息
response.cookies.get_dict() # 获取返回的cookies信息
response.request # 获取请求方式


# 放回结果为json数据的
response.json()  # 将结果进行反序列化


# 爬取文档乱码问题
response.apparent_encoding  # 文档的编码的方式（从HTML文档找）
response.encoding  # 响应体编码方式
eg: response.encoding = response.apparent_encoding # 文档的声明方式


response.headers # 查看响应头
response.history # 重定向历史   即前一次请求的地址


# 流的原始数据（报头的方式）
response = requests.get(
    url=url,
    params={"wd":"egon"},
    headers={"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3538.77 Safari/537.36"},
    stream=True   # 流的原始数据要设置为 True 才能读取
)
# 以流的方式读取原始数据   没有经过HTTP协议解析的数据  一般不用
print(resp.raw.read(100))