Python crawler urllib.request vs. requests

Get request comparison: Compared with urllib.rqquest, get request can get the response data without special transcoding, which is not convenient for teahouses. Only the .text attribute in requests can get the source code, while urllib.request.urlopen() After that, you have to .read().decode('utf-8') to decode to get the decoded source code, which is very unfriendly.

# requests   代码
import requests

url = "https://www.baidu.com/s?"

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'
}

datas = {
    'wd':'北京'
}
response = requests.get(url=url,params=datas,headers=headers)

content = response.text
with open("北京.html","w",encoding='utf-8') as op:
    op.write(content)
print(content)
import urllib.request
import urllib.parse

base_url = "https://www.baidu.com/s?"

data = {
    'wd':'周杰伦',
    'sex':'男',
    'location':'中国台湾省'
}
new_date = urllib.parse.urlencode(data)
base_url+=new_date

headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'}
request = urllib.request.Request(url=base_url,headers=headers);
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
print(content)

Post request comparison:

# Compare with urllib.request
# Advantages: 1. post request does not require codec 2. post request parameter is data 3. no need to customize the request object
import requests
import json

url = "https://fanyi.baidu.com/sug"

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'
}

datas = {
    'kw':'ever'
}

# 对比urllib.request
# 优点:  1.post请求不需要编解码    2.post请求参数是data   3.不需要请求对象的定制
response = requests.post(url=url,headers=headers,data=datas)

# content = response.text
# print(content)
content = response.text
print(content)
print("------")
obj = json.loads(content)
print(obj)

Guess you like

Origin blog.csdn.net/weixin_46310452/article/details/126004939