学习Python 二十一 (爬虫二,get和post请求,requests库)

19.4 get和post请求

get请求

from urllib import request 
from urllib import parse 
url = "https://www.baidu.com/s?" 
wd = input("请输入你要搜索的关键字:") 
params = {
    
     
	"wd":wd 
}
ps = parse.urlencode(params) 
print(ps)

在这里插入图片描述

from urllib import request 
from urllib import parse 
headers = {
    
     
	"User-Agent":"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50" 
}
url = "https://www.baidu.com/s?" 
wd = input("请输入你要搜索的关键字:") 
params = {
    
     
	"wd":wd 
}
#获取编码格式 
ps = parse.urlencode(params) 
print(ps) 
url = url + ps 
print(url) 
# rsp = request.urlopen(url) 
req = request.Request(url=url,headers=headers) resp = request.urlopen(req) 
data = resp.read() 
print(data) 
with open("get.html","wb") as f:
	f.write(data)
	

post请求

import urllib.request 
import urllib 
url = "https://fanyi.youdao.com/translate?" 
headers = {
    
     
	"User-Agent":"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50" 
}
word = input("请输入你要翻译的单词:") 
from_data ={
    
    
"i":word, 
"from":"AUTO", 
"to":"AUTO", 
"smartresult":"dict", 
"client":"fanyideskweb", 
"doctype":"json", 
"version":"2.1", 
"keyfrom":"fanyi.web", "action":"FY_BY_REALTlME", 
}
data = urllib.parse.urlencode(from_data) 
data = data.encode(encoding = "utf-8") 
request=urllib.request.Request(url,data=data,headers=headers) 
response = urllib.request.urlopen(request) 
html = response.read().decode(encoding="utf-8").strip() 
print(html)

19.5 requests库

requests 库底层封装urllib
安装:
pip install requests

import requests 
import chardet 
url = "http://www.sina.com.cn" 
#响应头 
response = requests.get(url=url) 
#获取响应头的编码 
print(response.encoding) 
#新浪的编码 print(chardet.detect(response.content))

ISO-8859-1 -------- ladin1编码

import requests 
import chardet 
url = "http://www.sina.com.cn" 
#响应头 
response = requests.get(url=url) 
#获取响应头的编码
# print(response.encoding) 
#新浪的编码 
# print(chardet.detect(response.content)) 
# print(response.text) 
charset=chardet.detect(response.content).get("encoding") 
print(charset) 
response.encoding = charset 
print(response.text) 
with open("sina.html","w",encoding=charset) as f: 
	f.write(response.text)

需求:爬取新浪页面的图片

import re 
import requests 
import chardet 
url = "http://www.sina.com.cn" 
#响应头 
response = requests.get(url=url) 
#获取响应头的编码 
# print(response.encoding) 
#新浪的编码 
# print(chardet.detect(response.content)) 
# print(response.text) 
charset=chardet.detect(response.content).get("encoding") 
print(charset) 
response.encoding = charset 
# print(response.text) 
html = response.text images = re.findall(r"src=\"(.*?jpg|png|gif|jepg)\"",html) print(images) print(len(images)) 
# with open("sina.html","w",encoding="utf-8") as f: 
# 	f.write(response.text) 
#enumerate 
for index,item, in enumerate (images): 
	print("开始从{}下载图片".format(item)) 
	real_url = "http:"+item 
	print("开始从{}下载图片".format(real_url)) 
	resp = requests.get(real_url) 
	with open("image/"+str(index)+".jpg","wb") as f: 
		#因为是字节数据,所以使用resp.content 
		f.write(resp.content)
import re 
import requests 
import chardet 
url = "http://www.sina.com.cn" 
#响应头 
response = requests.get(url=url) 
#获取响应头的编码 
# print(response.encoding) 
#新浪的编码 
# print(chardet.detect(response.content)) 
# print(response.text) 
charset = chardet.detect(response.content).get("encoding")
print(charset) 
response.encoding = charset 
# print(response.text) 
html = response.text 
images = re.findall(r"src=\"(.*?jpg|png|gif|jepg)\"",html) 
print(images) 
print(len(images)) 
# with open("sina.html","w",encoding="utf-8") as f: 
# 	f.write(response.text) 
#enumerate 
for index,item, in enumerate (images): 
	print("开始从{}下载图片".format(item)) 
	if not item.startswith("http"): 
		real_url = "http:"+item 
	print("开始从{}下载图片".format(real_url)) 
	resp = requests.get(real_url) 
	with open("image/"+str(index)+".jpg","wb") as f: 
		#因为是字节数据,所以使用resp.content 
		f.write(resp.content)

手动伪造请求头
fake-useragent
pip install fake-useragent
python -m pip install fake-useragent

import fake_useragent 
import re 
import requests from fake_useragent 
import UserAgent 
# us = UserAgent() 
# print(us) 
# print(us.ie) 
# print(us.chrome) 
# print(us.random) 
headers = {
    
     "User-Agent":UserAgent().random }kw = input("请输入你要搜索的文字:") 
parms = {
    
     
	"wd":kw 
}
url = "http://www.baidu.com/s?" 
response = requests.get(url=url,params=parms,headers=headers) 
print(response.text)

post请求:

var t = n.md5(navigator.appVersion), r = "" + (new Date).getTime(), i = r + parseInt(10 * Math.random(), 10); 
return {
    
    ts: r, bv: t, salt: i, sign: n.md5("fanyideskweb" + e + i + "Tbh5E8=q6U3EXe+&L[4c@")}
{
    
    ts: r, bv: t, salt: i, sign: n.md5("fanyideskweb" + e + i + "Tbh5E8=q6U3EXe+&L[4c@")}
i = r + parseInt(10 * Math.random()

salt == i

r = "" + (new Date).getTime()

time.time()
lvs = ts =r =time.time()
salt = i =time.time()+random.randint(0,10)=lvs+random.randint(0,10)

sign: n.md5("fanyideskweb" + e + i + "Tbh5E8=q6U3EXe+&L[4c@")

sign=hashlib.md5(“fanyideskweb”+word+salt+“Tbh5E8=q6U3EXe+&L[4c@”)

var t = n.md5(navigator.appVersion)

bv = t =hashlib.md5(“5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like
Gecko) Chrome/90.0.4430.85 Safari/537.36”

import time 
import hashlib 
import random 
import requests from fake_useragent 
import UserAgent 
url = "https://fanyi.youdao.com/translate?" 
headers = {
    
     
	"User-Agent":UserAgent().random 
}
word = input("请输入你要翻译的单词:") 
lvs = time.time()*1000 
salt = lvs + random.randint(1,10) 
sign = hashlib.md5(("fanyideskweb"+ word + str(salt) + "Tbh5E8=q6U3EXe+&L[4c@").encode("utf-8")).hexdigest() 
bv = hashlib.md5("5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36".encode("utf-8")).hexdigest() 
from_data ={
    
     
"i":word, 
"from":"AUTO", 
"to":"AUTO", 
"smartresult":"dict", 
"client":"fanyideskweb", 
"salt":salt, "sign":sign, 
"lts":lvs, 
"bv":bv, 
"doctype":"json",
"version":"2.1", 
"keyfrom":"fanyi.web", 
"action":"FY_BY_REALTlME",
}
response = requests.post(url=url,data=from_data,headers=headers) 
print(response.text)

おすすめ

転載: blog.csdn.net/weixin_53002381/article/details/116430700