一、Request对象
from urllib.request import urlopen from urllib.request import Request request = Request("http://www.baidu.com") response = urlopen(requst) print response.read().decode()
二、GET请求
大部分被传输到浏览器的html,images,js,css, … 都是通过GET方法发出请求的。它是获取数据的主要方法
from urllib.request import Request,urlopen from urllib.parse import quote url = 'https://www.baidu.com/s?wd={}'.format(quote("哔哩哔哩")) headers = { "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0" } requset = Request(url,headers=headers) response = urlopen(requset) print(response.read().decode())
这里没添加绕过机制被ban了,但大致就是这么个流程
三、POST请求
Request请求对象的里有data参数,它就是用在POST里的,我们要传送的数据就是这个参数data,data是一个字典,里面要匹配键值对,一般地,POST在我们登陆的时候使用
from urllib.request import Request,urlopen from urllib.parse import urlencode url = "http://192.168.43.167/DVWA/login.php" data = { "username":"admin", "password":"password" } headers = { "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0" } data = urlencode(data) request = Request(url,data=data.encode(),headers=headers) response = urlopen(request) print(response.read().decode())
四、Ajax的请求获取数据
有些网页内容使用AJAX加载,而AJAX一般返回的是JSON,直接对AJAX地址进行post或get,就返回JSON数据了
from urllib.request import Request, urlopen base_url = "https://movie.douban.com/j/chart/top_list?type=11&interval_id=100%3A90&action=&start={}&limit=20" i = 0 while True: headers = { "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0" } url = base_url.format(i * 20) request = Request(url, headers=headers) response = urlopen(request) info = response.read().decode() print(info) #当读到完时跳出 if info == "" or info is None: break i += 1
五、SSL
https://www.12306.cn/mormhweb/
# 忽略SSL安全认证 context = ssl._create_unverified_context() # 添加到context参数里 response = urllib.request.urlopen(request, context = context)
from urllib.request import Request, urlopen import ssl url = 'https://www.12306.cn/mormhweb/' headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0" } request = Request(url, headers=headers) # 忽略验证证书 context = ssl._create_unverified_context() response = urlopen(request, context=context) info = response.read().decode()
print(info)