一、在urllib2中的使用:
# 一: # 异常处理,及设置请求次数 # 可添加time时间间隔 import urllib2 def download(url,num_retries=2): print("Downloading:",url) try: html = urllib2.urlopen(url).read() except urllib2.URLError as e: print("Download error:",e.reason) html = None if num_retries>0: if hasattr(e,"code") and 500 <= e.code <600: # 当服务器响应结果为5XX时重新请求 return download(url,num_retries-1) return html # 二:在一的基础上添加用户代理 import urllib2 def download(url,uesr_agent="wswp",num_retries=2): print("Downloading:",url) headers = {"User_agent":uesr_agent} requset = urllib2.Request(url,headers=headers) try: html = urllib2.urlopen(requset).read() except urllib2.URLError as e: print("Download error:",e.reason) html = None if num_retries>0: if hasattr(e,"code") and 500 <= e.code <600: # 当服务器响应结果为5XX时重新请求 return download(url,num_retries-1) return html # 三、支持代理 import urllib2 def download(url,uesr_agent="wswp",proxy=None,num_retries=2): print("Downloading:",url) headers = {"User_agent":uesr_agent} requset = urllib2.Request(url,headers=headers) opener = urllib2.build_opener() if proxy: proxy_params = {urlparse.urlparse(url).scheme:proxy} opener.add_handler(urllib2.ProxyHandler(proxy_params)) try: html = urllib2.urlopen(requset).read() except urllib2.URLError as e: print("Download error:",e.reason) html = None if num_retries>0: if hasattr(e,"code") and 500 <= e.code <600: # 当服务器响应结果为5XX时重新请求 return download(url,num_retries-1) return html
二、在requests中的使用:
import requests headers={"User_agent":uesr_agent} proxies= { "http":"http://127.0.0.1:9999", "https":"http://127.0.0.1:8888" } response = requests.get("https://www.baidu.com",headers=headers,proxies=proxies) print(response.text)
更多requests的用法,参考:https://www.cnblogs.com/zhaof/p/6915127.html