我们知道即使我们把header换成某个浏览器,但是一直用这个浏览器访问,也会被认定位爬虫,所以要及时更换浏览器
import urllib.request
import random
def load_baidu():
url="http://www.baidu.com"
user_agent_list=[
"Mozilla / 5.0(Windows;U;WindowsNT6.1;en - us) AppleWebKit / 534.50(KHTML, likeGecko) Version / 5.1Safari / 534.50",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
"Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11"
]
random_user_agent = random.choice(user_agent_list)
request = urllib.request.Request(url)
requests.add_header("User-Agent",random_user_agent)
print(request.headers)
load_baidu()