Python :爬取糗事百科段子

源码:

import urllib
import random
def  JokeSet(Url,UserAgent)
'''
Url  : 动态url网址
UserAgent :动态请求头
'''
#设置请求头
Headers ={
"User-Agent" : UserAgent
}
#设置请求体
req = urllib.request.Request(Url,headers = Headers)
response = urllib.request.urlopen(req)
data = response.read().decode("utf-8")
#爬取的数据写入文件中
path = r"D:\pythonItem\爬虫Practice\糗事百科.html"
with open(path,"w",encode="utf-8") as f :
		f.write(data)
for i in range(1,11) :
#获取url页码,爬取的数量可以调整
	url = r"https://www.qiushibaike.com/text/page/"+str(2)+"/"
	HeadersList =[
	[
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; Intel Mac OS X 10_6_8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69."
   ]
   #随机产生列表中的请求头,预防被封ip
   UserAgent = random.choice(HeadersList)
   try :
	   JokeSet(url,UserAgent)
	   print("第{}次爬取成功".format(i))
   except :
   		print("爬取失败”)

#Html数据处理后续更新

猜你喜欢

转载自blog.csdn.net/Mr_zhangbz/article/details/83753833