Python爬取网页内容

import requests
from bs4 import BeautifulSoup


def getHTMLText(url):
	try:
		hd={'user-agent':'Mozilla/5.0'}
		#hd={'user-agent':'chrome/10'}
		#payload={'key1':'value1','key2':'value2'}
		#fs={'file':open('data.xls','rb')}
		r=requests.request('get',url,headers=hd)#,timeout=10)#,files=fs)#,json=payload)#,data=payload)
		#print(r.status_code)
		r.raise_for_status()
		r.encoding=r.apparent_encoding
		soup=BeautifulSoup(r.text)
		#print(r.headers)
		with open('web.txt','w',encoding='utf8') as f:
			for i in soup.get_text():
				f.write(i)
		return("爬取成功")
	except:
		return("爬取失败")
		

if __name__=="__main__":
	url='https://hitokoto.cn/?id=147'
	print(getHTMLText(url))



猜你喜欢

转载自blog.csdn.net/weixin_40775077/article/details/85058057