import requests
from bs4 import BeautifulSoup
def getHTMLText(url):
try:
hd={'user-agent':'Mozilla/5.0'}
#hd={'user-agent':'chrome/10'}
#payload={'key1':'value1','key2':'value2'}
#fs={'file':open('data.xls','rb')}
r=requests.request('get',url,headers=hd)#,timeout=10)#,files=fs)#,json=payload)#,data=payload)
#print(r.status_code)
r.raise_for_status()
r.encoding=r.apparent_encoding
soup=BeautifulSoup(r.text)
#print(r.headers)
with open('web.txt','w',encoding='utf8') as f:
for i in soup.get_text():
f.write(i)
return("爬取成功")
except:
return("爬取失败")
if __name__=="__main__":
url='https://hitokoto.cn/?id=147'
print(getHTMLText(url))
Python爬取网页内容
猜你喜欢
转载自blog.csdn.net/weixin_40775077/article/details/85058057
今日推荐
周排行