爬取时代周报排行榜前十

 

 

 

import requests
from ba4 import BeautifulSoup
import pandas as pd
url='https://tophub.today/n/aqeE1Gxe9R'
def getHTMLText (url,timeout=30):
try:
r=requests.get(url,timeout=30)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
except:
return'产生异常'
html=getHTMLText(url)
soup=BeautifulSoup(html,'html.parser')
print(soup.prettify())#查看页面结构
url='https://tophub.today/n/aqeE1Gxe9R'#时代周报
headers={'User-Agent':'1234'}
hd_data=requests.get(url,headers=headers)
hd_data.encoding="utf-8"
soup=BeautifulSoup(hd_data.content,'lxml')
html=hd_data.text

猜你喜欢

转载自www.cnblogs.com/hurt12/p/12541559.html