import requests
import re
import chardet
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.39'
}
response = requests.get('https://tophub.today/n/KqndgxeLl9',headers=headers)
encoding = chardet.detect(response.content)['encoding']
html_content = response.content.decode(encoding)
top_ten_regex = r'<td class=".*?"><a href=".*?">(.*?)</a>'
top_ten_heats = r'<td>(\d.*?)</td>'
top_ten_matches = re.findall(top_ten_regex, html_content, re.DOTALL)
top_ten_heat = re.findall(top_ten_heats,html_content,re.DOTALL)
print("Top Ten List:")
for i in range(10):
print("{}.{}:{}".format(i+1,top_ten_matches[i],top_ten_heat[i]))
使用requests库和re库爬取微博热搜前十榜单
猜你喜欢
转载自blog.csdn.net/weixin_51395932/article/details/130179006
今日推荐
周排行