使用requests库和re库爬取微博热搜前十榜单

import requests
import re
import chardet
headers={
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.39'
}

response = requests.get('https://tophub.today/n/KqndgxeLl9',headers=headers)


encoding = chardet.detect(response.content)['encoding']


html_content = response.content.decode(encoding)


top_ten_regex = r'<td class=".*?"><a href=".*?">(.*?)</a>'
top_ten_heats  = r'<td>(\d.*?)</td>'

top_ten_matches = re.findall(top_ten_regex, html_content, re.DOTALL)
top_ten_heat = re.findall(top_ten_heats,html_content,re.DOTALL)

print("Top Ten List:")

for i in range(10):
    print("{}.{}:{}".format(i+1,top_ten_matches[i],top_ten_heat[i]))

猜你喜欢

转载自blog.csdn.net/weixin_51395932/article/details/130179006