热门搜索排行榜

import requests
from bs4 import BeautifulSoup
import pandas as pd
url="http://top.baidu.com/buzz.php?p=top_keyword"
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/69.0.3497.100 Safari/537.36'}
r=requests.get(url)
r.encoding=r.apparent_encoding
x=r.text
soup=BeautifulSoup(x,'lxml')
a=[]
b=[]
for i in soup.find_all(class_="keyword"):
a.append(i.get_text().strip())
for l in soup.find_all(class_="icon-rise"):
b.append(l.get_text().strip())
data=[a,b]
print(data)
h=pd.DataFrame(data,index=["标题","热度"])
print(h.T)

[['翻译\nsearch', '淘宝\nsearch', '斗鱼\nsearch', '百度翻译\nsearch', '美国确诊超2600例\nsearch', '哔哩哔哩\nsearch', '知乎\nsearch', 'bilibili\nsearch', '微信\nsearch', '京东\nsearch', 'qq邮箱\nsearch', '微博\nsearch', '意大利报纸讣告\n search', '优酷\nsearch', '学习通\nsearch', '163\nsearch', '黄书豪出家\n search', '百度网盘\nsearch', '央行1000亿mlf\n search', '汽车之家\nsearch', '疫情\nsearch', '谷歌翻译\nsearch', '印度聚众喝牛尿\n search', '天眼查\nsearch', '员工救火用嘴吹\n search', '爱奇艺\nsearch', '今日新鲜事\nsearch', '宁津生院士逝世\nsearch', 'qq\nsearch', '英国229名科学家\n search', '腾讯入股新希望\n search', '虎牙\nsearch', '企查查\nsearch', '古巨基当爸\n search', '英女王迁离伦敦\n search', '钉钉\nsearch', '美国疫苗临床试验\n search', '吉利icon\nsearch', 'b站\nsearch', '6类行为定罪处罚\n search', '智联招聘\nsearch', '地图\nsearch', '腾讯视频\nsearch', '360\nsearch', '意大利一市长病逝\nsearch', '武大樱花直播日程\nsearch', '日历\nsearch', '韩国再现集体感染\n search', '百度地图\nsearch', '安家\nsearch', '亚马逊禁上口罩\nsearch', '淘宝网\nsearch', '微信网页版\nsearch', '163邮箱登录\nsearch', '电影天堂\nsearch', '谁都渴望遇见你\nsearch', '58同城\nsearch', '蝙蝠侠停拍\nsearch', '知网\nsearch', '微信公众平台\nsearch', '巴菲特女儿自我隔离\n search', '百度\nsearch', '4399\nsearch', '热血同行\nsearch', '微信公众号\nsearch', '顺丰\nsearch', '环球音乐ceo确诊\n search', '新型冠状病毒肺炎\nsearch', '京东商城\nsearch', '塞尔维亚紧急状态\n search', '腾讯会议\nsearch', '湖北籍舰艇出镜\n search', '在线翻译\nsearch', '捷克宣布全国隔离\n search', '球迷支持赛季无效\n search', 'steam\nsearch', 'wps\nsearch', 'lv生产洗手液\n search', '腾讯课堂\nsearch', '阿里巴巴\nsearch', '美联储利率降至零\n search', '国外确诊已超国内\n search', '有道翻译\nsearch', '西班牙将封锁全国\nsearch', '超星学习通\nsearch', '豆瓣\nsearch', '阿里云\nsearch', '意大利新增2547例\nsearch', '全国入境日均12万\n search', '战网\nsearch', '梅西呼吁抗击疫情\nsearch', '巴西总统检测结果\nsearch', '学信网\nsearch', '美股\n search', 'nga\nsearch', '上证指数\nsearch', '中国知网\nsearch', '智慧树\nsearch', '天猫\nsearch', '腾讯\nsearch'], ['343170', '317407', '239262', '227638', '224707', '220729', '213478', '210275', '190168', '182806', '174386', '159213', '131647', '124552', '122762', '120185', '116333', '111649', '109508', '107822', '103459', '99477', '98568', '97438', '97412', '96411', '95920', '94947', '91643', '91509', '90140', '84974', '83853', '80200', '79510', '78917', '77942', '77135', '76084', '75011', '74323', '74323', '71190', '69908', '69785', '68019', '67702', '65267', '60425', '59770', '58456', '58401', '57826', '57745', '57702', '57086', '56938', '56716', '56466', '56325', '55937', '55863', '54822', '54572', '54035', '53214', '53115', '52961', '52752', '52196', '52168', '50985', '50774', '50341', '50309', '50291', '50110', '49443', '48700', '47279', '45589', '45497', '44289', '43831', '43693', '43405', '42952', '42911', '42104', '41539']]
                    标题      热度
0           翻译\nsearch  343170
1           淘宝\nsearch  317407
2           斗鱼\nsearch  239262
3         百度翻译\nsearch  227638
4   美国确诊超2600例\nsearch  224707
..                 ...     ...
95        上证指数\nsearch    None
96        中国知网\nsearch    None
97         智慧树\nsearch    None
98          天猫\nsearch    None
99          腾讯\nsearch    None

[100 rows x 2 columns]




网页源代码中所需要的
<a class="list-title" target="_blank" href="http://www.baidu.com/baidu?cl=3&amp;tn=SE_baiduhomet8_jmjb7mjw&amp;rsv_dl=fyb_top&amp;fr=top1000&amp;wd=%B7%AD%D2%EB" href_top="./detail?b=2&amp;c=12&amp;w=%B7%AD%D2%EB">翻译</a>

猜你喜欢

转载自www.cnblogs.com/a1959711087/p/12526114.html