爬取b站评论、用户、性别、等级、点赞数
# -*- coding: utf-8 -*-
"""
Created on Sat Jan 2 01:38:24 2021
@author: kun
"""
import requests
headers = {
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100101 Firefox/22.0",
'cookie': ''}
import pandas as pd
comments = []
users = []
genders = []
levels = []
likes = []
original_url = "https://api.bilibili.com/x/v2/reply?jsonp&type=1&oid=800760067&sort=2&pn="
for page in range(1,60): # 页码这里就简单处理了
url = original_url + str(page)
print(url)
try:
html = requests.get(url, headers=headers)
data = html.json()
if data['data']['replies']:
for i in data['data']['replies']:
comments.append(i['content']['message'])
likes.append(i['like'])
users.append(i['member']['uname'])
genders.append(i['member']['sex'])
levels.append(i["member"]["level_info"]["current_level"])
except Exception as err:
print(url)
print(err)
data = pd.DataFrame({
"用户":users,"性别":genders,"等级":levels,"评论":comments,"点赞":likes})
data.to_excel("bingbing.xlsx")