爬取钉钉在B站卑微道歉视频弹幕,做成词云

爬取钉钉在B站卑微道歉视频弹幕,做成词云

先看用户老爷们给出得评价

在这里插入图片描述

然后是爬取代码:很简单

import requests
import jieba
import numpy as np
from lxml import etree
from wordcloud import WordCloud as wc
from PIL import Image
url = 'https://api.bilibili.com/x/v1/dm/list.so?oid=152796906'

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
}
result = []
html = etree.parse("./B站弹幕/bilibli.xml",etree.HTMLParser())
text = html.xpath("//d//text()")
with open ('./B站弹幕/a.txt',"a+",encoding="utf-8") as f:
    for t in text:
        f.write(t+'\n')

最后是词云制作

import jieba.analyse
from PIL import Image, ImageSequence
import numpy as np
import matplotlib.pyplot as plt
from wordcloud import WordCloud, ImageColorGenerator

l = ''
f = open('./B站弹幕/a.txt', 'r',encoding='utf-8')  # 这个就是你的数据源,打开数据时和数据进行截取可以使用结巴分词器
for i in f:
	l += f.read()

result = jieba.analyse.textrank(l, topK=250, withWeight=True)
keyworlds = dict()
for i in result:
	keyworlds[i[0]] = i[1]

# print(keyworlds)

image = Image.open('./B站弹幕/timg.jpg')  # 这个就是你的背景,想要好看的,背景图颜色多一点
graph = np.array(image)
wc = WordCloud(font_path='simhei.ttf', background_color='White', max_font_size=170, mask=graph)
wc.generate_from_frequencies(keyworlds)
image_color = ImageColorGenerator(graph)
plt.imshow(wc)
# plt.imshow(wc.recolor(color_func=image_color))
plt.axis('off')
plt.show()
wc.to_file('./B站弹幕/1.png')
发布了23 篇原创文章 · 获赞 22 · 访问量 1万+

猜你喜欢

转载自blog.csdn.net/qq_36389249/article/details/104364747