import requests
import jieba
import numpy as np
from lxml import etree
from wordcloud import WordCloud as wc
from PIL import Image
url ='https://api.bilibili.com/x/v1/dm/list.so?oid=152796906'
headers ={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'}
result =[]
html = etree.parse("./B站弹幕/bilibli.xml",etree.HTMLParser())
text = html.xpath("//d//text()")withopen('./B站弹幕/a.txt',"a+",encoding="utf-8")as f:for t in text:
f.write(t+'\n')
最后是词云制作
import jieba.analyse
from PIL import Image, ImageSequence
import numpy as np
import matplotlib.pyplot as plt
from wordcloud import WordCloud, ImageColorGenerator
l =''
f =open('./B站弹幕/a.txt','r',encoding='utf-8')# 这个就是你的数据源,打开数据时和数据进行截取可以使用结巴分词器for i in f:
l += f.read()
result = jieba.analyse.textrank(l, topK=250, withWeight=True)
keyworlds =dict()for i in result:
keyworlds[i[0]]= i[1]# print(keyworlds)
image = Image.open('./B站弹幕/timg.jpg')# 这个就是你的背景,想要好看的,背景图颜色多一点
graph = np.array(image)
wc = WordCloud(font_path='simhei.ttf', background_color='White', max_font_size=170, mask=graph)
wc.generate_from_frequencies(keyworlds)
image_color = ImageColorGenerator(graph)
plt.imshow(wc)# plt.imshow(wc.recolor(color_func=image_color))
plt.axis('off')
plt.show()
wc.to_file('./B站弹幕/1.png')