Reptile: requests, beautifulsoup
Word cloud: wordcloud, jieba
Code plus comments:
1 # -*- coding: utf-8 -*- 2 import xlrd#读取excel 3 import xlwt#写入excel 4 import requests 5 import linecache 6 import wordcloud 7 import jieba 8 import matplotlib.pyplot as plt 9 from bs4 import BeautifulSoup 10 11 if __name__=="__main__": 12 yun="" 13 14 n=0#ID编号 15 target='https://api.bilibili.com/x/v1/dm/list.so?oid=132084205'#b站oid页 16 user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36' 17 headers = {'User-Agent':user_agent}#伪装浏览器 18 19 req=requests.get(url=target) 20 html=req.text 21 html=html.encode('ISO 8859-1') 22 #html=html.replace('<br>',' ').replace('<br/>',' ').replace('/>','>') 23 bf=BeautifulSoup(html,"html.parser") 24 25 texts=bf.find('i') 26 texts_div=texts.find_all('d') 27 #print(texts_div) 28 for item in texts_div: 29 n=n+1 30 item_name=item.text Yun yun.replace = (3534 isSTR (ITEM_NAME) Yun = +31 isheader# " " , "" ) 36 Yun = yun.replace ( " ha " , "" ) 37 [ Yun = yun.replace ( " ah " , "" ) 38 is Yun yun.replace = ( " a " , "" ) # removed meaningless barrage 39 # stutter word, generating a string, can not be generated directly wordcloud correct Chinese word cloud 40 cut_text = " " .join (jieba.cut (Yun)) 41 is WC = wordcloud. Wordcloud ( 42 # to set the font, otherwise there will be garbled word of mouth, a path is generally computer font path, can be replaced by another 43 font_path="C:/Windows/Fonts/simfang.ttf", 44 #设置了背景,宽高 45 background_color="white",width=1000,height=880).generate(cut_text) 46 47 plt.imshow(wc, interpolation="bilinear") 48 plt.axis("off") 49 plt.show() 50 print("Done!")
Figure operating results: