Python video barrage reptile station b and generating the word nephanalysis

Reptile: requests, beautifulsoup

Word cloud: wordcloud, jieba

Code plus comments:

 1 # -*- coding: utf-8 -*-
 2 import xlrd#读取excel
 3 import xlwt#写入excel
 4 import requests
 5 import linecache
 6 import wordcloud
 7 import jieba
 8 import matplotlib.pyplot as plt
 9 from bs4 import BeautifulSoup
10  
11 if __name__=="__main__":
12     yun=""
13 
14     n=0#ID编号
15     target='https://api.bilibili.com/x/v1/dm/list.so?oid=132084205'#b站oid页
16     user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'
17     headers = {'User-Agent':user_agent}#伪装浏览器
18 
19     req=requests.get(url=target)
20     html=req.text
21     html=html.encode('ISO 8859-1')
22     #html=html.replace('<br>',' ').replace('<br/>',' ').replace('/>','>')
23     bf=BeautifulSoup(html,"html.parser")   
24 
25     texts=bf.find('i')
26     texts_div=texts.find_all('d')
27     #print(texts_div)
28     for item in texts_div:
29         n=n+1
30         item_name=item.text      Yun yun.replace = (3534 isSTR (ITEM_NAME)         Yun = +31 isheader#

 
"  " , "" )
 36      Yun = yun.replace ( " ha " , "" )
 37 [      Yun = yun.replace ( " ah " , "" )
 38 is      Yun yun.replace = ( " a " , "" ) # removed meaningless barrage 
39      # stutter word, generating a string, can not be generated directly wordcloud correct Chinese word cloud 
40      cut_text = "  " .join (jieba.cut (Yun))
 41 is      WC = wordcloud.
Wordcloud ( 42      # to set the font, otherwise there will be garbled word of mouth, a path is generally computer font path, can be replaced by another 
43     font_path="C:/Windows/Fonts/simfang.ttf",
44     #设置了背景,宽高
45     background_color="white",width=1000,height=880).generate(cut_text)
46 
47     plt.imshow(wc, interpolation="bilinear")
48     plt.axis("off")
49     plt.show()
50     print("Done!")

Figure operating results:

 

 

Guess you like

Origin www.cnblogs.com/ljy1227476113/p/12000264.html