Standing on the shoulders of giants to do data analysis

Code reference: Crawled the data of 100,000 comments of Eason Chan's new song "We" and found that: it turns out that some people are only suitable for meeting

1.  Find the comment url and request method:

header and form data (ignore information decryption)

2.  Grab popular comments

3.  Hot Comments Word Cloud

4. wordcloud practice: generate word cloud according to the shape and color layout of the picture

from wordcloud import WordCloud, ImageColorGenerator
import matplotlib.pyplot as plt
from scipy.misc import imread
import jieba
#wordcloud exercise
stopwords_path = 'D:/workspace/my exercises/netmusic/stopwords.txt' # Stopwords storage path
back_coloring_path = 'D:/workspace/my exercises/netmusic/leslie.jpg' # Background image storage path
font_path='C:\Fonts\simkai.ttf' # Chinese font file path
back_coloring = imread(back_coloring_path) # set the background color

# jiaba participle to remove stop words
def jiebaclearText(text):
    mywordlist = []
    seg_list = jieba.cut(text,cut_all=False)
    liststr='/ '.join(seg_list)
    f_stop = open(stopwords_path)
    try:
        f_stop_text = f_stop.read( )
    finally:
        f_stop.close( )
    f_stop_seg_list =f_stop_text.split('\n')
    for myword inliststr.split('/'):
        if not(myword.strip() inf_stop_seg_list) and len(myword.strip())>1:
           mywordlist.append(myword)
    return ''.join(mywordlist)

text = jiebaclearText(content_text)

wc = WordCloud(font_path=font_path, # set font
              background_color="white", # background color
               max_words=5000, # The maximum number of words displayed in the word cloud
              mask=back_coloring, # set the background image
              max_font_size=100, # maximum font size
              random_state=84,
              width=1000,height=860, margin=2,
               )

wc.generate(text)
image_colors = ImageColorGenerator(back_coloring)
plt.imshow(wc.recolor(color_func=image_colors))
plt.axis("off")
# draw the background image as a color image
plt.figure()
plt.imshow(back_coloring, cmap=plt.cm.gray)
plt.axis("off")
plt.show()

The original picture and word cloud haha, true fans can't see it either. .

          

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=324829530&siteId=291194637