Code reference: Crawled the data of 100,000 comments of Eason Chan's new song "We" and found that: it turns out that some people are only suitable for meeting
1. Find the comment url and request method:
header and form data (ignore information decryption)
2. Grab popular comments
3. Hot Comments Word Cloud
4. wordcloud practice: generate word cloud according to the shape and color layout of the picture
from wordcloud import WordCloud, ImageColorGenerator import matplotlib.pyplot as plt from scipy.misc import imread import jieba
#wordcloud exercise stopwords_path = 'D:/workspace/my exercises/netmusic/stopwords.txt' # Stopwords storage path back_coloring_path = 'D:/workspace/my exercises/netmusic/leslie.jpg' # Background image storage path font_path='C:\Fonts\simkai.ttf' # Chinese font file path back_coloring = imread(back_coloring_path) # set the background color # jiaba participle to remove stop words def jiebaclearText(text): mywordlist = [] seg_list = jieba.cut(text,cut_all=False) liststr='/ '.join(seg_list) f_stop = open(stopwords_path) try: f_stop_text = f_stop.read( ) finally: f_stop.close( ) f_stop_seg_list =f_stop_text.split('\n') for myword inliststr.split('/'): if not(myword.strip() inf_stop_seg_list) and len(myword.strip())>1: mywordlist.append(myword) return ''.join(mywordlist) text = jiebaclearText(content_text) wc = WordCloud(font_path=font_path, # set font background_color="white", # background color max_words=5000, # The maximum number of words displayed in the word cloud mask=back_coloring, # set the background image max_font_size=100, # maximum font size random_state=84, width=1000,height=860, margin=2, ) wc.generate(text) image_colors = ImageColorGenerator(back_coloring) plt.imshow(wc.recolor(color_func=image_colors)) plt.axis("off") # draw the background image as a color image plt.figure() plt.imshow(back_coloring, cmap=plt.cm.gray) plt.axis("off") plt.show()
The original picture and word cloud haha, true fans can't see it either. .