# -*- coding: utf-8 -*- """ Created on Wed Aug 16 13:53:52 2017 @author: gzs10227 """ from os import path from scipy.misc import imread import matplotlib.pyplot as plt from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator import jieba import jieba.analyse import re from collections import Counter plt.rcParams['font.family'] = ['sans-serif'] plt.rcParams ['font.sans-serif'] = ['Microsoft YaHei'] d = path.dirname(u'E:/Liao Qinghao/2017/201708/Text Analysis/') text = open(path.join(d, 'ngc.txt')).read() # read the mask / color image # taken from http://jirkavinse.deviantart.com/art/quot-Real-Life-quot-Alice-282261010 # set background image alice_coloring = imread(path.join(d, "test.jpg")) wc = WordCloud(background_color="black", #background color max_words=2000,# The maximum number of words displayed by the word cloud font_path='E:/Anaconda2/Lib/site-packages/matplotlib/mpl-data/fonts/ttf/Microsoft YaHei.ttf', mask=alice_coloring, #Set the background image stopwords=STOPWORDS.add("said"), max_font_size=40, #font maximum value random_state=42) text = re.sub(r'\[b\].*?\[\/b\]','',text) text = re.sub(r'\[quote\].*?\[\/pid\]','',text) text = re.sub(r'\[img\].*?\[\/img\]','',text) text = re.sub(r'\[align=center\].*?\[list\]','',text) text = re.sub(r'\[quote\].*?\[\/quote\]','',text) text = re.sub(r'\[url\].*?\[\/url\]','',text) text = re.sub(r'\[s:.*?\]','',text) text = re.sub(r'\[.*?\]','',text) text = text.replace('\t','').replace('\n','').replace(' ','') text = re.sub(r'[,:.!=?#%()#+-\/0-9]+','',text) ci = [i for i in jieba.cut(text) if len(i)>1] data = dict(Counter(ci)) ci_list = [] for k,v in data.items(): temp = (k,v) ci_list.append(temp) # To generate a word cloud, you can use generate to input all the text (Chinese is not good for word segmentation), or we can use the generate_from_frequencies function after calculating the word frequency wc.generate(text) wc.generate_from_frequencies(data) # txt_freq example is [('word a', 100),('word b', 90),('word c', 80)] # Generate color value from background image image_colors = ImageColorGenerator(alice_coloring) # The following code displays the image plt.imshow(wc) plt.axis("off") # draw word cloud plt.figure() # recolor wordcloud and show # we could also give color_func=image_colors directly in the constructor plt.imshow(wc.recolor(color_func=image_colors)) plt.axis("off") # draw the background image as a color image plt.figure() plt.imshow(alice_coloring, cmap=plt.cm.gray) plt.axis("off") plt.show() # save Picture wc.to_file(path.join(d, "tt.png"))
wordcloud word cloud use
Guess you like
Origin http://43.154.161.224:23101/article/api/json?id=325704358&siteId=291194637
Recommended
Ranking