本文由本人原创,仅作为自己的学习记录
主要是用jieba进行分词然后wordcloud进行词云图绘制,发现jieba分词还是会出现一些问题,后续计划有时间可能会阅读jieba源码进行优化。下面是代码
import wordcloud
import jieba
import matplotlib.pyplot as plt
from PIL import Image
from numpy import array
import io
def set_wc(word_file,gb_file):
word_data = open(word_file,'r').read()
cut_word_data = jieba.cut(word_data)
alldata=""
for i in cut_word_data:
alldata=alldata+" "+i
print alldata
font =r'C:\Windows\Fonts\simhei.ttf'
#cloud_picture = Image.open(bg_file)
#cp_array = array(cloud_picture)
cp_array=plt.imread(bg_file)
my_cp =wordcloud.WordCloud(collocations=False,font_path=font,mask=cp_array,\
max_words=2000, max_font_size=150, background_color="white").generate_from_text(alldata)
plt.imshow(my_cp)
plt.axis("off")
plt.show()
wordcloud.WordCloud.to_file('D:\eclipse\eclipse_workplace\yuntu\src\word_result.jpg')
if __name__=="__main__":
word_file='D:\\eclipse\\eclipse_workplace\\yuntu\\src\\word.txt'
bg_file='D:\\eclipse\\eclipse_workplace\\yuntu\\src\\2_tmp.jpg'
set_wc(word_file,bg_file)