运用jieba库统计词频及制作词云

一、对中国十九大报告做词频分析

 1 import jieba
 2 txt = open("中国十九大报告.txt.txt","r",encoding="utf-8").read()
 3 words = jieba.lcut(txt)
 4 counts = {}
 5 for word in words:
 6     if len(word)==1:
 7         continue
 8     else:
 9         counts[word] = counts.get(word,0)+1
10 items = list(counts.items())
11 items.sort(key=lambda x:x[1],reverse=True)
12 for i in range(15):
13     word,count = items[i]
14     print("{0:<10}{1:>5}".format(word,count))
15     

二、根据词频制作词云

 1 import jieba 
 2 import wordcloud
 3 from imageio import imread
 4 mask = imread("wujiaoxing.png")
 5 f = open("中共十九大报告词频.txt","r",encoding="utf-8")
 6 t = f.read()
 7 f.close()
 8 ls = jieba.lcut(t)
 9 txt = " ".join(ls)
10 w = wordcloud.WordCloud(font_path="msyh.ttc",mask = mask,width = 1000,height=800,background_color="black")
11 w.generate(txt)
12 w.to_file("grwordcloud.png")

猜你喜欢

转载自www.cnblogs.com/qinlai/p/12657196.html