一、对中国十九大报告做词频分析
1 import jieba 2 txt = open("中国十九大报告.txt.txt","r",encoding="utf-8").read() 3 words = jieba.lcut(txt) 4 counts = {} 5 for word in words: 6 if len(word)==1: 7 continue 8 else: 9 counts[word] = counts.get(word,0)+1 10 items = list(counts.items()) 11 items.sort(key=lambda x:x[1],reverse=True) 12 for i in range(15): 13 word,count = items[i] 14 print("{0:<10}{1:>5}".format(word,count)) 15
二、根据词频制作词云
1 import jieba 2 import wordcloud 3 from imageio import imread 4 mask = imread("wujiaoxing.png") 5 f = open("中共十九大报告词频.txt","r",encoding="utf-8") 6 t = f.read() 7 f.close() 8 ls = jieba.lcut(t) 9 txt = " ".join(ls) 10 w = wordcloud.WordCloud(font_path="msyh.ttc",mask = mask,width = 1000,height=800,background_color="black") 11 w.generate(txt) 12 w.to_file("grwordcloud.png")