词云效果
需要安装的依赖
plt
图片处理包jieba
分词包wordcloud
词云生成包
要实现的功能
批量读取很多文件的文件名,存入txt文件中,然后分析该文档文件
代码如下
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import jieba
# 视存放地址
PATH = 'D:\\videos\\analyser\\'
# 生成词云分析txt文件
def getTitlesFromVideoPath():
videosLists = os.listdir(PATH)
videosArr = []
videosTitle = ''
for videoItem in videosLists:
if(os.path.isfile(os.path.join(PATH, videoItem))):
#去掉 .MP4 后缀
videoItem = videoItem.replace('.mp4','')
videosArr.append(videoItem)
with open('./titles.txt', 'w') as f:
f.write(' '.join(videosArr))
# Now, There is no 'word.txt' under this path
path_txt = "titles.txt"
f = open(path_txt, 'r').read()
cut_text = " ".join(jieba.cut(f))
wordcloud = WordCloud(
font_path = "./font/STKAITI.TTF", # 一定要加上,不然出现中文“口”乱码
background_color="white",
width=1000,
height = 800
).generate(cut_text)
plt.imshow(wordcloud, interpolation = "bilinear")
plt.axis("off")
plt.show()