Python word segmentation and word cloud image generation

main.py

#coding: utf-8
'''
程序运行前,需要用词云图文件夹下的wordcloud.py替换掉以下路径中的wordcloud.py
Anaconda3\Lib\site-packages\wordcloud
以上路径为pip install wordcloud下载包所在路径,如果涉及虚拟环境,则进入以下路径
Anaconda3\envs\***\Lib\site-packages\wordcloud
替换原因:原来的wordcloud.py无法返回生成的词语和频率
'''
import jieba.analyse
import jieba
import wordcloud
# import pandas as pd

def cut(text):
    # 选择分词模式
    word_list = jieba.cut(text, cut_all=True)
    # 分词后在单独个体之间加上空格
    result = " ".join(word_list)
    # 返回分词结果
    return result

if __name__ == '__main__':

    # 载入自定义词典
    jieba.load_userdict('dict.txt')

    src = 'text.txt'
    f = open(src, 'r', encoding='utf-8')
    text = f.read()
    text = cut(text)
    # print(text)
    w = wordcloud.WordCloud(font_path='msyh.ttc', width=1000, height=700, background_color='white')
    w.generate(text)
    words_dict = w.return_words  # 字典
    # for k, v in words.items():
    #     print(k)

    w.to_file('grwordcloud.png')

wordcloud.py

class WordCloud(object):
	def __init__(self,...)
		...
		self.return_words = {
    
    }
		...
	def generate_from_frequencies(self, frequencies, max_font_size=None):

		...
		for word, freq in frequencies:
			...
			self.return_words[word] = freq
			...
		...
		return self

Guess you like

Origin blog.csdn.net/tailonh/article/details/110849608