python生成词云图

import xlrd

import matplotlib.pyplot as plt
from wordcloud import WordCloud
import jieba
#from wordcloud import
import pandas as pd
class word_cloud():


    '''
    keylist = ['title','title','title','title','title','title','title','title','title']
    '''


    def read_excel(self):
        # 打开excel文件
        workbook = xlrd.open_workbook(r'G:\soft\soft_web\soft_app\19分词价格段14000.01-160000.xlsx')
        # 获取所有的sheet
        sheet1 = workbook.sheet_by_name('Sheet1')
        # 获取第二列row_title
        rows = sheet1.col_values(1)
        return rows
    # def word_count(self,data):
    def word_count(self):
        '''
        分词和词频统计
        :param data:
        :return:
        '''
        data = self.read_excel()
        title_s = []
        for line in data:
            title_s.append(jieba.lcut(line.encode('utf-8')))
        title_dist = []
        for line in title_s:
            line_dist = []
            for word in line:
                if word not in line_dist:
                    line_dist.append(word)
            title_dist.append(line_dist)
        allWords_clean_disk = []
        for line in title_dist:
            for word in line:
                allWords_clean_disk.append(word)
        print(allWords_clean_disk)

        df_allWords_clean_dist = pd.DataFrame({"allwords": allWords_clean_disk})
        word_count = df_allWords_clean_dist.allwords.value_counts().reset_index()
        word_count.columns = ["word", "count"]
        return word_count


    #def  create_wordcloud(self,keylist):
    def create_wordcloud(self):
        '''
        生成关键词云
        :param keylist:
        :return:
        '''
        keylist = self.word_count()
        plt.figure(figsize=(50, 30))
        # pic = plt.imread('111.png')
        w_c = WordCloud(font_path=r'C:\Windows\Fonts\STSONG.TTF', background_color='white',
                        max_words=1000, max_font_size=60, margin=1,width=600,height=400)
        #word_counts = word_count(keylist)
        word_counts = self.word_count()
        #print(word_counts.values)
        w_c.generate_from_frequencies({x[0]: x[1] for x in word_counts.values})

        plt.imshow(w_c, interpolation='bilinear')
        plt.axis('off')
        plt.show()
        w_c.to_file("../static/temp/19.png")

        # 生成关键词云图
        # keylist = []
        # for line in data.raw_title:
        #     keyword = line.replace('沙发', '')
        #     keylist.append(keyword)
        # word_cloud().create_wordcloud(keylist)

# a = word_cloud()
# a.word_count(read_excel())
# if __name__ == '__main__':
#     word = word_cloud()
#     word.run("沙发", totalPage=5)
if __name__ =='__main__':
    a = word_cloud()
    #a.word_count()
    a.create_wordcloud()

猜你喜欢

转载自blog.csdn.net/weixin_42341608/article/details/81868656