python word cloud instances

#/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 23 16:03:41 2019

@author: Administrator
"""


import os
import sys
import docx
import jieba
import jieba.posseg as pseg
import re
import collections
from PIL import Image
import numpy as np
from docx import Document
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import wordcloud

os.chdir('E:\wordcloud')
os.getcwd()
n=0
# 读取整个文本
wd_lists=''
document = Document('bid_document.docx')
with open('words.txt','w',encoding='utf-8') as f:
    for paragraph in document.paragraphs:
        f.writelines(paragraph.text.split())

# Disable read Thesaurus
F = Open ( 'chineseStopWords.txt', 'R & lt', encoding = 'UTF-. 8')
stopwords = {}. Fromkeys (reached, f.read (). Split ( '\ n-'))
f.close ()
# Get custom dictionary
jieba.load_userdict ( 'userdict.txt')
# jieba.add_word ( 'graphene')
# jieba.add_word ( 'Catherine')

f = open('words.txt','r',encoding='utf-8')
text = f.read().split()
f.close()

segs = jieba.cut (text [0])

mytext_list = []
for seg in segs:
    if seg not in stopwords and seg != '' and len(seg) != 1:
        mytext_list.append(seg.replace(" ",""))
cloud_text = "/".join(mytext_list)

# Word frequency statistics
word_counts = collections.Counter (mytext_list) # to do word frequency statistics
word_counts_top10 = word_counts.most_common (10) # get the top 10 most high-frequency words
print (word_counts_top10) # Check the output of
       
# word frequency display
mask = np.array ( Image.open ( 'mask.png')) # define word frequency bACKGROUND
WC = wordcloud (
    BACKGROUND_COLOR = "Black", the background color #
    max_words = 30, # shows the maximum number of words
    font_path = "C: /Windows/Fonts/simfang.ttf ", using the font #
    MIN_FONT_SIZE = 15,
    max_font_size = 100, 
    width = 400, # width of FIG
    mask = mask
    )
# wc.generate (cloud_text)
wc.generate_from_frequencies (word_counts)
image_colors = wordcloud.ImageColorGenerator (mask)
wc.recolor ( color_func = image_colors) # word cloud color to the background program
plt.imshow(wc)
plt.axis('off')
plt.show()
wc.to_file("pic.png")

Guess you like

Origin blog.csdn.net/huobanjishijian/article/details/86621446