Statistics document before the number 5 and output high-frequency words

jieba Import 

LS = " China is a great country, is a good country " 
Print ( ' the original document as: ' , LS) 
Counts = {} # define statistical dictionary 
words = jieba.lcut (LS) 
Print ( ' good points phrases are: ' , words) 

for Word in words: 
    Counts [Word] . = Counts GET (Word, 0 ) + . 1 
Print ( ' generated dictionary is: ' , Counts) 
Print ( " dictionary elements: ' , counts.items ()) 
# dictionary tuple into a list 
items =List (counts.items ()) 
Print ( ' Counts generate a new list of elements: ' , items) 
# the second list is sorted by value - desc = Reverse True, the default ascending 
the Items.Sort (Key = the lambda X: X [ . 1 ], Reverse = True) 

Print ( ' according to the list of tuples in the order of a second dimension: ' , items) 
# forwarding list a front 5 
for I in Range ( . 5 ): 
    Word, COUNT = items [ I] 
    Print ( " {0: <--- {10}. 1:>}. 5 " .format (Word, COUNT)) 

# -----------------------
for Word in words:
     IF len (Word) == . 1 : # determines whether to add a phrase
         Continue 
    the else : 
        Counts [Word] . = Counts GET (Word, 0 ) + . 1

 

Guess you like

Origin www.cnblogs.com/huigebj/p/11433878.html