jieba Import LS = " China is a great country, is a good country " Print ( ' the original document as: ' , LS) Counts = {} # define statistical dictionary words = jieba.lcut (LS) Print ( ' good points phrases are: ' , words) for Word in words: Counts [Word] . = Counts GET (Word, 0 ) + . 1 Print ( ' generated dictionary is: ' , Counts) Print ( " dictionary elements: ' , counts.items ()) # dictionary tuple into a list items =List (counts.items ()) Print ( ' Counts generate a new list of elements: ' , items) # the second list is sorted by value - desc = Reverse True, the default ascending the Items.Sort (Key = the lambda X: X [ . 1 ], Reverse = True) Print ( ' according to the list of tuples in the order of a second dimension: ' , items) # forwarding list a front 5 for I in Range ( . 5 ): Word, COUNT = items [ I] Print ( " {0: <--- {10}. 1:>}. 5 " .format (Word, COUNT))
# -----------------------
for Word in words: IF len (Word) == . 1 : # determines whether to add a phrase Continue the else : Counts [Word] . = Counts GET (Word, 0 ) + . 1