Jieba word - on a volume of the Three Kingdoms Excerpts

jieba Import 
path = 'C: \\ Desktop the Users \\ \\ \\ Administrator. 3 Kingdoms .txt' 
text = Open (path, 'R & lt', encoding = 'UTF-. 8 ') .read () 
words = jieba. LCUT (text) 
excludes = [ 'general', 'say', 'two', 'not' 'negotiate', 'world', 'sergeant', 'day', 'Xuzhou', 'the emperor' ' how to ',' leading the arms', 'military cited', 'cavalry', 'Prefecture', 'court', 'not', 'nobility', 'to run', 'Taishi'] 
Counts = {}
 for Word in words :
     IF len (Word) ==. 1 :
         Continue 
    elif Word == 'Liang'Word == or 'Ming said': 
        rword = 'Ming' 
    elif Word == 'Supreme' or Word == ' xuande said ':
        rword = 'Bei' 
    elif Word == 'Meng' or Word == 'prime minister': 
        rword = 'Cao' 
    elif Word == 'Guan' or Word == 'Kung': 
        rword = 'Guan'
     the else : 
        rword = Word 
    Counts [rword] = counts.get (rword, 0) +. 1
 for Word in excludes: 
    del Counts [Word] 
items = List (counts.items ()) 
the Items.Sort (Key = the lambda X: X [. 1] ,reverse=True)
for i in range(15): 
    Word,count = items[i]   
    print("{0:<10}{1:>5}".format(word,count))

 

Guess you like

Origin www.cnblogs.com/Glzt/p/12651325.html