jieba 分词 hamlet

 1 def getTaxt():
 2     txt=open('hamlet.txt')
 3     txt = txt.lower()
 4     for ch in '!"#$%&()*+,-./:;<=>?@[\\]^_‘{|}~':
 5         txt = txt.replace(ch, " ")   #将文本中特殊字符替换为空格
 6     return txt
 7 
 8 hamletTxt = getText()
 9 words  = hamletTxt.split()
10 counts = {}
11 for word in words:            
12     counts[word] = counts.get(word,0) + 1
13 items = list(counts.items())
14 items.sort(key=lambda x:x[1], reverse=True) 
15 for i in range(10):
16     word, count = items[i]
17     # print ("{0:<10}{1:>5}".format(word, count))  输出出现最多的10个单词和其出现次数
18     print (word,count)  #输出出现最多的10个单词

猜你喜欢

转载自www.cnblogs.com/ghh0/p/12642264.html