1 def getTaxt(): 2 txt=open('hamlet.txt') 3 txt = txt.lower() 4 for ch in '!"#$%&()*+,-./:;<=>?@[\\]^_‘{|}~': 5 txt = txt.replace(ch, " ") #将文本中特殊字符替换为空格 6 return txt 7 8 hamletTxt = getText() 9 words = hamletTxt.split() 10 counts = {} 11 for word in words: 12 counts[word] = counts.get(word,0) + 1 13 items = list(counts.items()) 14 items.sort(key=lambda x:x[1], reverse=True) 15 for i in range(10): 16 word, count = items[i] 17 # print ("{0:<10}{1:>5}".format(word, count)) 输出出现最多的10个单词和其出现次数 18 print (word,count) #输出出现最多的10个单词
jieba 分词 hamlet
猜你喜欢
转载自www.cnblogs.com/ghh0/p/12642264.html
今日推荐
周排行