import jieba
txt = open(r"C:\Users\lenovo\Desktop\threekingdoms.txt","r",encoding="utf-8").read()
excludes = {"将军","却说","二人","不可","荆州","不能","如此"}
words = jieba.lcut(txt)
counts = {}
for word in words:
if len(word) == 1:
continue
elif word=="诸葛亮"or word =="孔明曰":
rword = "孔明"
else:
rword = word
counts[rword] = counts.get(rword,0) + 1
for word in excludes:
del(counts[word])
items = list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
for i in range(15):
word,count = items[i]
print("{0:<10}{1:>5}".format(word,count))
txt = open(r"C:\Users\lenovo\Desktop\threekingdoms.txt","r",encoding="utf-8").read()
excludes = {"将军","却说","二人","不可","荆州","不能","如此"}
words = jieba.lcut(txt)
counts = {}
for word in words:
if len(word) == 1:
continue
elif word=="诸葛亮"or word =="孔明曰":
rword = "孔明"
else:
rword = word
counts[rword] = counts.get(rword,0) + 1
for word in excludes:
del(counts[word])
items = list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
for i in range(15):
word,count = items[i]
print("{0:<10}{1:>5}".format(word,count))