Python学习之四大名著人物出场次数Python代码

《三国演义》,《水浒传》,《西游记》的人物出场次数Python代码:

  • 经过代码运行的结果可以看出三国作者对曹操和孔明比较喜爱;水浒作者对宋江和武松比较喜爱;西游作者对齐天大圣孙悟空比较喜爱
  • 通过这类代码,我们可以看出一篇文章中作者想表达的主要的的一些东西
#三国演义
print("三国演义人物出场次数:")
import jieba                                #jieba库的应用
import time                                 #引入time库,计算下程序运行的时间
start=time.perf_counter()
txt=open("三国演义.txt","r",encoding="utf-8").read()
excludes={"将军","却说","二人","后主","上马","不知","天子","大叫","众将","不可",
          "主公","蜀兵","只见","如何","商议","都督","一人","汉中","不敢","人马",
          "陛下","魏兵","天下","今日","左右","东吴","于是","荆州","不能","如此",
          "大喜","引兵","次日","军士","军马"}                #这些文字是多次程序运行所得
words=jieba.lcut(txt)
counts={}
for word in words:
    if len(word)==1:
        continue
    elif word=="诸葛亮" or word=="孔明曰":
        rword="孔明"
    elif word=="关公" or word=="云长":
        rword="关羽"
    elif word=="玄德" or word=="玄德曰":
        rword="刘备"
    elif word=="孟德" or word=="丞相":
        rword="曹操"                             #把意思相同的归为一个人
    else:
        rword=word
    counts[rword]=counts.get(rword,0)+1
for word in excludes:
    del counts[word]
items=list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
for i in range(10):
    word,count=items[i]
    print("{0:<10}{1:>5}次".format(word,count))
dur=time.perf_counter()-start
print("运行时间为{:.2f}s".format(dur))
print("-----------------------------------")


#水浒传
print("水浒传人物出场次数:")   
import jieba
import time
start=time.perf_counter()
txt=open("水浒传.txt","r",encoding="utf-8").read()
excludes={"二人","一个","来到","人马","你们","我们","好汉",
          "知府","什么","他们","银子","梁山","两个"}
words=jieba.lcut(txt)
counts={}
for word in words:
    if len(word)==1:
        continue
    elif word=="哥哥":
        rword="宋江"
    elif word=="头领":
        rword="林冲"
    else:
        rword=word
        counts[word]=counts.get(word,0)+1
for word in excludes:
    del counts[word]
items=list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
for i in range(10):
    word,count=items[i]
    print("{0:<10}{1:>5}次".format(word,count))
dur=time.perf_counter()-start
print("运行时间为{:.2f}s".format(dur))
print("-----------------------------")

#西游记
print("西游记人物出场次数:")
import jieba
import time
start=time.perf_counter()
txt=open("西游记.TXT","r",encoding="utf-8").read()
excludes={"一个","那里","怎么","我们","不知","两个","甚么","只见","不是",
          "原来","不敢","闻言","如何"}
words=jieba.lcut(txt)
counts={}
for word in words:
    if len(word)==1:
        continue
    elif word=="行者" or word=="大圣" or word=="老孙":
        rword="悟空"
    elif word=="师父" or word=="三藏" or word=="长老":
        rword="唐僧"
    elif word=="和尚" or word=="呆子":
        rword="沙僧"
    else:
        rword=word
    counts[rword]=counts.get(rword,0)+1
for word in excludes:
    del counts[word]
items=list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
for i in range(9):
    word,count=items[i]
    print("{0:<10}{1:>5}次".format(word,count))
dur=time.perf_counter()-start
print("运行时间为{:.2f}s".format(dur))

猜你喜欢

转载自blog.csdn.net/qq_42735631/article/details/81186685