import jieba fp1=r'D:/python/a.txt' outph=r'D:/python/out.txt' f=open(fp1,'r',encoding='utf-8') txt=f.read().strip() f.close() words=jieba.lcut(txt) f=open(outph,'w',encoding='utf-8') for word in words: f.write(word) f.write('\n' ) F.close () # second question to punctuation, word frequency statistics bd = ' [ '! "# $% & \' () * +, -.? / :; <=> @ [\\] ^ _ !? `{|} ~] + ,." "" ":,. ' Counts = {} exlutes = { ' author ' , ' after ' } for i in bd: TXT = txt.replace (i, ' ' ) # string to replace punctuation words = jieba.lcut (TXT) # word for Word in words: IF len (Word) == 1 : the Continue the else : counts[word]=counts.get(word,0)+1 #所有词全统计 for word in exlutes: del(counts[word]) #删除{a,b} items=list(counts.items()) items.sort(key=lambda x:x[1],reverse=True) for i in range(15): word,count=items[i] print("{0:>10}---{1:<5}".format(word,count))