"Central Document" Chinese word frequency statistics and saved to a file

. 1  # the Author: ZCB 
2  
. 3  # Chinese word frequency statistics 
. 4  Import jieba
 . 5  Import SYS
 . 6  
. 7 F = Open ( " D: / government work report .txt " , ' R & lt ' )
 . 8 TXT = reached, f.read ()
 . 9  f.close ()
 10  
. 11 D = {}
 12 is LS = jieba.lcut (TXT)
 13 is the exclude = [ ' " ' , ' " ' , ' to ' , ' and' , ' , ' , ' A ' , ' \ n- ' , ' . ' , ' , ' ] # Here to exclude list, the flexibility to change on demand
 14  for W in LS:
 15      IF W in exclude or len (W) ==. 1 :
 16          Continue 
. 17      D [W] = d.get (W, 0) + 1'd
 18 is  
. 19  # #################### sorted by val 
20 is RES = the sorted (d.items (), Key = the lambda X: X [. 1], Reverse = True)
21 fd= open("d:/统计结果.txt",'w')
22 old_stdout = sys.stdout
23 sys.stdout = fd
24 for w in res:
25     if(w[1]<=25):
26         continue
27     print('"{}"出现了{}次'.format(w[0],w[1]))
28 sys.stdout = old_stdout
29 fd.close()

 

Guess you like

Origin www.cnblogs.com/zach0812/p/11258982.html