一、程序分析
1、读文件到缓冲区
1 def process_file(dst): # 读文件到缓冲区 2 try: # 打开文件 3 f = open(dst,'r') 4 except IOError,s: 5 print s 6 return None 7 try: # 读文件到缓冲区 8 bvffer = f.read() 9 except: 10 print "Read File Error!" 11 return None 12 f.close() 13 return bvffer
2、缓冲区处理
1 def process_buffer(bvffer): 2 if bvffer: 3 word_freq = {} 4 # 下面添加处理缓冲区 bvffer代码,统计每个单词的频率,存放在字典word_freq 5 bvffer.lower() 6 char={"~@#$%^&*()_-+=<>?/,.:;{}[]|\'“”"} 7 for ch in char : 8 bvffer=bvffer.replace(ch,' ') 9 words=bvffer.strip().split() 10 for word in words: 11 word_freq[word]=word_freq.get(word,0) + 1 12 return word_freq