【NLP】词频统计的3中方法,时间复杂度,空间复杂度对比。

#第一种办法
import re 
from collections import Counter

def get_max_value_vl(text):
	text = text.lower()
	result = re.findall('[a-zA-Z0-9]',text) #去掉列表中的符号
	count = Counter(result)                 #词频统计
	count_list = list(count.values())
	max_value = max(count_list)
	max_list = []
	for k,v in count.items():
		if v == max_value:
			max_list.append(k)
	max_list = sorted(max_list)
	return max_list[0]

#第二种方法
from collections import Counter

def get_max_value_v2(text):
	count = Counter([x for x in text.lower() if x.isalpha()])
	m = max(count.values())
	return sorted([x for (x,y) in count.items() if y == m])[0]


#第三种方法
import string

def get_max_value_v3(text):
	text = text.lower()
	return max(string.ascii_lowercase,key= text.count)

猜你喜欢

转载自blog.csdn.net/qq_41664845/article/details/80059844
今日推荐