Estadísticas TOEFL vocabulario de alta frecuencia

punc_list = [',','.','\n','?','\'','\"','(',')',':']
with open('./tofel100.txt',encoding='utf-8') as f:
	p = f.readlines()
	all_words = []
	for s in p:
		for punc in punc_list:
			s = s.replace(punc,'') 
		line_words = s.split(' ')
		all_words = all_words + line_words

set_words = set(all_words)
word_list = list(set_words)

count_word = 'all'
print('The number of words in word_list is: ',len(word_list))

def get_word_counts(count_word):
	count = 0
	for word in all_words:
		if count_word == word:
			count += 1
	return count

word_counter_dict = {}
for word in word_list:
	word_counter_dict[word] = get_word_counts(word)

def get_sorted_tuple_list_from_dict(count_dict):
	return sorted(count_dict.items(), key=lambda d: d[1], reverse = True)

for element in get_sorted_tuple_list_from_dict(word_counter_dict):
	print(element)
Publicado 273 artículos originales · elogiado 40 · 30,000+ vistas

Supongo que te gusta

Origin blog.csdn.net/weixin_41855010/article/details/105235358
Recomendado
Clasificación