Python【词频统计】算法2个(待完成)

数据量较少时,直接输出

from collections import Counter
import jieba.posseg as jp
counter = Counter()
posseg = jp.cut(text)
for p in posseg:
    """自定义过滤条件"""
    counter[(p.word, p.flag)] += 1
most = counter.most_common()
print(most)
# 写入Excel
import pandas as pd
pd.DataFrame([(m[0][0], m[0][1], m[1]) for m in most], columns=['word', 'flag', 'frequency'])\
    .to_excel('word_count.xlsx', index=None)

数据量较大时,分批保存

from collections import Counter
import jieba.posseg as jp
counter = Counter()
posseg = jp.cut(text)
for p in posseg:
    """自定义过滤条件"""
    counter[(p.flag, p.word)] += 1
most = counter.most_common(99)
print(most)

猜你喜欢

转载自blog.csdn.net/Yellow_python/article/details/82913490