Python project combat-Gensim implementa manualmente el algoritmo LDA para jugar al análisis emocional

#!/usr/bin/env python

# -*- encoding: utf-8 -*-

'''
@Author  :   {Jack Zhao}

@Time    :   2020/1/9 10:26

@Contact :   {[email protected]}

@Desc    :  测试新字段,加入TF-IDF,替换数据源为London,最终版代码
'''
import math

import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt

from gensim import corpora, models, similarities
import gensim

import pyLDAvis.gensim
from collections import defaultdict
import nltk
from nltk.stem import WordNetLemmatizer
from textblob import TextBlob

#去空
def clean_none():
	# 这里修改为utf-8也没用,依旧报错UnicodeDecodeError: 'utf-8' codec can't decode bytes in position 0-1: invali,需要将csv文件
	# 另存为utf-8格式,纯属智障操作,导致乱码
	# 使用London数据集
	df = pd.read_csv("../data/London.csv")
	# 去空字段
	print("原本数据集共%d条数据\n" % (len(df.values)))
	# 这里将衡量字段reviews_per_month放入去空
	df = df.dropna(subset = ['id', 'host_abou

Supongo que te gusta

Origin blog.csdn.net/weixin_40539952/article/details/108537720
Recomendado
Clasificación