import jieba
import jieba.analyse as ana
ana.extract_tags(txt, withWeight=True)
textlist = [" ".join(cut_word_without_stopword(x)) for x in txt]
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
vectorizer = CountVectorizer()
x = vectorizer.fit_transform(textlist)
vectorizer.vocabulary_
tftrans = TfidfTransformer()
tf_idf = tftrans.fit_transform(x)
tf_idf_array = tf_idf.toarray()
tf_idf_array
tf_idf_dense = tf_idf.todense()
from gensim import corpora, models
dic = corpora.Dictionary(data_list)
corpus = [dic.doc2bow(x) for x in data_list]
tf_idf_models = models.TfidfModel(corpus)
corpus_tfidf = timodels[corpus]
corpus_tfidf[n]