import jieba import re from pymongo import MongoClient from snownlp import SnowNLP def load_file(): ''' 加载外部词典,正则去除所有的标点符号,返回纯文本 ''' jieba.load_userdict("C:/Users/jieba/dict_lzf.txt") # 加载外部自定义词典 # 加载外部自定义词典 client = MongoClient('localhost', 27017) # 链接数据库 db = client['Taoguba'] name = 'List' for i in range(5): db_name = name + str(i + 1) db_emotino = db[db_name] news = db_emotino.find() emo = [] id_ = 0 for i in news: new = (i["Content"]) r = '[’!@#~¥%……&*() ——+|}{“:”?》《,。、‘;’、】【!"#$%&\'()*+,-./:; <=>?@[\\]^_`{|}~]+' news1 = re.sub(r, '', new) news1 = re.sub('[a-zA-Z0-9]', '', news1) stop_new = stop_dict(news1) cut = cut_package(stop_new) emo.append(cut) emo = emotion(emo) print("情感相似度如下:") print(emo) for i in emo: id_ += 1 e_name = db_name + str("_emotion") write_to_DB(e_name, id_, i) print("情感分类结果如下:") sum_number(emo) def stop_dict(news): ''' 去除所有的停用词 ''' stopwords = open("C:/Users/stopworld.txt", 'r', encoding='utf-8').read() outstr = '' for word in news: if word not in stopwords: outstr += word return outstr def cut_package(news): ''' 按照不同的模式切分 ''' seg_list = [x for x in jieba.cut(news, cut_all=False)] # 精确切割模式(默认为精确模式) seg = (' '.join(seg_list)) return seg # print(seg) # seg_list = jieba.cut(news, cut_all=True) # 全模式 # print("Full Mode:", ' '.join(seg_list)) # seg_list = jieba.cut_for_search(news) # 搜索引擎模式 # print("Full Mode:", ' '.join(seg_list)) def emotion(text): mood_ = [] for i in text: mood = SnowNLP(i) sim_mood = mood.sentiments mood_.append(sim_mood) return mood_ def sum_number(summarry): number = [] for i in summarry: if(i > 0.6): number.append(1) else: number.append(0) print(number) numb = set(number) print("情感统计结果如下:") for i in numb: a = number.count(i) print(" %i 一共出现了%a次!" % (i, a)) if number.count(0) <= number.count(1): print("文档偏积极型!") else: print("文档偏消极型!") def write_to_DB(name, id, emotion): ''' 保存数据库 ''' client = MongoClient('localhost', 27017) # 链接数据库 db = client['Taoguba'] collection_name = db[name] collection_name.save({"_id": id, "Emotion": emotion}) def main(): load_file() if __name__ == '__main__': main()
文本情感分析
猜你喜欢
转载自blog.csdn.net/luzaofa/article/details/79712309
今日推荐
周排行