文本情感分析

import jieba
import re
from pymongo import MongoClient
from snownlp import SnowNLP

def load_file():
    '''
        加载外部词典，正则去除所有的标点符号，返回纯文本
    '''
    jieba.load_userdict("C:/Users/jieba/dict_lzf.txt")   # 加载外部自定义词典      # 加载外部自定义词典
    client = MongoClient('localhost', 27017)  # 链接数据库
    db = client['Taoguba']
    name = 'List'
    for i in range(5):
        db_name = name + str(i + 1)
        db_emotino = db[db_name]
        news = db_emotino.find()
        emo = []
        id_ = 0
        for i in news:
            new = (i["Content"])
            r = '[’！@#~￥%……&*（） ——+|}{“：”？》《，。、‘；’、】【!"#$%&\'()*+,-./:; <=>?@[\\]^_`{|}~]+'
            news1 = re.sub(r, '', new)
            news1 = re.sub('[a-zA-Z0-9]', '', news1)
            stop_new = stop_dict(news1)
            cut = cut_package(stop_new)
            emo.append(cut)
        emo = emotion(emo)
        print("情感相似度如下：")
        print(emo)
        for i in emo:
            id_ += 1
            e_name = db_name + str("_emotion")
            write_to_DB(e_name, id_, i)
        print("情感分类结果如下：")
        sum_number(emo)

def stop_dict(news):
    '''
        去除所有的停用词
    '''
    stopwords = open("C:/Users/stopworld.txt", 'r',
                     encoding='utf-8').read()
    outstr = ''
    for word in news:
        if word not in stopwords:
            outstr += word
    return outstr

def cut_package(news):
    '''
       按照不同的模式切分
    '''
    seg_list = [x for x in jieba.cut(news, cut_all=False)]         # 精确切割模式(默认为精确模式)
    seg = (' '.join(seg_list))
    return seg
    # print(seg)

    # seg_list = jieba.cut(news, cut_all=True)         # 全模式
    # print("Full Mode:", ' '.join(seg_list))

    # seg_list = jieba.cut_for_search(news)            # 搜索引擎模式
    # print("Full Mode:", ' '.join(seg_list))

def emotion(text):
    mood_ = []
    for i in text:
        mood = SnowNLP(i)
        sim_mood = mood.sentiments
        mood_.append(sim_mood)
    return mood_

def sum_number(summarry):
    number = []
    for i in summarry:
        if(i > 0.6):
            number.append(1)
        else:
            number.append(0)
    print(number)
    numb = set(number)
    print("情感统计结果如下：")
    for i in numb:
        a = number.count(i)
        print("  %i  一共出现了%a次！" % (i, a))
    if number.count(0) <= number.count(1):
        print("文档偏积极型！")
    else:
        print("文档偏消极型！")

def write_to_DB(name, id, emotion):
    '''
        保存数据库
    '''
    client = MongoClient('localhost', 27017)  # 链接数据库
    db = client['Taoguba']
    collection_name = db[name]
    collection_name.save({"_id": id, "Emotion": emotion})

def main():
    load_file()

if __name__ == '__main__':
    main()
猜你喜欢