机器学习之情感分析

情感分析

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
# sent.py
import nltk.corpus as nc
import nltk.classify as cf
import nltk.classify.util as cu
pdata = []
fileids = nc.movie_reviews.fileids('pos')
# print(fileids)
for fileid in fileids:
    feature = {}
    words = nc.movie_reviews.words(fileid)
    for word in words:
        feature[word] = True
    pdata.append((feature, 'POSITIVE'))
ndata = []
fileids = nc.movie_reviews.fileids('neg')
# print(fileids)
for fileid in fileids:
    feature = {}
    words = nc.movie_reviews.words(fileid)
    for word in words:
        feature[word] = True
    ndata.append((feature, 'NEGATIVE'))
pnumb, nnumb = int(len(pdata) * 0.8), int(len(ndata) * 0.8)
train_data = pdata[:pnumb] + ndata[:nnumb]
test_data = pdata[pnumb:] + ndata[nnumb:]
model = cf.NaiveBayesClassifier.train(train_data)
model = cf.NaiveBayesClassifier.train(train_data)
ac = cu.accuracy(model, test_data)
print(ac)
tops = model.most_informative_features()
for top in tops[:10]:
    print(top[0])
reviews = [
    'It is an amazing movie.',
    'This is a dull movie. I would never recommend it to anyone.',
    'The cinematography is pretty great in this move.',
    'The direction was terrible and the story was all over the place.']
sents, probs = [], []
for review in reviews:
    feature = {}
    words = review.split()
    for word in words:
        feature[word] = True
    pcls = model.prob_classify(feature)
    sent = pcls.max()
    prob = pcls.prob(sent)
    sents.append(sent)
    probs.append(prob)
for review, sent, prob in zip(reviews, sents, probs):
    print(review, '->', sent, '%.2f%%' % round(prob * 100, 2))

code result

0.735
outstanding
insulting
vulnerable
ludicrous
uninvolving
astounding
avoids
fascination
anna
animators
It is an amazing movie. -> POSITIVE 63.16%
This is a dull movie. I would never recommend it to anyone. -> NEGATIVE 76.52%
The cinematography is pretty great in this move. -> POSITIVE 68.67%
The direction was terrible and the story was all over the place. -> NEGATIVE 67.03%

猜你喜欢

转载自blog.csdn.net/lc574260570/article/details/82051798
今日推荐