import nltk.corpus as nc
import nltk.classify as cf
import nltk.classify.util as cu
pdata = []
fileids = nc.movie_reviews.fileids('pos')
for fileid in fileids:
feature = {}
words = nc.movie_reviews.words(fileid)
for word in words:
feature[word] = True
pdata.append((feature,'POSITIVE'))
ndata = []
fileids = nc.movie_reviews.fileids('neg')
for fileid in fileids:
feature = {}
words = nc.movie_reviews.words(fileid)
for word in words:
feature[word] = True
ndata.append((feature,'NEGATIVE'))
pnumb,nnumb = int(0.8*len(pdata)),int(0.8*len(ndata))
train_data = pdata[:pnumb] + ndata[:nnumb]
test_data = pdata[pnumb:] + ndata[nnumb:]
model = cf.NaiveBayesClassifier.train(train_data)
ac = cu.accuracy(model,test_data)
print('%.2f%%'%round(ac*100,2))
tops = model.most_informative_features()
for top in tops[:10]:
print(top[0])
reviews = ['It is an amazing movie.',
'This is a dull movie. I would never recommend it to anyone',
'The cinematography is pretty great in this movie.',
'This direction was terrible and the story was all over the place']
sents,probs = [],[]
for review in reviews:
feature = {}
words = review.split()
for word in words:
feature[word] = True
pcls = model.prob_classify(feature)
sent = pcls.max()
prob = pcls.prob(sent)
sents.append(sent)
probs.append(prob)
for review,sent,prob in zip(reviews,sents,probs):
print(review,'->',sent,'%.2f%%'%round(prob*100,2))
自然语言处理(NLP) 五:情感分析
猜你喜欢
转载自blog.csdn.net/weixin_38246633/article/details/80639680
今日推荐
周排行