电影评论分类:二分类问题

 1 ##数据预处理
 2 #加载IMDB数据集
 3 from keras.datasets import imdb
 4 (train_data, train_labels),(test_data, test_labels) = imdb.load_data(num_words = 10000)
 5 word_index = imdb.get_word_index()
 6 reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
 7 decoded_review = ''.join(
 8 [reverse_word_index.get(i - 3, '?') for i in train_data[0]]
 9 )
10 decoded_review  #将评论解码
11 import numpy as np
12 def vectorize_sequences(sequences, dimension = 10000):
13     result = np.zeros((len(sequences), dimension))
14     for i ,sequence in enumerate(sequences):
15         result[i, sequence] = 1
16     return result
17 x_train = vectorize_sequences(train_data)
18 x_test = vectorize_sequences(test_data)
19 ##将标签向量化
20 y_train = np.asarray(train_labels).astype('float32')
21 y_test = np.asarray(test_labels).astype('float32')
22 
23 from keras import models
24 from keras import layers
25 #定义模型
26 model = models.Sequential()
27 model.add(layers.Dense(16, activation = 'relu', input_shape = (10000,) ))
28 model.add(layers.Dense(16, activation = 'relu'))
29 model.add(layers.Dense(1, activation = 'sigmoid'))
30 #编译模型
31 model.compile(optimizer = 'rmsprop',
32               loss = 'binary_crossentropy',
33               metrics = ['accuracy'])
34 #配置优化器
35 from keras import optimizers
36 model.compile(optimizer = optimizers.RMSprop(lr = 0.001),
37               loss = 'binary_crossentropy',
38               metrics = ['accuracy'])
39 ##使用自定义的损失和指标
40 from keras import losses
41 from keras import metrics
42 model.compile(optimizer = optimizers.RMSprop(lr = 0.001),
43               loss = losses.binary_crossentropy,
44               metrics = [metrics.binary_accuracy])
45 ##留出验证集
46 x_val = x_train[:10000]
47 partial_x_train = x_train[10000:]
48 y_val = y_train[:10000]
49 partial_y_train = y_train[10000:]
50 
51 #训练模型
52 model.compile(optimizer = 'rmsprop',
53               loss = 'binary_crossentropy',
54               metrics = ['acc'])
55 history = model.fit(partial_x_train,
56                     partial_y_train,
57                     epochs = 20,
58                     batch_size = 512,
59                     validation_data = (x_val, y_val))
60 ##调用model.fit()返回一个history对象
61 history_dict = history.history
62 history_dict.keys()
63 
64 ##绘制训练损失和验证损失
65 %matplotlib inline
66 import matplotlib.pyplot as plt
67 history_dict = history.history
68 loss_values = history_dict['loss']
69 val_loss_values = history_dict['val_loss']
70 epochs = range(1, len(loss_values) + 1)
71 plt.plot(epochs, loss_values, 'bo', label = 'Training loss')
72 plt.plot(epochs, val_loss_values, 'b', label = 'Validation loss')
73 plt.title('Training and Validation loss')
74 plt.xlabel('Epochs')
75 plt.ylabel('Loss')
76 plt.legend()

猜你喜欢

转载自www.cnblogs.com/wangmengzhu/p/10748343.html