from keras.datasets Import IMDB # when num_words will load critic, to ensure critics inside word frequently used to maintain the previous 10,000, so some rare uncommon word in the data load will discard (train_data, train_labels), (test_data , test_labels) = imdb.load_data (num_words = 10000)
print(train_data[0]) print(train_labels[0])
# Frequency and the correspondence between words are stored in the hash table word_index, which corresponds to the key word, value corresponds to the frequency of words word_index = imdb.get_word_index () # correspondence between the table we want to reverse it, frequency becomes key, value is a word reverse_word_index = dict ([(value, key) for (key, value) in word_index.items ()]) '' ' Numerical train_data included, the value is not corresponding 1,2,3 word, and is used to indicate special meaning, represents a "fill", 2 indicates "start of text" 3 indicates "unknown", so when we read from train_data the value is 1, 2, we want to ignore it, from 4 until the start of the corresponding word, if the value is 4, It represents the highest frequency of occurrence of the word ''' text = "" for wordCount in train_data[0]: if wordCount > 3: text += reverse_word_index.get(wordCount - 3) text += " " else: text += "?" print(text)
import numpy as np def oneHotVectorizeText(allText, dimension=10000): ''' allText is set all text, text corresponding to each of a one-dimensional vector containing 10,000 elements, a total of X is assumed that the text item, then This function will generate ten thousand X article dimension vector, thus forming a two-dimensional matrix comprising X rows of 10,000 ''' oneHotMatrix = np.zeros((len(allText), dimension)) for i, wordFrequence in enumerate(allText): oneHotMatrix[i, wordFrequence] = 1.0 return oneHotMatrix x_train = oneHotVectorizeText(train_data) x_test = oneHotVectorizeText(test_data) print(x_train[0]) y_train = np.asarray(train_labels).astype('float32') y_test = np.asarray(test_labels).astype('float32')
from keras import models from keras import layers Model = models.Sequential () # constructing a first network and second layers, the first layer has 10000 nodes, the second layer has 16 nodes # the Dense means, each node of the first layer and the second layer all the nodes connected to # function RELU corresponds RELU (X) = max (0, X) model.add (layers.Dense (16, Activation = ' RELU ' , = input_shape (10000 ,))) # third layer 16 neurons, each of the nodes of the second layer and the third layer each node is connected to each other model.add (layers.Dense (16, Activation = ' RELU ' )) # fourth layer is only one node, output a 0 probability values between -1 model.add (layers.Dense (. 1, Activation = ' Sigmoid ' ))
import matplotlib.pyplot as plt x = np.linspace(-10, 10) y_relu = np.array([0*item if item < 0 else item for item in x]) plt.figure() plt.plot (x, y_relu, label = ' resume ' ) plt.legend ()
from keras import losses from keras import metrics from keras import optimizers model.compile(optimizer=optimizers.RMSprop(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])
x_val = x_train[:10000] partial_x_train = x_train[10000:] y_val = y_train [10000 ] partial_y_train = y_train[10000:] history = model.fit(partial_x_train, partial_y_train, epochs=20, batch_size=512, validation_data = (x_val, y_val))
train_result = history.history print(train_result.keys())
import matplotlib.pyplot as plt acc = train_result['acc'] val_acc = train_result['val_acc'] loss = train_result['loss'] val_loss = train_result['val_loss'] epochs = range(1, len(acc) + 1) #绘制训练数据识别准确度曲线 plt.plot(epochs, loss, 'bo', label='Trainning loss') #绘制校验数据识别的准确度曲线 plt.plot(epochs, val_loss, 'b', label='Validation loss') plt.title('Trainning and validation loss') plt.xlabel('Epochs') plt.ylabel('Loss') plt.legend() plt.show()
model = models.Sequential() model.add(layers.Dense(16, activation='relu', input_shape=(10000,))) model.add(layers.Dense(16, activation='relu')) model.add(layers.Dense(1, activation='sigmoid')) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) history = model.fit(x_train, y_train, epochs=4, batch_size=512) results = model.evaluate(x_test, y_test) print(results)
model.predict(x_test)