MNIST CNN实现
准备工作
mnist_loader.py
def load_mnist(path, kind='train'):
import os
import gzip
import numpy as np
"""Load MNIST data from `path`"""
labels_path = os.path.join(path,
'%s-labels-idx1-ubyte.gz'
% kind)
images_path = os.path.join(path,
'%s-images-idx3-ubyte.gz'
% kind)
with gzip.open(labels_path, 'rb') as lbpath:
labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
offset=8)
with gzip.open(images_path, 'rb') as imgpath:
images = np.frombuffer(imgpath.read(), dtype=np.uint8,
offset=16).reshape(len(labels), 784)
return images, labels
代码
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow import keras
import mnist_loader as ml
def load_data():
x_train, y_train = ml.load_mnist('./MNIST_DATA', 'train')
x_test, y_test = ml.load_mnist('./MNIST_DATA', 't10k')
x_train = x_train.reshape(-1, 28, 28, 1) / 255.0
x_test = x_test.reshape(-1, 28, 28, 1) / 255.0
y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)
return x_train, y_train, x_test, y_test
def rand_weights(shape):
return tf.Variable(tf.truncated_normal(shape, stddev = 0.05))
def rand_biases(length):
return tf.Variable(tf.truncated_normal([length], stddev = 0.05))
def new_conv_layer(input, nb_filters, nb_channels = 1, filter_size = 3, activation = tf.nn.relu):
shape = [filter_size, filter_size, nb_channels, nb_filters]
w = rand_weights(shape)
b = rand_biases(nb_filters)
layer = tf.nn.conv2d(input = input, filter = w, strides = [1, 1, 1, 1], padding = "SAME")
layer = layer + b
layer = activation(layer)
return layer
def new_max_pooling_layer(input, size = 2):
layer = tf.nn.max_pool(value = input, ksize = [1, size, size, 1], strides = [1, size, size, 1], padding = "SAME")
return layer
def flatten_layer(input):
nb_features = (input.get_shape())[1:].num_elements()
layer = tf.reshape(input, [-1, nb_features])
return layer
def new_fc_layer(input, nb_outputs):
nb_inputs = (input.get_shape())[1:].num_elements()
layer = tf.matmul(input, rand_weights([nb_inputs, nb_outputs])) + rand_biases(nb_outputs)
return layer
def main(arg = None):
x_train, y_train, x_test, y_test = load_data()
nb_classes = 10
learning_rate = 0.1
nb_epochs = 1000
batch_size = 128
acc_history = []
loss_history = []
with tf.Graph().as_default():
x = tf.placeholder(dtype = tf.float32, shape = [None, 28, 28, 1])
y_ = tf.placeholder(dtype = tf.float32, shape = [None, nb_classes])
c1 = new_conv_layer(x, 32)
p1 = new_max_pooling_layer(c1)
c2 = new_conv_layer(p1, 64, nb_channels = 32)
flatten = flatten_layer(c2)
fc1 = new_fc_layer(flatten, 128)
logits = new_fc_layer(fc1, nb_classes)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels = y_, logits = logits))
train = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(tf.nn.softmax(logits), axis = 1), tf.argmax(y_, axis = 1)), tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
indices = np.arange(x_train.shape[0])
for epoch in range(nb_epochs):
np.random.shuffle(indices)
x_batch = x_train[indices[:batch_size]]
y_batch = y_train[indices[:batch_size]]
feed_dict = {x: x_batch, y_: y_batch}
sess.run([train], feed_dict = feed_dict)
if epoch % 10 == 0:
l, acc = sess.run([loss, accuracy], feed_dict = feed_dict)
print("epoch: %d loss: %f, accuracy: %f" % (epoch, l, acc))
loss_history.append(l)
acc_history.append(acc)
test_acc = sess.run(accuracy, feed_dict = {x: x_test, y_: y_test})
print("test accuracy: %f" % test_acc)
saver = tf.train.Saver()
saver.save(sess, './cnn.ckpt', global_step = nb_epochs)
plt.plot(loss_history)
plt.plot(acc_history)
plt.legend(["loss", "accuracy"])
plt.show()
if __name__ == '__main__':
tf.app.run()
效果图
总结
对于如此大量的数据 (6W), 使用 Batch Gradient Descent 显然工作量太过沉重, 所以我使用了 Mini Batch Gradient Descent, 以 128 为一组, 在训练了大概 1000 个 epoch 之后模型收敛, 最终的 test accuracy 为 97.23%.
相比于 Keras, 直接用 TensorFlow 来构建神经网络确实十分繁琐, 但是一旦你熟悉了它的API之后, 又觉得还可以接受.