Tensorflow实现LeNet-5 MNIST手写体数字识别分类

# import modules
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import time
from datetime import timedelta
import math
from tensorflow.examples.tutorials.mnist import input_data
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '0'

# 设置按需使用GPU
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.InteractiveSession(config=config)


# 权值初始化
def weight_variable(shape):
    # 用正态分布来初始化权值
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)
# def weight_variable(shape):
#     return tf.Variable(tf.truncated_normal(shape, stddev=0.05))


# 偏置初始化
def bias_variable(shape):
    # 本例中用relu激活函数，所以用一个很小的正偏置较好
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)
# def bias_variable(length):
#     return tf.Variable(tf.constant(0.1, shape=length))

# input 代表输入，filter 代表卷积核
def conv2d(input, filter):
    return tf.nn.conv2d(input, filter, strides=[1, 1, 1, 1], padding='SAME')
# def conv2d(x, W):
#     return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

# 2x2最大池化层
def max_pool(input):
    return tf.nn.max_pool(input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
# def max_pool_2x2(inputx):
#     return tf.nn.max_pool(inputx, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


# import data
# data = input_data.read_data_sets("./data", one_hot=True)  # one_hot means [0 0 1 0 0 0 0 0 0 0] stands for 2
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

print("Size of:")
print("--Training-set:\t\t{}".format(len(mnist.train.labels)))
print("--Testing-set:\t\t{}".format(len(mnist.test.labels)))
print("--Validation-set:\t{}".format(len(mnist.validation.labels)))
mnist.test.cls = np.argmax(mnist.test.labels, axis=1)  # show the real test labels:  [7 2 1 ..., 4 5 6], 10000values

# input_layer
x_input = tf.placeholder(tf.float32, [None, 784], name="x_input")
input_image = tf.reshape(x_input, [-1, 28, 28, 1])
# x = tf.placeholder("float", shape=[None, 784], name='x')
# x_image = tf.reshape(x, [-1, 28, 28, 1])

y_input = tf.placeholder(tf.float32, [None, 10], name="y_input")
# y_true = tf.placeholder("float", shape=[None, 10], name='y_true')
y_true_cls = tf.argmax(y_input, dimension=1)

# Conv 1
layer_conv1 = {"weights": weight_variable([5, 5, 1, 32]),
               "biases": bias_variable([32])}
h_conv1 = tf.nn.relu(conv2d(input_image, layer_conv1["weights"]) + layer_conv1["biases"])
h_pool1 = max_pool(h_conv1)

# Conv 2
layer_conv2 = {"weights": weight_variable([5, 5, 32, 64]),
               "biases": bias_variable([64])}
h_conv2 = tf.nn.relu(conv2d(h_pool1, layer_conv2["weights"]) + layer_conv2["biases"])
h_pool2 = max_pool(h_conv2)

# Flat
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])

# Full-connected layer 1
fc1_layer = {"weights": weight_variable([7 * 7 * 64, 1024]),
             "biases": bias_variable([1024])}
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, fc1_layer["weights"]) + fc1_layer["biases"])

# Droupout Layer
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# Full-connected layer 2
fc2_layer = {"weights": weight_variable([1024, 10]),
             "biases": bias_variable([10])}
# Predicted class
y_pred = tf.nn.softmax(
    tf.matmul(h_fc1_drop, fc2_layer["weights"]) + fc2_layer["biases"])  # The output is like [0 0 1 0 0 0 0 0 0 0]

y_pred_cls = tf.argmax(y_pred, dimension=1)  # Show the real predict number like '2'

# cost function to be optimized
# 损失模型隐藏到loss-model模块
with tf.name_scope("loss-model"):
    # 1.损失函数loss：cross_entropy
    cross_entropy = -tf.reduce_mean(y_input * tf.log(y_pred))
    optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy)
    # 给损失模型的输出添加scalar，用来观察loss的收敛曲线
    tf.summary.scalar("loss", cross_entropy)
# cross_entropy = -tf.reduce_mean(y_input * tf.log(y_pred))
# optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy)

# Performance Measures
with tf.name_scope("accuracy-model"):
    # y_pre和y_input一行对应一个标签，行数对应batch的size大小
    correct_prediction = tf.equal(y_pred_cls, y_true_cls)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    # 给损失模型的输出添加scalar，用来观察accracy的收敛曲线
    tf.summary.scalar("test_acc", accuracy)
# correct_prediction = tf.equal(y_pred_cls, y_true_cls)
# accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))


def optimize(num_iterations):
    total_iterations = 0
    start_time = time.time()

    # 调用 merge_all() 收集所有的操作数据
    merged = tf.summary.merge_all()

    # 模型运行产生的所有数据保存到 ./tensorflow 文件夹供 TensorBoard 使用
    writer = tf.summary.FileWriter('./tensorboard', sess.graph, filename_suffix="_mnist")

    for i in range(total_iterations, total_iterations + num_iterations):
        x_batch, y_batch = mnist.train.next_batch(train_batch_size)

        feed_dict_train_op = {x_input: x_batch, y_input: y_batch, keep_prob: 0.5}
        feed_dict_test = {x_input: mnist.test.images, y_input: mnist.test.labels, keep_prob: 1.0}

        # summary = sess.run(merged, feed_dict={x_input: x_batch, y_input: y_batch})
        # train_step.run(feed_dict={x_input: x_batch, y_input: y_batch})
        # 等价于上述两条语句
        summary, _, train_loss = sess.run([merged, optimizer, cross_entropy],
                                          feed_dict=feed_dict_train_op)
        # sess.run(optimizer, feed_dict=feed_dict_train_op)

        # Print status every 100 iterations.
        if i % 100 == 0:
            # Calculate the accuracy on the training-set.
            acc = sess.run(accuracy, feed_dict=feed_dict_test)
            # Message for printing.
            msg = "Optimization Iteration:{0:>6}, Training Accuracy: {1:>6.1%}"
            # Print it.
            # print(msg.format(i + 1, acc))
            print(msg.format(i, acc))

        writer.add_summary(summary, i)

    # Update the total number of iterations performed
    total_iterations += num_iterations
    # Ending time
    end_time = time.time()
    # Difference between start and end_times.
    time_dif = end_time - start_time
    # Print the time-usage
    print("Time usage:" + str(timedelta(seconds=int(round(time_dif)))))


def print_test_accuracy():
    # Number of images in the test-set.
    num_test = len(mnist.test.images)
    cls_pred = np.zeros(shape=num_test, dtype=np.int)

    i = 0

    while i < num_test:
        # The ending index for the next batch is denoted j.
        j = min(i + test_batch_size, num_test)

        # Get the images from the test-set between index i and j
        images = mnist.test.images[i:j, :]
        # Get the associated labels
        labels = mnist.test.labels[i:j, :]

        # Create a feed-dict with these images and labels.
        feed_dict = {x_input: images, y_input: labels, keep_prob: 1.0}

        # Calculate the predicted class using Tensorflow.
        cls_pred[i:j] = sess.run(y_pred_cls, feed_dict=feed_dict)

        # Set the start-index for the next batch to the
        # end-index of the current batch
        i = j

    cls_true = mnist.test.cls
    correct = (cls_true == cls_pred)
    correct_sum = correct.sum()
    acc = float(correct_sum) / num_test

    # Print the accuracy
    msg = "Accuracy on Test-Set: {0:.1%} ({1}/{2})"
    print(msg.format(acc, correct_sum, num_test))


with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)

    train_batch_size = 50
    test_batch_size = 256

    # Performance after 20000 optimization iterations
    optimize(20000)
    print_test_accuracy()

# 输出结果：
# Optimization Iteration: 19800, Training Accuracy:  99.3%
# Optimization Iteration: 19900, Training Accuracy:  99.3%
# Time usage:0:01:28
# Accuracy on Test-Set: 99.3% (9933/10000)

手动下载mnist数据集地址：http://yann.lecun.com/exdb/mnist/

LeNet-5模型框架：

LeNet-5模型每一层的结构：

（1）第一层：卷积层

这一层的输入就是原始的图像像素，LeNet-5模型接受的输入层大小为32*32*1。第一个卷积层过滤器的尺寸为5*5，深度为6，不使用全0填充，步长为1。因为没有使用全0填充，所以这一层的输出的尺寸为32-5+1=28，深度为6。这一个卷积层总共有 5x5x1x6+6=156个参数，其中6个为偏置项参数。因为下一层的节点矩阵有28x28x6=4704个节点，每个节点和5x5=25个当前层节点相连，所以本层卷积层总共有4704 x(25+1)=122304个连接。

（2）第二层：池化层

这一层的输入为第一层的输出，是一个28x28x6的节点矩阵。本层采用的过滤器大小为2x2，长和宽的步长均为2，所以本层的输出矩阵的大小为14*14*6。

（3）第三层：卷积层

本层的输入矩阵大小为14x14x6，使用的过滤器大小为5x5，深度为16。本层不使用全0填充，步长为1。本层的输出矩阵大小为10x10x16。按照标准的卷积层，本层应该有 5x5x6x16+16=2416个参数，10x10x16x (25+1) =41600个连接。

（4）第四层：池化层

本层的输入矩阵大小为10x10x16，采用的过滤器大小为2x2，步长为2。本层的输出矩阵大小为5x5x16。

（5）第五层：全连接层

本层的输入矩阵大小为5x5x16，在LeNet-5模型的论文中将这一层称为卷积层，但是因为过滤器的大小就是5x5，所以和全连接层没有区别，在TensorFlow程序实现中也会将这一层看成全连接层。本层的输出节点个数为120，总共有 5x5x16x120+120=48120个参数。同时本层有5x5x16x120+120=48120个连接。

（6）第六层：全连接层

本层的输入节点个数为120个，输出节点个数为84个，总共参数为120x84+84=10164 个。同时本层有120x84+84=10164个连接。

（7）第七层：全连接层

本层的输入节点个数为84个，输出节点个数为10个，总共参数为84x10+10=850个。同时本层有84x10+10=850个连接。

本程序是根据LeNet-5实现手写体数字识别分类，模型框架如图所示：

Tensorflow实现LeNet-5 MNIST手写体数字识别分类

猜你喜欢