DL练习3—基于RNN&LSTM的MNIST手写字体识别

版权声明:本文为业余狙击手原创文章,欢迎阅读,欢迎转载,转载请注明出处,谢谢。 https://blog.csdn.net/sxlsxl119/article/details/85157051

点击获取基于RNN&LSTM的MNIST手写字体识别源代码(GitHub)

这篇文章就不说RNN和LSTM的原理了,现在网上这类文章铺天盖地的。在这里把实际代码跑一下,同时对代码加了一些注释,并看一下执行效果,主要是通过实际代码加深对RNN和LSTM的理解。

一,基于RNN的MNIST手写字体识别

1.1 代码

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

#设置随机种子用于比较两个计算结果
tf.set_random_seed(1)

# 加载数据
mnist = input_data.read_data_sets("E:\sxl_Programs\Python\MNIST_data\MNIST_data", one_hot=True)

# 超参数
lr = 0.0001  # 学习率
training_iters = 5000  # 训练轮数
batch_size = 200

n_inputs = 28  # MNIST data input (img shape: 28*28)
n_steps = 28  # time steps
n_hidden_units = 160  # neurons in hidden layer
n_classes = 10  # MNIST classes (0-9 digits)

# 输入输出
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])

# 权重
weights = {
    # (28, 128)
    'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
    # (128, 10)
    'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}
biases = {
    # (128, )
    'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
    # (10, )
    'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))
}

def RNN(X, weights, biases):
    # hidden layer for input to cell
    ########################################
    # transpose the inputs shape from
    # X ==> (128 batch * 28 steps, 28 inputs)
    X = tf.reshape(X, [-1, n_inputs])

    # into hidden
    # X_in = (128 batch * 28 steps, 128 hidden)
    X_in = tf.matmul(X, weights['in']) + biases['in']
    # X_in ==> (128 batch, 28 steps, 128 hidden)
    X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])

    # cell
    ##########################################
    # basic LSTM Cell.
    if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
        cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden_units, forget_bias=1.0, state_is_tuple=True)
    else:
        cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units)
    # lstm cell is divided into two parts (c_state, h_state)
    init_state = cell.zero_state(batch_size, dtype=tf.float32)

    # You have 2 options for following step.
    # 1: tf.nn.rnn(cell, inputs);
    # 2: tf.nn.dynamic_rnn(cell, inputs).
    # If use option 1, you have to modified the shape of X_in, go and check out this:
    # https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/recurrent_network.py
    # In here, we go for option 2.
    # dynamic_rnn receive Tensor (batch, steps, inputs) or (steps, batch, inputs) as X_in.
    # Make sure the time_major is changed accordingly.
    outputs, final_state = tf.nn.dynamic_rnn(cell, X_in, initial_state=init_state, time_major=False)

    # hidden layer for output as the final results
    #############################################
    # results = tf.matmul(final_state[1], weights['out']) + biases['out']

    # # or
    # unpack to list [(batch, outputs)..] * steps
    outputs = tf.unstack(tf.transpose(outputs, [1, 0, 2]))
    results = tf.matmul(outputs[-1], weights['out']) + biases['out']  # shape = (128, 10)\

    return results


pred = RNN(x, weights, biases)  #预测
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y)) #损失函数
train_op = tf.train.AdamOptimizer(lr).minimize(cost) #优化器

correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) #正确率
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) #正确率均值

with tf.Session() as sess:
    # tf.initialize_all_variables() no long valid from
    init = tf.global_variables_initializer()
    sess.run(init)
    step = 0
    while step < training_iters:
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        batch_xs = batch_xs.reshape([batch_size, n_steps, n_inputs])
        sess.run([train_op], feed_dict={x: batch_xs, y: batch_ys})
        step += 1
        if step % 20 == 0:
            train_acc = sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys})
            #             print(" Iter%d,Train accuracy: %.3f" % (step,train_acc))
            test_totalAccuracy = 0.0
            irange = int(mnist.test.labels.shape[0] / batch_size)   #数据集共有多少个batch_size
            for i in range(irange):
                test_batch_xs, test_batch_ys = mnist.test.next_batch(batch_size)
                test_batch_xs = test_batch_xs.reshape([batch_size, n_steps, n_inputs])
                feeds = {x: test_batch_xs, y: test_batch_ys}
                test_acc = sess.run(accuracy, feed_dict=feeds)
                test_totalAccuracy += test_acc
            test_totalAccuracy = test_totalAccuracy / (irange)
            print("  Iter%d,Train accuracy: %.3f,Test accuracy: %.3f" % (step, train_acc, test_totalAccuracy))

1.2 结果

迭代次数没有完全跑完...

二,基于LSTM的MNIST手写字体识别

2.1 代码

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

#重置default graph计算图以及nodes节点
tf.reset_default_graph()

# 超参数
learning_rate = 0.01    # 学习率
n_steps = 28            # LSTM 展开步数(时序持续长度)
n_inputs = 28           # 输入节点数
n_hiddens = 64          # 隐层节点数
n_layers = 2            # LSTM layer 层数
n_classes = 10          # 输出节点数(分类数目)

# 加载数据
mnist = input_data.read_data_sets("E:\sxl_Programs\Python\MNIST_data\MNIST_data", one_hot=True)
test_x = mnist.test.images
test_y = mnist.test.labels

# tensor placeholder
with tf.name_scope('inputs'):
    x = tf.placeholder(tf.float32, [None, n_steps * n_inputs], name='x_input')     # 输入
    y = tf.placeholder(tf.float32, [None, n_classes], name='y_input')               # 输出
    keep_prob = tf.placeholder(tf.float32, name='keep_prob_input')           # 保持多少不被 dropout
    batch_size = tf.placeholder(tf.int32, [], name='batch_size_input')       # 批大小

# weights and biases
with tf.name_scope('weights'):
    Weights = tf.Variable(tf.truncated_normal([n_hiddens, n_classes],stddev=0.1), dtype=tf.float32, name='W')
    tf.summary.histogram('output_layer_weights', Weights)
with tf.name_scope('biases'):
    biases = tf.Variable(tf.random_normal([n_classes]), name='b')
    tf.summary.histogram('output_layer_biases', biases)

# RNN structure
def RNN_LSTM(x, Weights, biases):
    # RNN 输入 reshape
    x = tf.reshape(x, [-1, n_steps, n_inputs])
    # 定义 LSTM cell
    # cell 中的 dropout
    def attn_cell():
        lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hiddens)
        with tf.name_scope('lstm_dropout'):
            return tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
    # attn_cell = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
    # 实现多层 LSTM
    # [attn_cell() for _ in range(n_layers)]
    enc_cells = []
    for i in range(0, n_layers):
        enc_cells.append(attn_cell())
    with tf.name_scope('lstm_cells_layers'):
        mlstm_cell = tf.contrib.rnn.MultiRNNCell(enc_cells, state_is_tuple=True)
    # 全零初始化 state
    _init_state = mlstm_cell.zero_state(batch_size, dtype=tf.float32)
    # dynamic_rnn 运行网络
    outputs, states = tf.nn.dynamic_rnn(mlstm_cell, x, initial_state=_init_state, dtype=tf.float32, time_major=False)
    # 输出
    #return tf.matmul(outputs[:,-1,:], Weights) + biases
    return tf.nn.softmax(tf.matmul(outputs[:,-1,:], Weights) + biases)

with tf.name_scope('output_layer'):
    pred = RNN_LSTM(x, Weights, biases)
    tf.summary.histogram('outputs', pred)
# 损失函数
with tf.name_scope('loss'):
    #cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
    cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pred),reduction_indices=[1]))
    tf.summary.scalar('loss', cost)
# 优化器
with tf.name_scope('train'):
    train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# accuarcy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
#准确率
with tf.name_scope('accuracy'):
    accuracy = tf.metrics.accuracy(labels=tf.argmax(y, axis=1), predictions=tf.argmax(pred, axis=1))[1]
    tf.summary.scalar('accuracy', accuracy)

merged = tf.summary.merge_all()

init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())

with tf.Session() as sess:
    sess.run(init)
    train_writer = tf.summary.FileWriter("E://logs//train",sess.graph)
    test_writer = tf.summary.FileWriter("E://logs//test",sess.graph)
    # training
    step = 1
    for i in range(10000):
        _batch_size = 128
        batch_x, batch_y = mnist.train.next_batch(_batch_size)

        sess.run(train_op, feed_dict={x:batch_x, y:batch_y, keep_prob:0.5, batch_size:_batch_size})
        if (i + 1) % 100 == 0:
            loss = sess.run(cost, feed_dict={x:batch_x, y:batch_y, keep_prob:1.0, batch_size:_batch_size})
            acc = sess.run(accuracy, feed_dict={x:batch_x, y:batch_y, keep_prob:1.0, batch_size:_batch_size})
            print('Iter: %d' % ((i+1)), '| train loss: %.6f' % loss, '| train accuracy: %.6f' % acc)
            train_result = sess.run(merged, feed_dict={x:batch_x, y:batch_y, keep_prob:1.0, batch_size:_batch_size})
            test_result = sess.run(merged, feed_dict={x:test_x, y:test_y, keep_prob:1.0, batch_size:test_x.shape[0]})
            train_writer.add_summary(train_result,i+1)
            test_writer.add_summary(test_result,i+1)

    print("Optimization Finished!")
    # prediction
    print("Testing Accuracy:", sess.run(accuracy, feed_dict={x:test_x, y:test_y, keep_prob:1.0, batch_size:test_x.shape[0]}))

2.2 结果

欢迎扫码关注我的微信公众号 

猜你喜欢

转载自blog.csdn.net/sxlsxl119/article/details/85157051