TensorFlow实现MNIST反向传播

一、代码

# coding=utf-8
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data


def sigmaprime(x):
    """
    用Sigmoid函数的导数更新权重
    :param x:
    :return: 更新后的权重
    """
    return tf.multiply(tf.sigmoid(x), tf.subtract(tf.constant(1.0), tf.sigmoid(x)))


# 数据集
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)  # 读取MNIST,独热编码

# 定义模型

# 常数
n_input = 784  # MNIST尺寸(28*28)
n_classes = 10  # MNIST类别(0-9)

# 超参数
max_epochs = 10000  # 最大迭代数
learning_rate = 0.5  # 学习率
batch_size = 10  # 每批训练批量大小
seed = 0  # 随机种子
n_hidden = 30  # 隐藏层的神经元数

# 占位符
x_in = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])


# 创建模型
def multilayer_perceptron(x, weights, biases):
    h_layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['h1'])  # 隐藏层使用ReLU激活函数
    out_layer_1 = tf.sigmoid(h_layer_1)

    h_out = tf.matmul(out_layer_1, weights['out']) + biases['out']  # 输出层使用线性激活函数
    return tf.sigmoid(h_out), h_out, out_layer_1, h_layer_1


weights = {  # 权重
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden], seed=seed)),
    'out': tf.Variable(tf.random_normal([n_hidden, n_classes], seed=seed))}

biases = {  # 偏置
    'h1': tf.Variable(tf.random_normal([1, n_hidden], seed=seed)),
    'out': tf.Variable(tf.random_normal([1, n_classes], seed=seed))}

# 正向传播
y_hat, h_2, o_1, h_1 = multilayer_perceptron(x_in, weights, biases)

# 损失函数
err = y - y_hat
loss = tf.reduce_mean(tf.square(err, name='loss'))

# 反向传播
delta_2 = tf.multiply(err, sigmaprime(h_2))
delta_w_2 = tf.matmul(tf.transpose(o_1), delta_2)

wtd_error = tf.matmul(delta_2, tf.transpose(weights['out']))
delta_1 = tf.multiply(wtd_error, sigmaprime(h_1))
delta_w_1 = tf.matmul(tf.transpose(x_in), delta_1)

eta = tf.constant(learning_rate)

# 更新权重
train = [
    tf.assign(weights['h1'], tf.add(weights['h1'], tf.multiply(eta, delta_w_1)))
    , tf.assign(biases['h1'], tf.add(biases['h1'], tf.multiply(eta, tf.reduce_mean(delta_1, axis=[0]))))
    , tf.assign(weights['out'], tf.add(weights['out'], tf.multiply(eta, delta_w_2)))
    , tf.assign(biases['out'], tf.add(biases['out'], tf.multiply(eta, tf.reduce_mean(delta_2, axis=[0]))))
]

# 定义精度
acct_mat = tf.equal(tf.argmax(y_hat, 1), tf.argmax(y, 1))
accuracy = tf.reduce_sum(tf.cast(acct_mat, tf.float32))

# 训练
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(max_epochs):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        _, loss1 = sess.run([train, loss], feed_dict={x_in: batch_xs, y: batch_ys})
        if epoch % 1000 == 0:
            print('Epoch: {0}  loss: {1}'.format(epoch, loss1))

    acc_test = sess.run(accuracy, feed_dict={x_in: mnist.test.images, y: mnist.test.labels})
    acc_train = sess.run(accuracy, feed_dict={x_in: mnist.train.images, y: mnist.train.labels})
    # 评估
    print('Accuracy Train%: {1}  Accuracy Test%: {2}'
          .format(epoch, acc_train / 600, (acc_test / 100)))

反向传播:BN(Backpropagation),计算输出层损失函数梯度→计算隐藏层损失函数梯度→用梯度更新权重
ReLU激活函数:整流线性单元,隐藏层最常使用的激活函数。当输入为负时不激活神经元。

二、结果

Accuracy Train%: 84.91833333333334 Accuracy Test%: 92.53

猜你喜欢

转载自blog.csdn.net/lly1122334/article/details/88230088
今日推荐