初学tensorflow，参考了以下几篇博客：

BN层[待学习]

先解释以下MNIST数据集，训练数据集有55,000 条，即X为55,000 * 784的矩阵，那么Y为55,000 * 10的矩阵，每个图片是28像素*28像素，带有标签，Y为该图片的真实数字，即标签，每个图片10个数字，1所在位置代表图片类别。

Softmax模型

准确率92.3，读入时候将图片拉成一个向量。使用Adam梯度下降求答案。


   
   
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import tensorflow 
       
       as tf
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import numpy 
       
       as np
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       from tensorflow.examples.tutorials.mnist 
       
       import input_data
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #训练数据集有55,000 条，即X为55,000 * 784的矩阵，那么Y为55,000 * 10的矩阵
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #读数据，one_hot表示将矩阵处理为行向量，即28*28 => 1*784
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       mnist = input_data.read_data_sets(
       
       "MNIST_data/", one_hot=
       
       True)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       learning_rate = 
       
       0.01
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       batch_size = 
       
       128
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       n_epochs = 
       
       1000
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       x = tf.placeholder(tf.float32, [
       
       None, 
       
       784]) 
       
       #因为训练时跟测试时样本数量不一样，所以直接None
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #只是一个softmax分类器，初始化0就好了,默认训练variable.trainable=True的参数
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       W = tf.Variable(tf.zeros([
       
       784, 
       
       10]))
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       b = tf.Variable(tf.zeros([
       
       10]))
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #softmax 输出一个10*1的矩阵，代表每个值的概率分布
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       y_hat = tf.nn.softmax(tf.matmul(x, W) + b)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       y = tf.placeholder(tf.float32, [
       
       None, 
       
       10])
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #交叉熵损失函数
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       loss = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y_hat)))
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #也可以调用内置函数
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #entropy = tf.nn.softmax_cross_entropy_with_logits(logits, Y) #第一个是测试输出的函数，第二个是样本类别真实值
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #loss = tf.reduce_mean(entropy) # computes the mean over examples in the batch
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #学习率为0.01 使用Adam梯度下降
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       train = tf.train.AdamOptimizer(learning_rate).minimize(loss)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       correct_prediction = tf.equal(tf.argmax(y, 
       
       1), tf.argmax(y_hat, 
       
       1))  
       
       # 测试样本只有一个1，看这个1的位置和预测的概率最大值是否一样
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))  
       
       # 通过cast将布尔类型转化成float类型，每个值要么0要么1，求他的均值就是准确率
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       with tf.Session() 
       
       as sess:
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           sess.run(tf.global_variables_initializer())
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
          
       
       for i 
       
       in range(n_epochs):
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
               batch_x, batch_y = mnist.train.next_batch(batch_size) 
       
       #获取批量样本
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
               sess.run(train, feed_dict={x: batch_x, y: batch_y}) 
       
       #运行计算图
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           print(sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels}))

全连接神经网络模型

2层隐藏层，激活函数为relu函数，分类函数为softmax函数，学习率采用指数下降法，基本初始学习率0.01，如果太大会只有9.8%的准确率，学习率衰减速度如果太快也会准确率下降（过拟合），dropout正则化不是很管用，会让准确率下降，只有keep_prob = 0.99才勉强准确率高点。如果一个隐藏层，准确率为93.45%，无论是学习率大了，过度正则化都会导致9.8%。学习率太低则90左右的准确率


   
   
    
    
     
     
      
      
     
     
     
     
      
      
       
       import tensorflow 
       
       as tf
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import numpy 
       
       as np
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       from tensorflow.examples.tutorials.mnist 
       
       import input_data
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       # 训练数据集有55,000 条，即X为55,000 * 784的矩阵，那么Y为55,000 * 10的矩阵
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       # 读数据，one_hot表示将矩阵处理为行向量，即28*28 => 1*784
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       mnist = input_data.read_data_sets(
       
       "MNIST_data/", one_hot=
       
       True)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       base_learning_rate = 
       
       0.01
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       batch_size = 
       
       128
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       n_epochs = 
       
       1000
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       keep_prob = 
       
       1
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       decay_steps = 
       
       2
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       decay_rate = 
       
       0.99
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       def add_layer(inputs, input_size, output_size, activation_function=None):
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           W = tf.Variable(tf.random_normal([input_size, output_size]) * np.sqrt(
       
       1/input_size))
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           b = tf.Variable(tf.zeros([
       
       1, output_size]) + 
       
       0.1)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           y_hat = tf.matmul(inputs, W) + b
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           y_hat = tf.nn.dropout(y_hat, keep_prob=keep_prob) 
       
       #dropout 自动除以了keep_prob
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
          
       
       if activation_function 
       
       is 
       
       None:
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
               outputs = y_hats
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
          
       
       else:
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
               outputs = activation_function(y_hat)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
          
       
       return outputs
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       x = tf.placeholder(tf.float32, [
       
       None, 
       
       784])
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       y = tf.placeholder(tf.float32, [
       
       None, 
       
       10])
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       layer1 = add_layer(x, 
       
       784, 
       
       100, activation_function=tf.nn.relu)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       layer2 = add_layer(layer1, 
       
       100, 
       
       10, activation_function=tf.nn.relu)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       y_hat = add_layer(layer2, 
       
       10, 
       
       10, tf.nn.softmax)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #定义存储训练轮数的变量，这个变量不需要被训练
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       global_step = tf.Variable(
       
       0, trainable=
       
       False)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       learning_rate = tf.train.exponential_decay(base_learning_rate, global_step, decay_steps, decay_rate)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #base_learning_rate为基础学习率，global_step为当前迭代的次数
       
        
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #decay_steps为几步一下降
       
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #decay_rate为学习率衰减速度
       
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       loss = tf.reduce_mean(-tf.reduce_sum(y*tf.log(y_hat)))
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       train = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step) 
       
       #会自增
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       correct_prediction = tf.equal(tf.argmax(y, 
       
       1), tf.argmax(y_hat, 
       
       1))  
       
       # 测试样本只有一个1，看这个1的位置和预测的概率最大值是否一样
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))  
       
       # 通过cast将布尔类型转化成float类型，每个值要么0要么1，求他的均值就是准确率
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       with tf.Session() 
       
       as sess:
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           sess.run(tf.global_variables_initializer())
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
          
       
       for i 
       
       in range(n_epochs):
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
               batch_x, batch_y = mnist.train.next_batch(batch_size)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
               sess.run(train, feed_dict={x: batch_x, y: batch_y})
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           print(sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels}))

卷积网络模型（LENET）

思路

使用一个简单的CNN网络结构如下，括号里边表示tensor经过本层后的输出shape：

输入层（28 * 28 * 1）
卷积层1（28 * 28 * 32）
pooling层1（14 * 14 * 32）
卷积层2（14 * 14 * 64）
pooling层2（7 * 7 * 64）
全连接层（1 * 1024）
softmax层（10）

主要的函数说明：

卷积层：
tf.nn.conv2d(input, filter, strides, padding, use_cudnn_on_gpu=None, data_format=None, name=None)

参数说明：

data_format：表示输入的格式，有两种分别为：“NHWC”和“NCHW”，默认为“NHWC”
input：输入是一个4维格式的（图像）数据，数据的 shape 由 data_format 决定：当 data_format 为“NHWC”输入数据的shape表示为[batch, in_height, in_width, in_channels]，分别表示训练时一个batch的图片数量、图片高度、图片宽度、图像通道数。当 data_format 为“NHWC”输入数据的shape表示为[batch, in_channels， in_height, in_width]
filter：卷积核是一个4维格式的数据：shape表示为：[height,width,in_channels, out_channels]，分别表示卷积核的高、宽、深度（与输入的in_channels应相同）、输出 feature map的个数（即卷积核的个数）。
strides：表示步长：一个长度为4的一维列表，每个元素跟data_format互相对应，表示在data_format每一维上的移动步长。当输入的默认格式为：“NHWC”，则 strides = [batch , in_height , in_width, in_channels]。其中 batch 和 in_channels 要求一定为1，即只能在一个样本的一个通道上的特征图上进行移动，in_height , in_width表示卷积核在特征图的高度和宽度上移动的布长，即和。
padding：表示填充方式：“SAME”表示采用填充的方式，简单地理解为以0填充边缘，当stride为1时，输入和输出的维度相同；“VALID”表示采用不填充的方式，多余地进行丢弃。具体公式：
“SAME”:
“VALID”:

池化层：
tf.nn.max_pool( value, ksize,strides,padding,data_format=’NHWC’,name=None)
或者
tf.nn.avg_pool(…)

参数说明：

value：表示池化的输入：一个4维格式的数据，数据的 shape 由 data_format 决定，默认情况下shape 为[batch, height, width, channels]
其他参数与 tf.nn.cov2d 类型
ksize：表示池化窗口的大小：一个长度为4的一维列表，一般为[1, height, width, 1]，因不想在batch和channels上做池化，则将其值设为1。

Batch Nomalization层：
batch_normalization( x,mean,variance,offset,scale, variance_epsilon,name=None)

mean 和 variance 通过 tf.nn.moments 来进行计算：
batch_mean, batch_var = tf.nn.moments(x, axes = [0, 1, 2], keep_dims=True)，注意axes的输入。对于以feature map 为维度的全局归一化，若feature map 的shape 为[batch, height, width, depth]，则将axes赋值为[0, 1, 2]
x 为输入的feature map 四维数据，offset、scale为一维Tensor数据，shape 等于 feature map 的深度depth。

注意，计算准确率的时候，一定让keep_prob等于1


   
   
    
    
     
     
      
      
     
     
     
     
      
      
       
       import tensorflow 
       
       as tf
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       import numpy 
       
       as np
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #导入input_data用于自动下载和安装MNIST数据集
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       from tensorflow.examples.tutorials.mnist 
       
       import input_data
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       mnist = input_data.read_data_sets(
       
       "MNIST_data/", one_hot=
       
       True)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #创建两个占位符，x为输入网络的图像，y_为输入网络的图像类别
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       x = tf.placeholder(
       
       "float", shape=[
       
       None, 
       
       784])
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       y = tf.placeholder(
       
       "float", shape=[
       
       None, 
       
       10])
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #权重初始化函数
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       def weight_variable(shape):
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
          
       
       #输出服从截尾正态分布的随机值
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           initial = tf.truncated_normal(shape, stddev=
       
       0.1)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
          
       
       return tf.Variable(initial)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #偏置初始化函数
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       def bias_variable(shape):
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           initial = tf.constant(
       
       0.1, shape=shape)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
          
       
       return tf.Variable(initial)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #创建卷积op
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #x 是一个4维张量，shape为[batch,height,width,channels]
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #卷积核移动步长为1。填充类型为SAME,可以不丢弃任何像素点
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       def conv2d(x, W):
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
          
       
       return tf.nn.conv2d(x, W, strides=[
       
       1,
       
       1,
       
       1,
       
       1], padding=
       
       "SAME")
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #创建池化op
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #采用最大池化，也就是取窗口中的最大值作为结果
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #x 是一个4维张量，shape为[batch,height,width,channels]
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #ksize表示pool窗口大小为2x2,也就是高2，宽2
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #strides，表示在height和width维度上的步长都为2
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       def max_pool_2x2(x):
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
          
       
       return tf.nn.max_pool(x, ksize=[
       
       1,
       
       2,
       
       2,
       
       1],
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
                                 strides=[
       
       1,
       
       2,
       
       2,
       
       1], padding=
       
       "SAME")
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #第1层，卷积层
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #初始化W为[5,5,1,32]的张量，表示卷积核大小为5*5，第一层网络的输入和输出神经元个数分别为1和32
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       W_conv1 = weight_variable([
       
       5,
       
       5,
       
       1,
       
       32])
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #初始化b为[32],即输出大小
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       b_conv1 = bias_variable([
       
       32])
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #把输入x(二维张量,shape为[batch, 784])变成4d的x_image，x_image的shape应该是[batch,28,28,1]
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #-1表示自动推测这个维度的size
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       x_image = tf.reshape(x, [
       
       -1,
       
       28,
       
       28,
       
       1])
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #把x_image和权重进行卷积，加上偏置项，然后应用ReLU激活函数，最后进行max_pooling
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #h_pool1的输出即为第一层网络输出，shape为[batch,14,14,1]
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       h_pool1 = max_pool_2x2(h_conv1)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #第2层，卷积层
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #卷积核大小依然是5*5，这层的输入和输出神经元个数为32和64
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       W_conv2 = weight_variable([
       
       5,
       
       5,
       
       32,
       
       64])
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       b_conv2 = weight_variable([
       
       64])
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #h_pool2即为第二层网络输出，shape为[batch,7,7,1]
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       h_pool2 = max_pool_2x2(h_conv2)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #第3层, 全连接层
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #这层是拥有1024个神经元的全连接层
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #W的第1维size为7*7*64，7*7是h_pool2输出的size，64是第2层输出神经元个数
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       W_fc1 = weight_variable([
       
       7*
       
       7*
       
       64, 
       
       1024])
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       b_fc1 = bias_variable([
       
       1024])
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #计算前需要把第2层的输出reshape成[batch, 7*7*64]的张量
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       h_pool2_flat = tf.reshape(h_pool2, [
       
       -1, 
       
       7*
       
       7*
       
       64])
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #Dropout层
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #为了减少过拟合，在输出层前加入dropout
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       keep_prob = tf.placeholder(
       
       "float")
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #输出层
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #最后，添加一个softmax层
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #可以理解为另一个全连接层，只不过输出时使用softmax将网络输出值转换成了概率
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       W_fc2 = weight_variable([
       
       1024, 
       
       10])
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       b_fc2 = bias_variable([
       
       10])
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #预测值和真实值之间的交叉墒
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       cross_entropy = -tf.reduce_sum(y * tf.log(y_conv))
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #train op, 使用ADAM优化器来做梯度下降。学习率为0.0001
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       train_step = tf.train.AdamOptimizer(
       
       1e-4).minimize(cross_entropy)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #评估模型，tf.argmax能给出某个tensor对象在某一维上数据最大值的索引。
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #因为标签是由0,1组成了one-hot vector，返回的索引就是数值为1的位置
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       correct_predict = tf.equal(tf.argmax(y_conv, 
       
       1), tf.argmax(y, 
       
       1))
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #计算正确预测项的比例，因为tf.equal返回的是布尔值，
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       #使用tf.cast把布尔值转换成浮点数，然后用tf.reduce_mean求平均值
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       accuracy = tf.reduce_mean(tf.cast(correct_predict, 
       
       "float"))
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
       with tf.Session() 
       
       as sess:
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
          
       
       # 初始化变量
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           sess.run(tf.global_variables_initializer())
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
          
       
       # 开始训练模型，循环20000次，每次随机从训练集中抓取50幅图像
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
          
       
       for i 
       
       in range(
       
       1000):
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
               batch = mnist.train.next_batch(
       
       50)
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
              
       
       if i % 
       
       100 == 
       
       0:
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
                  
       
       # 每100次输出一次日志
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
                   train_accuracy = accuracy.eval(feed_dict={
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
                       x: batch[
       
       0], y: batch[
       
       1], keep_prob: 
       
       1.0}) 
       
       #计算准确率时候一定让keep_prob等于1
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
                   print(
       
       "step %d, training accuracy %g" % (i, train_accuracy))
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
       
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
               train_step.run(feed_dict={x: batch[
       
       0], y: batch[
       
       1], keep_prob: 
       
       0.5})
      
      
     
     
    
    
     
     
      
      
     
     
     
     
      
      
       
           print(sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels}))

【TensorFlow/简单网络】MNIST数据集-softmax、全连接神经网络，卷积神经网络模型

Softmax模型

全连接神经网络模型

卷积网络模型（LENET）

思路

主要的函数说明：

参数说明：

参数说明：

猜你喜欢