CIFAR-10识别模型


import tensorflow as tf

#通过tf.variable_scope函数控制tf.get_variable函数来获取以及创建过的变量
with tf.variable_scope("zyy"):#zyy的命名空间
        v=tf.get_variable("v",[1],initializer=tf.constant_initializer(1.0))  #在zyy的命名空间内创建名字为v的变量

with tf.variable_scope("zyy"):
        v = tf.get_variable("v", [1])  # 通过tf.get_variable函数创建v的变量，则会失败，由于在zyy空间中已经生成了一个v的变量

with tf.variable_scope("zyy", reuse=True):
        v = tf.get_variable("v", [1], initializer=tf.constant_initializer(1.5))

# -- * -- * -- * -- * -- * -- * -- * -- * -- * -- * -- * -- * -- * -- * --

def inference(images):
  """
  输入参数：训练图像
  Returns:最终每个图像的种类
  我们使用tf.get_variable()，而不是实例化所有变量tf.Variable()，
  以便在多个GPU训练运行中共享变量。如果我们只在单个GPU上运行此模型，
  我们可以简化此功能通过用tf.Variable（）替换tf.get_variable（）的所有实例。
  """
  # 建立第一层卷积层。使用tf.variable_scope是为了定义命名空间
  with tf.variable_scope('conv1') as scope:
    # 第一层卷积层的权重，5*5的filter，有3个通道，同时有64个这样的filter。stddev wd参数都是在初始化权重的时候 一些必须的截断正太的参数
    kernel = _variable_with_weight_decay('weights',
                                         shape=[5, 5, 3, 64],
                                         stddev=5e-2,
                                         wd=0.0)
    # 使用tf.nn.conv2d进行卷积，[1, 1, 1, 1]第一个和第四个1代表在batch上和channel上，不予特殊操作。中间两个1代表在height width
    # 维度上，划窗的长度是1。padding='SAME'代表此次卷积需要保证输出和输入的shape保持一致，所以会在保持一致的前提下，进行外圈补0。具体
    # 的计算，我会在代码后续进行详解
    conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
    # 进行完卷积后，还需要有个bias偏执项进行相加
    biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
    pre_activation = tf.nn.bias_add(conv, biases)
    # 经过激励层
    conv1 = tf.nn.relu(pre_activation, name=scope.name)
    # summary是指将输出报告到Tensorboard
    _activation_summary(conv1)

  # pool1
  # pool层会变换图像的shape，具体的计算放在下面详解
  pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
                         padding='SAME', name='pool1')
  # norm1
  # 这是局部响应归一化层，现在的模型大多不采用
  norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
                    name='norm1')

  # conv2
  with tf.variable_scope('conv2') as scope:
    kernel = _variable_with_weight_decay('weights',
                                         shape=[5, 5, 64, 64],
                                         stddev=5e-2,
                                         wd=0.0)
    conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
    pre_activation = tf.nn.bias_add(conv, biases)
    conv2 = tf.nn.relu(pre_activation, name=scope.name)
    _activation_summary(conv2)

  # norm2
  norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
                    name='norm2')
  # pool2
  pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1],
                         strides=[1, 2, 2, 1], padding='SAME', name='pool2')

  # local3
  with tf.variable_scope('local3') as scope:
    # 铺平图像数据。因为这里不再是卷积计算了，需要把矩阵冲洗reshape成一个一维的向量
    # 在tensorflow官网进行 手写数字识别的时候，reshape的方法和下面的不一样
    # 那里面采用的reshape方法是：h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])。接着h_pool2_flat就直接可以和filter进行相乘
    # 下面这种reshape方法得到的返回结果 reshape 也可以直接和filter进行相乘
    reshape = tf.reshape(pool2, [FLAGS.batch_size, -1])
    dim = reshape.get_shape()[1].value # 8*8*64
    #这块的384这个数字和前面无关，是定义的 卷积核的个数，最终会输出384个特征
    weights = _variable_with_weight_decay('weights', shape=[dim, 384],
                                          stddev=0.04, wd=0.004)
    biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))
    local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
    _activation_summary(local3)

  # local4
  with tf.variable_scope('local4') as scope:
    #因为上一层输出特征数是384，所以这块的shape第一维也是384
    weights = _variable_with_weight_decay('weights', shape=[384, 192],
                                          stddev=0.04, wd=0.004)
    biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))
    local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name)
    _activation_summary(local4)

  # linear layer(WX + b),
  # We don't apply softmax here because
  # tf.nn.sparse_softmax_cross_entropy_with_logits accepts the unscaled logits
  # and performs the softmax internally for efficiency.
  with tf.variable_scope('softmax_linear') as scope:
    weights = _variable_with_weight_decay('weights', [192, NUM_CLASSES],
                                          stddev=1/192.0, wd=0.0)
    biases = _variable_on_cpu('biases', [NUM_CLASSES],
                              tf.constant_initializer(0.0))
    softmax_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name)
    _activation_summary(softmax_linear)

  return softmax_linear

整个的模型代表就如上所示，两个卷积层+两个全连接层+最后的一个softmax层，最终得到每一幅图像的NUM_CLASSES个概率，再接一部处理就可以输出各个类别了。

下面详细的对从 batch_size, 32, 32的输入数据开始，经过每层时候的size大小进行计算

第一层卷积层：卷积层统一是使用SAME的方式，用全0去补全。所以padding会保证卷积操作不会影响图片大小。而我们的步长又设置成了1。所以在卷积层中，我们的图片size并没有减少。并且可以计算出来补0的圈数，padding的值(p) 必须要满足p = (f-1)/2，其中f是过滤器的大小，p = (5-1) / 2 = 2。

第一层池化层：上面卷积层输出为 [32, 32，3，64]，前两维是height width，第三维是channel，第四维是filter个数。因为池化层虽然也有padding操作，但是以为滑动步长不为1，所以会改变图像size

padding的值(p) 必须要满足p = (f-1)/2 。其中f是过滤器的大小。那么在我们这里就是padding的值就是p = (3-1) / 2 = 1。而图片的大小可以用这个公式计算：n = (n + 2p -f)/s +1 其中s是步长，n是图片大小。我们的原始图片是32*32，那么刚才的公式就是（32 + 1*2 - 3）/2 +1 = 16.5，然后取整为16 。这样经过第一个卷积+池化的时候，图片就变成了16*16

第二层卷积层：因为padding操作+步长为1，所以未改变图像size

第二层池化层：（16 + 1*2 - 3）/2 +1 = 8.5，然后取整为8

第一层全连接层：图像的size为8*8，又因为第二层卷积层输出的filter为64，所以在reshape成一维的时候，dim就位8*8*64，384是自己指定的当层为386个filter

第二层全连接层：上面的全连接层输出已经是1维386，那么这层的参数项的shape 应为[384, 当层的filter数目即192]

猜你喜欢