深度学习入门——利用卷积神经网络训练CIFAR—10数据集

CIFAR-10数据集简介

CIFAR-10是由Hinton的学生Alex Krizhevsky和Ilya Sutskever整理的一个用于普适物体的小型数据集。它一共包含10个类别的RGB彩色图片：飞机、汽车、鸟类、猫、鹿、狗、蛙类、马、船：

数据集包含50000张训练图片和10000张测试图片，与MNIST手写数字数据集的区别：

CIFAR-10	MNIST
3通道彩色RGB图像	灰度图像
尺寸32x32	尺寸28x28
比例、特征不同	特征较为明显

所以线性模型在CIFAR-10表现很差。

数据集及代码文件下载

数据集下载

官网地址：https://www.cs.toronto.edu/~kriz/cifar.html

代码下载

https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10

文件	用途
cifar10_input.py	在TensorFlow中读入CIFAR-10训练图片
cifar10_input_test.py	测试cifar10_input.py
cifar10.py	建立预测模型
cifar10_train.py	使用单个CPU或GPU训练
cifar10_eval.py	在测试集上测试模型的性能
cifar10_multi_gpu_train.py	使用多个GPU训练

也可以从我的网盘下载：

链接：https://pan.baidu.com/s/1GyiKrMeMpXALOxuQRn_zsg 密码：1a5y

从数据集提取图片

注意要先解压才能从.bin文件中提取

# coding: utf-8
# 导入当前目录的cifar10_input，这个模块负责读入cifar10数据
import cifar10_input
# 导入TensorFlow和其他一些可能用到的模块。
import tensorflow as tf
import os
import scipy.misc


def inputs_origin():
  # filenames一共5个，从data_batch_1.bin到data_batch_5.bin
  # 读入的都是训练图像
  # 改为自己数据集文件的地址
  filenames = [r'E:\Program\Python\Deep-Learning-21-Examples-master\chapter_2\cifar10_data\cifar-10-batches-bin\data_batch_%d.bin'
               % i for i in range(1, 6)]
  # 判断文件是否存在
  for f in filenames:
    if not tf.gfile.Exists(f):
      raise ValueError('Failed to find file: ' + f)
  # 将文件名的list包装成TensorFlow中queue的形式
  filename_queue = tf.train.string_input_producer(filenames)
  # cifar10_input.read_cifar10是事先写好的从queue中读取文件的函数
  # 返回的结果read_input的属性uint8image就是图像的Tensor
  read_input = cifar10_input.read_cifar10(filename_queue)
  # 将图片转换为实数形式
  reshaped_image = tf.cast(read_input.uint8image, tf.float32)
  # 返回的reshaped_image是一张图片的tensor
  # 我们应当这样理解reshaped_image：每次使用sess.run(reshaped_image)，就会取出一张图片
  return reshaped_image


if __name__ == '__main__':
  # 创建一个会话sess
  with tf.Session() as sess:
    # 调用inputs_origin。cifar10_data/cifar-10-batches-bin是我们下载的数据的文件夹位置
    reshaped_image = inputs_origin()
    # 这一步start_queue_runner很重要。
    # 我们之前有filename_queue = tf.train.string_input_producer(filenames)
    # 这个queue必须通过start_queue_runners才能启动
    # 缺少start_queue_runners程序将不能执行
    threads = tf.train.start_queue_runners(sess=sess)
    # 变量初始化
    sess.run(tf.global_variables_initializer())
    # 创建文件夹cifar10_data/raw/
    if not os.path.exists('cifar10_data/raw/'):
      os.makedirs('cifar10_data/raw/')
    # 保存30张图片
    for i in range(30):
      # 每次sess.run(reshaped_image)，都会取出一张图片
      image_array = sess.run(reshaped_image)
      # 将图片保存
      scipy.misc.toimage(image_array).save('cifar10_data/raw/%d.jpg' % i)

结果

卷积神经网络模型注释

参考：https://blog.csdn.net/akadiao/article/details/79618342（这篇把所有文件都注释了，很全面）

#建立模型

def inference(images):
    # 第一层卷积
    with tf.variable_scope('conv1') as scope:
        # 卷积核
        kernel = _variable_with_weight_decay('weights',shape=[5, 5, 3, 64],stddev=5e-2, wd=None)
        # 卷积
        conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
        # 偏置初始化为0
        biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
        pre_activation = tf.nn.bias_add(conv, biases)
        # ReLu激活
        conv1 = tf.nn.relu(pre_activation, name=scope.name)
        # 汇总
        _activation_summary(conv1)
    # 第一层池化
    pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1')
    # lrn层
    norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')
    # 第二层卷积
    with tf.variable_scope('conv2') as scope:
        # 卷积核
        kernel = _variable_with_weight_decay('weights', shape=[5, 5, 64, 64], stddev=5e-2, wd=None)
        conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
        # 偏置初始化为0.1
        biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
        pre_activation = tf.nn.bias_add(conv, biases)
        # ReLu激活
        conv2 = tf.nn.relu(pre_activation, name=scope.name)
        # 汇总
        _activation_summary(conv2)
    # 第二lrn层
    norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')
    # 第二层池化
    pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2')

    # 连接层
    with tf.variable_scope('local3') as scope:
        # 转换为一维向量
        reshape = tf.reshape(pool2, [FLAGS.batch_size, -1])
        # 维数
        dim = reshape.get_shape()[1].value
        # 防止过拟合
        weights = _variable_with_weight_decay('weights', shape=[dim, 384], stddev=0.04, wd=0.004)
        # 偏置初始化为0.1
        biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))
        # ReLu激活
        local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
        _activation_summary(local3)

    # 连接层
    with tf.variable_scope('local4') as scope:
        # 防止过拟合
        weights = _variable_with_weight_decay('weights', shape=[384, 192], stddev=0.04, wd=0.004)
        # 偏置初始化为0.1
        biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))
        # ReLu激活
        local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name)
        _activation_summary(local4)

    # 线性层
    # (WX+b)不使用softmax,因为tf.nn.sparse_softmax_cross_entropy_with_logits接受未缩放的logits并在内部执行softmax以提高效率
    with tf.variable_scope('softmax_linear') as scope:
        weights = _variable_with_weight_decay('weights', [192, NUM_CLASSES], stddev=1/192.0, wd=None)
        # biases初始化为0
        biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0))
        # (WX+b) 进行线性变换以输出 logits
        softmax_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name)
        # 汇总
        _activation_summary(softmax_linear)
    return softmax_linear



# 模型训练

# 损失
def loss(logits, labels):
    labels = tf.cast(labels, tf.int64)
    # 计算logits和labels之间的交叉熵
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
      labels=labels, logits=logits, name='cross_entropy_per_example')
    # 计算整个批次的平均交叉熵损失
    cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
    # 把变量放入一个集合
    tf.add_to_collection('losses', cross_entropy_mean)
    # 总损失定义为交叉熵损失加上所有的权重衰减项(L2损失)
    return tf.add_n(tf.get_collection('losses'), name='total_loss')

# 总损失
def _add_loss_summaries(total_loss):
    # 指数移动平均
    loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
    losses = tf.get_collection('losses')
    # 将指数移动平均应用于单个损失
    loss_averages_op = loss_averages.apply(losses + [total_loss])
    # 单个损失损失和全部损失的标量summary
    for l in losses + [total_loss]:
        # 将每个损失命名为raw,并将损失的移动平均命名为原始损失
        tf.summary.scalar(l.op.name + ' (raw)', l)
        tf.summary.scalar(l.op.name, loss_averages.average(l))
    return loss_averages_op

# 训练CIFAR-10模型

def train(total_loss, global_step):
    # 影响学习率的变量
    num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size
    decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
    # 指数衰减
    lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps,
                                  LEARNING_RATE_DECAY_FACTOR, staircase=True)
    tf.summary.scalar('learning_rate', lr)
    # 总损失
    loss_averages_op = _add_loss_summaries(total_loss)
    # 计算梯度
    with tf.control_dependencies([loss_averages_op]):
        opt = tf.train.GradientDescentOptimizer(lr)
        grads = opt.compute_gradients(total_loss)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
    for var in tf.trainable_variables():
        tf.summary.histogram(var.op.name, var)
    for grad, var in grads:
        if grad is not None:
            tf.summary.histogram(var.op.name + '/gradients', grad)
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    with tf.control_dependencies([apply_gradient_op, variables_averages_op]): train_op = tf.no_op(name='train')
    return train_op

训练的结果会保存在cifar10_train文件夹下

深度学习入门——利用卷积神经网络训练CIFAR—10数据集

CIFAR-10数据集简介

数据集及代码文件下载

从数据集提取图片

卷积神经网络模型注释

猜你喜欢