基于cifar10数据集的cnn图片分类模型

数据集下载地址（python版）：
https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz

该数据集分成了几部分／批次（batches）。CIFAR-10 数据集包含 5 个部分，名称分别为 `data_batch_1`、`data_batch_2`，以此类推。每个部分都包含以下某个类别的标签和图片：

* 飞机
* 汽车
* 鸟类
* 猫
* 鹿
* 狗
* 青蛙
* 马
* 船只
* 卡车

import的helper是一个自己写的工具包
在我这篇随笔里：helper工具包——基于cifar10数据集的cnn分类模型的模块，把内容复制下来用python编辑器写成py文件，名字为helper,放到下载的数据集一个路径下，即可
代码大部分我都仔仔细细的注释过了，希望大家认真看，一定可以看懂的。

import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib as mpl
import helper
import pickle
import tensorflow as tf
import random

# 设置字符集，防止中文乱码
mpl.rcParams['font.sans-serif'] = [u'simHei']
mpl.rcParams['axes.unicode_minus'] = False

# 0、定义模型超参数。
learning_rate = 0.01  #学习率，梯度下降时，走的步长
batch_size = 256  #批量大小，将数据集分为好几批，一批批的输入神经网络中，每一批256条数据，一批批的执行梯度下降
keep_probability = 0.7  #dropout流程的保留比例，比如某一隐藏层神经节点参与运算时，使其百分之70为0
image_shape = [32, 32, 3]  # 输入图片的尺寸 [32, 32, 3]
n_classes = 10    #数据集的类别数量
epochs = 2000   #训练过程中，所有数据将被前向传播反向传播更新轮多少次，轮的次数越多，模型越准确，但容易过拟合，比如：
# 训练集有1000个样本，batchsize(批量大小)=10，那么： 训练完整个样本集需要： 100次iteration(迭代次数)，1次epoch。
every_save_model = 2 # 每多少个epoch保存1次模型

'''
神经网络中常用的超参数
1. 学习率 η，2. 正则化参数 λ，3. 神经网络的层数 L，4. 每一个隐层中神经元的个数 j，5. 学习的回合数Epochs，6. 小批量数据 minibatch 的大小，7. 输出神经元的编码方式，8. 代价函数的选择，9. 权重初始化的方法，10. 神经元激活函数的种类，11.参加训练模型数据的规模 
'''

"""
网络结构图。
input_x              [-1, 32, 32, 3]    输入层，意为输入图片数据是32*32*3的尺寸，长、宽与通道数，-1意为bantch_size(批量)大小，此处设置为自动
w1                   [5, 5, 3, 32]      权重，意为5*5*3的卷积核，32个，意为提取32张特征图，即32个特征
conv1                [-1, 32, 32, 32]   卷积层一，经过input_x与滤波器w1进行卷积运算后，得到：批量大小自动，长、宽、通道数为32*32*32的尺寸     
池化1(步幅为2)       [-1, 32/2, 32/2, 32]   池化层一：压缩数据，缩小图像，减小参数的数量和计算，意为，批量大小自动，执行步幅为2的平均池化或最                                            大池化，此步完成后得到的是16*16*32尺寸的数据
w2                   [5, 5, 32, 128]    权重，意为5*5*32的卷积核，128个，卷积核数量通常都是上一次卷积核数量的倍数增加，意为提取128张特征图，即                                        128个特征 
conv2                [-1, 16, 16, 128]  卷积层二，经过池化一后的数据与滤波器w2进行卷积运算后，得到：批量大小自动，长、宽、通道数为16*16*128的                                         尺寸，此次卷积相当于在第一次卷积提取出的特征的基础上，将第一次提取出来的特征的一些特征组合也提取出                                          来，相较于第一次卷积结果，因为随着网络的加深，feature map的长宽尺寸缩小(池化)，本卷积层的每个map提                                       取的特征越具有代表性（精华部分），所以后一层卷积层需要增加feature map的数量，才能更充分的提取出前一层                                        的特征，一般是成倍增加 
池化2(步幅为2)       [-1, 16/2, 16/2, 128] 
拉平层               [N, 8, 8, 128] --> [N, 8*8*128]
FC1(权重)            [8*8*128, 1024]
logits(权重)         [1024, 10]
预测概率值             -----> 使用softmax激活
"""
#vgg16，resnet50这种层数的计算：层数只包含有参数的层，像池化层啊，relu激活层啊，loss层啊这些，都不计数
# 构建模型图 1、创建变量
graph = tf.Graph() #complete
with graph.as_default():
    weights = {
        'conv1': tf.get_variable('w1', shape=[5, 5, 3, 32], initializer=tf.truncated_normal_initializer(stddev=0.1)),
    #    tf.truncated_normal_initializer从截断的正态分布中输出随机值。生成的值服从具有指定平均值和标准偏差的正态分布，如果生成的值大于平均值2        个标准偏差的值则丢弃重新选择。
        'conv2': tf.get_variable('w2', shape=[5, 5, 32, 128], initializer=tf.truncated_normal_initializer(stddev=0.1)),
        'fc1': tf.get_variable('w3', shape=[8*8*128, 1024], initializer=tf.truncated_normal_initializer(stddev=0.1)),
        'fc2': tf.get_variable('w4', shape=[1024, n_classes], initializer=tf.truncated_normal_initializer(stddev=0.1))
    }
    biases = {
        'conv1': tf.get_variable('b1', shape=[32], initializer=tf.zeros_initializer()),
        #tf.zeros_initializer 全0初始化
        'conv2': tf.get_variable('b2', shape=[128], initializer=tf.zeros_initializer()),
        'fc1': tf.get_variable('b3', shape=[1024], initializer=tf.zeros_initializer()),
        'fc2': tf.get_variable('b4', shape=[n_classes], initializer=tf.zeros_initializer())
    }

cifar10_dataset_folder_path = '../datas/cifar-10-batches-py'
if os.path.exists(cifar10_dataset_folder_path):
    #os.path.exists 判断括号里的文件是否存在的意思，括号内的可以是文件路径。存在输出Ture,不存在输出False
    print('yes')

def explore_data(): #探索一下数据，第五批次中第1001个样本的信息
    batch_id = 5  #批次编号
    sample_id = 1001  #样本编号
    nums = helper.display_stats(cifar10_dataset_folder_path, batch_id, sample_id)
    # epochs = nums // batch_id

def normalize(images,training=True): #complete
    """
    归一化图片数据。将其缩放到(0,1)之间
    :param images: 图片数据，图片的shape =[32, 32, 3]
    :return: 归一化以后的numpy的数据
    """
    return tf.layers.batch_normalization(images, training=True)
    #tf.layers下封装了一些函数，其中包括此函数，批归一化的函数，参数很多不一一列举，，return的这两个参数分别是输入的图片，是否参与模型的训练，此处为参与，注意，当模型训练好后，用在验证数据上时，不再归一化，所以测试数据时，应为False.

def one_hot_encode(x):  #complete
    """
    对输入的列表（真实类别标签），转换为one-hot形式
    :param x: 标签的list。
    :return: one-hot编码后的结果，是一个numpy数组。
    """
    return np.eye(10)[x.reshape(-1)].T #np.eye()意为生成多少行多少列的单位矩阵，因为one_hot编码，即是哪个类别，则第几位为1，其他几位都为0
    #x.reshape(-1)将标签构成的数组x横向平铺成0123456789,

def preprocess_data_and_save():
    # 预处理训练，验证、测试数据集。
    helper.preprocess_and_save_data(cifar10_dataset_folder_path, normalize, one_hot_encode)


# todo 检查点。若预处理数据已经完成，并保存到本地磁盘，那么每次可以从这里开始运行（之前的代码不用再执行了）
valid_features, valid_labels = pickle.load(
    open('../datas/cifar10/preprocess_validation.p', mode='rb'))
# print(len(valid_features))
#pickle我翻译为腌咸菜模块，作用是把python运行中得到的一些列表字典之类永久保存下来，其有两个方法，dump与load,dump(obj(对象), file(文件夹), [protocol可以为012,0是文本形式，1是老二进制，2是新二进制])，protocol意为协议
# load(文件夹)，保存为python文件到文件夹中
#open函数 打开一个文件，如果不存在则创建。rb是以二进制读模式打开
# load(文件夹)，保存为python文件到文件夹中


def cnn_net_input(image_shape, n_classes):
    """
    定义 input_x, input_y ,keep_prob等占位符。
    :param image_shape:  最原始的输入图片的尺寸
    :param n_classes:     类别数量。
    :return:
    """
    input_x = tf.placeholder(tf.float32, [None, image_shape[0], image_shape[1], image_shape[2]], name='input_x')
    #创建占位符，参数依然是 批量大小，长、宽、通道数
    input_y = tf.placeholder(tf.float32, [None, n_classes], name='input_y')
    change_learning_rate = tf.placeholder(tf.float32, shape=None, name='change_learning_rate')
    #学习率  无形状，标量
    keep_probab = tf.placeholder(tf.float32, shape=None, name='keep_probab')
    #保留比例
    return input_x, input_y, change_learning_rate, keep_probab


def conv2d(input_tensor, filter_w, filter_b, strides=[1, 1, 1, 1]):  #complete
    """
    实现 1、卷积 + 2、偏置项相加 + 3、激活
    :param x:
    :param filter_w:
    :param filter_b:
    :param strides:
    :return:
    """
    # 1、卷积
    conv = tf.nn.conv2d(
        input=input_tensor, filter=filter_w, strides=strides, padding='SAME'
    )
    #tf.nn是tensorflow一个内置的十分丰富的函数大集锦，其中就包括了conv2d这个函数，计算给定4-D输入和滤波器张量的2-D卷积
    #以上四个参数分别是;输入数据；滤波器；strides=[1, 1, 1, 1]是设置的滤波器的步长，steides四个1分别是N H W C，样本数，高度，宽度，通道数，第  一个和最后一个1是官方规定的必须是1，第二个和第三个分别是水平步长和垂直方向步长；当输入数据的    矩阵不够卷积核扫描时是否在四周填充0，使输   入图片和卷积后的图片长宽尺寸一样。若是设置valid(合理的)则不会填充，从而有可能形状变小
    # 2、偏置项相加
    conv = tf.nn.bias_add(conv, filter_b)
    #将偏置项bias的向量加到value的矩阵上，是向量与矩阵的每一行进行相加，得到的结果和value矩阵大小相同
    # 3、激活
    conv = tf.nn.relu(conv)
    return conv


def maxpool2d(input_tensor, k=2):  #complete
    kernel_size = [1, k, k, 1]
    #池化也是用滤波器来达到计算目的的，也叫池化核，此为池化核大小，四个参数同上面的strides,N,H,W C 样本数，高，宽，通道数
    strides = [1, k, k, 1]
    #池化步幅为2，原矩阵会缩小一半，步幅为1 时原矩阵尺寸不变
    maxpool_out = tf.nn.max_pool(
        value=input_tensor, ksize=kernel_size, strides=strides, padding='SAME'
    )
    #最大池化操作，padding=same为矩阵周边填充0
    return maxpool_out


def flatten(input_tensor): #complete
    """
    flatten层，实现特征图 维度从 4-D  重塑到 2-D形状 [Batch_size, 列维度]
    :param input:
    :return:
    """
    shape = input_tensor.get_shape()  # [N, 8, 8, 128]
    flatten_shape = shape[1] * shape[2] * shape[3]
    flatted = tf.reshape(input_tensor, shape=[-1, flatten_shape])
    #tf.reshape 改变指定数据的形状
    return  flatted


def fully_connect(input_tensor, weights, biases, activation=tf.nn.relu): #complete
    """
    实现全连接 或者  输出层。
    :param input_tensor:
    :param num_outputs: 输出的隐藏层节点数量。
    :return:
    """
    #卷积后要激活，池化不要激活，最后一步是全连接时得到output,要激活才能得到预测值
    fc = tf.matmul(input_tensor, weights) + biases
    if activation:
        fc = activation(fc)
        return fc
    else:
        # 这里是为了返回最终输出的logits。
        return fc


def model_net(input_x, weights, biases, keep_prob, istrain): #complete
    """
    构建模型
    :param input_x:   原始图片的占位符
    :param keep_prob: 定义的keep_prob的占位符。
    :return:  logits
    """

    """
    网络结构图。
    input_x              [-1, 32, 32, 3]
    w1                   [5, 5, 3, 32]
    conv1                [-1, 32, 32, 32]      
    池化1(步幅为2)       [-1, 32/2, 32/2, 32] 
    w2                   [5, 5, 32, 128]   
    conv2                [-1, 16, 16, 128] 
    池化2(步幅为2)       [-1, 16/2, 16/2, 128] 
    拉平层               [N, 8, 8, 128] --> [N, 8*8*128]
    FC1(权重)            [8*8*128, 1024]
    logits(权重)         [1024, 10]
    预测概率值             -----> 使用softmax激活
    """
    # conv1--dropout(可选)--池化1--conv2--dropout(可选)--池化2--拉平层--全连接层*N--输出层 得到logits
    with tf.variable_scope('Network'):

        # 卷积1  [N, 32, 32, 3]  --> [N, 32, 32, 32]
        conv1 = conv2d(
            input_tensor=input_x, filter_w=weights['conv1'], filter_b=biases['conv1'])
        conv1 = tf.nn.dropout(conv1, keep_prob=keep_probability)
        if istrain:
            conv1 = normalize(conv1)
        # 池化1 [N, 32, 32, 32]  -->[N, 16, 16, 32]
        pool1 = maxpool2d(conv1)
        # 卷积2  [N, 16, 16, 32]  --> [N, 16, 16, 128]
        conv2 = conv2d(
            input_tensor=pool1, filter_w=weights['conv2'], filter_b=biases['conv2'])
        conv2 = tf.nn.dropout(conv2, keep_prob=keep_probability)
        if istrain:
            conv2 = normalize(conv2)
        # 池化2 [N, 16, 16, 128]  -->[N, 8, 8, 128]
        pool2 = maxpool2d(conv2)

        # 拉平层 [N, 8, 8, 128] ---> [N, 8*8*128]
        shape = pool2.get_shape()  # [N, 8, 8, 128]
        flatten_shape = shape[1] * shape[2] * shape[3]
        flatted = tf.reshape(pool2, shape=[-1, flatten_shape])

        # 全连接层1  [N, 8*8*128] ---> [N, 1024]
        fc1 = fully_connect(
            input_tensor=flatted, weights=weights['fc1'], biases=biases['fc1'])
        fc1 = tf.nn.dropout(fc1, keep_prob=keep_prob)

        # 全连接层2（输出层）   [N, 1024] --->  [N, 10]
        logits = fully_connect(
            input_tensor=fc1, weights=weights['fc2'], biases=biases['fc2'], activation=None
        )
        return logits


# todo 自己定义两个执行会话环节需要使用的辅助函数。
def train_session(sess, train_opt, input_x, input_y, batch_x, batch_y, keep_prob, keep_probability, change_learning_rate, learning_rate):
    """
    执行的跑 模型优化器的函数
    :param sess:       会话的实例对象
    :param train_opt:  优化器对象
    :param keep_probability:  实数，保留概率
    :param batch_x:    当前的批量的images数据
    :param batch_y:    当前批量的标签数据。
    :return: 仅仅是执行优化器，无需返回值。
    """
    feed = {input_x: batch_x, input_y: batch_y, keep_prob: keep_probability, change_learning_rate: learning_rate}
    sess.run(train_opt, feed_dict=feed)  # 执行模型训练


def print_stats(sess, loss, accuracy, input_x, input_y,  batch_x, batch_y, keep_probab, keep_probability, change_learning_rate, learning_rate):
    """
    使用sess跑loss和 Accuracy，并打印出来
    :param sess:  会话的实例对象
    :param batch_x: 当前的批量的images数据
    :param batch_y: 当前批量的标签数据。
    :param loss:   图中定义的loss tensor对象
    :param accuracy: 图中定义的accuracy tensor对象
    :return:  仅仅是打印模型，无需返回值。
    """
    feed = {input_x: batch_x, input_y: batch_y, keep_probab: keep_probability, change_learning_rate: learning_rate}
    change_loss, change_acc = sess.run(
        [loss, accuracy], feed)
    loss_ = change_loss
    accuracy_ = change_acc
    print('Loss:{:.5f} - Valid Accuracy:{:.4f}'.format(loss_, accuracy_))


def create_file_path(path): #complete
    """
    创建文件夹路径函数
    """
    if not os.path.exists(path):
        os.makedirs(path)
        print('成功创建路径:{}'.format(path))
# ____________________________________________________________________________________________________
def train_single_batch():
    """
    先跑 preprocess-batch-1 这个训练数据集，确认模型ok之后，跑所有的数据。
    :return:
    """
    tf.reset_default_graph()
    my_graph = tf.Graph()
    # 一、建图
    with my_graph.as_default():
        # 1、创建占位符（输入图片，输入的标签，dropout）
        input_x, input_y,change_learning_rate, keep_prob = cnn_net_input(image_shape, n_classes)
        # 2、构建cnn图（传入输入图片，获得logits）
        logits = model_net(input_x, weights, biases, keep_probab, True)
        # 3、构建损失函数
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=logits, labels=input_y
        ))
        # 4、构建优化器。
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops): #保证train_op在update_ops执行之后再执行。
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            train_opt = optimizer.minimize(loss)
        # 5、计算准确率
        correct_pred = tf.equal(tf.argmax(logits, axis=1), tf.argmax(input_y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        # 二、构建会话
        with tf.Session() as sess:
            # 1、初始化全局变量
            sess.run(tf.global_variables_initializer())
            # 2、构建迭代的循环
            for epoch in range(epochs):
                batch_i = 1
                # 3、构建批量数据的循环
                # 3、构建批量数据的循环
                for batch_x, batch_y in helper.load_preprocess_training_batch(batch_i, batch_size):
                    # 4、跑train_opt
                    train_session(sess, train_opt, input_x, input_y, batch_x, batch_y, keep_probab,
                                  keep_probability, change_learning_rate, learning_rate)
                    print('Epoch {:>2}, CIFAR-10 Batch:{}'.format(epoch + 1, batch_i), end='')
                # 5、跑 模型损失和 准确率，并打印出来。
                    print_stats(sess, loss, accuracy, input_x, input_y, batch_x, batch_y, keep_probab,
                            keep_probability, change_learning_rate, learning_rate)

                # # 执行模型持久化的。
                # if epoch % every_save_model == 0:
                #     save_file = '_{}_model.ckpt'.format(epoch)
                #     save_file = os.path.join(save_path, save_file)
                #     saver.save(sess, save_path=save_file)
                #     print('Model saved to {}'.format(save_file))
# ________________________________________________________________________________________________________

def train_all_batch():
    """
    跑所有的数据。
    """
    # 一、建图
    with graph.as_default():

        # 1、创建占位符（输入图片，输入的标签，dropout）
        input_x, input_y, change_learning_rate, keep_probab = cnn_net_input(image_shape, n_classes)
        # 2、构建cnn图（传入输入图片，获得logits）
        logits = model_net(input_x, weights, biases, keep_probab, True)
        # 3、构建损失函数
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                                         labels=input_y))
        # 4、构建优化器。
        optimizer = tf.train.AdamOptimizer(learning_rate = change_learning_rate)
        train_opt = optimizer.minimize(loss)
        # 5、计算准确率
        correct_pred = tf.equal(tf.argmax(logits, axis=1), tf.argmax(input_y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

        # 6、构建持久化模型的对象 并创建 持久化文件保存的路径
        saver = tf.train.Saver(max_to_keep=2)
        save_path = './models/checkpoints'
        create_file_path(save_path)
        # 二、构建会话
        with tf.Session() as sess:
            # 1、初始化全局变量
            ckpt = tf.train.get_checkpoint_state(save_path)
            if ckpt is not None:
                saver.restore(sess, ckpt.model_checkpoint_path)
                saver.recover_last_checkpoints(ckpt.all_model_checkpoint_paths)
                print('从持久化文件中恢复模型')
            else:
                sess.run(tf.global_variables_initializer())
                print('没有持久化文件，从头开始训练!')
            # 2、构建迭代的循环
            print("epochs: {}".format(epochs))
            for epoch in range(epochs):
                # 多加一个循环，遍历所有的训练数据的batch
                n_batches = 5
                for batch_i in range(1, n_batches+1):
                    # 3、构建批量数据的循环
                    for batch_x, batch_y in helper.load_preprocess_training_batch(batch_i, batch_size):
                        # 4、跑train_opt
                        train_session(sess, train_opt, input_x, input_y, batch_x, batch_y, keep_probab,
                                      keep_probability, change_learning_rate, learning_rate)
                    print('Epoch {:>2}, CIFAR-10 Batch:{}'.format(epoch+1, batch_i), end='')
                    # 5、跑 模型损失和 准确率，并打印出来。
                    print_stats(sess, loss, accuracy, input_x, input_y, batch_x, batch_y, keep_probab,
                                        keep_probability, change_learning_rate, learning_rate)

                # 执行模型持久化的。
                if epoch % every_save_model == 0:
                    save_file = '_{}_model.ckpt'.format(epoch)
                    save_file = os.path.join(save_path, save_file)
                    saver.save(sess, save_path=save_file)
                    print('Model saved to {}'.format(save_file))

def gotest_model():
    """
    调用持久化文件跑测试数据集的数据。（要求准确率在60%以上）
    """
    tf.reset_default_graph()
    test_features, test_labels = pickle.load(
        open('../datas/cifar10/preprocess_test.p', mode='rb')
    )
    # 一、建图
    with graph.as_default():

        # 1、创建占位符（输入图片，输入的标签，dropout）
        input_x, input_y, change_learning_rate, keep_probab = cnn_net_input(image_shape, n_classes)
        # 2、构建cnn图（传入输入图片，获得logits）
        logits = model_net(input_x, weights, biases, keep_probab, True)
        # 3、构建损失函数
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                                         labels=input_y))
        # 4、构建优化器。
        optimizer = tf.train.AdamOptimizer(learning_rate=change_learning_rate)
        train_opt = optimizer.minimize(loss)
        # 5、计算准确率
        correct_pred = tf.equal(tf.argmax(logits, axis=1), tf.argmax(input_y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

        # 6、构建持久化模型的对象 并创建 持久化文件保存的路径
        saver = tf.train.Saver(max_to_keep=2)
        save_path = './models/checkpoints'
        # 二、构建会话
        with tf.Session() as sess:
            # 2、获取持久化的信息对象
            ckpt = tf.train.get_checkpoint_state(save_path)
            if ckpt is not None:
                saver.restore(sess, ckpt.model_checkpoint_path)
                saver.recover_last_checkpoints(ckpt.all_model_checkpoint_paths)
                print('从持久化文件中恢复模型')
            else:
                sess.run(tf.global_variables_initializer())
                print('没有持久化文件，从头开始训练!')
            # 2、保存每个批次数据的准确率，再求平均值。
            test_acc_total = []
            # 3、构建迭代的循环
            for test_batch_x, test_batch_y in helper.batch_features_labels(test_features, test_labels, batch_size):
                test_dict = {input_x: test_batch_x,
                             input_y: test_batch_y,
                             keep_probab: 1.0}
                test_batch_acc = sess.run(accuracy, test_dict)
                test_acc_total.append(test_batch_acc)
            print('Test Accuracy:{:.5f}'.format(np.mean(test_acc_total)))
            if np.mean(test_acc_total) > 0.6:
                print('恭喜你，通过了Cifar10项目！你已经掌握了CNN网络的基础知识!')


if __name__=='__main__':
    # explore_data()
    # train_all_batch()
    gotest_model()
    # train_single_batch()

基于cifar10数据集的cnn图片分类模型

猜你喜欢