使用Resnet_50_101_152训练自己的数据集

前言:

前面两节介绍了AlexNet和VGG-19模型的结构,以及具体的实现。正如前面讲的两者在结构上是相似的。但是接下来讲的Resnet(残差网络)不仅在深度上取得巨大的进步,而且在架构上也与之前的网络是不同的。残差网络的发明人是何凯明博士期间,在CVPR的文章《Deep Residual Learning for Image Recognition》中首次提出。值得注意的是他还是广东省的高考状元,两次获得ICCV 最佳论文奖。果然大佬都是用来膜拜的。言归正传咱们就看一下残差网络的架构以及最后的实现吧。

模型一: Resnet(残差网络)

没有找到152的网络的架构图,凑合看一下50层的残差网络的结构吧。 看过上两篇的就知道了,这个图是为了和最后的代码进行对比的。

同样首先是程序的主程序:

# -*- coding: utf-8 -*-
# @Time    : 2019/7/2 18:56
# @Author  : YYLin
# @Email   : [email protected]
# @File    : Resnet_50_101_152_Train.py
import Resnet_50_101_152
import tensorflow as tf
import os
import cv2
import numpy as np
from keras.utils import to_categorical

# 当加载 Resnet_152的时候 会发生GPU内存溢出 所以就是用CPU进行训练
# 当使用 inception_V4 batch_sizei为8的时候 就会出现内存溢出的问题 说明这个网络还是比较复杂的
# os.environ['CUDA_VISIBLE_DEVICES'] = "-1"
# 定义一些模型中所需要的参数
batch_size = 32
img_high = 100
img_width = 100
Channel = 3
label = 9

resnet_type = 'resnet_v2_50'

# 定义输入图像的占位符
inputs = tf.placeholder(tf.float32, [batch_size, img_high, img_width, Channel], name='inputs')
y = tf.placeholder(dtype=tf.float32, shape=[batch_size, label], name='label')
keep_prob = tf.placeholder("float")
is_train = tf.placeholder(tf.bool)

# 使用ResNet_50_101_152 需要在最后加上batch normal 所以需要使用 is_train
net = Resnet_50_101_152.resnet(inputs, resnet_type, is_train, label)
score = tf.squeeze(net, axis=(1, 2))
softmax_result = tf.nn.softmax(score)

# 定义损失函数 以及相对应的优化器
cross_entropy = -tf.reduce_sum(y*tf.log(softmax_result))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

# 显示最后预测的结果
correct_prediction = tf.equal(tf.argmax(softmax_result, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))


# 现在的我只需要加载图像和对应的label即可 不需要加载text中的内容
def load_satetile_image(batch_size=128, dataset='train'):
    img_list = []
    label_list = []
    dir_counter = 0

    if dataset == 'train':
        path = '../Dataset/baidu/train_image/train'

        # 对路径下的所有子文件夹中的所有jpg文件进行读取并存入到一个list中
        for child_dir in os.listdir(path):
            child_path = os.path.join(path, child_dir)
            for dir_image in os.listdir(child_path):
                img = cv2.imread(os.path.join(child_path, dir_image))
                img = img/255.0
                img_list.append(img)
                label_list.append(dir_counter)

            dir_counter += 1
    else:
        path = '../Dataset/baidu/valid_image/valid'

        # 对路径下的所有子文件夹中的所有jpg文件进行读取并存入到一个list中
        for child_dir in os.listdir(path):
            child_path = os.path.join(path, child_dir)
            for dir_image in os.listdir(child_path):
                img = cv2.imread(os.path.join(child_path, dir_image))
                img = img / 255.0
                img_list.append(img)
                label_list.append(dir_counter)

            dir_counter += 1

    # 返回的img_list转成了 np.array的格式
    X_train = np.array(img_list)
    Y_train = to_categorical(label_list, 9)

    # 加载数据的时候 重新排序
    # print('X_train.shape, Y_train.shape:', X_train.shape, Y_train.shape)
    data_index = np.arange(X_train.shape[0])
    np.random.shuffle(data_index)
    data_index = data_index[:batch_size]
    x_batch = X_train[data_index, :, :, :]
    y_batch = Y_train[data_index, :]

    return x_batch, y_batch


# 开始feed 数据并且训练数据
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(500000//batch_size):
        # 加载训练集和验证集
        img, img_label = load_satetile_image(batch_size, dataset='train')
        img_valid, img_valid_label = load_satetile_image(batch_size, dataset='vaild')
        # print('使用 mnist.train.next_batch加载的数据集形状', img.shape, type(img))

        # 源码之中是增加了正则化项 但是损失函数中暂时不再增加
        if i % 20 == 0:
            train_accuracy = accuracy.eval(feed_dict={inputs: img, y: img_label, is_train: True})
            print("step %d, training accuracy %g" % (i, train_accuracy))
        train_step.run(feed_dict={inputs: img, y: img_label, is_train: True})

        if i % 50 == 0:
            valid_socre = accuracy.eval(feed_dict={inputs: img_valid, y: img_valid_label, is_train: False})
            print("step %d, valid accuracy %g" % (i, valid_socre))

然后是本节的核心代码: Resnet (残差网络)

本代码中残差网络中的层数分别是50,101,152层。为了方便分析,在这里只是简单的分析一下层数为50层的残差网络的架构。

第一: 从残差网络的结构图中,我们可以看到conv2到conv5中重复单元数分别是[3、 4、 6、3 ],代码中unit单元为resnet_v2_50的单元数为也为[3、 4、 6、3 ]

第二: 残差网络中第一层的卷积核大小为7 * 7, 步长为 2 * 2,通道数为64   代码中验证通过

第三: 第一层卷积之后卷积核大小为3 * 3, 步长是 2 * 2,最大池化层。   代码中验证通过

第四: 开始验证重复部分,代码中也即是对应重复部分的卷积操作。  这个是猜的不确定是否正确。科学有时候是需要猜的

        for i in range(4):
            net = block(net, 'block'+str(i+1), UNITS[resnet_v2][i],
                        CHANNELS[i], is_train)

 第五: 全局池化层然后softmax输出,   代码中增加了batch normalize 以及relu激活函数 验证通过

所以说下面的代码复现还是很忠于原论文的。所以总体上验证通过。打完收工!!!!!!!!!!!!!!!

# -*- coding: utf-8 -*-
# @Time    : 2019/7/2 8:36
# @Author  : YYLin
# @Email   : [email protected]
# @File    : Resnet_50_101_152.py
# 本代码实现的是残差网络 50 101 152  参考代码中是有训练集 验证集 测试集
# 首先这个代码的整体架构是让人相信的
import tensorflow as tf
UNITS = {'resnet_v2_50': [3, 4, 6, 3], 'resnet_v2_101': [3, 4, 23, 3],
         'resnet_v2_152': [3, 8, 36, 3]}
CHANNELS = [64, 128, 256, 512]


def bottleneck(net, channel, is_train, holes=1, c_name='pretrain', stride=1,
               shortcut_conv=False, key=tf.GraphKeys.GLOBAL_VARIABLES):
    with tf.variable_scope('bottleneck_v2', reuse=tf.AUTO_REUSE):
        # define initializer for weights and biases
        w_initializer = tf.contrib.layers.xavier_initializer()
        b_initializer = tf.zeros_initializer()
        regularizer = tf.contrib.layers.l2_regularizer(scale=0.0001)
        # batch normalization
        net = tf.layers.batch_normalization(inputs=net, axis=-1,
                                            training=is_train, name='preact')
        net = tf.nn.relu(net)

        # shortcut
        if shortcut_conv:
            with tf.variable_scope('shortcut', reuse=tf.AUTO_REUSE):
                kernel = tf.get_variable(initializer=w_initializer,
                                         shape=[1, 1, net.shape[-1],
                                                channel*4],
                                         name='weights',
                                         regularizer=regularizer,
                                         collections=['pretrain', key])
                # convolution for shortcut in order to output size
                shortcut = tf.nn.conv2d(input=net, filter=kernel,
                                        strides=[1, stride, stride, 1],
                                        padding='SAME')
                biases = tf.get_variable(initializer=b_initializer,
                                         shape=channel*4, name='biases',
                                         regularizer=regularizer,
                                         collections=['pretrain', key])
                shortcut = tf.nn.bias_add(shortcut, biases)
        else:
            # shortcut
            shortcut = net

        # convolution 1
        with tf.variable_scope('conv1', reuse=tf.AUTO_REUSE):
            kernel = tf.get_variable(initializer=w_initializer,
                                     shape=[1, 1, net.shape[-1], channel],
                                     name='weights', regularizer=regularizer,
                                     collections=['pretrain', key])
            net = tf.nn.atrous_conv2d(value=net, filters=kernel, rate=holes,
                                      padding='SAME')
            biases = tf.get_variable(initializer=b_initializer,
                                     shape=channel, name='biases',
                                     regularizer=regularizer,
                                     collections=['non_pretrain', key])
            net = tf.nn.bias_add(net, biases)
            # batch normalization
            net = tf.layers.batch_normalization(inputs=net, axis=-1,
                                                training=is_train,
                                                name='preact')
            net = tf.nn.relu(net)

        # convolution 2
        with tf.variable_scope('conv2', reuse=tf.AUTO_REUSE):
            kernel = tf.get_variable(initializer=w_initializer,
                                     shape=[3, 3, channel, channel],
                                     name='weights', regularizer=regularizer,
                                     collections=['pretrain', key])
            net = tf.nn.conv2d(input=net, filter=kernel,
                               strides=[1, stride, stride, 1], padding='SAME')
            biases = tf.get_variable(initializer=b_initializer,
                                     shape=channel, name='biases',
                                     regularizer=regularizer,
                                     collections=['non_pretrain', key])
            net = tf.nn.bias_add(net, biases)
            # batch normalization
            net = tf.layers.batch_normalization(inputs=net, axis=-1,
                                                training=is_train,
                                                name='preact')
            net = tf.nn.relu(net)

        # convolution 3
        with tf.variable_scope('conv3', reuse=tf.AUTO_REUSE):
            kernel = tf.get_variable(initializer=w_initializer,
                                     shape=[1, 1, channel, channel*4],
                                     name='weights', regularizer=regularizer,
                                     collections=['pretrain', key])
            net = tf.nn.atrous_conv2d(value=net, filters=kernel, rate=holes,
                                      padding='SAME')
            biases = tf.get_variable(initializer=b_initializer,
                                     shape=channel*4, name='biases',
                                     regularizer=regularizer,
                                     collections=['pretrain', key])
            net = tf.nn.bias_add(net, biases)

    return net, shortcut


def block(net, name, unit, channel, is_train):
    with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
        for i in range(unit):
            with tf.variable_scope('unit_'+str(i+1), reuse=tf.AUTO_REUSE):
                # block1 i=0 stride=1
                if i == 0:
                    if name != 'block1':
                        net, shortcut = bottleneck(net, channel, is_train,
                                                   stride=2,
                                                   shortcut_conv=True)
                    else:
                        net, shortcut = bottleneck(net, channel, is_train,
                                                   stride=1,
                                                   shortcut_conv=True)
                else:
                    net, shortcut = bottleneck(net, channel, is_train)
            net = tf.add(net, shortcut)

    return net


def resnet(input_, resnet_v2, is_train, classes):
    key = tf.GraphKeys.GLOBAL_VARIABLES
    with tf.variable_scope(resnet_v2, reuse=tf.AUTO_REUSE):
        # define initializer for weights and biases
        w_initializer = tf.contrib.layers.xavier_initializer()
        b_initializer = tf.zeros_initializer()
        regularizer = tf.contrib.layers.l2_regularizer(scale=0.0001)
        # convolution 1
        with tf.variable_scope('conv1', reuse=tf.AUTO_REUSE):
            kernel = tf.get_variable(initializer=w_initializer,
                                     shape=[7, 7, 3, 64],
                                     name='weights', regularizer=regularizer,
                                     collections=['pretrain', key])
            net = tf.nn.conv2d(input=input_, filter=kernel,
                               strides=[1, 2, 2, 1], padding='SAME')
            biases = tf.get_variable(initializer=b_initializer, shape=64,
                                     name='biases', regularizer=regularizer,
                                     collections=['pretrain', key])
            net = tf.nn.bias_add(net, biases)
            net = tf.nn.max_pool(value=net, ksize=[1, 3, 3, 1],
                                 strides=[1, 2, 2, 1], padding='SAME')

        for i in range(4):
            net = block(net, 'block'+str(i+1), UNITS[resnet_v2][i],
                        CHANNELS[i], is_train)

        net = tf.layers.batch_normalization(inputs=net, axis=-1,
                                            training=is_train, name='postnorm')
        net = tf.nn.relu(net)

        h, w = net.shape[1:3]
        net = tf.nn.avg_pool(value=net, ksize=[1, h, w, 1],
                             strides=[1, 1, 1, 1], padding='VALID')

    # logits is not in scope 'resnet_v2' in order to fine-tune
    with tf.variable_scope('logits', reuse=tf.AUTO_REUSE):
        kernel = tf.get_variable(initializer=w_initializer,
                                 shape=[1, 1, 2048, classes], name='weights',
                                 regularizer=regularizer,
                                 collections=['non_pretrain', key])
        net = tf.nn.conv2d(input=net, filter=kernel,
                           strides=[1, 1, 1, 1], padding='VALID')
        biases = tf.get_variable(initializer=b_initializer, shape=classes,
                                 name='biases', regularizer=regularizer,
                                 collections=['non_pretrain', key])
        net = tf.nn.bias_add(net, biases)
    return net


最后实验结果分析: 

巡行的是在太慢了,真心不想等了,模型转的是在太慢了,等下一个专栏我专门介绍百度点石这个比赛的时候,在奉献上完成的训练结果。

猜你喜欢

转载自blog.csdn.net/qq_41776781/article/details/94459299
今日推荐