前言:
前面两节介绍了AlexNet和VGG-19模型的结构,以及具体的实现。正如前面讲的两者在结构上是相似的。但是接下来讲的Resnet(残差网络)不仅在深度上取得巨大的进步,而且在架构上也与之前的网络是不同的。残差网络的发明人是何凯明博士期间,在CVPR的文章《Deep Residual Learning for Image Recognition》中首次提出。值得注意的是他还是广东省的高考状元,两次获得ICCV 最佳论文奖。果然大佬都是用来膜拜的。言归正传咱们就看一下残差网络的架构以及最后的实现吧。
模型一: Resnet(残差网络)
没有找到152的网络的架构图,凑合看一下50层的残差网络的结构吧。 看过上两篇的就知道了,这个图是为了和最后的代码进行对比的。
同样首先是程序的主程序:
# -*- coding: utf-8 -*-
# @Time : 2019/7/2 18:56
# @Author : YYLin
# @Email : [email protected]
# @File : Resnet_50_101_152_Train.py
import Resnet_50_101_152
import tensorflow as tf
import os
import cv2
import numpy as np
from keras.utils import to_categorical
# 当加载 Resnet_152的时候 会发生GPU内存溢出 所以就是用CPU进行训练
# 当使用 inception_V4 batch_sizei为8的时候 就会出现内存溢出的问题 说明这个网络还是比较复杂的
# os.environ['CUDA_VISIBLE_DEVICES'] = "-1"
# 定义一些模型中所需要的参数
batch_size = 32
img_high = 100
img_width = 100
Channel = 3
label = 9
resnet_type = 'resnet_v2_50'
# 定义输入图像的占位符
inputs = tf.placeholder(tf.float32, [batch_size, img_high, img_width, Channel], name='inputs')
y = tf.placeholder(dtype=tf.float32, shape=[batch_size, label], name='label')
keep_prob = tf.placeholder("float")
is_train = tf.placeholder(tf.bool)
# 使用ResNet_50_101_152 需要在最后加上batch normal 所以需要使用 is_train
net = Resnet_50_101_152.resnet(inputs, resnet_type, is_train, label)
score = tf.squeeze(net, axis=(1, 2))
softmax_result = tf.nn.softmax(score)
# 定义损失函数 以及相对应的优化器
cross_entropy = -tf.reduce_sum(y*tf.log(softmax_result))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
# 显示最后预测的结果
correct_prediction = tf.equal(tf.argmax(softmax_result, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# 现在的我只需要加载图像和对应的label即可 不需要加载text中的内容
def load_satetile_image(batch_size=128, dataset='train'):
img_list = []
label_list = []
dir_counter = 0
if dataset == 'train':
path = '../Dataset/baidu/train_image/train'
# 对路径下的所有子文件夹中的所有jpg文件进行读取并存入到一个list中
for child_dir in os.listdir(path):
child_path = os.path.join(path, child_dir)
for dir_image in os.listdir(child_path):
img = cv2.imread(os.path.join(child_path, dir_image))
img = img/255.0
img_list.append(img)
label_list.append(dir_counter)
dir_counter += 1
else:
path = '../Dataset/baidu/valid_image/valid'
# 对路径下的所有子文件夹中的所有jpg文件进行读取并存入到一个list中
for child_dir in os.listdir(path):
child_path = os.path.join(path, child_dir)
for dir_image in os.listdir(child_path):
img = cv2.imread(os.path.join(child_path, dir_image))
img = img / 255.0
img_list.append(img)
label_list.append(dir_counter)
dir_counter += 1
# 返回的img_list转成了 np.array的格式
X_train = np.array(img_list)
Y_train = to_categorical(label_list, 9)
# 加载数据的时候 重新排序
# print('X_train.shape, Y_train.shape:', X_train.shape, Y_train.shape)
data_index = np.arange(X_train.shape[0])
np.random.shuffle(data_index)
data_index = data_index[:batch_size]
x_batch = X_train[data_index, :, :, :]
y_batch = Y_train[data_index, :]
return x_batch, y_batch
# 开始feed 数据并且训练数据
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(500000//batch_size):
# 加载训练集和验证集
img, img_label = load_satetile_image(batch_size, dataset='train')
img_valid, img_valid_label = load_satetile_image(batch_size, dataset='vaild')
# print('使用 mnist.train.next_batch加载的数据集形状', img.shape, type(img))
# 源码之中是增加了正则化项 但是损失函数中暂时不再增加
if i % 20 == 0:
train_accuracy = accuracy.eval(feed_dict={inputs: img, y: img_label, is_train: True})
print("step %d, training accuracy %g" % (i, train_accuracy))
train_step.run(feed_dict={inputs: img, y: img_label, is_train: True})
if i % 50 == 0:
valid_socre = accuracy.eval(feed_dict={inputs: img_valid, y: img_valid_label, is_train: False})
print("step %d, valid accuracy %g" % (i, valid_socre))
然后是本节的核心代码: Resnet (残差网络)
本代码中残差网络中的层数分别是50,101,152层。为了方便分析,在这里只是简单的分析一下层数为50层的残差网络的架构。
第一: 从残差网络的结构图中,我们可以看到conv2到conv5中重复单元数分别是[3、 4、 6、3 ],代码中unit单元为resnet_v2_50的单元数为也为[3、 4、 6、3 ]
第二: 残差网络中第一层的卷积核大小为7 * 7, 步长为 2 * 2,通道数为64 代码中验证通过
第三: 第一层卷积之后卷积核大小为3 * 3, 步长是 2 * 2,最大池化层。 代码中验证通过
第四: 开始验证重复部分,代码中也即是对应重复部分的卷积操作。 这个是猜的不确定是否正确。科学有时候是需要猜的
for i in range(4):
net = block(net, 'block'+str(i+1), UNITS[resnet_v2][i],
CHANNELS[i], is_train)
第五: 全局池化层然后softmax输出, 代码中增加了batch normalize 以及relu激活函数 验证通过
所以说下面的代码复现还是很忠于原论文的。所以总体上验证通过。打完收工!!!!!!!!!!!!!!!
# -*- coding: utf-8 -*-
# @Time : 2019/7/2 8:36
# @Author : YYLin
# @Email : [email protected]
# @File : Resnet_50_101_152.py
# 本代码实现的是残差网络 50 101 152 参考代码中是有训练集 验证集 测试集
# 首先这个代码的整体架构是让人相信的
import tensorflow as tf
UNITS = {'resnet_v2_50': [3, 4, 6, 3], 'resnet_v2_101': [3, 4, 23, 3],
'resnet_v2_152': [3, 8, 36, 3]}
CHANNELS = [64, 128, 256, 512]
def bottleneck(net, channel, is_train, holes=1, c_name='pretrain', stride=1,
shortcut_conv=False, key=tf.GraphKeys.GLOBAL_VARIABLES):
with tf.variable_scope('bottleneck_v2', reuse=tf.AUTO_REUSE):
# define initializer for weights and biases
w_initializer = tf.contrib.layers.xavier_initializer()
b_initializer = tf.zeros_initializer()
regularizer = tf.contrib.layers.l2_regularizer(scale=0.0001)
# batch normalization
net = tf.layers.batch_normalization(inputs=net, axis=-1,
training=is_train, name='preact')
net = tf.nn.relu(net)
# shortcut
if shortcut_conv:
with tf.variable_scope('shortcut', reuse=tf.AUTO_REUSE):
kernel = tf.get_variable(initializer=w_initializer,
shape=[1, 1, net.shape[-1],
channel*4],
name='weights',
regularizer=regularizer,
collections=['pretrain', key])
# convolution for shortcut in order to output size
shortcut = tf.nn.conv2d(input=net, filter=kernel,
strides=[1, stride, stride, 1],
padding='SAME')
biases = tf.get_variable(initializer=b_initializer,
shape=channel*4, name='biases',
regularizer=regularizer,
collections=['pretrain', key])
shortcut = tf.nn.bias_add(shortcut, biases)
else:
# shortcut
shortcut = net
# convolution 1
with tf.variable_scope('conv1', reuse=tf.AUTO_REUSE):
kernel = tf.get_variable(initializer=w_initializer,
shape=[1, 1, net.shape[-1], channel],
name='weights', regularizer=regularizer,
collections=['pretrain', key])
net = tf.nn.atrous_conv2d(value=net, filters=kernel, rate=holes,
padding='SAME')
biases = tf.get_variable(initializer=b_initializer,
shape=channel, name='biases',
regularizer=regularizer,
collections=['non_pretrain', key])
net = tf.nn.bias_add(net, biases)
# batch normalization
net = tf.layers.batch_normalization(inputs=net, axis=-1,
training=is_train,
name='preact')
net = tf.nn.relu(net)
# convolution 2
with tf.variable_scope('conv2', reuse=tf.AUTO_REUSE):
kernel = tf.get_variable(initializer=w_initializer,
shape=[3, 3, channel, channel],
name='weights', regularizer=regularizer,
collections=['pretrain', key])
net = tf.nn.conv2d(input=net, filter=kernel,
strides=[1, stride, stride, 1], padding='SAME')
biases = tf.get_variable(initializer=b_initializer,
shape=channel, name='biases',
regularizer=regularizer,
collections=['non_pretrain', key])
net = tf.nn.bias_add(net, biases)
# batch normalization
net = tf.layers.batch_normalization(inputs=net, axis=-1,
training=is_train,
name='preact')
net = tf.nn.relu(net)
# convolution 3
with tf.variable_scope('conv3', reuse=tf.AUTO_REUSE):
kernel = tf.get_variable(initializer=w_initializer,
shape=[1, 1, channel, channel*4],
name='weights', regularizer=regularizer,
collections=['pretrain', key])
net = tf.nn.atrous_conv2d(value=net, filters=kernel, rate=holes,
padding='SAME')
biases = tf.get_variable(initializer=b_initializer,
shape=channel*4, name='biases',
regularizer=regularizer,
collections=['pretrain', key])
net = tf.nn.bias_add(net, biases)
return net, shortcut
def block(net, name, unit, channel, is_train):
with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
for i in range(unit):
with tf.variable_scope('unit_'+str(i+1), reuse=tf.AUTO_REUSE):
# block1 i=0 stride=1
if i == 0:
if name != 'block1':
net, shortcut = bottleneck(net, channel, is_train,
stride=2,
shortcut_conv=True)
else:
net, shortcut = bottleneck(net, channel, is_train,
stride=1,
shortcut_conv=True)
else:
net, shortcut = bottleneck(net, channel, is_train)
net = tf.add(net, shortcut)
return net
def resnet(input_, resnet_v2, is_train, classes):
key = tf.GraphKeys.GLOBAL_VARIABLES
with tf.variable_scope(resnet_v2, reuse=tf.AUTO_REUSE):
# define initializer for weights and biases
w_initializer = tf.contrib.layers.xavier_initializer()
b_initializer = tf.zeros_initializer()
regularizer = tf.contrib.layers.l2_regularizer(scale=0.0001)
# convolution 1
with tf.variable_scope('conv1', reuse=tf.AUTO_REUSE):
kernel = tf.get_variable(initializer=w_initializer,
shape=[7, 7, 3, 64],
name='weights', regularizer=regularizer,
collections=['pretrain', key])
net = tf.nn.conv2d(input=input_, filter=kernel,
strides=[1, 2, 2, 1], padding='SAME')
biases = tf.get_variable(initializer=b_initializer, shape=64,
name='biases', regularizer=regularizer,
collections=['pretrain', key])
net = tf.nn.bias_add(net, biases)
net = tf.nn.max_pool(value=net, ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1], padding='SAME')
for i in range(4):
net = block(net, 'block'+str(i+1), UNITS[resnet_v2][i],
CHANNELS[i], is_train)
net = tf.layers.batch_normalization(inputs=net, axis=-1,
training=is_train, name='postnorm')
net = tf.nn.relu(net)
h, w = net.shape[1:3]
net = tf.nn.avg_pool(value=net, ksize=[1, h, w, 1],
strides=[1, 1, 1, 1], padding='VALID')
# logits is not in scope 'resnet_v2' in order to fine-tune
with tf.variable_scope('logits', reuse=tf.AUTO_REUSE):
kernel = tf.get_variable(initializer=w_initializer,
shape=[1, 1, 2048, classes], name='weights',
regularizer=regularizer,
collections=['non_pretrain', key])
net = tf.nn.conv2d(input=net, filter=kernel,
strides=[1, 1, 1, 1], padding='VALID')
biases = tf.get_variable(initializer=b_initializer, shape=classes,
name='biases', regularizer=regularizer,
collections=['non_pretrain', key])
net = tf.nn.bias_add(net, biases)
return net
最后实验结果分析:
巡行的是在太慢了,真心不想等了,模型转的是在太慢了,等下一个专栏我专门介绍百度点石这个比赛的时候,在奉献上完成的训练结果。