ResNet网络结构:
残差网络主要由多个残差块堆叠构成,每个残差块由2-3个卷积层组成,并且带有skip连接,残差块的结构如下:
上面是两种不同的跳跃结构,主要就是使用了不同的卷积核。左边参数要比右边的多很多(快一倍)。所以当网络很深时,用右边的比较好。 对于跳跃结构,当输入与输出的维度一样时,不需要做其他处理,两者相加就可,但当两者维度不同时,输入要进行变换以后去匹配输出的维度,主要经过两种方式,1)用zero-padding去增加维度,2)用1x1卷积来增加维度 。
整个网络的结构图如下:
ResNet V2和ResNet V1的区别主要在于,skip connection的非线性激活函数(如ReLU)替换为恒等映射(y=x),同时,ResNet V2在每一层中都使用了Batch Normalization。这样处理之后,新的残差学习单元比以前更容易训练并且泛化性更强。论文参考《Deep Residual Learning for Image Recognition》。
ResNet V2的代码实现:
#coding=utf-8
from datetime import datetime
import tensorflow as tf
import collections
import time
import math
slim=tf.contrib.slim
# 定义一个命名元组,一个Block代表多个残差块的堆叠,unit_fn处理残差块的函数,args为各个残差块的参数列表
class Block(collections.namedtuple('Block',['scope','unit_fn','args'])):
'A named tuple describing a Resnet block'
# 定义一个降采样的方法,也就是最大池化层
def subsample(inputs,factor,scope=None):
if factor==1:
return inputs
else:
return slim.max_pool2d(inputs,[1,1],stride=factor,scope=scope)
# 定义slim中某些函数的默认参数
def resnet_arg_scope(is_training=True,weight_decay=0.0001,batch_norm_decay=0.997,batch_norm_epsilon=1e-5,batch_norm_scale=True):
batch_norm_params={
'is_training':is_training,
'decay':batch_norm_decay,
'epsilon':batch_norm_epsilon,
'scale':batch_norm_scale,
'updates_collections':tf.GraphKeys.UPDATE_OPS,
}
with slim.arg_scope(
[slim.conv2d],
weights_regularizer=slim.l2_regularizer(weight_decay),
weights_initializer=slim.variance_scaling_initializer(),
activation_fn=tf.nn.relu,
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_params):
with slim.arg_scope([slim.batch_norm],**batch_norm_params):
with slim.arg_scope([slim.max_pool2d],padding='SAME') as arg_sc:
return arg_sc
# 定义卷积函数
def conv2d_same(inputs,num_outputs,kernel_size,stride,scope=None):
if stride==1:
return slim.conv2d(inputs,num_outputs,kernel_size,stride=1,padding='SAME',scope=scope)
else:
pad_total=kernel_size-1
pad_beg=pad_total//2
pad_end=pad_total - pad_beg
inputs=tf.pad(inputs,[[0,0],[pad_beg,pad_end],[pad_beg,pad_end],[0,0]])
return slim.conv2d(inputs,num_outputs,kernel_size,stride=stride,padding='VALID',scope=scope)
# 对多个残差块进行堆叠,一个block为一个堆叠
@slim.add_arg_scope # @符号 表示该函数的参数可以为arg_scope函数设置默认参数
def stack_blocks_dense(net,blocks,outputs_collections=None):
for block in blocks:
with tf.variable_scope(block.scope,'block',[net]) as sc:
for i,unit in enumerate(block.args):
with tf.variable_scope('unit_%d' %(i+1),values=[net]):
# 得到三元组的各个元素
unit_depth,unit_depth_bottleneck,unit_stride=unit
# 调用bottleneck函数,处理单个残差块
net=block.unit_fn(net,depth=unit_depth,depth_bottleneck=unit_depth_bottleneck,stride=unit_stride)
return slim.utils.collect_named_outputs(outputs_collections,sc.name,net)
# 定义残差学习单元,即一个残差块的内部结构,共三个卷积层,这个结构非常重要
@slim.add_arg_scope # @ 表示该函数的参数可以为arg_scope函数设置默认参数
def bottleneck(inputs,depth,depth_bottleneck,stride,outputs_collections=None,scope=None):
'''
depths:第三个卷积层的输出通道
depth_bottleneck: 第一个和第二个卷积层的输出通道
stride:第二层的卷积步长,第一层和第三层的卷积步长为1
'''
with tf.variable_scope(scope,'bottleneck_v2',[inputs]) as sc:
depth_in=slim.utils.last_dimension(inputs.get_shape(),min_rank=4)
preact=slim.batch_norm(inputs,activation_fn=tf.nn.relu,scope='preact') # 归一化处理
# 输入通道和输出通道相同,则只需要根据stride作最大池化,缩小特征图的尺寸使之和三层卷积后的结构相同
if depth==depth_in:
short_cut=subsample(inputs,stride,'short_cut')
#如果输入通道和输出通道不同,则需要通过 1x1的卷积来改变skip连接的通道数,使之和三层卷积后的输出相同
else:
short_cut=slim.conv2d(preact,depth,[1,1],stride=stride,normalizer_fn=None,activation_fn=None,scope='short_cut')
#第一个卷积层卷积核尺寸为:1x1,步长为 1,输出通道:depth_bottleneck
residual=slim.conv2d(preact,depth_bottleneck,[1,1],stride=1,scope='conv1')
#第二个卷积层的卷积核尺寸为:3x3,步长为:stride,输出通道为:depth_bottleneck
residual=conv2d_same(residual,depth_bottleneck,3,stride,scope='conv2')
# 第三个卷积层卷积核尺寸为:1x1,步长为 1,输入通道:depth
residual=slim.conv2d(residual,depth,[1,1],stride=1,normalizer_fn=None,activation_fn=None,scope='conv3')
# 这里代表skip连接
output=short_cut+residual
return slim.utils.collect_named_outputs(outputs_collections,sc.name,output)
# 定义残差网络的主函数
def resnet_v2(inputs,blocks,num_classes=None,global_pools=True,include_root_block=True,reuse=None,scope=None):
with tf.variable_scope(scope,'resnet_v2',[inputs],reuse=reuse) as sc:
end_points_collections=sc.original_name_scope+'_end_points'
# 设置常用函数的参数
with slim.arg_scope([slim.conv2d,bottleneck,stack_blocks_dense],outputs_collections=end_points_collections):
net=inputs
if include_root_block:
# 先进行(7x7/s2 64) 的卷积
with slim.arg_scope([slim.conv2d],activation_fn=None,normalizer_fn=None):
net=conv2d_same(net,64,7,stride=2,scope='conv1')
# 然后进行(3x3/s2)的最大池化操作
net=slim.max_pool2d(net,[3,3],stride=2,scope='conv1')
# 建立残差块的堆叠
net=stack_blocks_dense(net,blocks)
# 批量归一化操作
net=slim.batch_norm(net,activation_fn=tf.nn.relu,scope='postnorm')
if global_pools:
net=tf.reduce_mean(net,[1,2],name='pool5',keep_dims=True)
if num_classes is not None:
net=slim.conv2d(net,num_classes,[1,1],activation_fn=None,normalizer_fn=None,scope='logits')
end_points=slim.utils.convert_collection_to_dict(end_points_collections)
if num_classes is not None:
end_points['predictions']=slim.softmax(net,scope='predictions')
return net,end_points
# 50层的残差网络
'''
(256,64,3)三元组含义如下:
一个三元组代表一个残差块,由三层卷积层组成。其中256表示第三层输出的通道,前两层输出的通道为64,中间层
的步长为3,其余两层的步长为1,这个残差学习单元即为[(1x1/s1,64),(3x3/s3,64),(1x1/s1,256)]
'''
def resnet_v2_50(inputs,num_classes=None,global_pool=True,reuse=None,scope='resnet_v2_50'):
blocks=[
Block('block1',bottleneck,[(256,64,1)]*2+[(256,64,2)]),
Block('block2',bottleneck,[(512,128,1)]*3+[(512,128,2)]),
Block('block3',bottleneck,[(1024,256,1)]*5+[(1024,256,2)]),
Block('block4',bottleneck,[(2048,512,1)]*3)
]
return resnet_v2(inputs,blocks,num_classes,global_pool,include_root_block=True,reuse=reuse,scope=scope)
# 101层的残差网络
def resnet_v2_101(inputs,num_classes=None,global_pool=True,reuse=None,scope='resnet_v2_101'):
blocks=[
Block('block1',bottleneck,[(256,64,1)]*2+[(256,64,2)]),
Block('block2',bottleneck,[(512,128,1)]*3+[(512,128,2)]),
Block('block3',bottleneck,[(1024,256,1)]*22+[(1024,256,2)]),
Block('block4',bottleneck,[(2048,512,1)]*3)
]
return resnet_v2(inputs,blocks,num_classes,global_pool,include_root_block=True,reuse=reuse,scope=scope)
# 152层的卷积网络
def resnet_v2_152(inputs,num_classes=None,global_pool=True,reuse=None,scope='resnet_v2_152'):
blocks=[
Block('block1',bottleneck,[(256,64,1)]*2+[(256,64,2)]),
Block('block2',bottleneck,[(512,128,1)]*7+[(512,128,2)]),
Block('block3',bottleneck,[(1024,256,1)]*35+[(1024,256,2)]),
Block('block4',bottleneck,[(2048,512,1)]*3)
]
return resnet_v2(inputs,blocks,num_classes,global_pool,include_root_block=True,reuse=reuse,scope=scope)
# 200层的卷积网络
def resnet_v2_200(inputs,num_classes=None,global_pool=True,reuse=None,scope='resnet_v2_200'):
blocks=[
Block('block1',bottleneck,[(256,64,1)]*2+[(256,64,2)]),
Block('block2',bottleneck,[(512,128,1)]*23+[(512,128,2)]),
Block('block3',bottleneck,[(1024,256,1)]*35+[(1024,256,2)]),
Block('block4',bottleneck,[(2048,512,1)]*3)
]
return resnet_v2(inputs,blocks,num_classes,global_pool,include_root_block=True,reuse=reuse,scope=scope)
# 定义评估每轮计算时间的函数
def time_tensorflow_run(session,target,info_string):
num_steps_burn_in=10 # 预热轮数,给程序热身
total_duration=0.0
total_duration_suqred=0.0
for i in range(num_batches + num_steps_burn_in):
start_time=time.time()
_=session.run(target)
duration=time.time()-start_time
if i >=num_steps_burn_in:
if not i%10:
print ('%s:step %d,duration=%.3f' %(datetime.now(),i - num_steps_burn_in,duration))
total_duration+=duration
total_duration_suqred+=duration*duration
mn=total_duration/num_batches # 计算每轮的平均耗时
vr=total_duration_suqred/num_batches - mn*mn
sd=math.sqrt(vr) # 计算标准差
print ('%s:%s across %d steps,%.3f +/- %.3f sec/batch ' %(datetime.now(),info_string,num_batches,mn,sd))
# ---测试前向传播的时间---
batch_size=32
height,width=224,224
inputs=tf.random_uniform((batch_size,height,width,3))
with slim.arg_scope(resnet_arg_scope(is_training=False)):
net,end_points=resnet_v2_152(inputs,1000)
init=tf.global_variables_initializer()
sess=tf.Session()
sess.run(init)
num_batches=100
time_tensorflow_run(sess,net,"Forward")
参考《TensorFlow 实战》