TensorFlow实现ResNet V2

ResNet网络结构:

    残差网络主要由多个残差块堆叠构成,每个残差块由2-3个卷积层组成,并且带有skip连接,残差块的结构如下:

                        

    上面是两种不同的跳跃结构,主要就是使用了不同的卷积核。左边参数要比右边的多很多(快一倍)。所以当网络很深时,用右边的比较好。 对于跳跃结构,当输入与输出的维度一样时,不需要做其他处理,两者相加就可,但当两者维度不同时,输入要进行变换以后去匹配输出的维度,主要经过两种方式,1)用zero-padding去增加维度,2)用1x1卷积来增加维度 。

    整个网络的结构图如下:

       

    ResNet V2和ResNet V1的区别主要在于,skip connection的非线性激活函数(如ReLU)替换为恒等映射(y=x),同时,ResNet V2在每一层中都使用了Batch Normalization。这样处理之后,新的残差学习单元比以前更容易训练并且泛化性更强。论文参考《Deep Residual Learning for Image Recognition》。

ResNet V2的代码实现:

#coding=utf-8
from datetime import datetime 
import tensorflow as tf
import collections
import time
import math
slim=tf.contrib.slim

# 定义一个命名元组,一个Block代表多个残差块的堆叠,unit_fn处理残差块的函数,args为各个残差块的参数列表
class Block(collections.namedtuple('Block',['scope','unit_fn','args'])):
	'A named tuple describing a Resnet block'

# 定义一个降采样的方法,也就是最大池化层
def subsample(inputs,factor,scope=None):
	if factor==1:
		return inputs
	else:
		return slim.max_pool2d(inputs,[1,1],stride=factor,scope=scope)

# 定义slim中某些函数的默认参数
def resnet_arg_scope(is_training=True,weight_decay=0.0001,batch_norm_decay=0.997,batch_norm_epsilon=1e-5,batch_norm_scale=True):
	batch_norm_params={
		'is_training':is_training,
		'decay':batch_norm_decay,
		'epsilon':batch_norm_epsilon,
		'scale':batch_norm_scale,
		'updates_collections':tf.GraphKeys.UPDATE_OPS,
		}
	with slim.arg_scope(
		[slim.conv2d],
		weights_regularizer=slim.l2_regularizer(weight_decay),
		weights_initializer=slim.variance_scaling_initializer(),
		activation_fn=tf.nn.relu,
		normalizer_fn=slim.batch_norm,
		normalizer_params=batch_norm_params):
		with slim.arg_scope([slim.batch_norm],**batch_norm_params):
			with slim.arg_scope([slim.max_pool2d],padding='SAME') as arg_sc:
				return arg_sc

# 定义卷积函数
def conv2d_same(inputs,num_outputs,kernel_size,stride,scope=None):
	if stride==1:
		return slim.conv2d(inputs,num_outputs,kernel_size,stride=1,padding='SAME',scope=scope)
	else:
		pad_total=kernel_size-1
		pad_beg=pad_total//2
		pad_end=pad_total - pad_beg
		inputs=tf.pad(inputs,[[0,0],[pad_beg,pad_end],[pad_beg,pad_end],[0,0]])
		return slim.conv2d(inputs,num_outputs,kernel_size,stride=stride,padding='VALID',scope=scope)

# 对多个残差块进行堆叠,一个block为一个堆叠
@slim.add_arg_scope  # @符号 表示该函数的参数可以为arg_scope函数设置默认参数
def stack_blocks_dense(net,blocks,outputs_collections=None):
	for block in blocks:
		with tf.variable_scope(block.scope,'block',[net]) as sc:
			for i,unit in enumerate(block.args):
				with tf.variable_scope('unit_%d' %(i+1),values=[net]):
					# 得到三元组的各个元素
					unit_depth,unit_depth_bottleneck,unit_stride=unit
					# 调用bottleneck函数,处理单个残差块
					net=block.unit_fn(net,depth=unit_depth,depth_bottleneck=unit_depth_bottleneck,stride=unit_stride)
	return slim.utils.collect_named_outputs(outputs_collections,sc.name,net)
	


# 定义残差学习单元,即一个残差块的内部结构,共三个卷积层,这个结构非常重要
@slim.add_arg_scope  # @ 表示该函数的参数可以为arg_scope函数设置默认参数
def bottleneck(inputs,depth,depth_bottleneck,stride,outputs_collections=None,scope=None):
	'''
		depths:第三个卷积层的输出通道
		depth_bottleneck: 第一个和第二个卷积层的输出通道
		stride:第二层的卷积步长,第一层和第三层的卷积步长为1
	'''
	with tf.variable_scope(scope,'bottleneck_v2',[inputs]) as sc:
		depth_in=slim.utils.last_dimension(inputs.get_shape(),min_rank=4)
		preact=slim.batch_norm(inputs,activation_fn=tf.nn.relu,scope='preact') # 归一化处理
	
	# 输入通道和输出通道相同,则只需要根据stride作最大池化,缩小特征图的尺寸使之和三层卷积后的结构相同
	if depth==depth_in:
		short_cut=subsample(inputs,stride,'short_cut')
	
	#如果输入通道和输出通道不同,则需要通过 1x1的卷积来改变skip连接的通道数,使之和三层卷积后的输出相同
	else:
		short_cut=slim.conv2d(preact,depth,[1,1],stride=stride,normalizer_fn=None,activation_fn=None,scope='short_cut')
	
	#第一个卷积层卷积核尺寸为:1x1,步长为 1,输出通道:depth_bottleneck
	residual=slim.conv2d(preact,depth_bottleneck,[1,1],stride=1,scope='conv1')

	#第二个卷积层的卷积核尺寸为:3x3,步长为:stride,输出通道为:depth_bottleneck
	residual=conv2d_same(residual,depth_bottleneck,3,stride,scope='conv2')

	# 第三个卷积层卷积核尺寸为:1x1,步长为 1,输入通道:depth
	residual=slim.conv2d(residual,depth,[1,1],stride=1,normalizer_fn=None,activation_fn=None,scope='conv3')

	# 这里代表skip连接
	output=short_cut+residual

	return slim.utils.collect_named_outputs(outputs_collections,sc.name,output)

# 定义残差网络的主函数
def resnet_v2(inputs,blocks,num_classes=None,global_pools=True,include_root_block=True,reuse=None,scope=None):
	with tf.variable_scope(scope,'resnet_v2',[inputs],reuse=reuse) as sc:
		end_points_collections=sc.original_name_scope+'_end_points'
		# 设置常用函数的参数
		with slim.arg_scope([slim.conv2d,bottleneck,stack_blocks_dense],outputs_collections=end_points_collections):
			net=inputs
			if include_root_block:

				# 先进行(7x7/s2 64) 的卷积
				with slim.arg_scope([slim.conv2d],activation_fn=None,normalizer_fn=None):
					net=conv2d_same(net,64,7,stride=2,scope='conv1')
				
				# 然后进行(3x3/s2)的最大池化操作
				net=slim.max_pool2d(net,[3,3],stride=2,scope='conv1')
			
			# 建立残差块的堆叠
			net=stack_blocks_dense(net,blocks)

			# 批量归一化操作
			net=slim.batch_norm(net,activation_fn=tf.nn.relu,scope='postnorm')
			if global_pools:
				net=tf.reduce_mean(net,[1,2],name='pool5',keep_dims=True)
			if num_classes is not None:
				net=slim.conv2d(net,num_classes,[1,1],activation_fn=None,normalizer_fn=None,scope='logits')
			end_points=slim.utils.convert_collection_to_dict(end_points_collections)
			if num_classes is not None:
				end_points['predictions']=slim.softmax(net,scope='predictions')
			return net,end_points

# 50层的残差网络
'''
(256,64,3)三元组含义如下:
  一个三元组代表一个残差块,由三层卷积层组成。其中256表示第三层输出的通道,前两层输出的通道为64,中间层
  的步长为3,其余两层的步长为1,这个残差学习单元即为[(1x1/s1,64),(3x3/s3,64),(1x1/s1,256)]
'''
def resnet_v2_50(inputs,num_classes=None,global_pool=True,reuse=None,scope='resnet_v2_50'):
	blocks=[
		Block('block1',bottleneck,[(256,64,1)]*2+[(256,64,2)]),
		Block('block2',bottleneck,[(512,128,1)]*3+[(512,128,2)]),
		Block('block3',bottleneck,[(1024,256,1)]*5+[(1024,256,2)]),
		Block('block4',bottleneck,[(2048,512,1)]*3)
	]
	return resnet_v2(inputs,blocks,num_classes,global_pool,include_root_block=True,reuse=reuse,scope=scope)


# 101层的残差网络
def resnet_v2_101(inputs,num_classes=None,global_pool=True,reuse=None,scope='resnet_v2_101'):
	blocks=[
		Block('block1',bottleneck,[(256,64,1)]*2+[(256,64,2)]),
		Block('block2',bottleneck,[(512,128,1)]*3+[(512,128,2)]),
		Block('block3',bottleneck,[(1024,256,1)]*22+[(1024,256,2)]),
		Block('block4',bottleneck,[(2048,512,1)]*3)
	]
	return resnet_v2(inputs,blocks,num_classes,global_pool,include_root_block=True,reuse=reuse,scope=scope)

# 152层的卷积网络
def resnet_v2_152(inputs,num_classes=None,global_pool=True,reuse=None,scope='resnet_v2_152'):
	blocks=[
		Block('block1',bottleneck,[(256,64,1)]*2+[(256,64,2)]),
		Block('block2',bottleneck,[(512,128,1)]*7+[(512,128,2)]),
		Block('block3',bottleneck,[(1024,256,1)]*35+[(1024,256,2)]),
		Block('block4',bottleneck,[(2048,512,1)]*3)
	]
	return resnet_v2(inputs,blocks,num_classes,global_pool,include_root_block=True,reuse=reuse,scope=scope)

# 200层的卷积网络
def resnet_v2_200(inputs,num_classes=None,global_pool=True,reuse=None,scope='resnet_v2_200'):
	blocks=[
		Block('block1',bottleneck,[(256,64,1)]*2+[(256,64,2)]),
		Block('block2',bottleneck,[(512,128,1)]*23+[(512,128,2)]),
		Block('block3',bottleneck,[(1024,256,1)]*35+[(1024,256,2)]),
		Block('block4',bottleneck,[(2048,512,1)]*3)
	]
	return resnet_v2(inputs,blocks,num_classes,global_pool,include_root_block=True,reuse=reuse,scope=scope)

# 定义评估每轮计算时间的函数
def time_tensorflow_run(session,target,info_string):
	num_steps_burn_in=10      # 预热轮数,给程序热身
	total_duration=0.0
	total_duration_suqred=0.0

	for i in range(num_batches + num_steps_burn_in):
		start_time=time.time()
		_=session.run(target)
		duration=time.time()-start_time
		if i >=num_steps_burn_in:
			if not i%10:
				print ('%s:step %d,duration=%.3f' %(datetime.now(),i - num_steps_burn_in,duration))
			total_duration+=duration
			total_duration_suqred+=duration*duration

			mn=total_duration/num_batches                  # 计算每轮的平均耗时
			vr=total_duration_suqred/num_batches - mn*mn   
			sd=math.sqrt(vr)                               # 计算标准差
			print ('%s:%s across %d steps,%.3f +/- %.3f sec/batch ' %(datetime.now(),info_string,num_batches,mn,sd))

# ---测试前向传播的时间---
batch_size=32
height,width=224,224
inputs=tf.random_uniform((batch_size,height,width,3))
with slim.arg_scope(resnet_arg_scope(is_training=False)):
	net,end_points=resnet_v2_152(inputs,1000)
init=tf.global_variables_initializer()
sess=tf.Session()
sess.run(init)
num_batches=100
time_tensorflow_run(sess,net,"Forward")

参考《TensorFlow 实战》

猜你喜欢

转载自blog.csdn.net/MOU_IT/article/details/81106626