本blog为github上CharlesShang/TFFRCNN版源码解析系列代码笔记
---------------个人学习笔记---------------
----------------本文作者吴疆--------------
------点击此处链接至博客园原文------
若不加申明,本文默认以测试阶段调用VGGnet_test网络,代码中默认padding方式为SAME,区别于VALID,DEFAULT_PADDING = 'SAME'
1.两个装饰器(以conv1_1的执行过程为例)
@layer def conv(self, input, k_h, k_w, c_o, s_h, s_w, name, biased=True,relu=True, padding=DEFAULT_PADDING, trainable=True):
注意@layer是个装饰器,这句话相当于conv=layer(conv)
@include_original def layer(op):
这句话则相当于layer=include_original(layer)
def include_original(dec): """ Meta decorator, which make the original function callable (via f._original() )""" def meta_decorator(f): decorated = dec(f) decorated._original = f return decorated return meta_decorator
不太理解这里的装饰器include_original套装饰器layer,由注释得知装饰器include_original实现了原函数的可调用(有懂的同学麻烦教教我!此外,为何还传入self,指的是哪个类的实例化对象,layer装饰器中最后为何还要返回self)
因此,当在VGGnet_test.py中调用conv(3,3,64,1,1,name='conv1_1',trainable=False)相当于layer(conv)(3,3,64,1,1,name='conv1_1',trainable=False)
而layer(conv)返回layer_decorated函数指针,且op被赋值为conv,则相当于执行了layer_decorated(3,3,64,1,1,name='conv1_1',trainable=False)
其中args为(3,3,64,1,1)、kwargs为{‘name’:'conv1_1','trainable':'False'}
@include_original def layer(op): def layer_decorated(self, *args, **kwargs): # Automatically set a name if not provided. # setdefault为自建函数,若字典中存在'name'键,则返回对应value值如conv1_1,否则以默认值self.get_unique_name(op.__name__)赋值并返回 name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) # Figure out the layer inputs. # 如VGGnet_test.py网络,在conv1_1之前调用了self.feed('data')使得self.inputs非空 # 此时self.inputs列表中仅含一个元素,为self.layers['data']=self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3])即缩放后的图像数据 if len(self.inputs)==0: # 无输入 raise RuntimeError('No input variables found for layer %s.'%name) elif len(self.inputs)==1: # 单个输入 layer_input = self.inputs[0] else: layer_input = list(self.inputs) # 多个输入的情况 # Perform the operation and get the output. # 执行相当处理层操作,如conv layer_output = op(self, layer_input, *args, **kwargs) # Add to layer LUT. #在self.layers字典中记录该层输出 self.layers[name] = layer_output # This output is now the input for the next layer. #将该层output通过feed函数添加到self.inputs列表中,作为下一层的输入 self.feed(layer_output) # Return self for chained calls. 链接调用 return self return layer_decorated
需要注意的是,self.inputs为列表,通过feed()函数填入数据作为下一层输入,self.layers为字典,记录每一层的输出,此外,还应注意某些层多个输入的情况。
def include_original(dec): """ Meta decorator, which make the original function callable (via f._original() )""" def meta_decorator(f): decorated = dec(f) decorated._original = f return decorated return meta_decorator
再来看看include_original装饰器(Meta decorator),实现了原函数的可调用功能,即如:猜想是实现了通过self.conv()调用卷积处理,此处代码未读懂。
2.Network新式类
class Network(object): def __init__(self, inputs, trainable=True): # 构造函数 self.inputs = [] self.layers = dict(inputs) self.trainable = trainable self.setup()
self.inputs为列表,通过feed()函数填入数据作为下一层输入,self.layers为字典,记录每一层的输出,各个网络处理层均在类内被定义,将在“3”中介绍。
--------------------------------(除网络处理层外的)其余7个函数-----------------------------------
def setup(self)函数,猜想若非执行Network类的子类,将触发异常(被__init__函数调用)
def setup(self): raise NotImplementedError('Must be subclassed.')
def load(self,data_path,session,ignore_missing=False) 加载如imagenet等预训练模型.npy文件
def load(self, data_path, session, ignore_missing=False): # 从ImageNet预训练模型加载相应层参数 data_dict = np.load(data_path).item() # 生成data_dict字典 for key in data_dict: with tf.variable_scope(key, reuse=True): for subkey in data_dict[key]: # data_dict[key]应该也是一个字典,含weight和bias键值 try: var = tf.get_variable(subkey) session.run(var.assign(data_dict[key][subkey])) print "assign pretrain model "+subkey+ " to "+key except ValueError: print "ignore "+key if not ignore_missing: raise
此处item()函数未查到,另with tf.variable_scope(key, reuse=True)和session.run(var.assign(data_dict[key][subkey]))等相关tensorflow机制暂不明白
def feed(self, *args)构造上一层输出作为下一层的输入,在layer装饰器和网络文件(如VGGnet_test.py中)被调用
#layers为一个dict inputs为一个list def feed(self, *args): assert len(args)!=0 # 如果args为空即没有参数,就raise一个error self.inputs = [] for layer in args: if isinstance(layer, basestring): # 判断一个对象是否为str或者unicode的实例 try: layer = self.layers[layer] # 被重新赋值 print layer except KeyError: print self.layers.keys() raise KeyError('Unknown layer name fed: %s'%layer) self.inputs.append(layer) # 将取出的layer数据(即上一层输出)存入input列表作为下一层输入 return self
def get_output(self, layer)取出由各层输出构成的字典self.layers中的某层(layer参数)输出,被test.py等调用
def get_output(self, layer): try: layer = self.layers[layer] except KeyError: print self.layers.keys() raise KeyError('Unknown layer name fed: %s'%layer) return layer
def get_unique_name(self,prefix) 若未指定网络层名,则自动获取唯一的网络层名称(如self.layers中有两个以conv开头,则该层名为conv3),实际上由于网络文件各层均指定了name使得该函数未使用,被layer装饰器调用
def get_unique_name(self, prefix): #由类似于conv等的prefix计数得到当前conv的id输出类似于conv1、conv2的处理层名称 id = sum(t.startswith(prefix) for t,_ in self.layers.items())+1 return '%s_%d'%(prefix, id)
name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) # 此处op为conv、max_pool等
def make_var(self, name, shape, initializer=None, trainable=True, regularizer=None)以tensorflow定义的格式创建变量,被网络相关处理层(如conv()函数)调用
def make_var(self, name, shape, initializer=None, trainable=True, regularizer=None): # 在tensorflow格式下新建变量 return tf.get_variable(name, shape, initializer=initializer, trainable=trainable, regularizer=regularizer)
def validate_padding(self, padding)仅允许padding为SAME或VALID,否则抛出异常,被含padding的相关网络处理层(如conv()、upconv()函数)调用
def validate_padding(self, padding): # 仅允许padding方式为SAME或VALID,否则抛出异常 assert padding in ('SAME', 'VALID')
3.网络处理层
-----------------------------------卷积(卷积原理1、各类卷积点击这里)-----------------------------------------
def conv(...)
@layer # 如VGGnet_test.py中self.conv(3, 3, 64, 1, 1, name='conv1_1', trainable=False) # 即执行layer装饰器中layer_output = op(self, layer_input, *args, **kwargs) # 相当于layer_output = conv(self, layer_input, 3, 3, 64, 1, 1, name='conv1_1', trainable # =False,biased=True,relu=True, padding=DEFAULT_PADDING) def conv(self, input, k_h, k_w, c_o, s_h, s_w, name, biased=True,relu=True, padding=DEFAULT_PADDING, trainable=True): """ contribution by miraclebiu, and biased option""" self.validate_padding(padding) # 仅允许padding为same或valid #input为[batch,in_height,in_width,in_channels] c_i = input.get_shape()[-1] # 输入通道数,即feature map个数 # [1, s_h, s_w, 1]指stride,其中第1位和最后1位必须为1,第1位表示在batch上的位移,最后1位表示在深度上的位移 # 匿名函数,本代码段内被调用,其中,i,k分别指input、kernel(由4个参数控制,输入、输出通道数及kernel宽和高) convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) with tf.variable_scope(name) as scope: # 管理参数命名 # init_weights = tf.truncated_normal_initializer(0.0, stddev=0.001) 正态初始化权重 init_weights = tf.contrib.layers.variance_scaling_initializer(factor=0.01, mode='FAN_AVG', uniform=False) init_biases = tf.constant_initializer(0.0) # 以tensorflow机制构造变量 kernel = self.make_var('weights', [k_h, k_w, c_i, c_o], init_weights, trainable, \ regularizer=self.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)) # 0.0005 if biased: biases = self.make_var('biases', [c_o], init_biases, trainable) conv = convolve(input, kernel) # 卷积结果 if relu: bias = tf.nn.bias_add(conv, biases) # 添加偏置量 return tf.nn.relu(bias) # relu后结果 return tf.nn.bias_add(conv, biases) else: conv = convolve(input, kernel) # 卷积结果 if relu: return tf.nn.relu(conv) return conv
cfg.TRAIN.WEIGHT_DECAY 0.0005
卷积核由4个参数控制,tensorflow中卷积处理函数为tf.nn.conv2d(i,k,[1,s_h,s_w,1],padding=padding)
-------------------------------------反/逆卷积(反卷积原理点击这里)-----------------------------------------------
def upconv(...)
@layer def upconv(self, input, shape, c_o, ksize=4, stride = 2, name = 'upconv', biased=False, relu=True, padding=DEFAULT_PADDING, trainable=True): """ up-conv""" self.validate_padding(padding) # 仅允许padding为same或valid c_in = input.get_shape()[3].value # 输入feature map的通道数 in_shape = tf.shape(input) # 输入的shape if shape is None: # h = ((in_shape[1] - 1) * stride) + 1 # w = ((in_shape[2] - 1) * stride) + 1 h = ((in_shape[1] ) * stride) w = ((in_shape[2] ) * stride) new_shape = [in_shape[0], h, w, c_o] # 输出的4维shape如何确定不明? else: new_shape = [in_shape[0], shape[1], shape[2], c_o] output_shape = tf.stack(new_shape) # tf.stack是拼接矩阵? filter_shape = [ksize, ksize, c_o, c_in] # 卷积核的shape,由4个参数控制? with tf.variable_scope(name) as scope: # init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01) init_weights = tf.contrib.layers.variance_scaling_initializer(factor=0.01, mode='FAN_AVG', uniform=False) # 赋初值 filters = self.make_var('weights', filter_shape, init_weights, trainable, \ regularizer=self.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)) deconv = tf.nn.conv2d_transpose(input, filters, output_shape, strides=[1, stride, stride, 1], padding=DEFAULT_PADDING, name=scope.name) # 反卷积结果 # coz de-conv losses shape info, use reshape to re-gain shape deconv = tf.reshape(deconv, new_shape) if biased: init_biases = tf.constant_initializer(0.0) biases = self.make_var('biases', [c_o], init_biases, trainable) if relu: bias = tf.nn.bias_add(deconv, biases) # 添加偏置 return tf.nn.relu(bias) # relu处理后返回 return tf.nn.bias_add(deconv, biases) else: if relu: return tf.nn.relu(deconv) return deconv
tensorflow中反卷积处理函数为tf.nn.conv2d_transpose(...)
输出维度(4个参数)如何确定?反卷积原理?tf.stack函数?卷积核shape由4个参数确定?output_shape和new_shape的关系?
-------------------------------------------------(最大/平均)池化---------------------------------------------
def max_pool(...)
def avg_pool(...)
@layer def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): self.validate_padding(padding) return tf.nn.max_pool(input, ksize=[1, k_h, k_w, 1], strides=[1, s_h, s_w, 1], padding=padding, name=name) @layer def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): self.validate_padding(padding) return tf.nn.avg_pool(input, ksize=[1, k_h, k_w, 1], strides=[1, s_h, s_w, 1], padding=padding, name=name)
tensorflow中(最大/平均)池化处理函数为tf.nn.max_pool(...)和tf.nn.avg_pool(...)
-------------------------------------------------非线性函数------------------------------------------------------
def relu(...)
@layer def relu(self, input, name): return tf.nn.relu(input, name=name)
-------------------------------------------------roi_pool------------------------------------------------------
def roi_pool(...)
# 如VGGnet_test.py中self.roi_pool(7, 7, 1.0 / 16, name='pool_5') # 该层self.feed('conv5_3', 'rois'),即输入为['conv5_3','rois']列表 @layer def roi_pool(self, input, pooled_height, pooled_width, spatial_scale, name): # only use the first input if isinstance(input[0], tuple): input[0] = input[0][0] if isinstance(input[1], tuple): input[1] = input[1][0] print input return roi_pool_op.roi_pool(input[0], input[1], pooled_height, pooled_width, spatial_scale, name=name)[0]
以VGGnet_test.py为例,该层input为conv5_3和rois构成的列表,由roi_pool_op.roi_pool(...)执行(在roi_pooling_layer/roi_pooling_op.py中,实际由roi_pooling.so执行)
isinstance(input,tuple)为何要判断input是否为tuple
-------------------------------------------------psroi_pool(原理)------------------------------------------------------
def psroi_pool(...)
@layer def psroi_pool(self, input, output_dim, group_size, spatial_scale, name): """contribution by miraclebiu""" # only use the first input if isinstance(input[0], tuple): input[0] = input[0][0] if isinstance(input[1], tuple): input[1] = input[1][0] return psroi_pooling_op.psroi_pool(input[0], input[1], output_dim=output_dim, group_size=group_size, spatial_scale=spatial_scale, name=name)[0]
由psroi_pool_op.psroi_pool(...)执行(在psroi_pooling_layer/psroi_pooling_op.py中,实际由psroi_pooling.so执行)
-------------------------------------------------proposal_layer---------------------------------------------------------
tf.py_func()接收的是tensor,然后将其转化为numpy array送入xxx函数,最后再将xxx函数输出的numpy array转化为tensor返回
4.其他
数据是如何通过sess.run馈入的,feed_dict与VGGnet_test类中相应变量的关系(仅定义了占位符)是什么机制