import tensorflow as tf from networks.network import Network #要检测的类别数+background n_classes = 21 #对于一副任意大小PxQ的图像,首先缩放至固定大小 MxN,缩放因子为1/16 _feat_stride = [16,] #anchor_scales为3个尺度 anchor_scales = [8, 16, 32] class VGGnet_test(Network): def __init__(self, trainable=True): self.inputs = [] #图像[batchsize,H,W,channels] self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3]) #对于一副任意大小PxQ图像,传入Faster RCNN前首先reshape到固定 Mx N ,im_info=[M, N, scale_factor]则保存了此次缩放的所有信息。 self.im_info = tf.placeholder(tf.float32, shape=[None, 3]) # 该参数定义dropout比例 self.keep_prob = tf.placeholder(tf.float32) self.layers = dict({'data':self.data, 'im_info':self.im_info}) self.trainable = trainable self.setup() def setup(self): ''' MxN图像送入网络;而Conv layers中包含了13个conv层+13个relu层+4个pooling层; 所有的conv层都是: kernel_size=3 , pad=1 , stride=1 所有的pooling层都是: kernel_size=2 , pad=0 , stride=2 在Faster RCNN Conv layers中对所有的卷积都做了扩边处理( pad=1 ,即填充一圈0),导致原图变为 (M+2)x(N+2) 大小,再做3x3卷积后输出 MxN 。 正是这种设置,导致Conv layers中的conv层不改变输入和输出矩阵大小. Conv layers中的pooling层 kernel_size=2 , stride=2 。这样每个经过pooling层的 MxN 矩阵, 都使输出长宽都变为输入的1/2。那么,一个 MxN 大小的矩阵经过Conv layers固定变为 (M/16)x(N/16) :return: ''' (self.feed('data') .conv(3, 3, 64, 1, 1, name='conv1_1', trainable=False) .conv(3, 3, 64, 1, 1, name='conv1_2', trainable=False) .max_pool(2, 2, 2, 2, padding='VALID', name='pool1') .conv(3, 3, 128, 1, 1, name='conv2_1', trainable=False) .conv(3, 3, 128, 1, 1, name='conv2_2', trainable=False) .max_pool(2, 2, 2, 2, padding='VALID', name='pool2') .conv(3, 3, 256, 1, 1, name='conv3_1') .conv(3, 3, 256, 1, 1, name='conv3_2') .conv(3, 3, 256, 1, 1, name='conv3_3') .max_pool(2, 2, 2, 2, padding='VALID', name='pool3') .conv(3, 3, 512, 1, 1, name='conv4_1') .conv(3, 3, 512, 1, 1, name='conv4_2') .conv(3, 3, 512, 1, 1, name='conv4_3') .max_pool(2, 2, 2, 2, padding='VALID', name='pool4') .conv(3, 3, 512, 1, 1, name='conv5_1') .conv(3, 3, 512, 1, 1, name='conv5_2') .conv(3, 3, 512, 1, 1, name='conv5_3')) #RPN网络中利用anchors和softmax初步提取出foreground anchors作为候选区域。 (self.feed('conv5_3') .conv(3,3,512,1,1,name='rpn_conv/3x3') #anchor_scales为3个尺度*3种比例,这也就刚好对应了feature maps每一个点都有9个anchors,同时每个anchors又有可能是foreground和background,两个得分,3*3*2 .conv(1,1,len(anchor_scales)*3*2,1,1,padding='VALID',relu = False,name='rpn_cls_score')) (self.feed('rpn_conv/3x3') #9个anchors,对每一个都做box回归,存的是(dx,dy,dw,dh)3*3*4=36 .conv(1,1,len(anchor_scales)*3*4,1,1,padding='VALID',relu = False,name='rpn_bbox_pred')) # 先reshape后softmax激活 (self.feed('rpn_cls_score') # 形状shape(1,9xH,W,2) .reshape_layer(2,name = 'rpn_cls_score_reshape') # 形状shape(1,9XH,W,2) .softmax(name='rpn_cls_prob')) # 再reshape (self.feed('rpn_cls_prob') # 形状shape(1,H,W,18),信息还原成'rpn_cls_score',刚才两步reshape_layer操作: # 1、修改为softmax格式。2、还原rpn_cls_score信息位置格式,只不过内容变为sotfmax得分 .reshape_layer(len(anchor_scales)*3*2,name = 'rpn_cls_prob_reshape')) ''' Proposal Layer有3个输入: 1.fg/bg anchors分类器结果rpn_cls_prob_reshape, 2.对应的bbox reg的[d_{x}(A),d_{y}(A),d_{w}(A),d_{h}(A)]变换量rpn_bbox_pred, 3.im_info; 另_feat_stride=16保存了缩放信息 ''' (self.feed('rpn_cls_prob_reshape','rpn_bbox_pred','im_info') #进入network.proposal_layer,cfg_key=TEST,输出估计框 .proposal_layer(_feat_stride, anchor_scales, 'TEST', name = 'rois')) (self.feed('conv5_3', 'rois') .roi_pool(7, 7, 1.0/16, name='pool_5') .fc(4096, name='fc6') .fc(4096, name='fc7') .fc(n_classes, relu=False, name='cls_score') .softmax(name='cls_prob')) (self.feed('fc7') .fc(n_classes*4, relu=False, name='bbox_pred'))
Faster-RCNN_TF代码解读(一):lib\networks\VGGnet_test.py
猜你喜欢
转载自blog.csdn.net/weixin_39881922/article/details/80171139
今日推荐
周排行