# -------------------------------------------------------- # Faster R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick and Sean Bell # -------------------------------------------------------- import numpy as np import yaml from fast_rcnn.config import cfg from generate_anchors import generate_anchors from lib.fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes from fast_rcnn.nms_wrapper import nms import pdb DEBUG = False """ Outputs object detection proposals by applying estimated bounding-box transformations to a set of regular boxes (called "anchors"). """ #To output the estimated frame #The input of the function is (rpn_cls_prob_reshape: rpn_cls_score goes through R-softmax-R, rpn_bbox_pred: bbox information prediction result) def proposal_layer(rpn_cls_prob_reshape,rpn_bbox_pred,im_info,cfg_key,_feat_stride = [16,],anchor_scales = [8, 16, 32]): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) #algorithm: #get base 9anchor ''' [[ -84. -40. 99. 55.] [-176. -88. 191. 103.] [-360. -184. 375. 199.] [ -56. -56. 71. 71.] [-120. -120. 135. 135.] [-248. -248. 263. 263.] [ -36. -80. 51. 95.] [ -80. -168. 95. 183.] [-168. -344. 183. 359.]]''' _anchors = generate_anchors(scales=np.array(anchor_scales)) # is 9 _num_anchors = _anchors.shape[0] rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape,[0,3,1,2]) #形状shape(1,18,H,W) rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,1,2]) #大小[batchsize,H,W,4*9] #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1]) #rpn_bbox_pred = np.transpose (rpn_bbox_pred, [0,3,2,1]) #im_info = [M, N, scale_factor] im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' #TRAIN: 12000 TEST: 6000 (the number of top high score boxes to keep before NMS) pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N # TRAIN: 2000 TEST: 300 (the number of top high score boxes to keep after NMS) post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N # TRAIN: 0.7 TEST: 0.7 (NMS threshold) nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # TRAIN: 16 TEST: 16 (minimum size of proposal in original image) min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want #For a 3-dimensional (remove 1 of the first dimension) feature-map: rpn_cls_prob_reshape #Slice from the depth direction, the first half is the classification score of 9 anchors corresponding to each center i that can be viewed as bg, and the second half is the classification score of 9 anchors corresponding to each center i that can be viewed as fg #Currently taking the fg part scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :] bbox_deltas = rpn_bbox_pred #im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors # Take out the height and width of the feature-map height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts # Generate horizontal offset values. The number of offset values is width. Taking a 600 × 1000 image as an example, there will be 64 offset values, because width=1000/16=64 shift_x = np.arange(0, width) * _feat_stride # Generate vertical offset values. The number of offset values is height. Taking a 600 × 1000 image as an example, there will be 39 offset values, because height=600/16=39 shift_y = np.arange(0, height) * _feat_stride # Convert the coordinate vector to a coordinate matrix, the new shift_x row vector is the old shift_x, with dim(shift_y) rows, and the new shift_y column vector is the old shift_y, with dim(shift_x) columns shift_x, shift_y = np.meshgrid(shift_x, shift_y) # shift_x, shift_y are two-dimensional arrays of 39 × 64, and the combination of elements at the corresponding positions constitutes the required offset size on the image (the offset size is relatively # The offset size of the 9 anchors in the upper left corner), that is to say, a total of 2496 offset value pairs will be obtained. These offset value pairs are added to the initial anchor to get # All anchors, so for a 600×1000 image, a total of 2496×9 anchors will be generated and stored in the all_anchors variable # note: The value of _feat_stride is not arbitrarily determined. After passing through the vgg convolutional neural network, there are a total of 4 maxpool layers, and the pad mode of the remaining conv layers is SAME. You can find the current featuremap point corresponding to the original image point. # That is, the visible field of view of each point of the featuremap is (2^4)*(2^4)=16*16, find the anchor according to the featuremap, that is, find 9 proportional anchors in the pixel block of 16*16 in the original image # To locate the anchor area of the original image, you only need to define the offsets of the 9 anchors formed by the 16*16 area in the upper left corner relative to all the 16*16 area anchors. The following code can achieve # For an instance of width=4,height=3, you can achieve: # [[ 0 0 0 0] # [16 0 16 0] # [32 0 32 0] # [ 0 16 0 16] # [16 16 16 16] # [32 16 32 16] # [ 0 32 0 32] # [16 32 16 32] # [32 32 32 32] # [ 0 48 0 48] # [16 48 16 48] # [32 48 32 48]] # Corresponding to the offset of each pixel block # numpy.ravel() reduces the multidimensional array to one dimension, and combines to get an array of (width*height, 4) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors # A=_num_anchors equals 9 A = _num_anchors # K is equal to width*height2496 K = shifts.shape[0] # Add (1, A, 4) to the array of (K, 1, 4) to get the (K, A, 4) array. The experiment proves that the 4 elements of each (K, 1, 4) are sequentially Add each 4 elements in (1, A, 4) to get (K, A, 4) array # This is reasonable, because _anchors records 4 values of the upper left corner coordinates and the lower right corner coordinates of the 9 anchors that are used for the upper left corner of the view, while the width*height visible view is recorded in shifts relative to the upper left Offset of the angular field of view # Add the two to get the coordinate information of the upper left and lower right corners of the width*height*9 predicted anchors anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order #Convert bbox information (1, H, W, 4 * A) into (1 * H * W * A, 4) form, making it the same as the anchor information order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) # Convert the score information (1, A, H, W) into (1 * H * W * A, 1) form, making it the same order as the anchor information scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations #Convert anchor to proposals through bbox conversion #The process here is that each anchor comes to G', that is, proposal, through its own (dx,dy,dw,dh), so that G'~GT(ground-true), where (dx,dy,dw,dh) from the 'rpn_bbox_pred' layer #anchors((2496*9),4),bbox_deltas(1 * H * W * A, 4) proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image #crop the prediction box #im_info[0] stores the number of pixel rows of the picture, that is, the height, and im_info[1] stores the number of pixel columns of the picture, that is, the width #make the boxes inside the image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold #Remove boxes smaller than the threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) #Inference im_info[2]=1/16 keep = _filter_boxes(proposals, min_size * im_info[2]) #Save eligible proposals and corresponding scores proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) #numpy.argsort() returns the index value from small to large, and [::-1] is in reverse order. So order is the index value of the score from large to small order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: #Take the first pre_nms_topN, TRAIN: 12000, TEST: 6000 order = order[:pre_nms_topN] #Save eligible proposals and corresponding scores proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) #Returned is the purified index of nms, which is already sorted according to the score from large to small keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: #post_nms_topN(TRAIN:2000 TEST:300) #Get the first two thousand indexes with high score keep = keep[:post_nms_topN] #further purification proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 #Create a proposal index, proposals.shape[0] is the number of remaining proposals batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) #Generate blob, consisting of [proposal index (all 0), proposal], shape is (proposals.shape[0], 5) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob #top[0].reshape(*(blob.shape)) #top[0].data[...] = blob # [Optional] output scores blob #if len(top) > 1: # top[1].reshape(*(scores.shape)) # top[1].data[...] = scores def _filter_boxes(boxes, min_size): #This is to find the boxes that meet the conditions, and the index is stored in keep """Remove all boxes with any side smaller than min_size.""" ws = boxes[:, 2] - boxes[:, 0] + 1 hs = boxes[:, 3] - boxes[:, 1] + 1 keep = np.where((ws >= min_size) & (hs >= min_size))[0] return keep
Faster-RCNN_TF code interpretation (2): Faster-RCNN_TF-master\lib\rpn_msr\proposal_layer_tf.py
Guess you like
Origin http://43.154.161.224:23101/article/api/json?id=325179484&siteId=291194637
Recommended
Ranking