Faster RCNN原理及Pytorch代码解读——RPN(五):生成候选区域

我们前面已经计算出了RPN的损失了,而RPN的另一个功能就是区域生成 即生成较好的Proposal, 以供下一个阶段进行细分类与回归。
整个过程的示意图如下在这里插入图片描述
这一部分的内容理解不难,首先是生成大小固定的全部Anchors,关于如何生成Anchors这一点在前面已经讲过了。然后将网络中得到的回归偏移作用到Anchor上使Anchor更加贴近于真值, 并修剪超出图像尺寸的Proposal,得到最初的建议区域。
之后按照分类网络输出的得分对Anchor排序,保留前12000个得分高的Anchors。 由于一个物体可能会有多个Anchors重叠对应,因此再应用非极大值抑制(NMS) 将重叠的框去掉,最后在剩余的Proposal中再次根据RPN的预测得分选择前2000个,作为最终的Proposal,输出到下一个阶段。
下面分阶段介绍代码

生成所有锚框

    def forward(self, input):
		"""
		input: 元组,(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key)
			   其中rpn_cls_prob:分类分支的输出,shape(batch, 18, 37, 50)
			   rpn_bbox_pred:回归分支的输出,shape(batch, 36, 37, 50)
			   im_info:图像的宽跟高
			   cfg_key:模型正处于哪种模式,值为“TRAING” or "TEST"
		return:
		output: 筛选得到的候选框,shape(batch, 2000, 5)
		"""
        # 分类分支输出前九维为背景概率,后九维为前景概率
        scores = input[0][:, self._num_anchors:, :, :]	# 取前景概率
        bbox_deltas = input[1]	# 回归分支输出
        im_info = input[2]		# 图像的宽跟高
        cfg_key = input[3]		# 模型正处于哪种模式,值为“TRAING” or "TEST"

        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N	# 应用NMS之前,保留的RPN候选框最大数目,训练阶段是12000,预测阶段是6000
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N	# 应用NMS之后,保留的RPN候选框数目.训练阶段是2000,预测阶段是300
        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH	# 使用NMS的阈值
        min_size      = cfg[cfg_key].RPN_MIN_SIZE	# 候选框的最小尺寸(在原始图像比例下)

        batch_size = bbox_deltas.size(0)	# 批处理大小

		##  锚框的生成
        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
                                  shift_x.ravel(), shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors	# 每一个特征点的锚框数,数量为9
        K = shifts.size(0)	# 特征点总数,为1850

        self._anchors = self._anchors.type_as(scores)
        # 调用基础anchor加上偏移量生成所有anchors
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)	# 利用pytorch的广播机制
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)	# 修改形状,(batch, 16650, 4)

回归偏移调整Anchor

		# 修改回归分支的预测框的形状,使其与锚框一致,都为(batch, 16650, 4)
        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        # 修改分类分支的前景概率的形状,使其与锚框一致,都为(batch, 16650)
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(batch_size, -1)

        # 生成anchor后,首先利用回归网络对anchor进行偏移修整, (batch, 16650, 4)
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)
def bbox_transform_inv(boxes, deltas, batch_size):
	"""
	对锚框进行偏移
	boxes:初始锚框,shape(batch, 16650, 4)
	deltas:RPN回归分支输出,shape(batch, 16650, 4)
	batch_size:批处理大小
	"""
	# 计算锚框的中心坐标和宽高
    widths = boxes[:, :, 2] - boxes[:, :, 0] + 1.0
    heights = boxes[:, :, 3] - boxes[:, :, 1] + 1.0
    ctr_x = boxes[:, :, 0] + 0.5 * widths
    ctr_y = boxes[:, :, 1] + 0.5 * heights

	# 获取中心坐标和宽高的偏移量
    dx = deltas[:, :, 0::4]
    dy = deltas[:, :, 1::4]
    dw = deltas[:, :, 2::4]
    dh = deltas[:, :, 3::4]

	# 计算偏移值
    pred_ctr_x = dx * widths.unsqueeze(2) + ctr_x.unsqueeze(2)
    pred_ctr_y = dy * heights.unsqueeze(2) + ctr_y.unsqueeze(2)
    pred_w = torch.exp(dw) * widths.unsqueeze(2)
    pred_h = torch.exp(dh) * heights.unsqueeze(2)

    pred_boxes = deltas.clone()
    # x1
    pred_boxes[:, :, 0::4] = pred_ctr_x - 0.5 * pred_w
    # y1
    pred_boxes[:, :, 1::4] = pred_ctr_y - 0.5 * pred_h
    # x2
    pred_boxes[:, :, 2::4] = pred_ctr_x + 0.5 * pred_w
    # y2
    pred_boxes[:, :, 3::4] = pred_ctr_y + 0.5 * pred_h

    return pred_boxes

修剪超出边界的候选框

        # 将超出图像范围的边框修整到图像边界,(batch, 16650, 4)
        proposals = clip_boxes(proposals, im_info, batch_size)
def clip_boxes(boxes, im_shape, batch_size):
	# 利用pytorch的clamp函数对超出边界的锚框进行修剪
    for i in range(batch_size):
        boxes[i,:,0::4].clamp_(0, im_shape[i, 1]-1)
        boxes[i,:,1::4].clamp_(0, im_shape[i, 0]-1)
        boxes[i,:,2::4].clamp_(0, im_shape[i, 1]-1)
        boxes[i,:,3::4].clamp_(0, im_shape[i, 0]-1)

    return boxes

排序筛选

 		# 利用分类网络的得分对proposal进行排序        
        scores_keep = scores	# (batch, 16650)
        proposals_keep = proposals	# (batch, 16650, 4)
        _, order = torch.sort(scores_keep, 1, True)

		# 生成结果矩阵, shape(batch, 2000, 5),  第一维是batch编号, 后四维是预测的偏移量
        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]	# 取出单个样本的候选框
            scores_single = scores_keep[i]	# 取出单个样本的的前景概率

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i] # 取出单个样本的的前景概率排序索引

            # 选取前12000个(训练阶段)
            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

			# 取得分最高的前12000(训练阶段)
            proposals_single = proposals_single[order_single, :]	# shape(12000, 4)
            scores_single = scores_single[order_single].view(-1,1)	# shape(12000, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            # 进行NMS,在此利用GPU进行计算,提高效率
            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh, force_cpu=not cfg.USE_GPU_NMS)
            keep_idx_i = keep_idx_i.long().view(-1)

            # 最终选择前2000个,作为最终的Proposal输出
            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i,:,0] = i
            output[i,:num_proposal,1:] = proposals_single

        return output

Guess you like

Origin blog.csdn.net/weixin_41693877/article/details/107159304