我们前面已经计算出了RPN的损失了,而RPN的另一个功能就是区域生成 即生成较好的Proposal, 以供下一个阶段进行细分类与回归。
整个过程的示意图如下
这一部分的内容理解不难,首先是生成大小固定的全部Anchors,关于如何生成Anchors这一点在前面已经讲过了。然后将网络中得到的回归偏移作用到Anchor上使Anchor更加贴近于真值, 并修剪超出图像尺寸的Proposal,得到最初的建议区域。
之后按照分类网络输出的得分对Anchor排序,保留前12000个得分高的Anchors。 由于一个物体可能会有多个Anchors重叠对应,因此再应用非极大值抑制(NMS) 将重叠的框去掉,最后在剩余的Proposal中再次根据RPN的预测得分选择前2000个,作为最终的Proposal,输出到下一个阶段。
下面分阶段介绍代码
生成所有锚框
def forward(self, input):
"""
input: 元组,(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key)
其中rpn_cls_prob:分类分支的输出,shape(batch, 18, 37, 50)
rpn_bbox_pred:回归分支的输出,shape(batch, 36, 37, 50)
im_info:图像的宽跟高
cfg_key:模型正处于哪种模式,值为“TRAING” or "TEST"
return:
output: 筛选得到的候选框,shape(batch, 2000, 5)
"""
# 分类分支输出前九维为背景概率,后九维为前景概率
scores = input[0][:, self._num_anchors:, :, :] # 取前景概率
bbox_deltas = input[1] # 回归分支输出
im_info = input[2] # 图像的宽跟高
cfg_key = input[3] # 模型正处于哪种模式,值为“TRAING” or "TEST"
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N # 应用NMS之前,保留的RPN候选框最大数目,训练阶段是12000,预测阶段是6000
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N # 应用NMS之后,保留的RPN候选框数目.训练阶段是2000,预测阶段是300
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # 使用NMS的阈值
min_size = cfg[cfg_key].RPN_MIN_SIZE # 候选框的最小尺寸(在原始图像比例下)
batch_size = bbox_deltas.size(0) # 批处理大小
## 锚框的生成
feat_height, feat_width = scores.size(2), scores.size(3)
shift_x = np.arange(0, feat_width) * self._feat_stride
shift_y = np.arange(0, feat_height) * self._feat_stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose())
shifts = shifts.contiguous().type_as(scores).float()
A = self._num_anchors # 每一个特征点的锚框数,数量为9
K = shifts.size(0) # 特征点总数,为1850
self._anchors = self._anchors.type_as(scores)
# 调用基础anchor加上偏移量生成所有anchors
anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) # 利用pytorch的广播机制
anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # 修改形状,(batch, 16650, 4)
回归偏移调整Anchor
# 修改回归分支的预测框的形状,使其与锚框一致,都为(batch, 16650, 4)
bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
bbox_deltas = bbox_deltas.view(batch_size, -1, 4)
# 修改分类分支的前景概率的形状,使其与锚框一致,都为(batch, 16650)
scores = scores.permute(0, 2, 3, 1).contiguous()
scores = scores.view(batch_size, -1)
# 生成anchor后,首先利用回归网络对anchor进行偏移修整, (batch, 16650, 4)
proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)
def bbox_transform_inv(boxes, deltas, batch_size):
"""
对锚框进行偏移
boxes:初始锚框,shape(batch, 16650, 4)
deltas:RPN回归分支输出,shape(batch, 16650, 4)
batch_size:批处理大小
"""
# 计算锚框的中心坐标和宽高
widths = boxes[:, :, 2] - boxes[:, :, 0] + 1.0
heights = boxes[:, :, 3] - boxes[:, :, 1] + 1.0
ctr_x = boxes[:, :, 0] + 0.5 * widths
ctr_y = boxes[:, :, 1] + 0.5 * heights
# 获取中心坐标和宽高的偏移量
dx = deltas[:, :, 0::4]
dy = deltas[:, :, 1::4]
dw = deltas[:, :, 2::4]
dh = deltas[:, :, 3::4]
# 计算偏移值
pred_ctr_x = dx * widths.unsqueeze(2) + ctr_x.unsqueeze(2)
pred_ctr_y = dy * heights.unsqueeze(2) + ctr_y.unsqueeze(2)
pred_w = torch.exp(dw) * widths.unsqueeze(2)
pred_h = torch.exp(dh) * heights.unsqueeze(2)
pred_boxes = deltas.clone()
# x1
pred_boxes[:, :, 0::4] = pred_ctr_x - 0.5 * pred_w
# y1
pred_boxes[:, :, 1::4] = pred_ctr_y - 0.5 * pred_h
# x2
pred_boxes[:, :, 2::4] = pred_ctr_x + 0.5 * pred_w
# y2
pred_boxes[:, :, 3::4] = pred_ctr_y + 0.5 * pred_h
return pred_boxes
修剪超出边界的候选框
# 将超出图像范围的边框修整到图像边界,(batch, 16650, 4)
proposals = clip_boxes(proposals, im_info, batch_size)
def clip_boxes(boxes, im_shape, batch_size):
# 利用pytorch的clamp函数对超出边界的锚框进行修剪
for i in range(batch_size):
boxes[i,:,0::4].clamp_(0, im_shape[i, 1]-1)
boxes[i,:,1::4].clamp_(0, im_shape[i, 0]-1)
boxes[i,:,2::4].clamp_(0, im_shape[i, 1]-1)
boxes[i,:,3::4].clamp_(0, im_shape[i, 0]-1)
return boxes
排序筛选
# 利用分类网络的得分对proposal进行排序
scores_keep = scores # (batch, 16650)
proposals_keep = proposals # (batch, 16650, 4)
_, order = torch.sort(scores_keep, 1, True)
# 生成结果矩阵, shape(batch, 2000, 5), 第一维是batch编号, 后四维是预测的偏移量
output = scores.new(batch_size, post_nms_topN, 5).zero_()
for i in range(batch_size):
# # 3. remove predicted boxes with either height or width < threshold
# # (NOTE: convert min_size to input image scale stored in im_info[2])
proposals_single = proposals_keep[i] # 取出单个样本的候选框
scores_single = scores_keep[i] # 取出单个样本的的前景概率
# # 4. sort all (proposal, score) pairs by score from highest to lowest
# # 5. take top pre_nms_topN (e.g. 6000)
order_single = order[i] # 取出单个样本的的前景概率排序索引
# 选取前12000个(训练阶段)
if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
order_single = order_single[:pre_nms_topN]
# 取得分最高的前12000(训练阶段)
proposals_single = proposals_single[order_single, :] # shape(12000, 4)
scores_single = scores_single[order_single].view(-1,1) # shape(12000, 1)
# 6. apply nms (e.g. threshold = 0.7)
# 7. take after_nms_topN (e.g. 300)
# 8. return the top proposals (-> RoIs top)
# 进行NMS,在此利用GPU进行计算,提高效率
keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh, force_cpu=not cfg.USE_GPU_NMS)
keep_idx_i = keep_idx_i.long().view(-1)
# 最终选择前2000个,作为最终的Proposal输出
if post_nms_topN > 0:
keep_idx_i = keep_idx_i[:post_nms_topN]
proposals_single = proposals_single[keep_idx_i, :]
scores_single = scores_single[keep_idx_i, :]
# padding 0 at the end.
num_proposal = proposals_single.size(0)
output[i,:,0] = i
output[i,:num_proposal,1:] = proposals_single
return output