本文实现了mAP函数,其中计算了精确率(Precision),召回率(Recall)
首先需要了解mAP的概念和计算方式.
具体原理可以查看:https://www.cnblogs.com/lixiunan/articles/9566627.html
以下为参考上文的代码实现,最好配合上文一起理解.
def calc_detection_voc_prec_rec(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels,
gt_difficults=None, iou_thresh=0.5):
'''
输入为pred_bboxes(total_image,N,4)
gt_bboxes(total_image,M,4)
'''
# 存放每一类真值标签个数,用于计算recall
gt_number = np.zeros(opt.classes)
# 用于存储预测窗口是否正确TP
TP_FP = defaultdict(list)
# 用于存储预测窗口的置信度
pred_score_all = defaultdict(list)
for i in range(len(gt_labels)):
# 取出每一个图像中的预测和真值
pred_bbox, pred_label, pred_score, gt_bbox, gt_label = pred_bboxes[
i], pred_labels[i], pred_scores[i], gt_bboxes[i], gt_labels[i]
pred_bbox = np.array(pred_bbox)
pred_label = np.array(pred_label)
pred_score = np.array(pred_score)
gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool)
# 用于计算每一类中预测窗口的TP
for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)):
# 选出该图像中的l类预测和真值
gt_bbox_l = gt_bbox[gt_label == int(l)]
gt_label_l = gt_label[gt_label == int(l)]
if len(pred_bbox) == 0:
continue
pred_bbox_l = pred_bbox[pred_label == int(l)]
pred_score_l = pred_score[pred_label == int(l)]
pred_label_l = pred_label[pred_label == int(l)]
pred_score_all[l].extend(pred_score_l)
if len(pred_bbox_l) == 0:
continue
if len(gt_bbox_l) == 0:
TP_FP[l].extend(np.zeros(len(pred_label_l)))
continue
# 计算每一类真值标签个数
gt_number[l] += len(gt_bbox_l)
# 计算iou(N,M) N为预测bbox大小,M为真值bbox大小
iou = bbox_iou2(pred_bbox_l, gt_bbox_l)
# pred_label_l.shape == pre_index.shape
# 找到预测标签与所有真值标签中iou最大值
pre_index = iou.argmax(axis=1)
# 大于阈值的TP为1,小于阈值为0
pre_index[pre_index < iou_thresh] = 0
pre_index[pre_index > iou_thresh] = 1
TP_FP[l].extend(pre_index)
# print pred_score_all
# print gt_number
# print TP_FP
prec = [None] * opt.classes
rec = [None] * opt.classes
# 计算每一类精确率和召回率
for l in pred_score_all.keys():
score = np.array(pred_score_all[l])
tp_fp = np.array(TP_FP[l])
tp_fp = tp_fp[score.argsort()[::-1]]
score = score[score.argsort()[::-1]]
total = np.arange(len(tp_fp))
# 逐行累加TP
tp = np.cumsum(tp_fp)
# 逐行累加FP
fp = total - tp
pre_l = tp / (tp + fp + 0.001)
pre_l[pre_l > 1] = 1
prec[l] = pre_l
if gt_number[l]:
rec_l = (tp) / (gt_number[l])
rec_l[rec_l > 1] = 1
rec[l] = rec_l
else:
rec[l] = np.zeros(len(prec[l]))
return prec, rec
ap的计算如参考文献.
预测和输入:
for batch_i, (img_path, imgs, targets) in enumerate(tqdm(dataloader, desc="Detecting objects")):
imgs = imgs.cuda()
# print targets
if len(targets) == 0:
continue
# print targets.shape
_, __, W, H = imgs.shape
with torch.no_grad():
outputs = model(imgs)
outputs = non_max_suppression(
outputs, 80, conf_thres=0.8, nms_thres=opt.nms_thres)
# 绘制预测和真值图像
# draw_predict(imgs, outputs, 'output')
# draw_predict(imgs, targets, 'targets', 'gt')
for i in range(opt.batch_size):
if i > (len(targets) - 1):
continue
# target (batch_size,50,5) 第一位为
target = targets[i, :, 1:5].numpy()
gt_label = targets[i, :, 0].numpy()
gt_bbox = np.zeros((50, 4))
# 输入为相对坐标,要转化为(x1,y1,x2,y2)
gt_bbox[:, 0] = (target[:, 0] - target[:, 2] / 2.0) * 416
gt_bbox[:, 1] = (target[:, 1] - target[:, 3] / 2.0) * 416
gt_bbox[:, 2] = (target[:, 0] + target[:, 2] / 2.0) * 416
gt_bbox[:, 3] = (target[:, 1] + target[:, 3] / 2.0) * 416
# 取出零填充
gt_label = gt_label[np.sum(gt_bbox, axis=1) > 0]
gt_bbox = gt_bbox[np.sum(gt_bbox, axis=1) > 0]
# print gt_label
output = outputs[i]
if output is not None:
# Get predicted boxes, confidence scores and labels
pred_boxes = output[:, :4].cpu().numpy()
scores = output[:, 4].cpu().numpy()
pred_label = output[:, -1].cpu().numpy()
pred_label = pred_label.astype(int)
else:
pred_boxes = []
scores = []
pred_label = []
# print gt_bbox.shape
# print pred_boxes.shape
gt_bboxes.append(gt_bbox)
gt_labels.append(gt_label)
pred_bboxes.append(pred_boxes)
pred_labels.append(pred_label)
pred_scores.append(scores)
# if batch_i > 100:
# break