The article refers to the problem that the MAP and AP calculation in eval.py are 0, which has been solved! ! ! (If it is not the case of the category name, you can try to refer to this article)
AP=0 appears in the YOLOX training VOC format data set. You can check: Solve the problem that AP is 0 during YOLOX training
1. Problem description:
Today, in the process of re-learning and researching YOLOX with a classmate, I found that the AP of the data used is 0. I
also sent a solution about the YOLOX AP of 0 before, but the main reason for this problem is that there is xml标签
no difficult
such voc_eval
An error in calculating AP caused by a parameter
2. Solve the problem:
Since it is known that difficult
the problem is caused by the parameter, then modify it accordingly
1. First comment out difficult
the operation of getting parameters from the xml file
for obj in tree.findall("object"):
obj_struct = {
}
obj_struct["name"] = obj.find("name").text
obj_struct["pose"] = obj.find("pose").text
obj_struct["truncated"] = int(obj.find("truncated").text)
# obj_struct["difficult"] = int(obj.find("difficult").text)
bbox = obj.find("bndbox")
2. Modifying difficult
the value of
does not exist difficult
, so it will cause difficult = np.array([x["difficult"] for x in R]).astype(bool)
an error, which will lead to npos = npos + sum(~difficult)
an error, which will cause rec = tp / float(npos)
an error, and then fp
the tp
calculation of will also be wrong
if ovmax > ovthresh:
if not R["difficult"][jmax]:
if not R["det"][jmax]:
tp[d] = 1.0
R["det"][jmax] = 1
else:
fp[d] = 1.0
else:
fp[d] = 1.0
Because none difficult
means that difficult
all are 0
, then we only need to assign difficult to 长度为 R
an array of all zeros, that is:
difficult = np.zeros(len(R)).astype(np.bool)
As for npos can be modified or not modified,
# 自增,非difficult样本数量,如果数据集没有 difficult,npos数量 就是 gt数量。
npos = npos + sum(~difficult)
To modify:
# len(R) 即为 gt数量
npos = npos + len(R)
Complete the above modification, and then run the program, you can get the correct AP value
Below is the modified completevoc_eval.py
#!/usr/bin/env python3
# Code are based on
# https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py
# Copyright (c) Bharath Hariharan.
# Copyright (c) Megvii, Inc. and its affiliates.
import os
import pickle
import xml.etree.ElementTree as ET
import numpy as np
def parse_rec(filename):
"""Parse a PASCAL VOC xml file"""
tree = ET.parse(filename)
objects = []
for obj in tree.findall("object"):
obj_struct = {
}
obj_struct["name"] = obj.find("name").text
obj_struct["pose"] = obj.find("pose").text
obj_struct["truncated"] = int(obj.find("truncated").text)
# obj_struct["difficult"] = int(obj.find("difficult").text)
bbox = obj.find("bndbox")
obj_struct["bbox"] = [
int(bbox.find("xmin").text),
int(bbox.find("ymin").text),
int(bbox.find("xmax").text),
int(bbox.find("ymax").text),
]
objects.append(obj_struct)
return objects
def voc_ap(rec, prec, use_07_metric=False):
"""
Compute VOC AP given precision and recall.
If use_07_metric is true, uses the
VOC 07 11 point method (default:False).
"""
if use_07_metric:
# 11 point metric
ap = 0.0
for t in np.arange(0.0, 1.1, 0.1):
if np.sum(rec >= t) == 0:
p = 0
else:
p = np.max(prec[rec >= t])
ap = ap + p / 11.0
else:
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.0], rec, [1.0]))
mpre = np.concatenate(([0.0], prec, [0.0]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
def voc_eval(
detpath,
annopath,
imagesetfile,
classname,
cachedir,
ovthresh=0.5,
use_07_metric=False,
):
# first load gt
if not os.path.isdir(cachedir):
os.mkdir(cachedir)
cachefile = os.path.join(cachedir, "annots.pkl")
# read list of images
with open(imagesetfile, "r") as f:
lines = f.readlines()
imagenames = [x.strip() for x in lines]
if not os.path.isfile(cachefile):
# load annots
recs = {
}
for i, imagename in enumerate(imagenames):
recs[imagename] = parse_rec(annopath.format(imagename))
if i % 100 == 0:
print(f"Reading annotation for {
i + 1}/{
len(imagenames)}")
# save
print(f"Saving cached annotations to {
cachefile}")
with open(cachefile, "wb") as f:
pickle.dump(recs, f)
else:
# load
with open(cachefile, "rb") as f:
recs = pickle.load(f)
# extract gt objects for this class
class_recs = {
}
npos = 0
for imagename in imagenames:
R = [obj for obj in recs[imagename] if obj["name"] == classname]
bbox = np.array([x["bbox"] for x in R])
# difficult = np.array([x["difficult"] for x in R]).astype(bool)
difficult = np.zeros(len(R)).astype(np.bool)
det = [False] * len(R)
# npos = npos + len(R)
npos = npos + sum(~difficult)
class_recs[imagename] = {
"bbox": bbox, "difficult": difficult, "det": det}
# read dets
detfile = detpath.format(classname)
with open(detfile, "r") as f:
lines = f.readlines()
if len(lines) == 0:
return 0, 0, 0
splitlines = [x.strip().split(" ") for x in lines]
image_ids = [x[0] for x in splitlines]
confidence = np.array([float(x[1]) for x in splitlines])
BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
# sort by confidence
sorted_ind = np.argsort(-confidence)
BB = BB[sorted_ind, :]
image_ids = [image_ids[x] for x in sorted_ind]
# go down dets and mark TPs and FPs
nd = len(image_ids)
tp = np.zeros(nd)
fp = np.zeros(nd)
for d in range(nd):
R = class_recs[image_ids[d]]
bb = BB[d, :].astype(float)
ovmax = -np.inf
BBGT = R["bbox"].astype(float)
if BBGT.size > 0:
# compute overlaps
# intersection
ixmin = np.maximum(BBGT[:, 0], bb[0])
iymin = np.maximum(BBGT[:, 1], bb[1])
ixmax = np.minimum(BBGT[:, 2], bb[2])
iymax = np.minimum(BBGT[:, 3], bb[3])
iw = np.maximum(ixmax - ixmin + 1.0, 0.0)
ih = np.maximum(iymax - iymin + 1.0, 0.0)
inters = iw * ih
# union
uni = (
(bb[2] - bb[0] + 1.0) * (bb[3] - bb[1] + 1.0)
+ (BBGT[:, 2] - BBGT[:, 0] + 1.0) * (BBGT[:, 3] - BBGT[:, 1] + 1.0) - inters
)
overlaps = inters / uni
ovmax = np.max(overlaps)
jmax = np.argmax(overlaps)
if ovmax > ovthresh:
if not R["difficult"][jmax]:
if not R["det"][jmax]:
tp[d] = 1.0
R["det"][jmax] = 1
else:
fp[d] = 1.0
else:
fp[d] = 1.0
# compute precision recall
fp = np.cumsum(fp)
tp = np.cumsum(tp)
rec = tp / float(npos)
# avoid divide by zero in case the first detection matches a difficult
# ground truth
prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
ap = voc_ap(rec, prec, use_07_metric)
return rec, prec, ap