Article Directory
1. Calculate the IOU
Calculate the IOU of two bboxes
import numpy as np
def ComputeIOU(boxA, boxB):
## 计算相交框的坐标
x1 = np.max([boxA[0], boxB[0]])
x2 = np.min([boxA[2], boxB[2]])
y1 = np.max([boxA[1], boxB[1]])
y2 = np.min([boxA[3], boxB[3]])
w = np.max(x2-x1+1, 0)
h = np.max(y2-y1+1, 0)
area = w*h
iou = area/((boxA[2]-boxA[0]+1)*(boxA[3]-boxA[1]+1) + (boxB[2]-boxB[0]+1)*(boxB[3]-boxB[1]+1) - area)
return iou
boxA = [1,1,3,3]
boxB = [2,2,4,4]
IOU = ComputeIOU(boxA, boxB)
To calculate the IOU of two groups of bboxes, two for loops can be accelerated with matrix operations
import numpy as np
def iou_batch(bb_test, bb_gt):
"""
From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2]
"""
bb_gt = np.expand_dims(bb_gt, 0) #
bb_test = np.expand_dims(bb_test, 1)
# np.max()仅返回一个最大值,np.maximu()返回每个维度的最大值
# 比如[[ \ 12 32]
# [10 12 32]
# [30 30 32]]
# [[12 32]] [[10][30]] np.maximu()返回
# [[12. 32.]
# [30. 32.]]
xx1 = np.maximum(bb_test[..., 0], bb_gt[..., 0])
yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1])
xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2])
yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3])
w = np.maximum(0., xx2 - xx1)
h = np.maximum(0., yy2 - yy1)
wh = w * h
o = wh / ((bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1])
+ (bb_gt[..., 2] - bb_gt[..., 0]) * (bb_gt[..., 3] - bb_gt[..., 1]) - wh)
return o
detections = np.array([[10,10,20,30,0.95],[30,15,40,40,0.95]])
trackers = np.array([[12,12,22,32,1],[32,15,42,42,1]])
iou_matrix = iou_batch(detections, trackers)
print(type(iou_matrix))
2、NMS
Non-Maximum Suppression (NMS) non-maximum suppression. Literally, those non-maximum value elements are suppressed, and the maximum value elements are retained. It is mainly used for target detection, target tracking, 3D reconstruction, data mining, etc.
nms
Algorithm core
Sort the bboxes according to the confidence (from large to small), take the bbox with the highest confidence each time, calculate the IOU with other bboxes, and remove the bboxes larger than the threshold. Repeat the above operations until there are no overlapping bboxes.
code example
def hard_nms(preds, iou_thresh=0.7, score_th=None, condidates_num=200):
"""
Params:
preds(numpy.array): detection preds before nms, with shape(N, 4)
iou_thresh(float): iou thershold
score_th: detection thershold (optional)
Return:
keeps(nump.array): keeped anchor indexes
"""
# if no bbox in preds
if preds.size==0:
return None
# sort by scores
bboxes = preds[np.argsort(preds[:,4])]
if score_th:
mask = bboxes[:,4]>=score_th
bboxes = bboxes[mask]
# print(bboxes)
keeps = []
while len(bboxes) > 0:
current = bboxes[-1]
keeps.append(current)
# if keeped num equal with condidates_num or only one anchor left
if len(bboxes) == 1 or len(keeps) == condidates_num:
break
bboxes = bboxes[:-1]
ious = iou_batch(current, bboxes).flatten()
mask = ious <= iou_thresh
# print(mask)
bboxes = bboxes[mask]
return np.array(keeps)
Use the iou_batch in the previous section to calculate the IOU, but the area will be calculated repeatedly every time it is calculated, so you can first calculate the area of all bboxes and use it directly.
def nms(preds, iou_thresh=0.5, score_th=None):
if preds.size==0:
return None
bboxes = np.array(preds)
# 根据置信度阈值进行初步筛选
if score_th:
mask = np.where(bboxes[:,4]>=score_th)
bboxes = bboxes[mask]
# 先记录
x1 = bboxes[:, 0]
y1 = bboxes[:, 1]
x2 = bboxes[:, 2]
y2 = bboxes[:, 3]
score = bboxes[:, 4]
area = (x2-x1+1) * (y2-y1+1)
# 用idxs来记录下标,后期只需要维护idxs就可以
idxs = np.argsort(score)
res = []
while idxs.size>0:
cur = idxs[-1]
res.append(bboxes[cur])
if idxs.size==1:
break
# 计算iou
xx1 = np.maximum(x1[cur], x1[idxs[:-1]])
yy1 = np.maximum(y1[cur], y1[idxs[:-1]])
xx2 = np.minimum(x2[cur], x2[idxs[:-1]])
yy2 = np.minimum(y2[cur], y2[idxs[:-1]])
w = np.maximum(xx2-xx1+1, 0.)
h = np.maximum(yy2-yy1+1, 0.)
iner = w * h
outer = area[cur] + area[idxs[:-1]] - iner
ious = iner / outer
# 根据iou筛选bbox
mask = np.where(ious<=iou_thresh)
idxs = idxs[mask]
# 另一种写法
# mask = ious <= iou_thresh
# idxs = idxs[:-1][mask]
'''
两种写法的区别在于np.where()直接生成满足条件的下标,
而ious <= iou_thresh是生成长度为原数组的Boolean数组,因为求IOU的时候没求自己和自己的,
所以IOU数把组长度少了一个,所以要先把idxs长度-1
'''
return np.array(res)
dets = np.array([[187, 82, 337, 317, 0.9], [150, 67, 305, 282, 0.75], [246, 121, 368, 304, 0.8], [1,1,200,300, 0.2]])
dets_nms = nms(dets, 0.5, 0.5)
print(dets_nms)
soft-nms
When encountering a dense scene and a large degree of overlap between different objects, nms eliminates adjacent objects with low confidence based on IOU, resulting in missed detection. Increasing the iou threshold may lead to false detection.
Therefore, some scholars proposed soft-nms, as shown in the figure.
Algorithm core
For the target whose iou exceeds the threshold, there is no direct brute force deletion, but to reduce its confidence. The higher the overlap, the lower the confidence.
- There are usually two kinds of methods, one kind of linear attenuation:
si = { si Iou ( M , bi ) < N t si ( 1 − I ou ( M , bi ) ) I ou ( M , bi ) ≥ N t s_i= \begin{cases} s_i& \text{ $ Iou(M,b_i)<N_t $ } \\ s_i(1-Iou(M,b_i))& \text{ $ Iou(M,b_i)\ge N_t$ } \ end {cases}si={ sisi(1−Iou(M,bi)) Iou(M,bi)<Nt Iou(M,bi)≥Nt
It is a function of jumping changes (less than the threshold, the score remains unchanged, greater than the threshold, the score is multiplied by a coefficient less than 1, which is equivalent to N t N_tNtThe position has been mutated), the author believes that the penalty function should be continuous, otherwise it will lead to mutations in anchor sorting.
- The other is Gaussian smooth decay:
si = sie − iou ( M , bi ) 2 σ , ∀ bi ∉ D s_i=s_ie^{-\frac{iou(M,b_i)^2}{\sigma}},\ forall b_i \notin Dsi=sie−piou(M,bi)2,∀bi∈/D
A larger penalty is given to the score of the anchor close to M, that is, a small coefficient is multiplied, and a small penalty is given to the score of the anchor far away from M. If the iou is 0, the penalty is 0.
code example
The following code is written by myself, which is different from the official implementation . I don't know if it can be used in all cases.
def soft_nms(preds, score_th=0.25, sigma=0.5):
'''
:param preds: detections:List of[x1,y1,x2,y2,score]
:param score_th: 置信度阈值
:param sigma: 高斯方差,一般默认0.5
:return:
'''
if len(preds)==0:
return None
bboxes = np.array(preds)
if score_th:
mask = np.where(bboxes[:,4]>score_th)
bboxes = bboxes[mask]
x1 = bboxes[:, 0]
y1 = bboxes[:, 1]
x2 = bboxes[:, 2]
y2 = bboxes[:, 3]
area = (x2-x1+1) * (y2-y1+1)
# 用now记录和筛选下次要比较的bbox
# 如果已经作为最大值的bbox比较过了,或者置信度小于阈值,则置为False
now = [True for _ in range(len(bboxes))]
res = []
while 1:
# 因为每次操作会修改置信度,因此需要重新排序
idxs = np.argsort(bboxes[:, 4])
# 对于已经作为最大值的bbox比较过了,或者置信度小于阈值的bbox,剔除掉
# print(now)
# print(idxs)
mask = np.array([True if now[idx] else False for idx in idxs])
# print(mask)
idxs = idxs[mask]
# print(idxs)
if len(idxs)==1:
cur = idxs[-1]
res.append(bboxes[cur])
break
if len(idxs)==0:
break
cur = idxs[-1]
now[cur] = False
res.append(bboxes[cur])
# 计算iou
xx1 = np.maximum(x1[cur], x1[idxs[:-1]])
yy1 = np.maximum(y1[cur], y1[idxs[:-1]])
xx2 = np.minimum(x2[cur], x2[idxs[:-1]])
yy2 = np.minimum(y2[cur], y2[idxs[:-1]])
w = np.maximum(xx2-xx1+1, 0.)
h = np.maximum(yy2-yy1+1, 0.)
iner = w * h
outer = area[cur] + area[idxs[:-1]] - iner
ious = iner / outer
# 计算iou是按照置信度排的序,所以惩罚置信度的时候也要按照这个顺序
bboxes[idxs[:-1],4] *= np.exp(-(ious*ious)/sigma)
# 剔除小于阈值的bbox,这里对所有bbox进行筛选,是为了和now的长度对应
mask = bboxes[:,4]>score_th
now = [mask[i] and now[i] for i in range(len(mask))]
return np.array(res)
dets = np.array([[187, 82, 337, 317, 0.9], [150, 67, 305, 282, 0.75], [246, 121, 368, 304, 0.8], [1,1,200,300, 0.2]])
dets_nms = soft_nms(dets, 0.5)
print(dets_nms)
'''SOFT_NMS
[[187. 82. 337. 317. 0.9 ]
[246. 121. 368. 304. 0.57206931]]
'''
'''NMS
[[187. 82. 337. 317. 0.9]
[246. 121. 368. 304. 0.8]]
'''
3. K-means clustering anchors
Refer to k-means clustering in YOLOV3 to get detailed explanation of anchor boxes process
import numpy as np
def iou_batch(boxs,clusters):
boxs = np.expand_dims(boxs,1)
clusters = np.expand_dims(clusters,0)
x = np.minimum(boxs[...,0], clusters[..., 0])
y = np.minimum(boxs[...,1], clusters[..., 1])
iner = x * y
outer = boxs[...,0] * boxs[...,1] + clusters[..., 0] * clusters[..., 1] - iner
o = iner / outer
return o
def avg_iou(box, cluster):
return np.mean([np.max(cas_iou(box[i], cluster)) for i in range(box.shape[0])])
def kmeans(box, k):
# 取出一共有多少框
row = box.shape[0]
# 每个框各个点的位置
distance = np.empty((row, k))
# 最后的聚类位置
last_clu = np.zeros((row,))
np.random.seed()
# 随机选5个当聚类中心
cluster = box[np.random.choice(row, k, replace=False)]
# cluster = random.sample(row, k)
while True:
# 计算每一行距离五个点的iou情况。
# for i in range(row):
# distance[i] = 1 - cas_iou(box[i], cluster)
distance = 1 - iou_batch(box,cluster)
# print(distance.shape)
# 取出最小点
label = np.argmin(distance, axis=1) # 记录每个bbox属于哪一类anchor
# 如果分类结果与上一步相同,则终止
if (last_clu == label).all():
break
# 求每一个类的中位点
for j in range(k):
cluster[j] = np.median(
box[label == j], axis=0)
# print(cluster)
last_clu = label
return cluster
if __name__ == '__main__':
data = []
num = 100
SIZE = 446
np.random.seed(123)
for _ in range(num):
bbox = np.random.uniform(20,100,2).astype(dtype=int)
data.append(bbox)
data = np.array(data)
# print(data)
# 使用k聚类算法
out = kmeans(data, 9)
# 从小到大排序
out = out[np.argsort(out[:, 0])]
print('acc:{:.2f}%'.format(avg_iou(data, out) * 100))
print(out)
4. Hand-tear quick row
sort is a quick sort implementation, the time complexity is O(nlogn), and the worst case is O(n^2). The
following is a handwritten quick sort, using recursive thinking, each time taking the number in the middle of the array as a benchmark, and then taking the number less than The benchmark is placed on the left, and the one that is greater than the benchmark is placed on the right. Recursive execution, a bit given the pre-order and in-order traversal, the feeling of seeking post-order traversal.
def quick_sort(nums):
n = len(nums)
if n<2:
return nums
mid = n//2
left = []
right = []
for i, num in enumerate(nums):
if i == mid: continue
if num <= nums[mid]:
left.append(num)
else:
right.append(num)
left = quick_sort(left)
right = quick_sort(right)
return left + [nums[mid]] + right
import random
k = 10
nums = [0] * k
for i in range(k):
nums[i] = random.randint(1,100)
print(nums)
print(quick_sort(nums))