lib/datasets/ds_utils.py
# --------------------------------------------------------
# Fast/er R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
import numpy as np
def unique_boxes(boxes, scale=1.0):
"""Return indices of unique boxes.返回unique boxes 索引"""
v = np.array([1, 1e3, 1e6, 1e9])
hashes = np.round(boxes * scale).dot(v).astype(np.int) #将boxes的坐标与缩放比例scale的乘积四舍五入,得到的结果与矩阵V点乘,最后将得到的结果转为整数(即取整数部分)
_, index = np.unique(hashes, return_index=True)#查找数组中唯一元素,并返回索引值(—,是得到的唯一元素组成的数组,但是我们不需要它)
return np.sort(index)#对索引值排序(sort函数是list列表中的函数)
def xywh_to_xyxy(boxes):
"""Convert [x y w h] box format to [x1 y1 x2 y2] format.W:宽,h:高 X2=x1+w,y2=y1+h"""
return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))#VOC2007 的矩形标注坐标是以1为基准的(1-based),而我们在处理图像坐标都是0起始的(0-based)。所以在这里才要对从xml文件中读取的xmin,ymin,xmax,ymax 统统减1将坐标变为我们做数据处理时所需要的0-based坐标
def xyxy_to_xywh(boxes):
"""Convert [x1 y1 x2 y2] box format to [x y w h] format.将[x1y1x2y2]框格式转换为[x y w h ]格式"""
return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
def validate_boxes(boxes, width=0, height=0):
"""Check that a set of boxes are valid."""
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
assert (x1 >= 0).all()#所有的X1均大于0,.all返回true
assert (y1 >= 0).all()
assert (x2 >= x1).all()
assert (y2 >= y1).all()
assert (x2 < width).all()#?
assert (y2 < height).all()
def filter_small_boxes(boxes, min_size):
w = boxes[:, 2] - boxes[:, 0]#宽
h = boxes[:, 3] - boxes[:, 1]#高
keep = np.where((w >= min_size) & (h > min_size))[0]#返回符合条件的坐标值
return keep