def transform(self, img, lbl):
img = img[:, :, ::-1]
img = img.astype(np.float64)
img -= self.mean_bgr
img = img.transpose(2, 0, 1)
img = torch.from_numpy(img).float()
lbl = torch.from_numpy(lbl).long()
return img, lbl
def untransform(self, img, lbl):
img = img.numpy()
img = img.transpose(1, 2, 0)
img += self.mean_bgr
img = img.astype(np.uint8)
img = img[:, :, ::-1]
lbl = lbl.numpy()
return img, lbl
@register_op
class StitchBusTruck(BaseOperator):
def __init__(self, prob=0.5, max_trial=1):
super(StitchBusTruck, self).__init__()
self.prob = prob
self.max_trial = max_trial
self.anno_dict = {
}
self.anno_dict_car = {
}
self.anno_dict_middle = {
}
self.name = []
self.name_car = []
self.name_middle = []
f = open('label/van_bus_truck.txt', 'r')
line = f.readline()
while line:
line = line.split('\t')
imgname = line[0]
self.name.append(imgname)
ground_point = eval(line[1])
bbox = eval(line[2])
bbox = [int(i) for i in bbox]
label = int(line[3]) - 1
angle_line = eval(line[4])
self.anno_dict[imgname] = {
}
self.anno_dict[imgname]['ground_points'] = ground_point
self.anno_dict[imgname]['label'] = label
self.anno_dict[imgname]['bbox'] = bbox
self.anno_dict[imgname]['angle_line'] = angle_line
line = f.readline()
for img in self.name:
mask_img = cv2.imread(os.path.join(os.getcwd(), 'label/bus_mask', img + '.png'))
mask_img = cv2.cvtColor(mask_img, cv2.COLOR_BGR2RGB)
self.anno_dict[img]['img'] = mask_img
h = open('label/car_label.txt', 'r')
line_car = h.readline()
while line_car:
line = line_car.split('\t')
imgname = line[0]
self.name_car.append(imgname)
ground_point = eval(line[1])
bbox = eval(line[2])
bbox = [int(i) for i in bbox]
label = int(line[3]) - 1
angle_line = eval(line[4])
self.anno_dict_car[imgname] = {
}
self.anno_dict_car[imgname]['ground_points'] = ground_point
self.anno_dict_car[imgname]['label'] = label
self.anno_dict_car[imgname]['bbox'] = bbox
self.anno_dict_car[imgname]['angle_line'] = angle_line
line_car = h.readline()
for img in self.name_car:
mask_img = cv2.imread(os.path.join(os.getcwd(), 'label/car_mask', img + '.png'))
mask_img = cv2.cvtColor(mask_img, cv2.COLOR_BGR2RGB)
self.anno_dict_car[img]['img'] = mask_img
g = open('label/middle_car.txt', 'r')
line_middle = g.readline()
while line_middle:
line = line_middle.split('\t')
imgname = line[0]
self.name_middle.append(imgname)
ground_point = eval(line[1])
bbox = eval(line[2])
bbox = [int(i) for i in bbox]
label = int(line[3]) - 1
angle_line = eval(line[4])
self.anno_dict_middle[imgname] = {
}
self.anno_dict_middle[imgname]['ground_points'] = ground_point
self.anno_dict_middle[imgname]['label'] = label
self.anno_dict_middle[imgname]['bbox'] = bbox
self.anno_dict_middle[imgname]['angle_line'] = angle_line
line_middle = g.readline()
for img in self.name_middle:
mask_img = cv2.imread(os.path.join(os.getcwd(), 'label/middle_car', img + '.png'))
mask_img = cv2.cvtColor(mask_img, cv2.COLOR_BGR2RGB)
self.anno_dict_middle[img]['img'] = mask_img
print('*' * 10, 'stitch truck bus van and car', '*' * 10)
def rotate_image(self, img, angle, scale=1.):
w = img.shape[1]
h = img.shape[0]
rangle = np.deg2rad(angle)
nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)) * scale
nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)) * scale
rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, scale)
rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
rot_mat[0, 2] += rot_move[0]
rot_mat[1, 2] += rot_move[1]
return cv2.warpAffine(
img, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))),
flags=cv2.INTER_LANCZOS4)
def rotate_point(self, img, xmin, ymin, angle, scale=1., begin_x=0, begin_y=0):
w = img.shape[1]
h = img.shape[0]
rangle = np.deg2rad(angle)
nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)) * scale
nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)) * scale
rot_mat = cv2.getRotationMatrix2D((nw * 0.5 + begin_x, nh * 0.5 + begin_y), angle, scale)
rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
rot_mat[0, 2] += rot_move[0]
rot_mat[1, 2] += rot_move[1]
Q1 = np.dot(rot_mat, np.array([xmin, ymin, 1]))
x, y = int(Q1[0]), int(Q1[1])
return x, y
def __call__(self, sample, context=None):
cur_prob = np.random.uniform(0., 1.)
if len(sample['gt_class']) == 0:
cur_prob = 1.0
b = sample['gt_class']
b = b.tolist()
if 0 < len(sample['gt_class']) <=2 and (0 <= b.count([1]) <= 2 or 0 <= b.count([3]) <= 2):
cur_prob = 1.0
state = 0
if cur_prob <= self.prob:
mask_img_idx = np.random.choice(self.name, 1)[0]
elif self.prob <= cur_prob <= 0.75:
mask_img_idx = np.random.choice(self.name_car, 1)[0]
state = 1
elif 0.75 < cur_prob < 1.0:
mask_img_idx = np.random.choice(self.name_middle, 1)[0]
state = 2
else:
return sample
img = sample['image']
img_h, img_w, _ = img.shape
gt_bbox = sample['gt_bbox']
for i in range(self.max_trial):
if state == 0:
mask_img = self.anno_dict[mask_img_idx]['img']
bbox = self.anno_dict[mask_img_idx]['bbox']
ground_point = self.anno_dict[mask_img_idx]['ground_points']
angle_line = self.anno_dict[mask_img_idx]['angle_line']
mask_label = self.anno_dict[mask_img_idx]['label']
elif state == 1:
mask_img = self.anno_dict_car[mask_img_idx]['img']
bbox = self.anno_dict_car[mask_img_idx]['bbox']
ground_point = self.anno_dict_car[mask_img_idx]['ground_points']
angle_line = self.anno_dict_car[mask_img_idx]['angle_line']
mask_label = self.anno_dict_car[mask_img_idx]['label']
else:
mask_img = self.anno_dict_middle[mask_img_idx]['img']
bbox = self.anno_dict_middle[mask_img_idx]['bbox']
ground_point = self.anno_dict_middle[mask_img_idx]['ground_points']
angle_line = self.anno_dict_middle[mask_img_idx]['angle_line']
mask_label = self.anno_dict_middle[mask_img_idx]['label']
mask_img = cv2.resize(mask_img, (bbox[2] - bbox[0], bbox[3] - bbox[1]))
mask_h, mask_w, _ = mask_img.shape
if state == 0:
if random.random() < 0.5:
angle = random.randint(-15, -6)
else:
angle = random.randint(6, 15)
else:
if random.random() < 0.5:
angle = random.randint(-35, -8)
else:
angle = random.randint(8, 35)
rot_img = self.rotate_image(mask_img, angle, 1)
mask_h1, mask_w1, _ = rot_img.shape
delta_x = (mask_w - mask_w1) // 2
delta_y = (mask_h - mask_h1) // 2
rot_x11, rot_y11 = self.rotate_point(mask_img, bbox[0], bbox[1], angle, 1, begin_x=bbox[0],
begin_y=bbox[1])
rot_x12, rot_y12 = self.rotate_point(mask_img, bbox[2], bbox[1], angle, 1, begin_x=bbox[0],
begin_y=bbox[1])
rot_x22, rot_y22 = self.rotate_point(mask_img, bbox[2], bbox[3], angle, 1, begin_x=bbox[0],
begin_y=bbox[1])
rot_x32, rot_y32 = self.rotate_point(mask_img, bbox[0], bbox[3], angle, 1, begin_x=bbox[0],
begin_y=bbox[1])
xx = min(rot_x11, rot_x12, rot_x22, rot_x32)
yy = min(rot_y11, rot_y12, rot_y22, rot_y32)
xxx = max(rot_x11, rot_x12, rot_x22, rot_x32)
yyy = max(rot_y11, rot_y12, rot_y22, rot_y32)
xx, yy = xx + delta_x, yy + delta_y
xxx, yyy = xxx + delta_x, yyy + delta_y
rot_box = np.array([[xx, yy, xxx, yyy]])
rot_box = rot_box.astype('float32')
rot_box = np.clip(rot_box, 0, 1279)
res_bigcar, res_car = iou_judge(rot_box, gt_bbox)
if state == 0 and max(res_bigcar) > 0.35:
continue
elif state == 1 and max(res_car) > 0.1:
continue
elif state == 2 and max([max(res_bigcar),max(res_car)]) > 0.2:
continue
else:
for n in range(mask_h1):
for x in range(mask_w1):
p = np.sum(rot_img[n, x])
if p <= 0:
continue
if bbox[1] + n + delta_y < 1280 and bbox[0] + x + delta_x < 1280:
img[bbox[1] + n + delta_y, bbox[0] + x + delta_x, :] = rot_img[n, x, :]
x11, y11 = self.rotate_point(mask_img, ground_point[0], ground_point[1], angle, 1, begin_x=bbox[0],
begin_y=bbox[1])
x22, y22 = self.rotate_point(mask_img, ground_point[2], ground_point[3], angle, 1, begin_x=bbox[0],
begin_y=bbox[1])
x33, y33 = self.rotate_point(mask_img, ground_point[4], ground_point[5], angle, 1, begin_x=bbox[0],
begin_y=bbox[1])
x44, y44 = self.rotate_point(mask_img, ground_point[6], ground_point[7], angle, 1, begin_x=bbox[0],
begin_y=bbox[1])
x55, y55 = self.rotate_point(mask_img, ground_point[8], ground_point[9], angle, 1, begin_x=bbox[0],
begin_y=bbox[1])
x11, y11 = x11 + delta_x, y11 + delta_y
x22, y22 = x22 + delta_x, y22 + delta_y
x33, y33 = x33 + delta_x, y33 + delta_y
x44, y44 = x44 + delta_x, y44 + delta_y
x55, y55 = x55 + delta_x, y55 + delta_y
angle_x1, angle_y1 = self.rotate_point(mask_img, angle_line[0], angle_line[1], angle, 1,
begin_x=bbox[0], begin_y=bbox[1])
angle_x2, angle_y2 = self.rotate_point(mask_img, angle_line[2], angle_line[3], angle, 1,
begin_x=bbox[0], begin_y=bbox[1])
angle_x1, angle_y1 = angle_x1 + delta_x, angle_y1 + delta_y
angle_x2, angle_y2 = angle_x2 + delta_x, angle_y2 + delta_y
ground_point = np.array([[x11, y11, x22, y22, x33, y33, x44, y44, x55, y55]])
ground_point = ground_point.astype('float32')
angle_line = [angle_x1, angle_y1, angle_x2, angle_y2]
sample['is_crowd'] = np.concatenate((sample['is_crowd'], np.array([[0]], dtype='int32')))
sample['gt_class'] = np.concatenate((sample['gt_class'], np.array([[mask_label]], dtype='int32')))
sample['gt_bbox'] = np.concatenate((sample['gt_bbox'], rot_box))
sample['gt_score'] = np.concatenate((sample['gt_score'], np.array([[1.]], dtype='float32')))
if sample['gt_poly'] is None:
sample['gt_poly'] = [None]
else:
sample['gt_poly'] = sample['gt_poly'].append(None)
sample['gt_occluded'] = np.concatenate((sample['gt_occluded'], np.array([[0]], dtype='int32')))
sample['gt_occluded_wgt'] = np.concatenate(
(sample['gt_occluded_wgt'], np.array([[0.]], dtype='float32')))
sample['gt_truncated'] = np.concatenate((sample['gt_truncated'], np.array([[0]], dtype='int32')))
sample['gt_truncated_wgt'] = np.concatenate(
(sample['gt_truncated_wgt'], np.array([[0.]], dtype='float32')))
sample['gt_tru_occ_wgt'] = np.concatenate((sample['gt_tru_occ_wgt'], np.array([[1.]], dtype='float32')))
sample['gt_fisheye_keypoint'] = np.concatenate((sample['gt_fisheye_keypoint'], ground_point))
sample['gt_fisheye_keypoint_wgt'] = np.concatenate((sample['gt_fisheye_keypoint_wgt'],
np.array(
[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]],
dtype='float32')))
sample['gt_fisheye_keypoint_lbl'] = np.concatenate((sample['gt_fisheye_keypoint_lbl'],
np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
dtype='int32')))
sample['gt_fisheye_angleline'] = np.concatenate(
(sample['gt_fisheye_angleline'], np.array([angle_line], dtype='float32')))
sample['gt_fisheye_angleline_wgt'] = np.concatenate(
(sample['gt_fisheye_angleline_wgt'], np.array([[1., 1., 1., 1.]], dtype='float32')))
sample['gt_angle_sin'] = np.concatenate(
(sample['gt_angle_sin'], np.array([[0., 0.]], dtype='float32')))
sample['gt_angle_sin_wgt'] = np.concatenate(
(sample['gt_angle_sin_wgt'], np.array([[1., 1.]], dtype='float32')))
cur_gt_angle_bin, cur_gt_phi_cos, cur_gt_phi_sin, cur_gt_angle_bin_flip, cur_gt_phi_cos_flip, cur_gt_phi_sin_flip, cur_gt_phi_wgt, cur_gt_phi_wgt_flip = cal_angle(angle_line)
sample['gt_angle_bin'] = np.concatenate((sample['gt_angle_bin'], cur_gt_angle_bin))
sample['gt_phi_sin'] = np.concatenate((sample['gt_phi_sin'], cur_gt_phi_sin))
sample['gt_phi_cos'] = np.concatenate((sample['gt_phi_cos'], cur_gt_phi_cos))
sample['gt_angle_bin_flip'] = np.concatenate((sample['gt_angle_bin_flip'], cur_gt_angle_bin_flip))
sample['gt_phi_sin_flip'] = np.concatenate((sample['gt_phi_sin_flip'], cur_gt_phi_sin_flip))
sample['gt_phi_cos_flip'] = np.concatenate((sample['gt_phi_cos_flip'], cur_gt_phi_cos_flip))
sample['gt_phi_wgt'] = np.concatenate((sample['gt_phi_wgt'], cur_gt_phi_wgt))
sample['gt_phi_wgt_flip'] = np.concatenate((sample['gt_phi_wgt_flip'], cur_gt_phi_wgt_flip))
sample['gt_target_x'] = np.concatenate((sample['gt_target_x'], np.zeros((1, 480), dtype=np.float32)))
sample['gt_target_y'] = np.concatenate((sample['gt_target_y'], np.zeros((1, 480), dtype=np.float32)))
sample['gt_target_wgt'] = np.concatenate((sample['gt_target_wgt'], np.array([[0., 0., 0., 0., 0.]], dtype='float32')))
sample['gt_target_x1'] = np.concatenate((sample['gt_target_x1'], np.zeros((1, 720), dtype=np.float32)))
sample['gt_target_y1'] = np.concatenate((sample['gt_target_y1'], np.zeros((1, 720), dtype=np.float32)))
sample['gt_target_wgt1'] = np.concatenate((sample['gt_target_wgt1'], np.array([[0., 0., 0., 0., 0.]], dtype='float32')))
sample['gt_target_x2'] = np.concatenate((sample['gt_target_x2'], np.zeros((1, 960), dtype=np.float32)))
sample['gt_target_y2'] = np.concatenate((sample['gt_target_y2'], np.zeros((1, 960), dtype=np.float32)))
sample['gt_target_wgt2'] = np.concatenate((sample['gt_target_wgt2'], np.array([[0., 0., 0., 0., 0.]], dtype='float32')))
sample['gt_source'] = np.concatenate((sample['gt_source'], np.array([[0]], dtype='int32')))
sample['image'] = img
return sample
class ResizeAndPad(BaseOperator):
"""Resize image and bbox, then pad image to target size.
Args:
target_dim (int): target size
interp (int): interpolation method, default to `cv2.INTER_LINEAR`.
"""
def __init__(self, target_dim=512, interp=cv2.INTER_LINEAR):
super(ResizeAndPad, self).__init__()
self.target_dim = target_dim
self.interp = interp
def __call__(self, sample, context=None):
w = sample['w']
h = sample['h']
interp = self.interp
dim = self.target_dim
dim_max = max(h, w)
scale = self.target_dim / dim_max
resize_w = int(w * scale)
resize_h = int(h * scale)
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
scale_array = np.array([scale, scale] * 2, dtype=np.float32)
sample['gt_bbox'] = np.clip(sample['gt_bbox'] * scale_array, 0,
dim - 1)
img = sample['image']
img = cv2.resize(img, (resize_w, resize_h), interpolation=interp)
img = np.array(img)
canvas = np.zeros((dim, dim, 3), dtype=img.dtype)
canvas[:resize_h, :resize_w, :] = img
sample['h'] = dim
sample['w'] = dim
sample['image'] = canvas
sample['im_info'] = np.asarray([h, w, scale],dtype=np.float32)
sample['im_shape'] = np.asarray([96, 96], dtype=np.float32)
return sample