图像中的数据增强及变换

    def transform(self, img, lbl):
        img = img[:, :, ::-1]  # RGB -> BGR
        img = img.astype(np.float64)
        img -= self.mean_bgr
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).float()
        lbl = torch.from_numpy(lbl).long()
        return img, lbl

    def untransform(self, img, lbl):
        img = img.numpy()
        img = img.transpose(1, 2, 0)
        img += self.mean_bgr
        img = img.astype(np.uint8)
        img = img[:, :, ::-1]
        lbl = lbl.numpy()
        return img, lbl

@register_op
class StitchBusTruck(BaseOperator):
    def __init__(self, prob=0.5, max_trial=1):
        super(StitchBusTruck, self).__init__()
        self.prob = prob
        self.max_trial = max_trial

        self.anno_dict = {
    
    }
        self.anno_dict_car = {
    
    }
        self.anno_dict_middle = {
    
    }

        self.name = []
        self.name_car = []
        self.name_middle = []

        f = open('label/van_bus_truck.txt', 'r')
        line = f.readline()
        while line:
            line = line.split('\t')
            imgname = line[0]
            self.name.append(imgname)

            # ground_points
            ground_point = eval(line[1])  # x1,y1,x2,y2...

            # bbox
            bbox = eval(line[2])
            bbox = [int(i) for i in bbox]  # x1,y1,x2,y2
            label = int(line[3]) - 1

            # angle_line
            angle_line = eval(line[4])

            self.anno_dict[imgname] = {
    
    }
            self.anno_dict[imgname]['ground_points'] = ground_point
            self.anno_dict[imgname]['label'] = label
            self.anno_dict[imgname]['bbox'] = bbox
            self.anno_dict[imgname]['angle_line'] = angle_line
            line = f.readline()

        for img in self.name:
            mask_img = cv2.imread(os.path.join(os.getcwd(), 'label/bus_mask', img + '.png'))
            mask_img = cv2.cvtColor(mask_img, cv2.COLOR_BGR2RGB)
            self.anno_dict[img]['img'] = mask_img

        h = open('label/car_label.txt', 'r')
        line_car = h.readline()
        while line_car:
            line = line_car.split('\t')
            imgname = line[0]
            self.name_car.append(imgname)

            # ground_points
            ground_point = eval(line[1])  # x1,y1,x2,y2...

            # bbox
            bbox = eval(line[2])
            bbox = [int(i) for i in bbox]  # x1,y1,x2,y2
            label = int(line[3]) - 1

            # angle_line
            angle_line = eval(line[4])

            self.anno_dict_car[imgname] = {
    
    }
            self.anno_dict_car[imgname]['ground_points'] = ground_point
            self.anno_dict_car[imgname]['label'] = label
            self.anno_dict_car[imgname]['bbox'] = bbox
            self.anno_dict_car[imgname]['angle_line'] = angle_line

            line_car = h.readline()

        for img in self.name_car:
            mask_img = cv2.imread(os.path.join(os.getcwd(), 'label/car_mask', img + '.png'))
            mask_img = cv2.cvtColor(mask_img, cv2.COLOR_BGR2RGB)
            self.anno_dict_car[img]['img'] = mask_img

        g = open('label/middle_car.txt', 'r')
        line_middle = g.readline()
        while line_middle:
            line = line_middle.split('\t')
            imgname = line[0]
            self.name_middle.append(imgname)

            # ground_points
            ground_point = eval(line[1])  # x1,y1,x2,y2...

            # bbox
            bbox = eval(line[2])
            bbox = [int(i) for i in bbox]  # x1,y1,x2,y2
            label = int(line[3]) - 1

            # angle_line
            angle_line = eval(line[4])

            self.anno_dict_middle[imgname] = {
    
    }
            self.anno_dict_middle[imgname]['ground_points'] = ground_point
            self.anno_dict_middle[imgname]['label'] = label
            self.anno_dict_middle[imgname]['bbox'] = bbox
            self.anno_dict_middle[imgname]['angle_line'] = angle_line

            line_middle = g.readline()

        for img in self.name_middle:
            mask_img = cv2.imread(os.path.join(os.getcwd(), 'label/middle_car', img + '.png'))
            mask_img = cv2.cvtColor(mask_img, cv2.COLOR_BGR2RGB)
            self.anno_dict_middle[img]['img'] = mask_img

        print('*' * 10, 'stitch truck bus van and car', '*' * 10)

    def rotate_image(self, img, angle, scale=1.):
        w = img.shape[1]
        h = img.shape[0]
        # convet angle into rad
        rangle = np.deg2rad(angle)  # angle in radians

        nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)) * scale
        nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)) * scale
        # rotation matrix
        rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, scale)

        rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
        rot_mat[0, 2] += rot_move[0]
        rot_mat[1, 2] += rot_move[1]

        return cv2.warpAffine(
            img, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))),
            flags=cv2.INTER_LANCZOS4)

    def rotate_point(self, img, xmin, ymin, angle, scale=1., begin_x=0, begin_y=0):
        w = img.shape[1]
        h = img.shape[0]
        rangle = np.deg2rad(angle)  # angle in radians

        nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)) * scale
        nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)) * scale

        rot_mat = cv2.getRotationMatrix2D((nw * 0.5 + begin_x, nh * 0.5 + begin_y), angle, scale)
        rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
        rot_mat[0, 2] += rot_move[0]
        rot_mat[1, 2] += rot_move[1]

        Q1 = np.dot(rot_mat, np.array([xmin, ymin, 1]))
        x, y = int(Q1[0]), int(Q1[1])
        return x, y

    def __call__(self, sample, context=None):
        cur_prob = np.random.uniform(0., 1.)
        if len(sample['gt_class']) == 0:
            cur_prob = 1.0
        b = sample['gt_class']
        b = b.tolist()
        #print(type(b))
        #if 0 < len(sample['gt_class']) <=5 and 1 <= b.count([4])+b.count([3]) <= 4:
        #    cur_prob = 1.0
        if 0 < len(sample['gt_class']) <=2 and (0 <= b.count([1]) <= 2 or 0 <= b.count([3]) <= 2):
            cur_prob = 1.0
        state = 0
        if cur_prob <= self.prob:
            mask_img_idx = np.random.choice(self.name, 1)[0]
        elif self.prob <= cur_prob <= 0.75:
            mask_img_idx = np.random.choice(self.name_car, 1)[0]
            state = 1
        elif 0.75 < cur_prob < 1.0:
            mask_img_idx = np.random.choice(self.name_middle, 1)[0]
            state = 2
        else:
            return sample

        img = sample['image']
        img_h, img_w, _ = img.shape
        gt_bbox = sample['gt_bbox']

        for i in range(self.max_trial):
            if state == 0:
                mask_img = self.anno_dict[mask_img_idx]['img']
                bbox = self.anno_dict[mask_img_idx]['bbox']
                ground_point = self.anno_dict[mask_img_idx]['ground_points']
                angle_line = self.anno_dict[mask_img_idx]['angle_line']
                mask_label = self.anno_dict[mask_img_idx]['label']
            elif state == 1:
                mask_img = self.anno_dict_car[mask_img_idx]['img']
                bbox = self.anno_dict_car[mask_img_idx]['bbox']
                ground_point = self.anno_dict_car[mask_img_idx]['ground_points']
                angle_line = self.anno_dict_car[mask_img_idx]['angle_line']
                mask_label = self.anno_dict_car[mask_img_idx]['label']
            else:
                mask_img = self.anno_dict_middle[mask_img_idx]['img']
                bbox = self.anno_dict_middle[mask_img_idx]['bbox']
                ground_point = self.anno_dict_middle[mask_img_idx]['ground_points']
                angle_line = self.anno_dict_middle[mask_img_idx]['angle_line']
                mask_label = self.anno_dict_middle[mask_img_idx]['label']


            mask_img = cv2.resize(mask_img, (bbox[2] - bbox[0], bbox[3] - bbox[1]))
            mask_h, mask_w, _ = mask_img.shape
            if state == 0:

                if random.random() < 0.5:
                    angle = random.randint(-15, -6)
                else:
                    angle = random.randint(6, 15)
            else:
                if random.random() < 0.5:
                    angle = random.randint(-35, -8)
                else:
                    angle = random.randint(8, 35)
            # rotate
            rot_img = self.rotate_image(mask_img, angle, 1)
            mask_h1, mask_w1, _ = rot_img.shape

            # offset
            delta_x = (mask_w - mask_w1) // 2
            delta_y = (mask_h - mask_h1) // 2
            # rot_box = np.expand_dims([bbox[0]+delta_x, bbox[1]+delta_y, bbox[0]+delta_x+mask_w1, bbox[1]+delta_y+mask_h1], axis=0)
            # rot_box = rot_box.astype('float32')
            # print("rot_box",[bbox[0]+delta_x, bbox[1]+delta_y, bbox[0]+delta_x+mask_w1, bbox[1]+delta_y+mask_h1],rot_box.shape)

            # rot box
            rot_x11, rot_y11 = self.rotate_point(mask_img, bbox[0], bbox[1], angle, 1, begin_x=bbox[0],
                                                 begin_y=bbox[1])
            rot_x12, rot_y12 = self.rotate_point(mask_img, bbox[2], bbox[1], angle, 1, begin_x=bbox[0],
                                                 begin_y=bbox[1])
            rot_x22, rot_y22 = self.rotate_point(mask_img, bbox[2], bbox[3], angle, 1, begin_x=bbox[0],
                                                 begin_y=bbox[1])
            rot_x32, rot_y32 = self.rotate_point(mask_img, bbox[0], bbox[3], angle, 1, begin_x=bbox[0],
                                                 begin_y=bbox[1])

            xx = min(rot_x11, rot_x12, rot_x22, rot_x32)
            yy = min(rot_y11, rot_y12, rot_y22, rot_y32)

            xxx = max(rot_x11, rot_x12, rot_x22, rot_x32)
            yyy = max(rot_y11, rot_y12, rot_y22, rot_y32)

            xx, yy = xx + delta_x, yy + delta_y
            xxx, yyy = xxx + delta_x, yyy + delta_y

            rot_box = np.array([[xx, yy, xxx, yyy]])

            # rot_box = np.array([[bbox[0]+delta_x, bbox[1]+delta_y, bbox[0]+delta_x+mask_w1, bbox[1]+delta_y+mask_h1]])
            rot_box = rot_box.astype('float32')
            rot_box = np.clip(rot_box, 0, 1279)
            # all_ious = np_vec_no_jit_iou(rot_box, gt_bbox)
            # res, ious = iou_idx(rot_box, gt_bbox, angle)
            res_bigcar, res_car = iou_judge(rot_box, gt_bbox)
            #pdb.set_trace()

            if state == 0 and max(res_bigcar) > 0.35:
                continue
            elif state == 1 and max(res_car) > 0.1:
                continue
            elif state == 2 and max([max(res_bigcar),max(res_car)]) > 0.2:
                continue
            else:
                for n in range(mask_h1):
                    for x in range(mask_w1):
                        p = np.sum(rot_img[n, x])
                        if p <= 0:
                            continue
                        if bbox[1] + n + delta_y < 1280 and bbox[0] + x + delta_x < 1280:
                            img[bbox[1] + n + delta_y, bbox[0] + x + delta_x, :] = rot_img[n, x, :]

                # rotate points
                x11, y11 = self.rotate_point(mask_img, ground_point[0], ground_point[1], angle, 1, begin_x=bbox[0],
                                             begin_y=bbox[1])
                x22, y22 = self.rotate_point(mask_img, ground_point[2], ground_point[3], angle, 1, begin_x=bbox[0],
                                             begin_y=bbox[1])
                x33, y33 = self.rotate_point(mask_img, ground_point[4], ground_point[5], angle, 1, begin_x=bbox[0],
                                             begin_y=bbox[1])
                x44, y44 = self.rotate_point(mask_img, ground_point[6], ground_point[7], angle, 1, begin_x=bbox[0],
                                             begin_y=bbox[1])
                x55, y55 = self.rotate_point(mask_img, ground_point[8], ground_point[9], angle, 1, begin_x=bbox[0],
                                             begin_y=bbox[1])

                x11, y11 = x11 + delta_x, y11 + delta_y
                x22, y22 = x22 + delta_x, y22 + delta_y
                x33, y33 = x33 + delta_x, y33 + delta_y
                x44, y44 = x44 + delta_x, y44 + delta_y
                x55, y55 = x55 + delta_x, y55 + delta_y  # center point

                angle_x1, angle_y1 = self.rotate_point(mask_img, angle_line[0], angle_line[1], angle, 1,
                                                       begin_x=bbox[0], begin_y=bbox[1])
                angle_x2, angle_y2 = self.rotate_point(mask_img, angle_line[2], angle_line[3], angle, 1,
                                                       begin_x=bbox[0], begin_y=bbox[1])

                angle_x1, angle_y1 = angle_x1 + delta_x, angle_y1 + delta_y
                angle_x2, angle_y2 = angle_x2 + delta_x, angle_y2 + delta_y

                ground_point = np.array([[x11, y11, x22, y22, x33, y33, x44, y44, x55, y55]])
                ground_point = ground_point.astype('float32')
                angle_line = [angle_x1, angle_y1, angle_x2, angle_y2]

                # print("gt_bbox:",sample['gt_bbox'].shape,sample['gt_bbox'],sample['gt_bbox'].dtype)
                # print("add_bbox:",rot_box.shape,rot_box,rot_box.dtype)
                # change gt

                sample['is_crowd'] = np.concatenate((sample['is_crowd'], np.array([[0]], dtype='int32')))

                sample['gt_class'] = np.concatenate((sample['gt_class'], np.array([[mask_label]], dtype='int32')))

                # import pdb
                # pdb.set_trace()
                sample['gt_bbox'] = np.concatenate((sample['gt_bbox'], rot_box))

                sample['gt_score'] = np.concatenate((sample['gt_score'], np.array([[1.]], dtype='float32')))

                if sample['gt_poly'] is None:
                    sample['gt_poly'] = [None]
                else:
                    sample['gt_poly'] = sample['gt_poly'].append(None)
                sample['gt_occluded'] = np.concatenate((sample['gt_occluded'], np.array([[0]], dtype='int32')))

                sample['gt_occluded_wgt'] = np.concatenate(
                    (sample['gt_occluded_wgt'], np.array([[0.]], dtype='float32')))

                sample['gt_truncated'] = np.concatenate((sample['gt_truncated'], np.array([[0]], dtype='int32')))

                sample['gt_truncated_wgt'] = np.concatenate(
                    (sample['gt_truncated_wgt'], np.array([[0.]], dtype='float32')))
                sample['gt_tru_occ_wgt'] = np.concatenate((sample['gt_tru_occ_wgt'], np.array([[1.]], dtype='float32')))
                sample['gt_fisheye_keypoint'] = np.concatenate((sample['gt_fisheye_keypoint'], ground_point))

                sample['gt_fisheye_keypoint_wgt'] = np.concatenate((sample['gt_fisheye_keypoint_wgt'],
                                                                    np.array(
                                                                        [[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]],
                                                                        dtype='float32')))

                sample['gt_fisheye_keypoint_lbl'] = np.concatenate((sample['gt_fisheye_keypoint_lbl'],
                                                                    np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
                                                                             dtype='int32')))

                sample['gt_fisheye_angleline'] = np.concatenate(
                    (sample['gt_fisheye_angleline'], np.array([angle_line], dtype='float32')))
                sample['gt_fisheye_angleline_wgt'] = np.concatenate(
                    (sample['gt_fisheye_angleline_wgt'], np.array([[1., 1., 1., 1.]], dtype='float32')))

                sample['gt_angle_sin'] = np.concatenate(
                    (sample['gt_angle_sin'], np.array([[0., 0.]], dtype='float32')))
                sample['gt_angle_sin_wgt'] = np.concatenate(
                    (sample['gt_angle_sin_wgt'], np.array([[1., 1.]], dtype='float32')))
                
                cur_gt_angle_bin, cur_gt_phi_cos, cur_gt_phi_sin, cur_gt_angle_bin_flip, cur_gt_phi_cos_flip, cur_gt_phi_sin_flip, cur_gt_phi_wgt, cur_gt_phi_wgt_flip = cal_angle(angle_line)
                sample['gt_angle_bin'] = np.concatenate((sample['gt_angle_bin'], cur_gt_angle_bin))
                sample['gt_phi_sin'] = np.concatenate((sample['gt_phi_sin'], cur_gt_phi_sin))
                sample['gt_phi_cos'] = np.concatenate((sample['gt_phi_cos'], cur_gt_phi_cos))
                sample['gt_angle_bin_flip'] = np.concatenate((sample['gt_angle_bin_flip'], cur_gt_angle_bin_flip))
                sample['gt_phi_sin_flip'] = np.concatenate((sample['gt_phi_sin_flip'], cur_gt_phi_sin_flip))
                sample['gt_phi_cos_flip'] = np.concatenate((sample['gt_phi_cos_flip'], cur_gt_phi_cos_flip))
                sample['gt_phi_wgt'] = np.concatenate((sample['gt_phi_wgt'], cur_gt_phi_wgt))
                sample['gt_phi_wgt_flip'] = np.concatenate((sample['gt_phi_wgt_flip'], cur_gt_phi_wgt_flip))

                # sample['gt_heatmap_target'] = np.concatenate(
                #     (sample['gt_heatmap_target'], np.zeros((1, 11520), dtype=np.float32)))
                # sample['gt_heatmap_target_wgt'] = np.concatenate(
                #     (sample['gt_heatmap_target_wgt'], np.array([[0., 0., 0., 0., 0.]], dtype='float32')))
                sample['gt_target_x'] = np.concatenate((sample['gt_target_x'], np.zeros((1, 480), dtype=np.float32)))
                sample['gt_target_y'] = np.concatenate((sample['gt_target_y'], np.zeros((1, 480), dtype=np.float32)))
                sample['gt_target_wgt'] = np.concatenate((sample['gt_target_wgt'], np.array([[0., 0., 0., 0., 0.]], dtype='float32')))

                sample['gt_target_x1'] = np.concatenate((sample['gt_target_x1'], np.zeros((1, 720), dtype=np.float32)))
                sample['gt_target_y1'] = np.concatenate((sample['gt_target_y1'], np.zeros((1, 720), dtype=np.float32)))
                sample['gt_target_wgt1'] = np.concatenate((sample['gt_target_wgt1'], np.array([[0., 0., 0., 0., 0.]], dtype='float32')))

                sample['gt_target_x2'] = np.concatenate((sample['gt_target_x2'], np.zeros((1, 960), dtype=np.float32)))
                sample['gt_target_y2'] = np.concatenate((sample['gt_target_y2'], np.zeros((1, 960), dtype=np.float32)))
                sample['gt_target_wgt2'] = np.concatenate((sample['gt_target_wgt2'], np.array([[0., 0., 0., 0., 0.]], dtype='float32')))

                sample['gt_source'] = np.concatenate((sample['gt_source'], np.array([[0]], dtype='int32')))

                #for ii, box in enumerate(sample['gt_bbox']):
                #    box = list(map(int, box))
                #    cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 255, 0))
                    # cv2.putText(img, str(sample['gt_class'][ii][0]), (box[0], box[1]+10), 0, 0.8, (0,255,0), 1)
                #    cv2.imwrite('show/' + os.path.basename(sample['im_file']), img)
                #    print('show/' + os.path.basename(sample['im_file']))
        sample['image'] = img
        return sample

class ResizeAndPad(BaseOperator):
    """Resize image and bbox, then pad image to target size.
    Args:
        target_dim (int): target size
        interp (int): interpolation method, default to `cv2.INTER_LINEAR`.
    """

    def __init__(self, target_dim=512, interp=cv2.INTER_LINEAR):
        super(ResizeAndPad, self).__init__()
        self.target_dim = target_dim
        self.interp = interp

    def __call__(self, sample, context=None):
        w = sample['w']
        h = sample['h']
        interp = self.interp
        dim = self.target_dim
        dim_max = max(h, w)
        scale = self.target_dim / dim_max
        resize_w = int(w * scale)
        resize_h = int(h * scale)
        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
            scale_array = np.array([scale, scale] * 2, dtype=np.float32)
            sample['gt_bbox'] = np.clip(sample['gt_bbox'] * scale_array, 0,
                                        dim - 1)
        img = sample['image']
        img = cv2.resize(img, (resize_w, resize_h), interpolation=interp)
        img = np.array(img)
        canvas = np.zeros((dim, dim, 3), dtype=img.dtype)
        canvas[:resize_h, :resize_w, :] = img
        # for b in sample['gt_bbox']:
        #     x1, y1, x2, y2 = map(int, b)
        #     cv2.rectangle(canvas, (x1, y1), (x2, y2), [0, 0, 255], 2)
        # cv2.imshow('test', canvas)
        # cv2.imwrite('test.jpg', canvas)
        # cv2.waitKey(0)

        sample['h'] = dim
        sample['w'] = dim
        sample['image'] = canvas
        sample['im_info'] = np.asarray([h, w, scale],dtype=np.float32)
        sample['im_shape'] = np.asarray([96, 96], dtype=np.float32)
        return sample

图像中的数据增强及变换

猜你喜欢