1. Cut
import torch
from torchvision import transforms
import cv2
import numpy as np
import types
from numpy import random
class RandomSampleCrop(object):
"""Crop
Arguments:
img (Image): the image being input during training
boxes (Tensor): the original bounding boxes in pt form
labels (Tensor): the class labels for each bbox
mode (float tuple): the min and max jaccard overlaps
Return:
(img, boxes, classes)
img (Image): the cropped image
boxes (Tensor): the adjusted bounding boxes in pt form
labels (Tensor): the class labels for each bbox
"""
def __init__(self):
self.sample_options = (
# using entire original input image
None,
# sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
(0.1, None),
(0.3, None),
(0.7, None),
(0.9, None),
# randomly sample a patch
(None, None),
)
def __call__(self, image, boxes=None, labels=None):
height, width, _ = image.shape
while True:
# randomly choose a mode
mode = random.choice(self.sample_options)
if mode is None:
return image, boxes, labels
min_iou, max_iou = mode
if min_iou is None:
min_iou = float('-inf')
if max_iou is None:
max_iou = float('inf')
# max trails (50)
for _ in range(50):
current_image = image
w = random.uniform(0.3 * width, width)
h = random.uniform(0.3 * height, height)
# aspect ratio constraint b/t .5 & 2
if h / w < 0.5 or h / w > 2:
continue
left = random.uniform(width - w)
top = random.uniform(height - h)
# convert to integer rect x1,y1,x2,y2
rect = np.array([int(left), int(top), int(left + w), int(top + h)])
# calculate IoU (jaccard overlap) b/t the cropped and gt boxes
overlap = jaccard_numpy(boxes, rect)
# is min and max overlap constraint satisfied? if not try again
if overlap.min() < min_iou and max_iou < overlap.max():
continue
# cut the crop from the image
current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
:]
# keep overlap with gt box IF center in sampled patch
centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
# mask in all gt boxes that above and to the left of centers
m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
# mask in all gt boxes that under and to the right of centers
m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
# mask in that both m1 and m2 are true
mask = m1 * m2
# have any valid boxes? try again if not
if not mask.any():
continue
# take only matching gt boxes
current_boxes = boxes[mask, :].copy()
# take only matching gt labels
current_labels = labels[mask]
# should we use the box left and top corner or the crop's
current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
rect[:2])
# adjust to crop (by substracting crop's left,top)
current_boxes[:, :2] -= rect[:2]
current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
rect[2:])
# adjust to crop (by substracting crop's left,top)
current_boxes[:, 2:] -= rect[:2]
return current_image, current_boxes, current_labels
def debug_random_crop():
random_crop = RandomSampleCrop()
import cv2
path = './test.jpg'
img = cv2.imread(path)
print(img.shape)
boxes = np.array([[68, 62, 311, 523],
[276, 235, 498, 535],
[480, 160, 701, 510]])
labels = np.array([[1],
[1],
[1]])
current_image, current_boxes, current_labels = random_crop(img, boxes, labels)
print('==current_image.shape:', current_image.shape)
print('==current_boxes:', current_boxes)
print('==current_labels:', current_labels)
for box in current_boxes:
x1,y1,x2,y2 = box
cv2.rectangle(current_image,(x1,y1),(x2,y2),color=(0,0,255),thickness=2)
cv2.imwrite('./draw_current_image.jpg', current_image)
if __name__ == '__main__':
debug_random_crop()
Becomes
2. Expand
def expand(image, boxes, filler):
"""
Perform a zooming out operation by placing the image in a larger canvas of filler material.
Helps to learn to detect smaller objects.
:param image: image, a tensor of dimensions (3, original_h, original_w)
:param boxes: bounding boxes in boundary coordinates, a tensor of dimensions (n_objects, 4)
:param filler: RBG values of the filler material, a list like [R, G, B]
:return: expanded image, updated bounding box coordinates
"""
# Calculate dimensions of proposed expanded (zoomed-out) image
original_h = image.size(1)
original_w = image.size(2)
max_scale = 4
scale = random.uniform(1, max_scale)
new_h = int(scale * original_h)
new_w = int(scale * original_w)
# Create such an image with the filler
filler = torch.FloatTensor(filler) # (3)
new_image = torch.ones((3, new_h, new_w), dtype=torch.float) * filler.unsqueeze(1).unsqueeze(1) # (3, new_h, new_w)
# Note - do not use expand() like new_image = filler.unsqueeze(1).unsqueeze(1).expand(3, new_h, new_w)
# because all expanded values will share the same memory, so changing one pixel will change all
# Place the original image at random coordinates in this new image (origin at top-left of image)
left = random.randint(0, new_w - original_w)
right = left + original_w
top = random.randint(0, new_h - original_h)
bottom = top + original_h
new_image[:, top:bottom, left:right] = image
print('==boxes:', boxes)
# Adjust bounding boxes' coordinates accordingly
new_boxes = boxes + torch.FloatTensor([left, top, left, top]).unsqueeze(
0) # (n_objects, 4), n_objects is the no. of objects in this image
print('===new_boxes:', new_boxes)
return new_image, new_boxes
def torch_cutout():
info = {"boxes": [[52, 86, 470, 419],
[157, 43, 288, 166]],
"labels": [13, 15], "difficulties": [0, 0]}
image = Image.open('./2008_000008.jpg', mode='r')
image = image.convert('RGB')
bboxs = info['boxes']
lables = info['labels']
difficulties = info['difficulties']
img = np.array(image)[..., ::-1].copy()
for box in bboxs:
x1, y1, x2, y2 = box
print('x1, y1, x2, y2:', x1, y1, x2, y2)
cv2.rectangle(img, (x1,y1), (x2,y2),color=(0,0,255),thickness=2)
cv2.imwrite('./img_rect.jpg', img)
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
new_image = FT.to_tensor(image)
boxes = torch.FloatTensor(bboxs)
labels = torch.LongTensor(lables) # (n_objects)
difficulties = torch.ByteTensor(difficulties) # (n_objects)
# new_image, new_boxes, new_labels, new_difficulties = random_crop(new_image, boxes, labels, difficulties)
# print('new_image, new_boxes, new_labels, new_difficulties', new_image.shape, new_boxes, new_labels, new_difficulties)
new_image, new_boxes = expand(new_image, boxes, filler=mean)
fin_img = new_image.permute(1, 2, 0).numpy()*255.
fin_img = fin_img[..., ::-1].copy()
print('fin_img.shape:', fin_img.shape)
fin_boxes = new_boxes.numpy()
print(fin_boxes)
for box in fin_boxes:
x1, y1, x2, y2 = box
print('x1, y1, x2, y2:', x1, y1, x2, y2)
cv2.rectangle(fin_img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)
cv2.imwrite('./fin_img_rect.jpg', fin_img)
if __name__ == '__main__':
torch_cutout()
Becomes
The entire transform detected includes random cropping, expansion, resize, and so on.
import torch
from torchvision import transforms
import cv2
import numpy as np
import types
from numpy import random
def intersect(box_a, box_b):
max_xy = np.minimum(box_a[:, 2:], box_b[2:])
min_xy = np.maximum(box_a[:, :2], box_b[:2])
inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)
return inter[:, 0] * inter[:, 1]
def jaccard_numpy(box_a, box_b):
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
is simply the intersection over union of two boxes.
E.g.:
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
Args:
box_a: Multiple bounding boxes, Shape: [num_boxes,4]
box_b: Single bounding box, Shape: [4]
Return:
jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]
"""
inter = intersect(box_a, box_b)
area_a = ((box_a[:, 2] - box_a[:, 0]) *
(box_a[:, 3] - box_a[:, 1])) # [A,B]
area_b = ((box_b[2] - box_b[0]) *
(box_b[3] - box_b[1])) # [A,B]
union = area_a + area_b - inter
return inter / union # [A,B]
class Compose(object):
"""Composes several augmentations together.
Args:
transforms (List[Transform]): list of transforms to compose.
Example:
>>> augmentations.Compose([
>>> transforms.CenterCrop(10),
>>> transforms.ToTensor(),
>>> ])
"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, img, boxes=None, labels=None):
for t in self.transforms:
img, boxes, labels = t(img, boxes, labels)
return img, boxes, labels
class Lambda(object):
"""Applies a lambda as a transform."""
def __init__(self, lambd):
assert isinstance(lambd, types.LambdaType)
self.lambd = lambd
def __call__(self, img, boxes=None, labels=None):
return self.lambd(img, boxes, labels)
class ConvertFromInts(object):
def __call__(self, image, boxes=None, labels=None):
return image.astype(np.float32), boxes, labels
class Normalize(object):
def __init__(self, mean=None, std=None):
self.mean = np.array(mean, dtype=np.float32)
self.std = np.array(std, dtype=np.float32)
def __call__(self, image, boxes=None, labels=None):
image = image.astype(np.float32)
image /= 255.
image -= self.mean
image /= self.std
return image, boxes, labels
class ToAbsoluteCoords(object):
def __call__(self, image, boxes=None, labels=None):
height, width, channels = image.shape
boxes[:, 0] *= width
boxes[:, 2] *= width
boxes[:, 1] *= height
boxes[:, 3] *= height
return image, boxes, labels
class ToPercentCoords(object):
def __call__(self, image, boxes=None, labels=None):
height, width, channels = image.shape
boxes[:, 0] /= width
boxes[:, 2] /= width
boxes[:, 1] /= height
boxes[:, 3] /= height
return image, boxes, labels
class Resize(object):
def __init__(self, size=300):
self.size = size
def __call__(self, image, boxes=None, labels=None):
image = cv2.resize(image, (self.size,
self.size))
return image, boxes, labels
class RandomSaturation(object):
def __init__(self, lower=0.5, upper=1.5):
self.lower = lower
self.upper = upper
assert self.upper >= self.lower, "contrast upper must be >= lower."
assert self.lower >= 0, "contrast lower must be non-negative."
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
image[:, :, 1] *= random.uniform(self.lower, self.upper)
return image, boxes, labels
class RandomHue(object):
def __init__(self, delta=18.0):
assert delta >= 0.0 and delta <= 360.0
self.delta = delta
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
image[:, :, 0] += random.uniform(-self.delta, self.delta)
image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
return image, boxes, labels
class RandomLightingNoise(object):
def __init__(self):
self.perms = ((0, 1, 2), (0, 2, 1),
(1, 0, 2), (1, 2, 0),
(2, 0, 1), (2, 1, 0))
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
swap = self.perms[random.randint(len(self.perms))]
shuffle = SwapChannels(swap) # shuffle channels
image = shuffle(image)
return image, boxes, labels
class ConvertColor(object):
def __init__(self, current='BGR', transform='HSV'):
self.transform = transform
self.current = current
def __call__(self, image, boxes=None, labels=None):
if self.current == 'BGR' and self.transform == 'HSV':
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
elif self.current == 'HSV' and self.transform == 'BGR':
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
else:
raise NotImplementedError
return image, boxes, labels
class RandomContrast(object):
def __init__(self, lower=0.5, upper=1.5):
self.lower = lower
self.upper = upper
assert self.upper >= self.lower, "contrast upper must be >= lower."
assert self.lower >= 0, "contrast lower must be non-negative."
# expects float image
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
alpha = random.uniform(self.lower, self.upper)
image *= alpha
return image, boxes, labels
class RandomBrightness(object):
def __init__(self, delta=32):
assert delta >= 0.0
assert delta <= 255.0
self.delta = delta
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
delta = random.uniform(-self.delta, self.delta)
image += delta
return image, boxes, labels
class ToCV2Image(object):
def __call__(self, tensor, boxes=None, labels=None):
return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels
class ToTensor(object):
def __call__(self, cvimage, boxes=None, labels=None):
return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels
class RandomSampleCrop(object):
"""Crop
Arguments:
img (Image): the image being input during training
boxes (Tensor): the original bounding boxes in pt form
labels (Tensor): the class labels for each bbox
mode (float tuple): the min and max jaccard overlaps
Return:
(img, boxes, classes)
img (Image): the cropped image
boxes (Tensor): the adjusted bounding boxes in pt form
labels (Tensor): the class labels for each bbox
"""
def __init__(self):
self.sample_options = (
# using entire original input image
None,
# sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
(0.1, None),
(0.3, None),
(0.7, None),
(0.9, None),
# randomly sample a patch
(None, None),
)
def __call__(self, image, boxes=None, labels=None):
height, width, _ = image.shape
while True:
# randomly choose a mode
mode = random.choice(self.sample_options)
if mode is None:
return image, boxes, labels
min_iou, max_iou = mode
if min_iou is None:
min_iou = float('-inf')
if max_iou is None:
max_iou = float('inf')
# max trails (50)
for _ in range(50):
current_image = image
w = random.uniform(0.3 * width, width)
h = random.uniform(0.3 * height, height)
# aspect ratio constraint b/t .5 & 2
if h / w < 0.5 or h / w > 2:
continue
left = random.uniform(width - w)
top = random.uniform(height - h)
# convert to integer rect x1,y1,x2,y2
rect = np.array([int(left), int(top), int(left + w), int(top + h)])
# calculate IoU (jaccard overlap) b/t the cropped and gt boxes
overlap = jaccard_numpy(boxes, rect)
# is min and max overlap constraint satisfied? if not try again
if overlap.min() < min_iou and max_iou < overlap.max():
continue
# cut the crop from the image
current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
:]
# keep overlap with gt box IF center in sampled patch
centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
# mask in all gt boxes that above and to the left of centers
m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
# mask in all gt boxes that under and to the right of centers
m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
# mask in that both m1 and m2 are true
mask = m1 * m2
# have any valid boxes? try again if not
if not mask.any():
continue
# take only matching gt boxes
current_boxes = boxes[mask, :].copy()
# take only matching gt labels
current_labels = labels[mask]
# should we use the box left and top corner or the crop's
current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
rect[:2])
# adjust to crop (by substracting crop's left,top)
current_boxes[:, :2] -= rect[:2]
current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
rect[2:])
# adjust to crop (by substracting crop's left,top)
current_boxes[:, 2:] -= rect[:2]
return current_image, current_boxes, current_labels
class Expand(object):
def __init__(self, mean):
self.mean = mean
def __call__(self, image, boxes, labels):
if random.randint(2):
return image, boxes, labels
height, width, depth = image.shape
ratio = random.uniform(1, 4)
left = random.uniform(0, width * ratio - width)
top = random.uniform(0, height * ratio - height)
expand_image = np.zeros(
(int(height * ratio), int(width * ratio), depth),
dtype=image.dtype)
expand_image[:, :, :] = self.mean
expand_image[int(top):int(top + height),
int(left):int(left + width)] = image
image = expand_image
boxes = boxes.copy()
boxes[:, :2] += (int(left), int(top))
boxes[:, 2:] += (int(left), int(top))
return image, boxes, labels
class RandomMirror(object):
def __call__(self, image, boxes, classes):
_, width, _ = image.shape
if random.randint(2):
image = image[:, ::-1]
boxes = boxes.copy()
boxes[:, 0::2] = width - boxes[:, 2::-2]
return image, boxes, classes
class SwapChannels(object):
"""Transforms a tensorized image by swapping the channels in the order
specified in the swap tuple.
Args:
swaps (int triple): final order of channels
eg: (2, 1, 0)
"""
def __init__(self, swaps):
self.swaps = swaps
def __call__(self, image):
"""
Args:
image (Tensor): image tensor to be transformed
Return:
a tensor with channels swapped according to swap
"""
# if torch.is_tensor(image):
# image = image.data.cpu().numpy()
# else:
# image = np.array(image)
image = image[:, :, self.swaps]
return image
class PhotometricDistort(object):
def __init__(self):
self.pd = [
RandomContrast(),
ConvertColor(transform='HSV'),
RandomSaturation(),
RandomHue(),
ConvertColor(current='HSV', transform='BGR'),
RandomContrast()
]
self.rand_brightness = RandomBrightness()
# self.rand_light_noise = RandomLightingNoise()
def __call__(self, image, boxes, labels):
im = image.copy()
im, boxes, labels = self.rand_brightness(im, boxes, labels)
if random.randint(2):
distort = Compose(self.pd[:-1])
else:
distort = Compose(self.pd[1:])
im, boxes, labels = distort(im, boxes, labels)
return im, boxes, labels
# return self.rand_light_noise(im, boxes, labels)
class SSDAugmentation(object):
def __init__(self, size=300, mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229)):
self.mean = mean
self.size = size
self.std = std
self.augment = Compose([
ConvertFromInts(),
ToAbsoluteCoords(),
PhotometricDistort(),
Expand(self.mean),
RandomSampleCrop(),
RandomMirror(),
ToPercentCoords(),
Resize(self.size),
Normalize(self.mean, self.std)
])
def __call__(self, img, boxes, labels):
return self.augment(img, boxes, labels)
def debug_random_crop():
random_crop = RandomSampleCrop()
import cv2
path = './test.jpg'
img = cv2.imread(path)
print(img.shape)
boxes = np.array([[68, 62, 311, 523],
[276, 235, 498, 535],
[480, 160, 701, 510]])
labels = np.array([[1],
[1],
[1]])
current_image, current_boxes, current_labels = random_crop(img, boxes, labels)
print('==current_image.shape:', current_image.shape)
print('==current_boxes:', current_boxes)
print('==current_labels:', current_labels)
for box in current_boxes:
x1,y1,x2,y2 = box
cv2.rectangle(current_image,(x1,y1),(x2,y2),color=(0,0,255),thickness=2)
cv2.imwrite('./draw_current_image.jpg', current_image)
if __name__ == '__main__':
debug_random_crop()
3. Rotate
import os
import cv2
import numpy as np
import json
import imgaug as ia
from imgaug import augmenters as iaa
def may_augment_poly(aug, img_shape, poly):
# for p in poly:
# print('==p', p)
keypoints = [ia.Keypoint(p[0], p[1]) for p in poly]
keypoints = aug.augment_keypoints(
[ia.KeypointsOnImage(keypoints, shape=img_shape)])[0].keypoints
poly = [(p.x, p.y) for p in keypoints]
return poly
def get_express_code_txt():
path = './标好快递单二维码数据'
# output_path = './标好快递单二维码数据_out'
# if not os.path.exists(output_path):
# os.mkdir(output_path)
imgs_list_path =[os.path.join(path, i) for i in os.listdir(path) if '.jpg' in i]
for i, img_list_path in enumerate(imgs_list_path):
if i < 1:
print('==img_list_path:', img_list_path)
img = cv2.imread(img_list_path)
json_list_path = img_list_path.replace('.jpg', '.json')
with open(json_list_path, 'r') as file:
json_info = json.load(file)
shapes = json_info['shapes']
output_points = []
for shape in shapes:
points = np.array(shape['points']).astype(np.int)
# print('===before points', points)
points = cal_stand_points(points)
points = polygon_area1(points)
# print('===after points', points)
# cv2.polylines(img, [np.array(points).reshape(-1, 1, 2)], True, (0, 255, 0), thickness=2)
output_points.append(list(map(int, (points.reshape(-1).tolist()))))
print('==output_points:', output_points)
seq = iaa.Sequential([
# iaa.Multiply((1.2, 1.5)), # change brightness, doesn't affect keypoints
iaa.Fliplr(0.5),
iaa.Affine(
rotate=(0, 360),#0~360随机旋转
scale=(0.7, 1.0),#通过增加黑边缩小图片
), # rotate by exactly 0~360deg and scale to 70-100%, affects keypoints
# iaa.Resize(0.5, 3)
])
seq_def = seq.to_deterministic()
image_aug = seq_def.augment_image(img)
print('==image_aug.shape:', image_aug.shape)
line_polys = []
polys = np.array(output_points).reshape(-1, 4, 2).astype(np.int)
print('==polys:', polys.shape)
for poly in polys:
new_poly = may_augment_poly(seq_def, img.shape, poly)
line_polys.append(new_poly)
print('=line_polys:', line_polys)
#debug
for line_poly in line_polys:
# print('==line_poly:', line_poly)
cv2.polylines(image_aug, [np.array(line_poly).reshape(-1, 1, 2).astype(np.int)], True, (0, 0, 255), thickness=2)
cv2.imwrite('./image_aug.jpg', image_aug)
if __name__ == '__main__':
get_express_code_txt()
json file:
{
"version": "4.2.10",
"shapes": [
{
"shape_type": "polygon",
"group_id": null,
"label": "code",
"points": [
[
207.6190476190476,
689.2857142857143
],
[
613.5714285714286,
545.2380952380953
],
[
654.047619047619,
635.7142857142858
],
[
254.04761904761904,
777.3809523809524
]
],
"flags": {}
},
{
"shape_type": "polygon",
"group_id": null,
"label": "code",
"points": [
[
500.4761904761905,
883.3333333333334
],
[
858.8095238095239,
757.1428571428572
],
[
881.4285714285716,
796.4285714285714
],
[
513.5714285714286,
925.0
]
],
"flags": {}
},
{
"shape_type": "polygon",
"group_id": null,
"label": "code",
"points": [
[
595.7142857142858,
1059.5238095238096
],
[
960.0,
933.3333333333334
],
[
981.4285714285716,
973.8095238095239
],
[
606.4285714285714,
1101.1904761904761
]
],
"flags": {}
}
],
"lineColor": [
0,
255,
0,
128
],
"fillColor": [
255,
0,
0,
128
],
"imageHeight": 1422,
"imageData": null,
"imageWidth": 1152,
"imagePath": "72.jpg",
"flags": {}
}
Original image Enhanced image