目标检测中的增强方式

目标检测的数据增强对于提高模型的泛化能力很有帮助,往往会用在线的方式进行,即在训练的过程中随机的对batch中的图片进行某些图像增强变换。但是离线的方式有时候也是不可少的。下面介绍一个比较好用的图像增强库:https://github.com/albumentations-team/albumentations,这个库是基于https://github.com/aleju/imgaug开发的

安装过程非常简单,直接输入下面的即可

pip install albumentations

下面介绍进行图像增强的用法,详细的解释见代码注释

1.目标分类任务

目标分类任务即不涉及到bounding box,直接对对整幅图像进行数据增强,返回增强后的效果。

import numpy as np
import cv2
from matplotlib import pyplot as plt

from albumentations import (
    HorizontalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine,
    IAASharpen, IAAEmboss, RandomContrast, RandomBrightness, Flip, OneOf, Compose,Rotate,VerticalFlip
)  # 图像变换函数


image = cv2.imread('./data/1/1.jpg')  # BGR


# aug = Rotate(p=1)
# img = aug(image=image)['image']
# aug = RandomRotate90(p=1)
# img = aug(image=image)['image']
aug = VerticalFlip(p=1)
img = aug(image=image)['image']

#
# aug = IAAPerspective(scale=0.2, p=1)
# img_IAAPerspective = aug(image=image)['image']
#
# aug = ShiftScaleRotate(p=1)
# img = aug(image=image)['image']
cv2.imwrite('2.jpg', img)

2.目标检测

对于目标检测等任务会要求数据增强不仅仅返回增强之后的图片,还应该返回增强后的图片中目标的bounding box信息。

import os
import numpy as np
import cv2
from matplotlib import pyplot as plt

#引入图像增强的方式
from albumentations import (
    BboxParams,
    HorizontalFlip,
    VerticalFlip,
    RandomRotate90,
    Resize,
    CenterCrop,
    RandomCrop,
    Crop,
    Rotate,
    Compose,
    Flip
)
BOX_COLOR = (255, 0, 0)
TEXT_COLOR = (255, 255, 255)

#可视化的时候一定要注意bbox是pascal_voc 格式[x_min, y_min, x_max, y_max]还是coco格式[x_min, y_min, width, height],然后根据需要进行修改
def visualize_bbox(img, bbox, class_id, class_idx_to_name, color=BOX_COLOR, thickness=20):
    x_min, y_min, x_max, y_max = bbox
    x_min, x_max, y_min, y_max = int(x_min), int(x_max), int(y_min), int(y_max)
    cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=color, thickness=thickness)
    class_name = class_idx_to_name[class_id]
    ((text_width, text_height), _) = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.35, 21)
    cv2.rectangle(img, (x_min, y_min - int(1.3 * text_height)), (x_min + text_width, y_min), BOX_COLOR, -1)
    cv2.putText(img, class_name, (x_min, y_min - int(0.3 * text_height)), cv2.FONT_HERSHEY_SIMPLEX, 0.35,TEXT_COLOR, lineType=cv2.LINE_AA)
    return img


def visualize(annotations, category_id_to_name):
    img = annotations['image'].copy()
    for idx, bbox in enumerate(annotations['bboxes']):
        img = visualize_bbox(img, bbox, annotations['category_id'][idx], category_id_to_name)
    plt.figure()
    plt.imshow(img)
    plt.show()

def get_aug(aug, min_area=0., min_visibility=0.):
    return Compose(aug, bbox_params=BboxParams(format='pascal_voc', min_area=min_area,
                                               min_visibility=min_visibility, label_fields=['category_id'])) #这里的format也要根据bbox的格式进行修改
# image = download_image('http://images.cocodataset.org/train2017/000000386298.jpg')
image = cv2.imread('./data/images/0101_003.png')
annotations = {'image': image, 'bboxes': [[704,432,1744,1552],[682,1740,1800,2440]], 'category_id': [1,1]}#注意这类有多个框的时候,catagory_id也要对应多个
category_id_to_name = {1: 'table'}
visualize(annotations, category_id_to_name)
# aug = get_aug([VerticalFlip(p=1)])
# augmented = aug(**annotations)
# visualize(augmented, category_id_to_name)

# aug = get_aug([HorizontalFlip(p=1)])
# augmented = aug(**annotations)
# visualize(augmented, category_id_to_name)

# aug = get_aug([CenterCrop(p=1, height=1000, width=2000)])
# augmented = aug(**annotations)
# visualize(augmented, category_id_to_name)

# aug = get_aug([RandomRotate90(p=1)])
# augmented = aug(**annotations)
# print(augmented['category_id'])
# visualize(augmented, category_id_to_name)

aug = get_aug([Blur(p=1,blur_limit=9)])
augmented = aug(**annotations)
print(augmented['category_id'])
visualize(augmented, category_id_to_name)

以上代码是对目标检测中的增强,该库一共提供了两类方式的增强,一种是Pixel-level transforms,另一种是Spatial-level transforms。

当然除了目标检测,该库同样适用于目标分类,目标分割等等,详细的用法见该库提供的文档https://albumentations.readthedocs.io/en/latest/index.html

猜你喜欢

转载自blog.csdn.net/breeze_blows/article/details/103236808