Imgaug (image enhancement) study notes for deep learning

imgaug (image enhancement) for deep learning

foreword

This article is for me to study imgaug notes, from

  1. Imgaug (image enhancement) of Python third-party module
  2. Image Processing Essentials (5): Imgaug's Enhanced Mark BoundingBox

imgaug is a packaged python library for image augmentation, which supports transformation of keypoints and bounding boxes together.

1. Installation and uninstallation

 
# 通过pypi安装
pip install imgaug

# 卸载
pip uninstall imgaug

2. Examples

2.1 Basic use

First define a transformation sequence, and then directly pass in the image batch:

from imgaug import augmenters as iaa
 
seq = iaa.Sequential([
    iaa.Crop(px=(0, 16)), # 从每边裁剪图像0到16px(随机选择)
    iaa.Fliplr(0.5), #0.5是概率,水平翻转50%的图像
    iaa.GaussianBlur(sigma=(0, 3.0)) # 用0到3.0的σ模糊图像
])
 
for batch_idx in range(1000):
  	# 'images'应该是一个4D numpy数组的形状(N,高度,宽度,通道)
	# 或一个3D numpy数组列表,每个数组都有形状(高度、宽度、通道)。
	# 灰度图像必须有形状(高度,宽度,1)每个。
	# 所有图片必须有numpy的dtype uint8。值被期望在
	# 范围0-255。
    images = load_batch(batch_idx)
    images_aug = seq.augment_images(images)
    train_on_images(images_aug)

2.2 Contains examples of commonly used transformations

import cv2
import numpy as np
from imgaug import augmenters as iaa
import imgaug as ia

# 定义一个lambda表达式,以p=0.5的概率去执行sometimes传递的图像增强
sometimes = lambda aug: iaa.Sometimes(0.5, aug)
# 建立一个名为seq的实例,定义增强方法,用于增强
aug = iaa.Sequential(
    [
        iaa.Fliplr(0.5),  # 对50%的图像进行镜像翻转
        iaa.Flipud(0.2),  # 对20%的图像做左右翻转
        sometimes(iaa.Crop(percent=(0, 0.1))),
        # 这里沿袭我们上面提到的sometimes,对随机的一部分图像做crop操作
        # crop的幅度为0到10%
        # 或者 sometimes(iaa.Crop(px=(0, 16))),随机在距离边缘的0-16像素中选择crop范围

        # 对一部分图像做仿射变换
        sometimes(iaa.Affine(
            scale={
    
    "x": (0.8, 1.2), "y": (0.8, 1.2)},  # 图像缩放为80%到120%之间
            translate_percent={
    
    "x": (-0.2, 0.2), "y": (-0.2, 0.2)},  # 平移±20%之间
            rotate=(-45, 45),  # 旋转±45度之间
            shear=(-16, 16),  # 剪切变换±16度,(矩形变平行四边形)
            order=[0, 1],  # 使用最邻近差值或者双线性差值
            cval=(0, 255),  # 全白全黑填充
            mode=ia.ALL  # 定义填充图像外区域的方法
        )),

        # 使用下面的0个到5个之间的方法去增强图像。注意SomeOf的用法
        iaa.SomeOf((0, 5),
                   [
                       # 将部分图像进行超像素的表示。o(╥﹏╥)o用超像素增强作者还是第一次见,比较孤陋寡闻
                       sometimes(
                           iaa.Superpixels(
                               p_replace=(0, 1.0),
                               n_segments=(20, 200)
                           )
                       ),

                       # 用高斯模糊,均值模糊,中值模糊中的一种增强。注意OneOf的用法
                       iaa.OneOf([
                           iaa.GaussianBlur((0, 3.0)),
                           iaa.AverageBlur(k=(2, 7)),  # 核大小2~7之间,k=((5, 7), (1, 3))时,核高度5~7,宽度1~3
                           iaa.MedianBlur(k=(3, 11)),
                       ]),

                       # 锐化处理
                       iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)),

                       # 浮雕效果
                       iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)),

                       # 边缘检测,将检测到的赋值0或者255然后叠在原图上
                       sometimes(iaa.OneOf([
                           iaa.EdgeDetect(alpha=(0, 0.7)),
                           iaa.DirectedEdgeDetect(
                               alpha=(0, 0.7), direction=(0.0, 1.0)
                           ),
                       ])),

                       # 加入高斯噪声
                       iaa.AdditiveGaussianNoise(
                           loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5
                       ),

                       # 将1%到10%的像素设置为黑色
                       # 或者将3%到15%的像素用原图大小2%到5%的黑色方块覆盖
                       iaa.OneOf([
                           iaa.Dropout((0.01, 0.1), per_channel=0.5),
                           iaa.CoarseDropout(
                               (0.03, 0.15), size_percent=(0.02, 0.05),
                               per_channel=0.2
                           ),
                       ]),

                       # 5%的概率反转像素的强度,即原来的强度为v那么现在的就是255-v
                       iaa.Invert(0.05, per_channel=True),

                       # 每个像素随机加减-10到10之间的数
                       iaa.Add((-10, 10), per_channel=0.5),

                       # 像素乘上0.5或者1.5之间的数字.
                       iaa.Multiply((0.5, 1.5), per_channel=0.5),

                       # 将整个图像的对比度变为原来的一半或者二倍
                       iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5),

                       # 将RGB变成灰度图然后乘alpha加在原图上
                       iaa.Grayscale(alpha=(0.0, 1.0)),

                       # 把像素移动到周围的地方。这个方法在mnist数据集增强中有见到
                       sometimes(
                           iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)
                       ),

                       # 扭曲图像的局部区域
                       sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.05)))
                   ],

                   random_order=True  # 随机的顺序把这些操作用在图像上
                   )
    ],
    random_order=True  # 随机的顺序把这些操作用在图像上
)

# 数据增强,针对单张图片
image = cv2.imread('1.jpg', 0)
h = image.shape[0]
w = image.shape[1]
enhance_num = 32

aug_example_img = aug.augment_image(image=image)
print(image.shape, aug_example_img.shape)

# 生成一个图片列表

example_images = np.array(
    [image for _ in range(enhance_num)],
    dtype=np.uint8
)
aug_imgs = aug(images=example_images)
# images_aug = aug.augment_images(images=img_array)
# 显示图片
ia.show_grid(aug_imgs, rows=4, cols=8)
# 保存图片
for i in range(aug_imgs.shape[0]):
    img = aug_imgs[i]
    cv2.imwrite("aug_%d.jpg" % i, img)

# 保存为一张图片
img_array = np.array([image] * enhance_num, dtype=np.uint8)
write_img = np.zeros(shape=(h, (w+10) * enhance_num, 3), dtype=np.uint8)
for j, item in enumerate(aug_imgs):
    write_img[:, j * (w + 10): j * (w + 10) + w, :] = item

3 Augmenters common functions

First import the Augmenters class

from imgaug import augmenters as iaa

3.1 iaa.Sequential()

Generate a Sequential function prototype for processing images:

from imgaug import augmenters as iaa
iaa.Sequential(children=None,
               random_order=False,
               name=None,
               deterministic=False,
               random_state=None)

parameter:

  • children: Augmenter or collection of Augmenters to apply to the image. Default None
  • random_order: bool type, default False. Whether to apply a different sequence of Augmenter lists to each batch of images. When set to True, the processing order of pictures between different batches will be different, but the order in the same batch is the same.
  • deterministic: bool type, default False.

3.2 iaa.someOf()

Apply some transformations in Augmenter to image processing instead of all Augmenters. For example: 20 transformations can be defined, but only 5 of them are selected each time. But it does not support fixed selection of an Augmenter.

Function prototype:

from imgaug import augmenters as iaa
iaa.SomeOf(n=None,
           children=None,
           random_order=False,
           name=None,
           deterministic=False,
           random_state=None)

parameter:

  • n: How many to choose from the total Augmenters. Can be an int, tuple, list or random value.
  • random_order: Whether the order is different each time.

example:

# 每次选择一个翻转方式
seq = iaa.SomeOf(1, [
     iaa.Fliplr(1.0),
     iaa.Flipud(1.0)
 ])
imgs_aug = seq.augment_images(imgs)
 
# 每次使用1~3个Augmenter来处理图片,每个batch中的Augmenters顺序一样。
seq = iaa.SomeOf((1, 3), 
        [
             iaa.Fliplr(1.0),
             iaa.Flipud(1.0),
             iaa.GaussianBlur(1.0)
 ])
imgs_aug = seq.augment_images(imgs)
 
# 每次使用1到多个Augmenter来处理图片,每个batch中的Augmenters顺序不一样。
seq = iaa.SomeOf((1, None), 
    [
     iaa.Fliplr(1.0),
     iaa.Flipud(1.0),
     iaa.GaussianBlur(1.0)
 ], random_order=True)
imgs_aug = seq.augment_images(imgs)

3.3 iaa.OneOf()

Choose from a range of Augmenters to transform one at a time.

iaa.OneOf(children,
          name=None,
          deterministic=False,
          random_state=None)

The meaning of the parameters is the same as above.

3.4 iaa.Sometimes()

Apply some Augmenters to some of the pictures in the batch, and apply another Augmenters to the remaining pictures.

iaa.Sometimes(p=0.5,
              then_list=None,
              else_list=None,
              name=None,
              deterministic=False,
              random_state=None)
  • p: float. What proportion of the picture will be Augmente.
  • then_list: Augmenter collection. p-probability of transforming Augmenters on pictures.
  • else_list: Augmenters whose 1-p probability images will be transformed. Note that the Augmenter applied to the transformed image can only be one of then_list or else_list.

3.5 iaa.WithColorspace()

Transforms an image in a specific color space. That is: first transform the picture from one color space to another, then transform the image in another color space, and finally transform back to the original color space.

iaa.WithColorspace(to_colorspace,
                   from_colorspace='RGB',
                   children=None,
                   name=None,
                   deterministic=False,
                   random_state=None)
  • to_colorspace: The color space to transform. The following options are available: RGB, BGR, GRAY, CIE, YCrCb, HSV, HLS, Lab, Luv
  • from_colorspace: The original color space, default RGB.
  • children: The transformation to perform.
# 先将图片从RGB变换到HSV,然后将H值增加10,然后再变换回RGB。
aug = iaa.WithColorspace(to_colorspace="HSV", from_colorspace="RGB",
                         children=iaa.WithChannels(0, iaa.Add(10)))

3.6 iaa.WithChannels()

Select a Channel from the picture to transform, and then merge the channel back after the transform.

iaa.WithChannels(channels=None,
                 children=None,
                 name=None,
                 deterministic=False,
                 random_state=None)

parameter:

  • channels: int or int list. Which channels are to be used for transformation.
  • children: What changes should be made after the channel is selected.

3.7 iaa.Noop()

No transformations are performed. In some cases, you just want to use an Augmenter as a placeholder, so that you can continue to call the augment_image() function, but the actual transformation is not performed. This can be used for example when testing.

3.8 iaa.Lambda()

Customize some transformation functions.

iaa.Lambda(func_images,
           func_keypoints,
           name=None,
           deterministic=False,
           random_state=None)

parameter:

  • func_images: Call this function for each image. The function must return the transformed image. The function has the form:
function(images, random_state, parents, hooks)
  • func_keypoints: A function to transform the keypoints of each image. This function returns the transformed keypoint. The function form is:
function(keypoints_on_images, random_state, parents, hooks)

example:

def func_images(images, random_state, parents, hooks):
    images[:, ::2, :, :] = 0
    return images
 
def func_keypoints(keypoints_on_images, random_state, parents, hooks):
    return keypoints_on_images
 
aug = iaa.Lambda(
    func_images=func_images,
    func_keypoints=func_keypoints
)

Turn the pixels in every two lines of each picture into black strips, and keep the key points.

3.9 iaa.AssertShape()

assert the image to be transformed and the shape of the keypoint. Throws an exception if not satisfied.

iaa.AssertShape(shape,
                check_images=True,
                check_keypoints=True,
                name=None,
                deterministic=False,
                random_state=None)

parameter:

  • shape: tuple, usually in the form (N, H, W, C). The value of each element in the tuple can be: None, int, two tuples of type int or a list of type int. If None, all values ​​are acceptable. If it is an int, only the corresponding position is the value will be accepted. If it is a tuple of int type, such as (a, b), the value of the corresponding position must be a<=x
# 检查输入的每张图片是否是32×32×3的,如果是则执行水平翻转,否则报错
seq = iaa.Sequential([
    iaa.AssertShape((None, 32, 32, 3)),
    iaa.Fliplr(0.5)
])
 
# 先检查图片的高度是否是32<=H<64, 宽度是否是32, channel是否是1或者3。如果都满足则执行水平翻转,否则报错。
seq = iaa.Sequential([
    iaa.AssertShape((None, (32, 64), 32, [1, 3])),
    iaa.Fliplr(0.5)
])

3.10 iaa.Scale()

Scales the image to a fixed size.

iaa.Scale(size,
          interpolation='cubic',
          name=None,
          deterministic=False,
          random_state=None)

parameter:

  • size: String "keep", at this time, keep the original size of the image without scaling. If it is an integer n, it is scaled to (n, n). If it is a float v, each image will be scaled to (H*v, W*v), and the size of each image is still different at this time. If it is a tuple type (a, b), if there is at least one decimal in a and b, pick a number from [a, b] as the scaling factor. If a and b are both integers, pick an integer from [a, b] as the scaled size. If it is a list, the numbers in the list are either all integers or all decimals (cannot be mixed). If it is a dict type, the dict must have two keys: height and width. The value of each key can still be selected according to the above method. In addition, the value of the key can also be "keep-aspect-ratio", which means scaling according to the ratio.
  • interpolation: scaling method. If it is All, one will be randomly selected from the following: nearest, linear, area, cubic, and note that each picture may be different. If it is int, it should be one of the following: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC. If it is a string, this method will be used all the time and must be one of the following: nearest, linear, area, cubic. If it is an int list or string list, each picture will randomly select one from it.

3.11 iaa.CropAndPad()

Crop or pad. When filling, the filled area is black.

iaa.CropAndPad(px=None,
               percent=None,
               pad_mode='constant',
               pad_cval=0,
               keep_size=True,
               sample_independently=True,
               name=None,
               deterministic=False,
               random_state=None)

parameter:

  • px: Pixels that want to crop (negative values) or pad (positive values). Note and percent cannot exist at the same time. If None, pixel-level crops will not be used. int or int list same as above. If it is a tuple with 4 elements, then the 4 elements represent (top, right, bottom, left) respectively, and each element can be int or int tuple or int list.
  • percent: crop or pad proportionally, same as px. But both cannot exist at the same time.
  • pad_mode: padding mode. Can be All, string, string list. The optional padding methods are: constant, edge, linear_ramp, maximum, median, minimum, reflect, symmetric, wrap. The specific meaning can be found in the numpy documentation.
  • pad_cval: float、int、float tuple、int tuple、float list、int list. Select padding value when pad_mode=constant.
  • keep_size: bool type. After cropping, the image size will change. If the value is set to 1, it will be scaled to the original size after crop or pad.
  • sample_independently : bool type. If set to False, the value selected from px or percent will be applied to the four directions each time.

3.12 iaa.Pad()

Same as iaa.CropAndPad(), only positive values ​​are accepted.

3.13 iaa.Crop()

Same as iaa.CropAndPad(), only negative values ​​are accepted.

3.14 iaa.Fliplr()

Horizontal mirror flip.

iaa.Fliplr(p=0, name=None, deterministic=False, random_state=None)

parameter:

  • p: int or float, the probability of flipping each picture

3.15 Flips()

Flip upside down, same as above.

3.16 iaa.ChangeColorspace()

Change image space.

iaa.ChangeColorspace(to_colorspace, from_colorspace='RGB', alpha=1.0, name=None, deterministic=False, random_state=None)

parameter:

  • to_colorspace: Look up.
  • from_colorspace: See above.
  • alpha: The alpha value of the new color space when overwriting the old color space. for int, float, int tuple, float tuple.

3.17 iaa.Grayscale()

into a grayscale image.

iaa.Grayscale(alpha=0, from_colorspace='RGB', name=None, deterministic=False, random_state=None)

parameter:

  • alpha: The alpha value of the new color space when overwriting the old color space.

3.18 iaa.GaussianBlur()

Gaussian disturbance.

iaa.GaussianBlur(sigma=0, name=None, deterministic=False, random_state=None)

parameter:

  • sigma: The standard deviation of the Gaussian transform. can float, float tuple. The common one is 0, no disturbance. 3, strong disturbance.

3.19 iaa.AverageBlur()

Perturb by taking the mean value from the nearest neighbor pixels.

iaa.AverageBlur(k=1, name=None, deterministic=False, random_state=None)

parameter:

  • k: window size. can int, int tuple. When int tupleyes , if each element is the same tuple, each element is used as height and width respectively, and the window size is inconsistent.

3.20 iaa.MedianBlur()

Perturb by the nearest neighbor median.

iaa.MedianBlur(k=1, name=None, deterministic=False, random_state=None)

Same as above.

3.21 iaa.Convolve()

Apply convolution to an image.

iaa.Convolve(matrix=None, name=None, deterministic=False, random_state=None)
  • matrix: convolution matrix.

3.22 iaa.Sharpen()

sharpen.

iaa.Sharpen(alpha=0, lightness=1, name=None, deterministic=False, random_state=None)

3.23 iaa.Emboss()

Embossed effect.

iaa.Emboss(alpha=0, strength=1, name=None, deterministic=False, random_state=None)

3.24 iaa.EdgeDetect()

Edge detection.

iaa.EdgeDetect(alpha=0, name=None, deterministic=False, random_state=None)

3.25 iaa.DirectedEdgeDetect()

Edge detection in a specific direction.

iaa.DirectedEdgeDetect(alpha=0, direction=(0.0, 1.0), name=None, deterministic=False, random_state=None)

3.26 iaa.Add()

Randomly add a value.

iaa.Add(value=0, per_channel=False, name=None, deterministic=False, random_state=None)

3.27 iaa.AddElementwise()

Add by pixel.

iaa.AddElementwise(value=0, per_channel=False, name=None, deterministic=False, random_state=None)

3.28 iaa.AdditiveGaussianNoise()

Add Gaussian noise.

iaa.AdditiveGaussianNoise(loc=0, scale=0, per_channel=False, name=None, deterministic=False, random_state=None)

3.29 iaa.Multiply()

Multiplies each pixel in the image by a value to make the image brighter or darker.

iaa.Multiply(mul=1.0, per_channel=False, name=None, deterministic=False, random_state=None)

3.30 iaa.MultiplyElementwise()

Multiply by pixel value.

iaa.MultiplyElementwise(self, mul=1.0, per_channel=False, name=None, deterministic=False, random_state=None)

3.31 iaa.Dropout()

Randomly remove some pixels, that is, turn these pixels into 0.

iaa.Dropout(p=0, per_channel=False, name=None, deterministic=False, random_state=None)

3.32 iaa.CoarseDropout()

Set the value of the rectangle to 0.

iaa.CoarseDropout(p=0, size_px=None, size_percent=None, per_channel=False, min_size=4, name=None, deterministic=False, random_state=None)

3.33 iaa.Invert()

Turn each pixel value p into 255-p.

iaa.Invert(p=0, per_channel=False, min_value=0, max_value=255, name=None, deterministic=False, random_state=None)

3.34 iaa.ContrastNormalization()

Change the contrast of the image.

iaa.ContrastNormalization(alpha=1.0, per_channel=False, name=None, deterministic=False, random_state=None)

3.35 iaa.Affine()

Affine transformation. Including: Translation, Rotation, Zoom, and Shear. The simulation transformation usually produces some new pixels, and we need to specify the generation method of these new pixels, which is realized by setting cvaland modetwo parameters. The parameter orderis used to set the interpolation method.

iaa.Affine(scale=1.0,
           translate_percent=None,
           translate_px=None,
           rotate=0.0,
           shear=0.0,
           order=1,
           cval=0,
           mode='constant',
           name=None, deterministic=False, random_state=None)

parameter:

  • scale: Image scaling factor. 1 means no scaling, 0.5 means zooming out to 50% of the original. This parameter can be float, float tuple, dict. If yes float, all pictures are scaled according to this ratio. If so float tuple, randomly pick a value for scaling, which is the same x-axisas y-axisthe scaling factor of . If it is one dict, there should be two key: x, y, the value of each xor ycan be float, float tuple, and the scaling ratio of x-axisand y-axisis not the same at this time.
  • translate_percent: translation ratio, 0 means no translation, 0.5 means 50% translation. It can be float, float tuple, dict, and the specific meaning is the same as scale. Use positive or negative to indicate the translation direction.
  • translate_px: Translate in pixels. It can be int, int tuple, dict, and the specific meaning is the same as translate_percent.
  • rotate: The translation angle, between 0 and 360 degrees, positive and negative can also indicate the direction. can be float, float tuple.
  • shear: The degree of miscutting, between 0 and 360 degrees, plus or minus indicates the direction. can be float, int, float tuple, int tuple.
  • order: Interpolation order, skimagesame as defined in . The following 0 and 1 methods are fast, 3 is slower, and 4 and 5 are particularly slow. can be int, int list, ia.ALL. If yes ia.ALL, randomly select from all interpolation methods each time.
    • 0: nearest neighbor interpolation.
    • 1: bilinear interpolation (default).
    • 2: Biquadratic interpolation (not recommended).
    • 3: Bicubic interpolation.
    • 4: Bi-quartic。
    • 5:Bi-quintic。
  • cval: When using constant filling after translation, specify the constant value of the filling, and it mode=constantwill only take effect when . can be int, float, tuple, ia.ALL. If it is , it will be filled with a value randomly chosen ia.ALLfrom among.[0,255]
  • mode: How to fill the blank pixels after transformation. can be string, string list, ia.ALL. The basic usage is the same as above. The selection range of strings is:
    • constant: Pad with a constant.
    • edge: Edge padding.
    • symmetric: Mirror symmetrical fill.
    • reflect: Pads with the reflection of the vector mirrored on the first and last values of the vector along each axis.
    • wrap: Pads with the wrap of the vector along the axis. The first values are used to pad the end and the end values are used to pad the beginning.

3.36 iaa.PiecewiseAffine()

Randomly place some regular grid points and move the pixels around these points. This time results in a localized distortion.

iaa.PiecewiseAffine(scale=0,
                    nb_rows=4,
                    nb_cols=4,
                    order=1,
                    cval=0,
                    mode='constant',
                    name=None, deterministic=False, random_state=None)

3.37 iaa.ElasticTransformation()

Transform by moving local pixels.

iaa.ElasticTransformation(alpha=0,
                          sigma=0,
                          name=None,
                          deterministic=False,
                          random_state=None)

4. keypoint transformation

imgaug supports transforming key points in an image while transforming the image. Examples are as follows:

import imgaug as ia
from imgaug import augmenters as iaa
 
iaa.seed(1)
 
image=ia.quokka(size=(256,256))

# 定义4个关键点
keypoints=ia.KeypointsOnImage([
    ia.Keypoint(x=65, y=100),
    ia.Keypoint(x=75, y=200),
    ia.Keypoint(x=100, y=100),
    ia.Keypoint(x=200, y=80)
], shape=image.shape)

# 定义一个变换序列
seq=iaa.Sequential([
    iaa.Multiply((1.2, 1.5)), # 改变亮度,不影响关键点
    iaa.Affine(
        rotate=10,
        scale=(0.5, 0.7)
    ) # 旋转10度然后缩放,会影响关键点
])

# 固定变换序列,之后就可以先变换图像然后变换关键点,这样可以保证两次的变换完全相同。
# 如果调用次函数,需要在每次batch的时候都调用一次,否则不同的batch执行相同的变换。
seq_det = seq.to_deterministic()

# 转换成list或者batch来变换。由于只有一张图片, 因此用[0]来取出该图和关键点。
image_aug = seq_det.augment_images([image])[0]
keypoints_aug = seq_det.augment_keypoints([keypoints])[0]

# print coordinates before/after augmentation (see below)
# use after.x_int and after.y_int to get rounded integer coordinates
for i in range(len(keypoints.keypoints)):
    before = keypoints.keypoints[i]
    after = keypoints_aug.keypoints[i]
    print("Keypoint %d: (%.8f, %.8f) -> (%.8f, %.8f)" % (
        i, before.x, before.y, after.x, after.y)
    )

# 将关键点画在图片上。
# image with keypoints before/after augmentation (shown below)
image_before = keypoints.draw_on_image(image, size=7)
image_after = keypoints_aug.draw_on_image(image_aug, size=7)
 
fig, axes = plt.subplots(2, 1, figsize=(20, 15))
plt.subplots_adjust(left=0.2, bottom=0.2, right=0.8, top=0.8, hspace=0.3, wspace=0.0)
axes[0].set_title("image before")
axes[0].imshow(image_before)
axes[1].set_title("image after augmentation")
axes[1].imshow(image_after)
 
plt.show()

5. Bounding Boxes transformation

Corresponding to BBox, there are two APIs, one for generating BBox and the other for integrating BBox.

  • BoundingBox

  • BoundingBoxesOnImage

5.1 BBox

For BBox, it is usually used to mark objects in target detection, here is a rectangle. Therefore, it is represented by the coordinates of the upper left corner point and the lower right corner point (x1, y1, x2, y2).

It is only affected by image enhancement techniques that change the geometric appearance of the image, not by things like Gaussian noise.

API: BoundingBox

Input parameters of BBox:

imgaug.augmentables.bbs.BoundingBox(x1, y1, x2, y2, label=None):

As you can see, it contains 5 inputs, 4 coordinates and 1 object category label label.

BoundingBox has some important properties: .x1, .y1, .x2, .y2, .height, .width, .center_x, .center_y, .area

Some methods of BBox:

  • project(from_shape, to_shape) : Project a bbox from an image of one size to another.

  • *extend([all_sides], [top], [right], [bottom], [left]) *

  • *intersection(other, [default]) * : intersection

  • *union(other) * : union

  • *iou(other) * : intersection and union ratio

  • *is_fully_within_image(image) * Determine whether all bboxes are within the image

  • *is_partly_within_image(image) * Make sure that at least part of the bbox is within the image

  • *clip_out_of_image(image) * : Cut off the BBox outside the image

  • *shift([x], [y]) * : move bbox

  • *draw_on_image(image, [color], [alpha], [size], [copy], [raise_if_out_of_image]) *: draw BBox and its label

  • *draw_label_on_image(image, [color], [color_text], [color_bg], [alpha], [size], [size_text], [height], [copy], [raise_if_out_of_image]) * :只绘制label

  • *draw_box_on_image(image, [color], [alpha], [size], [copy], [raise_if_out_of_image) * : only draw the border

  • *extract_from_image(image, [pad], [pad_max], [prevent_zero_size] *: Extract the pixels contained in the border from the image

API:BoundingBoxesOnImage

BoundingBoxesOnImage() input parameters:

imgaug.augmentables.bbs.BoundingBoxesOnImage(bounding_boxes, shape)

Some methods included in BoundingBoxesOnImage:

  • on(image) : recalculate the bbox after the graphic changes

  • *from_xyxy_array(xyxy, shape) *: Generated by (N, 4) numpy array

  • *to_xyxy_array([dtype]) *: Generate a numpy array of (N, 4)

  • *draw_on_image([color], [alpha], [size], [copy], [raise_if_out_of_image]) *:绘制bbox和image

  • *remove_out_of_image([fully], [partly]) *: Remove some bboxes that are completely or partially not in the image

  • *clip_out_of_image() *: Cut off all bboxes

  • *shift([x], [y]) *: translate all bboxes

In addition, there are some other APIs in imgaug, such as enhancing BBOx and images aug.augment(images=…, bounding_boxes=…) && aug.augment_bounding_boxes() .

5.2 Draw bbox

import imgaug as ia
import imageio
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
%matplotlib inline
ia.seed(1)
img = imageio.imread("samoye.jpg")
bbs = BoundingBoxesOnImage([
    BoundingBox(x1=65, y1=1, x2=417, y2=396)], shape=img.shape)
ia.imshow(bbs.draw_on_image(img, size=2))

png

5.3 Drawing after applying augmentation methods to images

 # 定义方法
from imgaug import augmenters as iaa
ia.seed(1)
 
seq = iaa.Sequential([iaa.GammaContrast(1.5),
                      iaa.Affine(translate_percent={
    
    "x":0.1}, scale=0.8)])
 
# 作用于图像和bbox上
image_aug, bbs_aug = seq(image=img, bounding_boxes=bbs)

# 取出结果
bbs_aug[0].x1, bbs_aug[0].y1, bbs_aug[0].x2, bbs_aug[0].y2

# 绘制
ia.imshow(bbs_aug.draw_on_image(image_aug, size=2))

png

image_aug, bbs_aug = iaa.Affine(rotate=45)(image=img, bounding_boxes=bbs)
ia.imshow(bbs_aug.draw_on_image(image_aug, size=1))

png

The bbox can be seen after the image used in the official tutorial is rotated, but the bbox does not rotate well with the target. After the image used here is rotated, the bbox cannot be seen. The official tutorial explains that this problem stems from non-target pixels being part of the border. After rotation, a new bounding box must be drawn to incorporate these non-object pixels.

The draw_on_image() method of bbox can also adjust the color thickness and transparency of bbox through parameters such as color, size, and alpha.

import numpy as np
image_bbs = np.copy(img)
image_bbs = bbs.draw_on_image(img, color=[255, 0, 0], size=3)
print("color=[255,0,0],size=3,")
ia.imshow(image_bbs)
print("color=[0,255,0],size=10,alpha=0.5")
image_bbs_1=bbs.draw_on_image(img, color=[0,255,0],size=10,alpha=0.5)



ia.imshow(image_bbs_1)
color=[255,0,0],size=3,

png

color=[0,255,0],size=10,alpha=0.5

png

It can be seen from this that when the size is large, the border of the bbox near the edge may not be drawn.

In addition, bbox is usually used for target detection, and the label of the target needs to be added for visualization. The bbox also contains the label field. When the bbox includes a label when it is initialized, the label will be displayed directly when it is displayed; if the label is not assigned when it is initialized, it needs to be assigned before it can be displayed.

bbs_label = bbs.deepcopy()
bbs_label[0].label = "dog"

image_bbs = bbs_label.draw_on_image(img, size=1)
ia.imshow(image_bbs)

png

But because the upper border is close to the edge of the image, the label is not drawn and displayed.

Change an image...

image = imageio.imread("fox.jpg")

bbox = BoundingBoxesOnImage([BoundingBox(x1=58, y1=19, x2=203, y2=183)
                            ], shape=image.shape)
bbox[0].label = "fox"
image_bbox = bbox.draw_on_image(image, size=2)
ia.imshow(image_bbox)

png

5.4 Extract the target area

BoundingBox contains the extract_from_image(image) method, which can extract the image of the target area.

fox = bbox.bounding_boxes[0].extract_from_image(image)
ia.imshow(fox)

png

If you want to extract the target and some areas around the target, you can use the extend method in combination to extend the bbox first, and then extract the image of the extended bbox area.

fox = bbox.bounding_boxes[0].extend(
    all_sides=0, left=30, right=10).extract_from_image(image)
ia.imshow(fox)

png

In addition to the extend method, the shift method can move the bbox , just like Keypoints.

bb = bbox.bounding_boxes[0].shift(x=20, y=20)
ia.imshow(bb.draw_on_image(image, size=2))
ia.imshow(bb.extract_from_image(image))

png

png

To sum up, extend can change the shape of the bbox, expand or shrink the bbox; and shift can only translate the bbox without affecting the size of the bbox.

At the same time, when the original bbox is moved 20 pixels in the y-axis direction, the bbox will be outside the image . As in the second image above, there will be a black border . Of course, if you don’t want this situation, you can increase the parameter pad=False , as shown in the figure below, the generated image will not have black borders.

bb = bbox.bounding_boxes[0].shift(x=20, y=20)
ia.imshow(bb.extract_from_image(image, pad=False))

png

5.5 Cropping bbox

When the image is transformed, the corresponding bbox may be partly outside the image, and the .clip_out_of_image() method can be used to cut off the part of the bounding box outside the image plane. The following code first moves the bbox to part outside the image, and crops the bbox.

print("-------------------")
print("shifted by 50 px - y")
print("-------------------")
bb = bbox.bounding_boxes[0].shift(y=50)
print("before clip")
ia.imshow(bb.draw_on_image(image, size=2))
ia.imshow(bb.extract_from_image(image))
bb_clip =bb.clip_out_of_image(image.shape)
print("after clip")
ia.imshow(bb_clip.draw_on_image(image, size=2))
ia.imshow(bb_clip.extract_from_image(image))
-------------------
shifted by 50 px - y
-------------------
before clip

png

png

after clip

png

png

It can be seen that when the bbox outside the image is cut off, the bbox displayed in the original image can display the boundary well.

5.6 Project the bbox onto other images

Like Keypoints, bbox also has a method to keep the bbox consistent after the image is scaled:

  • project

  • on

import numpy as np
import imageio
import imgaug as ia
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
from imgaug import augmenters as iaa 
%matplotlib inline
img = imageio.imread("koala.jpg")
bbox = BoundingBoxesOnImage([BoundingBox(x1=26, y1=8, x2=109, y2=160)
                            , BoundingBox(x1=39, y1=4, x2=245, y2=194)], shape=img.shape)
bbox[0].label = "koala"
bbox[1].label = "koala"
ia.imshow(bbox.draw_on_image(img, size=2))
print(img.shape)

png

(194, 259, 3)

Scale the image to 120*120

img_resize = ia.imresize_single_image(img, (120, 120))
ia.imshow(img_resize)

png

print("Bounding box without changes:")
ia.imshow(bbox.draw_on_image(img_resize, size=2))

print("Bounding box with project(from, to)")  # 需要对bbox中的Boundingbox进行处理
ia.imshow(bbox.bounding_boxes[0].project(from_shape=img.shape, to_shape=img_resize.shape).draw_on_image(img_resize, size=2, copy=False))
ia.imshow(bbox.bounding_boxes[1].project(from_shape=img.shape, to_shape=img_resize.shape).draw_on_image(img_resize, size=2, copy=False))

print("Bounding box with on(shape)")   # 可以对整个bbox进行处理
ia.imshow(bbox.on(img_resize.shape).draw_on_image(img_resize, color=(255,255,0), size=2))
Bounding box without changes:

png

Bounding box with project(from, to)

png

png

Bounding box with on(shape)

png

5.7 Intersection, union, intersection and ratio

IoU is usually calculated between bboxes, and the corresponding method is provided in imgaug:

  • BoundingBox.intersection(other_bounding_box)

  • BoundingBox.union(other_bounding_box)

  • BoundingBox.iou(other_bounding_box)

1. intersection

bb_intersection = bbox.bounding_boxes[0].intersection(bbox.bounding_boxes[1])
ia.imshow(bb_intersection.draw_on_image(img, size=2))

png

As shown in the figure above, the returned bb_intersection is actually the bbox of the intersection of two bboxes.

At the same time, information such as the height, width, and area of ​​the intersection can be obtained through bb_intersection.

print("The intersection has a height of %.4f, width of %.4f and an area of %.4f"%(
bb_intersection.height, bb_intersection.width, bb_intersection.area))
The intersection has a height of 152.0000, width of 70.0000 and an area of 10640.0000

2. union

bb_union = bbox.bounding_boxes[0].union(bbox.bounding_boxes[1])
ia.imshow(bb_union.draw_on_image(img, size=2))

png

As shown in the figure above, the returned bb_union is actually the bbox of the union of two bboxes.

Similarly, information such as the height, width, and area of ​​the intersection can be obtained through bb_bb_union.

print("The union has a height of %.4f, width of %.4f and an area of %.4f"%(
bb_union.height, bb_union.width, bb_union.area))
The union has a height of 190.0000, width of 219.0000 and an area of 41610.0000

3. IoU

iou = bbox.bounding_boxes[0].iou(bbox.bounding_boxes[1])
print("IoU: %.4f"%(iou))
IoU: 0.2588

6. Stochastic Parameter

When doing transformation, we hope that the transformation of each picture is different, which can be achieved by random selection of parameters. However, if you want to reproduce the previous transformation, you need to use it determinismto achieve it, which is cumbersome. To avoid this, use Stochastic Parametersto implement. This variable is usually an abstract probability distribution, such as a normal distribution, a uniform distribution, and so on. Usually all augmentercan accept this parameter, so it is very convenient to control the scope of variables. They can all be determinismcombined.

example:

from imgaug import augmenters as iaa
from imgaug import parameters as iap
 
seq = iaa.Sequential([
    iaa.GaussianBlur(
        sigma=iap.Uniform(0.0, 1.0)
    ),
    iaa.ContrastNormalization(
        iap.Choice(
            [1.0, 1.5, 3.0],
            p=[0.5, 0.3, 0.2]
        )
    ),
    iaa.Affine(
        rotate=iap.Normal(0.0, 30),
        translate_px=iap.RandomSign(iap.Poisson(3))
    ),
    iaa.AddElementwise(
        iap.Discretize(
            (iap.Beta(0.5, 0.5) * 2 - 1.0) * 64
        )
    ),
    iaa.Multiply(
        iap.Positive(iap.Normal(0.0, 0.1)) + 1.0
    )
])

All available probability distributions are:

6.1 Normal distribution

Normal(loc, scale): The mean is loc, the standard deviation is scale.

from imgaug import parameters as iap
params = [
    iap.Normal(0, 1),
    iap.Normal(5, 3),
    iap.Normal(iap.Choice([-3, 3]), 1),
    iap.Normal(iap.Uniform(-3, 3), 1)
]
iap.show_distributions_grid(params)

6.2 Laplace distribution

Laplace(loc, scale): peak loc, width scale:

from imgaug import parameters as iap
params = [
    iap.Laplace(0, 1),
    iap.Laplace(5, 3),
    iap.Laplace(iap.Choice([-3, 3]), 1),
    iap.Laplace(iap.Uniform(-3, 3), 1)
]
iap.show_distributions_grid(params)

6.3 Other continuous probability distributions include:

  • Chi-square distribution (ChiSquare)
  • Weibull distribution
  • Uniform distribution
  • Beta distribution

6.4 Discrete Probability Distributions

  • Binomial distribution (Binomial)
  • Discrete Uniform
  • Poisson distribution

6.5 Mathematical operations on distributions

imgaug supports arithmetic operations on random parameters. Allows to modify the values ​​drawn from a distribution or to combine several distributions with each other. Supported operations are:

  • Add
  • Subtract
  • Multiply
  • Divide
  • Power

6.6 Special parameters

Supported operations are:

  • Deterministic
  • Choice
  • Clip
  • Discretize
  • Absolute
  • RandomSign
  • ForceSign
  • Positive
  • Negative
  • FromLowerResolution

See the documentation for specific meaning and usage.

7. Blending/Overlaying images

Augment will directly change the image and discard the original image. Sometimes we need to change parts of the image, or combine the original image with the newly transformed image. This can be done by adding a certain weight (αα parameter) to the picture before and after the transformation or using a pixel-wise mask.
An example is as follows:

# First row
iaa.Alpha(
    (0.0, 1.0),
    first=iaa.MedianBlur(11),
    per_channel=True
)

# Second row
iaa.SimplexNoiseAlpha(
    first=iaa.EdgeDetect(1.0),
    per_channel=False
)

# Third row
iaa.SimplexNoiseAlpha(
    first=iaa.EdgeDetect(1.0),
    second=iaa.ContrastNormalization((0.5, 2.0)),
    per_channel=0.5
)

# Forth row
iaa.FrequencyNoiseAlpha(
    first=iaa.Affine(
        rotate=(-10, 10),
        translate_px={
    
    "x": (-4, 4), "y": (-4, 4)}
    ),
    second=iaa.AddToHueAndSaturation((-40, 40)),
    per_channel=0.5
)

# Fifth row
iaa.SimplexNoiseAlpha(
    first=iaa.SimplexNoiseAlpha(
        first=iaa.EdgeDetect(1.0),
        second=iaa.ContrastNormalization((0.5, 2.0)),
        per_channel=True
    ),
    second=iaa.FrequencyNoiseAlpha(
        exponent=(-2.5, -1.0),
        first=iaa.Affine(
            rotate=(-10, 10),
            translate_px={
    
    "x": (-4, 4), "y": (-4, 4)}
        ),
        second=iaa.AddToHueAndSaturation((-40, 40)),
        per_channel=True
    ),
    per_channel=True,
    aggregation_method="max",
    sigmoid=False
)

8. Example for target detection

from imgaug import augmenters as iaa
import cv2
import numpy as np
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage

import copy
seq = iaa.Sequential([
    # 选择0到5种方法做变换
    iaa.SomeOf((1, 5),
        [
                iaa.Fliplr(1),  # 对50%的图片进行水平镜像翻转
                iaa.Flipud(1),  # 对50%的图片进行垂直镜像翻转
                iaa.Affine(
                    scale={
    
    "x": (0.8, 1.2), "y": (0.8, 1.2)},
                    translate_percent={
    
    "x": (-0.2, 0.2), "y": (-0.2, 0.2)},
                    rotate=(-25, 25),
                    shear=(-8, 8)
                ),                              # 对每张图像应用仿射变换

                iaa.OneOf([
                    iaa.GaussianBlur((0, 3.0)), # 高斯滤波
                    iaa.AverageBlur(k=(2, 7)),  # 均值滤波,k指核的大小
                    iaa.MedianBlur(k=(3, 11)),  # 中值滤波
                ]),
                iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)),  # 锐化图片
                iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)),       # 与锐化类似,但是具有压纹效果
                iaa.AdditiveGaussianNoise(                           # 增加高斯噪声
                    loc=0, scale=(0.0, 0.05*255)
                ),
                iaa.Invert(0.05, per_channel=True),       # 几率反转每个通道
                iaa.Add((-10, 10), per_channel=0.5),      # 为每个像素上加浮动10
                iaa.AddElementwise((-40, 40)),
                iaa.Multiply((0.5, 1.5)),                 # 更改原来图像的亮度
                iaa.MultiplyElementwise((0.5, 1.5)),      # 为每个像素乘于一个值
                iaa.ContrastNormalization((0.5, 2.0)),    # 更改对比度
                iaa.LinearContrast((0.75, 1.5)),          # 加强或削弱每幅图像的对比度
                iaa.PiecewiseAffine(scale=(0.01, 0.05)),  # 局部区域扭曲程度不同
                iaa.OneOf([
                                    iaa.Dropout((0.01, 0.1), per_channel=0.5),
                                    iaa.CoarseDropout(
                                        (0.03, 0.15), size_percent=(0.02, 0.05),
                                        per_channel=0.2
                                    ),
                                ]),
                        ],                                # 随机删除所有像素的1 - 10%(即设置为黑色)或将其删除到原始大小的2-5%的图像上,导致巨大的删除矩形。
        random_order=True  # 以随机的方式执行上述扩充
    )
],random_order=True)


src = cv2.imdecode(np.fromfile(r'E:\pythonProject\dataset\0\0.jpg', dtype=np.uint8), cv2.IMREAD_COLOR)
bndbox = [120, 120, 240, 240]

img = copy.deepcopy(src)
cv2.rectangle(img, (bndbox[0], bndbox[1]), (bndbox[2], bndbox[3]), (0, 255, 255))
bbs = BoundingBoxesOnImage([
    BoundingBox(x1=bndbox[0], y1=bndbox[1], x2=bndbox[2], y2=bndbox[3])], shape=src.shape)

image_aug, bbs_aug = seq(image=src, bounding_boxes=bbs)

cv2.rectangle(image_aug, (int(bbs_aug[0].x1), int(bbs_aug[0].y1)), (int(bbs_aug[0].x2), int(bbs_aug[0].y2)), (0, 255, 255))

cv2.imshow("img", img)
cv2.imshow("image_aug", image_aug)
cv2.waitKey()

9. Example for Semantic Segmentation

import imgaug.augmenters as iaa  # 导入iaa
import cv2
import glob
import os
import numpy as np

if __name__ == '__main__':
    img_dir = '/train/images/'	# 图片文件路径
    msk_dir = '/train/masks/'	# 标签文件路径
    img_type = '.png'
    img_tmp_dir = '/tmp/images/'	# 输出图片文件路径
    msk_tmp_dir = '/tmp/masks/'
    img_list = os.listdir(img_dir)
    msk_list = os.listdir(msk_dir)

    for i in range(len(img_list)):
        img_name = img_list[i]
        msk_name = msk_list[i]

        img = cv2.imread(filename=img_dir + "/" + img_name)
        img = np.expand_dims(img, axis=0).astype(np.float32)
        msk = cv2.imread(filename=msk_dir + "/" + msk_name)
        msk = np.expand_dims(msk, axis=0).astype(np.int32)
        # 定义数据增强策略
        # 每次选择一个翻转方式
        seq = iaa.SomeOf(1, [
            iaa.Fliplr(p=1),    # 水平翻转
            iaa.Flipud(p=1),    # 垂直翻转
            iaa.GaussianBlur(sigma=(0, 3.0)),   # 高斯模糊
            iaa.Sharpen(alpha=(0, 0.3), lightness=(0.9, 1.1)),  # 锐化处理
            iaa.Affine(scale=(0.9, 1), translate_percent=(0, 0.1), rotate=(-40, 40), cval=0, mode='constant'),   # 仿射变换
            iaa.CropAndPad(px=(-10, 0), percent=None, pad_mode='constant', pad_cval=0, keep_size=True), # 裁剪缩放
            iaa.PiecewiseAffine(scale=(0, 0.05), nb_rows=4, nb_cols=4, cval=0),     # 以控制点的方式随机形变
            iaa.ContrastNormalization((0.75, 1.5), per_channel=True),  # 对比度增强,0.75-1.5随机数值为alpha,该alpha应用于每个通道
            iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5),  # 高斯噪声
            iaa.Multiply((0.8, 1.2), per_channel=0.2),  # 20%的图片像素值乘以0.8-1.2中间的数值,用以增加图片明亮度或改变颜色
        ])
        # 同时对原图和分割进行数据增强
        for j in range(5):
            img_aug, msk_aug = seq(images=img, segmentation_maps=msk)
            img_out = img_tmp_dir + img_name.split(".")[0] + "_" + str(j) + img_type
            msk_out = msk_tmp_dir + msk_name.split(".")[0] + "_" + str(j) + img_type
            cv2.imwrite(img_out, img_aug[0])
            cv2.imwrite(msk_out, msk_aug[0,:,:,0])
        print("正在进行数据增强{}".format(i))


Guess you like

Origin blog.csdn.net/qq_45723275/article/details/129276991