python深度学习训练样本图像增强

深度学习样本标完后图片增强标签内容不变02_train图片增强后-CSDN博客

begin部分两部分都打开则可以实现随机从A图取一部分放至B图中增强图像

功能的优化

import numpy as np
import cv2
import copy
import random
import argparse
import glob

def random_gray(em):
    if em >= 128:
        B = np.random.randint(0, 30)
        G = np.random.randint(0, 30)
        R = np.random.randint(0, 30)
    else:
        B = np.random.randint(196, 226)
        G = np.random.randint(196, 226)
        R = np.random.randint(196, 226)
    return (B, G, R)

def get_hull(axis_list):
    hull = cv2.convexHull(axis_list, clockwise=True, returnPoints=True)
    return hull

def generate_curve(img, control_points, color, thickness):
    # 使用贝塞尔曲线拟合头发弯曲线条
    curve_points = []
    num_points = 100  # 调整生成的曲线上的点的数量
    for t in np.linspace(0, 1, num_points):
        curve_point = np.power(1 - t, 3) * control_points[0] + 3 * np.power(1 - t, 2) * t * control_points[1] + 3 * (
                1 - t) * np.power(t, 2) * control_points[2] + np.power(t, 3) * control_points[3]
        curve_points.append(curve_point)
    curve_points = np.array(curve_points, dtype=np.int32)
    # 绘制头发弯曲线条
    cv2.polylines(img, [curve_points], isClosed=False, color=color, thickness=thickness)
    return img

def rotation(img, angle):
    rows = img.shape[0]
    cols = img.shape[1]
    M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle=angle, scale=1)  # 向左旋转angle度并缩放为原来的scale倍
    img = cv2.warpAffine(img, M, (cols, rows), cv2.INTER_NEAREST)  # 第三个参数是输出图像的尺寸中心
    return img

def random_jitter(input, dx, dy):
    H = np.float32([[1, 0, dx], [0, 1, dy]])  # 定义平移矩阵
    rows, cols = input.shape[:2]  # 获取图像高宽(行列数)
    res = cv2.warpAffine(input, H, (cols, rows), cv2.INTER_NEAREST)
    return res

def black_edge_crop(img, H, W):
    return cv2.resize(img[30:H - 30, 30:W - 30, :], dsize=(W, H))

def random_h(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    region_mask1 = np.where(img[:, :, 0] < 175, 1, 0)
    region_mask2 = np.where(img[:, :, 0] > 5, 1, 0)
    region_mask = np.bitwise_and(region_mask1, region_mask2)
    hue_t = np.ones_like(img[:, :, 0]) * np.random.randint(-5, 6) * region_mask
    img[:, :, 0] = img[:, :, 0] + hue_t
    return cv2.cvtColor(img, cv2.COLOR_HSV2BGR)

def brightness_adjustment(img):
    blank = np.zeros_like(img)
    c = (1 + np.random.randint(low=-1, high=2, size=None, dtype='l') / 10.)
    img = cv2.addWeighted(img, c, blank, 1 - c, 0)
    return img

def change_channel(img, sd):
    b, g, r = cv2.split(img)
    if sd == 0:
        out = cv2.merge([b, r, g])
    elif sd == 1:
        out = cv2.merge([b, g, r])
    elif sd == 2:
        out = cv2.merge([r, g, b])
    elif sd == 3:
        out = cv2.merge([r, b, g])
    elif sd == 4:
        out = cv2.merge([g, b, r])
    elif sd == 5:
        out = cv2.merge([g, r, b])
    return out

def random_flip(input, flag):
    if flag == 1:
        return np.fliplr(input)
    elif flag == 2:
        return np.flipud(input)
    elif flag == 3:
        return np.flipud(np.fliplr(input))
    else:
        return input

def gen_diffs(ex, mask, diff_src, num_diff, H, W):
    big_cnt = 0
    for b in range(num_diff):
        # 随机确定左上角的点和长宽
        if np.random.randint(0, 30) == 0 and big_cnt == 0:
            x_l, y_l, w, h = np.random.randint(low=99, high=W - 300, size=None, dtype='l'), \
                             np.random.randint(low=99, high=H - 300, size=None, dtype='l'), \
                             np.random.randint(low=100, high=300, size=None, dtype='l'), \
                             np.random.randint(low=100, high=300, size=None, dtype='l')
            big_cnt = 1
            big_flag = 1
        else:
            x_l, y_l, w, h = np.random.randint(low=49, high=W - 50, size=None, dtype='l'), \
                             np.random.randint(low=49, high=H - 50, size=None, dtype='l'), \
                             np.random.randint(low=10, high=50, size=None, dtype='l'), \
                             np.random.randint(low=10, high=50, size=None, dtype='l')
            big_flag = 0

        # 在此方形区域内生成差异, 同时将空mask图片中的对应区域变为1(白色)作为标签
        points = [[x_l + w // 2, y_l + h // 2]]
        for i in range(x_l, x_l + w):
            for j in range(y_l, y_l + h):
                if big_flag == 1:
                    if np.random.randint(0, 5000) == 1:
                        points.append([i, j])
                else:
                    if np.random.randint(0, 100) == 1:
                        points.append([i, j])
        random.shuffle(points)
        pts = np.asarray([points], dtype=np.int32)
        hull = get_hull(pts).transpose(1, 0, 2)
        mask = cv2.fillPoly(mask.copy(), hull, color=1)
        ex = cv2.fillPoly(ex.copy(), hull, color=(0, 0, 0))

    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    mask = cv2.dilate(mask.copy(), kernel, 1)
    mask_filled = np.stack((mask, mask, mask), axis=2)
    mask_filled = cv2.GaussianBlur(mask_filled, (3, 3), 0, 0)
    ex = (255 * ((ex / 255) * (1 - mask_filled) + (diff_src / 255) * mask_filled)).astype(np.uint8)
    return ex, mask

def one_curve(ex, mask, H, W):
    x1 = np.random.randint(20, W - 20)
    y1 = np.random.randint(20, H - 20)
    x2 = max(min(x1 + np.random.randint(-20, 20), W - 1), 0)
    y2 = max(min(y1 + np.random.randint(-20, 20), H - 1), 0)
    x3 = max(min(x2 + np.random.randint(-40, 40), W - 1), 0)
    y3 = max(min(y2 + np.random.randint(-40, 40), H - 1), 0)
    x4 = max(min(x3 + np.random.randint(-80, 80), W - 1), 0)
    y4 = max(min(y3 + np.random.randint(-80, 80), H - 1), 0)
    control_points = np.array([[x1, y1], [x2, y2], [x3, y3], [x4, y4]], dtype=np.int32)
    thickness = np.random.randint(1, 3)
    mask_for_curve = np.zeros_like(mask)
    mask_for_curve = generate_curve(mask_for_curve, control_points, 1, thickness)
    mask = generate_curve(mask, control_points, 1, thickness)
    ex_mean = ex[mask_for_curve == 1].sum() / np.sum(mask_for_curve == 1)
    ex = generate_curve(ex.copy(), control_points, random_gray(ex_mean / 3), thickness)
    return ex, mask

def noisy(noise_typ, image):
    if noise_typ == "gauss":
        row, col, ch = image.shape
        mean = 0
        var = 0.001
        sigma = var ** 0.5
        gauss = np.random.normal(mean, sigma, (row, col, ch))
        gauss = gauss.reshape(row, col, ch)
        noisy = (image / 255 + gauss) if np.random.randint(0, 2) == 1 else (image / 255 - gauss)
        noisy = cv2.normalize(noisy, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
        noisy = np.uint8(noisy * 255)
        return noisy
    elif noise_typ == "s&p":
        row, col, ch = image.shape
        s_vs_p = 0.5
        amount = 0.004
        out = np.copy(image)
        # Salt mode
        num_salt = np.ceil(amount * image.size * s_vs_p)
        coords = [np.random.randint(0, i - 1, int(num_salt))
                  for i in image.shape[:2]]
        out[:, :, 0:1][tuple(coords)] = 255
        out[:, :, 1:2][tuple(coords)] = 255
        out[:, :, 2:3][tuple(coords)] = 255
        # Pepper mode
        num_pepper = np.ceil(amount * image.size * (1. - s_vs_p))
        coords = [np.random.randint(0, i - 1, int(num_pepper))
                  for i in image.shape[:2]]
        out[:, :, 0:1][tuple(coords)] = 0
        out[:, :, 1:2][tuple(coords)] = 0
        out[:, :, 2:3][tuple(coords)] = 0
        return out

def add_noise(image):
    # random number
    r = np.random.rand(1)
    # gaussian noise
    if r < 0.9:
        row, col, ch = image.shape
        mean = 0
        var = np.random.rand(1) * 0.3 * 256
        sigma = var ** 0.5
        gauss = sigma * np.random.randn(row, col) + mean
        gauss = np.repeat(gauss[:, :, np.newaxis], ch, axis=2)
        noisy = image + gauss
        noisy = np.clip(noisy, 0, 255)
    else:
        # motion blur
        sizes = [3, 5, 7, 9, 11, 15]
        size = sizes[int(np.random.randint(len(sizes), size=1))]
        kernel_motion_blur = np.zeros((size, size))
        if np.random.rand(1) < 0.5:
            kernel_motion_blur[int((size - 1) / 2), :] = np.ones(size)
        else:
            kernel_motion_blur[:, int((size - 1) / 2)] = np.ones(size)
        kernel_motion_blur = kernel_motion_blur / size
        noisy = cv2.filter2D(image, -1, kernel_motion_blur)

    return noisy

def virtual_light(img):
    # 获取图像行和列
    rows, cols = img.shape[:2]
    # 设置中心点和光照半径
    centerX = np.random.randint(50, cols - 50)
    centerY = np.random.randint(50, rows - 50)
    radius = min(centerX, centerY)
    # radius = np.random.randint(50, cols // 4)
    # 设置光照强度
    strength = 0 + np.random.randint(-20, 20)
    x = 1 if np.random.randint(0, 2) == 1 else -1
    # 新建目标图像
    distance = (centerY - np.arange(rows)[:, np.newaxis] - 0.5) ** 2 + \
               (centerX - np.arange(cols)[np.newaxis, :] - 0.5) ** 2

    # 计算结果矩阵
    result = strength * (1 - np.sqrt(distance) / radius)
    result[distance >= radius ** 2] = 0
    result = np.clip(result * x, -255, 255).astype("int32")
    # 添加结果
    dst = np.clip(img + result[..., np.newaxis], 0, 255).astype("uint8")
    return dst

def random_resize(img, right, up, fg):
    H, W = img.shape[0], img.shape[1]
    if fg == 0:
        if len(img.shape) == 3:
            img_croped = img[20:H - 20, 20:W - 20, :]
        else:
            img_croped = img[20:H - 20, 20:W - 20]
        img_resized = cv2.resize(img_croped, dsize=(W, H), interpolation=cv2.INTER_NEAREST)
    elif fg == 1:
        img_resized = img
    else:
        img_paded = cv2.copyMakeBorder(img, int(up), int(up), int(right), int(right),
                                       cv2.BORDER_CONSTANT, value=0)
        img_resized = cv2.resize(img_paded, dsize=(W, H), interpolation=cv2.INTER_NEAREST)
    return img_resized.copy()

def add_time_noise(image):
    mu = np.random.randint(0, 30) / 10.
    sigma = np.random.randint(0, 30) / 10.
    noise = np.random.normal(mu, sigma, image.shape)
    noisy_image = (image + noise) if np.random.randint(0, 2) == 0 else (image - noise)
    noisy_image = np.clip(noisy_image, 0, 255).astype(np.uint8)
    return noisy_image

def rgb_norm(image):
    r, g, b = cv2.split(image)
    eps = 1e-7
    r = (r - np.mean(r)) / (np.std(r) + eps)
    g = (g - np.mean(g)) / (np.std(g) + eps)
    b = (b - np.mean(b)) / (np.std(b) + eps)
    return cv2.merge((r, g, b))

def make_datasets(args):
    H, W = args.HW
    for a in range(L):
        ## begin
        # 随机从A图中截取部分copy至B图中
        # num_diff = np.random.randint(low=12, high=20, size=None, dtype='l')  # 生成差异的数量
        # mask = np.zeros((H, W))  # 生成固定大小的空白图片(全黑)
        ## begin 

        src = cv2.imread(imgs[a])
        src = cv2.resize(src, dsize=(H, W))  # 随机选择背景图片resize到640x480
        src_show = copy.deepcopy(src)

        if args.rot:
            angle = np.random.randint(low=-10, high=11) / 4
            src = rotation(img=src, angle=angle)
        if args.jit:
            dx, dy = np.random.randint(low=-5, high=6, size=2)
            src = random_jitter(src, dx, dy)
        if args.crop:
            src = black_edge_crop(src, H, W)
        if args.hue:
            src = random_h(src)
        if args.bright:
            src = brightness_adjustment(src)
        if args.channel_change:
            sd = np.random.randint(low=0, high=6)
            src = change_channel(src, sd=sd)
        if args.flip:
            sd = np.random.randint(low=0, high=6)
            src = random_flip(src, sd)

        ## begin
        # 随机从A图中截取部分copy至B图中
        # diff_img = cv2.imread(imgs[np.random.randint(0, len(imgs))])
        # diff_src = cv2.resize(diff_img, dsize=(H, W))
        # src, _ = gen_diffs(src, mask, diff_src, num_diff, H, W)
        ## begin

        if args.curve:
            src, mask = one_curve(src, mask, H, W)

        if args.noise:
            if np.random.randint(0, 3) == 0:
                src = noisy(noise_typ='gauss', image=src)
            elif np.random.randint(0, 3) == 1:
                src = noisy(noise_typ='s&p', image=src)
            else:
                src = add_noise(src)
        if args.blur:
            k = np.random.randint(1, 3) * 2 + 1
            src = cv2.GaussianBlur(src, (k, k), 0)
        if args.light:
            src = virtual_light(src)

        fg = np.random.randint(0, 2)
        right = np.random.randint(10, 15)
        up = np.random.randint(10, 15)
        src = random_resize(src, right, up, fg)

        if args.time_noise:
            src = add_time_noise(src)

        if args.show_result:
            cv2.imshow('left is source, right is augumented', np.hstack((src_show, src)))
            cv2.waitKey(500)

        if args.save_result:
            cv2.imwrite(imgs[a].replace('.png', '_augumented.png'), src)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--HW', default=[640, 640], help='height, width of image')
    parser.add_argument('--imgs_path', default="D:\\BUFFER\\Pycharm\\Images\\", help='path of background images')
    parser.add_argument('--jit', default=False, help='translation augument')                  # 是否进行抖动增强
    parser.add_argument('--rot', default=False, help='rotation augument')                     # 是否进行旋转增强
    parser.add_argument('--channel_change', default=False, help='channel augument')          # 是否进行通道变化增强
    parser.add_argument('--crop', default=True, help='random crop')                          # 是否进行随机裁剪增强
    parser.add_argument('--hue', default=False, help='color augument')                       # 是否进行色调随机增强
    parser.add_argument('--bright', default=False, help='brightness augument')               # 是否进行亮度随机增强
    parser.add_argument('--flip', default=False, help='random flip')                          # 是否进行随机翻转
    parser.add_argument('--light', default=False, help='virtual light')                       # 是否模拟光照增强
    parser.add_argument('--noise', default=False, help='random noise')                        # 是否添加随机噪声
    parser.add_argument('--blur', default=False, help='guass blur')                           # 是否添加随机模糊
    parser.add_argument('--curve', default=False, help='add something thin and long')         # 是否训练丝状异物
    parser.add_argument('--time_noise', default=False, help='motion blur')                   # 是否模拟时间噪声
    parser.add_argument('--show_result', default=True, help='show result')                   # 是否显示结果
    parser.add_argument('--save_result', default= True, help='save result')                  # 是否保存结果

    args = parser.parse_args()
    imgs = glob.glob(args.imgs_path + '\*.png')
    L = len(imgs)
    make_datasets(args) # 结果为图像列表

猜你喜欢

转载自blog.csdn.net/moonlightpeng/article/details/133775131
今日推荐