深度学习样本标完后图片增强标签内容不变02_train图片增强后-CSDN博客
begin部分两部分都打开则可以实现随机从A图取一部分放至B图中增强图像
功能的优化
import numpy as np
import cv2
import copy
import random
import argparse
import glob
def random_gray(em):
if em >= 128:
B = np.random.randint(0, 30)
G = np.random.randint(0, 30)
R = np.random.randint(0, 30)
else:
B = np.random.randint(196, 226)
G = np.random.randint(196, 226)
R = np.random.randint(196, 226)
return (B, G, R)
def get_hull(axis_list):
hull = cv2.convexHull(axis_list, clockwise=True, returnPoints=True)
return hull
def generate_curve(img, control_points, color, thickness):
# 使用贝塞尔曲线拟合头发弯曲线条
curve_points = []
num_points = 100 # 调整生成的曲线上的点的数量
for t in np.linspace(0, 1, num_points):
curve_point = np.power(1 - t, 3) * control_points[0] + 3 * np.power(1 - t, 2) * t * control_points[1] + 3 * (
1 - t) * np.power(t, 2) * control_points[2] + np.power(t, 3) * control_points[3]
curve_points.append(curve_point)
curve_points = np.array(curve_points, dtype=np.int32)
# 绘制头发弯曲线条
cv2.polylines(img, [curve_points], isClosed=False, color=color, thickness=thickness)
return img
def rotation(img, angle):
rows = img.shape[0]
cols = img.shape[1]
M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle=angle, scale=1) # 向左旋转angle度并缩放为原来的scale倍
img = cv2.warpAffine(img, M, (cols, rows), cv2.INTER_NEAREST) # 第三个参数是输出图像的尺寸中心
return img
def random_jitter(input, dx, dy):
H = np.float32([[1, 0, dx], [0, 1, dy]]) # 定义平移矩阵
rows, cols = input.shape[:2] # 获取图像高宽(行列数)
res = cv2.warpAffine(input, H, (cols, rows), cv2.INTER_NEAREST)
return res
def black_edge_crop(img, H, W):
return cv2.resize(img[30:H - 30, 30:W - 30, :], dsize=(W, H))
def random_h(img):
img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
region_mask1 = np.where(img[:, :, 0] < 175, 1, 0)
region_mask2 = np.where(img[:, :, 0] > 5, 1, 0)
region_mask = np.bitwise_and(region_mask1, region_mask2)
hue_t = np.ones_like(img[:, :, 0]) * np.random.randint(-5, 6) * region_mask
img[:, :, 0] = img[:, :, 0] + hue_t
return cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
def brightness_adjustment(img):
blank = np.zeros_like(img)
c = (1 + np.random.randint(low=-1, high=2, size=None, dtype='l') / 10.)
img = cv2.addWeighted(img, c, blank, 1 - c, 0)
return img
def change_channel(img, sd):
b, g, r = cv2.split(img)
if sd == 0:
out = cv2.merge([b, r, g])
elif sd == 1:
out = cv2.merge([b, g, r])
elif sd == 2:
out = cv2.merge([r, g, b])
elif sd == 3:
out = cv2.merge([r, b, g])
elif sd == 4:
out = cv2.merge([g, b, r])
elif sd == 5:
out = cv2.merge([g, r, b])
return out
def random_flip(input, flag):
if flag == 1:
return np.fliplr(input)
elif flag == 2:
return np.flipud(input)
elif flag == 3:
return np.flipud(np.fliplr(input))
else:
return input
def gen_diffs(ex, mask, diff_src, num_diff, H, W):
big_cnt = 0
for b in range(num_diff):
# 随机确定左上角的点和长宽
if np.random.randint(0, 30) == 0 and big_cnt == 0:
x_l, y_l, w, h = np.random.randint(low=99, high=W - 300, size=None, dtype='l'), \
np.random.randint(low=99, high=H - 300, size=None, dtype='l'), \
np.random.randint(low=100, high=300, size=None, dtype='l'), \
np.random.randint(low=100, high=300, size=None, dtype='l')
big_cnt = 1
big_flag = 1
else:
x_l, y_l, w, h = np.random.randint(low=49, high=W - 50, size=None, dtype='l'), \
np.random.randint(low=49, high=H - 50, size=None, dtype='l'), \
np.random.randint(low=10, high=50, size=None, dtype='l'), \
np.random.randint(low=10, high=50, size=None, dtype='l')
big_flag = 0
# 在此方形区域内生成差异, 同时将空mask图片中的对应区域变为1(白色)作为标签
points = [[x_l + w // 2, y_l + h // 2]]
for i in range(x_l, x_l + w):
for j in range(y_l, y_l + h):
if big_flag == 1:
if np.random.randint(0, 5000) == 1:
points.append([i, j])
else:
if np.random.randint(0, 100) == 1:
points.append([i, j])
random.shuffle(points)
pts = np.asarray([points], dtype=np.int32)
hull = get_hull(pts).transpose(1, 0, 2)
mask = cv2.fillPoly(mask.copy(), hull, color=1)
ex = cv2.fillPoly(ex.copy(), hull, color=(0, 0, 0))
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
mask = cv2.dilate(mask.copy(), kernel, 1)
mask_filled = np.stack((mask, mask, mask), axis=2)
mask_filled = cv2.GaussianBlur(mask_filled, (3, 3), 0, 0)
ex = (255 * ((ex / 255) * (1 - mask_filled) + (diff_src / 255) * mask_filled)).astype(np.uint8)
return ex, mask
def one_curve(ex, mask, H, W):
x1 = np.random.randint(20, W - 20)
y1 = np.random.randint(20, H - 20)
x2 = max(min(x1 + np.random.randint(-20, 20), W - 1), 0)
y2 = max(min(y1 + np.random.randint(-20, 20), H - 1), 0)
x3 = max(min(x2 + np.random.randint(-40, 40), W - 1), 0)
y3 = max(min(y2 + np.random.randint(-40, 40), H - 1), 0)
x4 = max(min(x3 + np.random.randint(-80, 80), W - 1), 0)
y4 = max(min(y3 + np.random.randint(-80, 80), H - 1), 0)
control_points = np.array([[x1, y1], [x2, y2], [x3, y3], [x4, y4]], dtype=np.int32)
thickness = np.random.randint(1, 3)
mask_for_curve = np.zeros_like(mask)
mask_for_curve = generate_curve(mask_for_curve, control_points, 1, thickness)
mask = generate_curve(mask, control_points, 1, thickness)
ex_mean = ex[mask_for_curve == 1].sum() / np.sum(mask_for_curve == 1)
ex = generate_curve(ex.copy(), control_points, random_gray(ex_mean / 3), thickness)
return ex, mask
def noisy(noise_typ, image):
if noise_typ == "gauss":
row, col, ch = image.shape
mean = 0
var = 0.001
sigma = var ** 0.5
gauss = np.random.normal(mean, sigma, (row, col, ch))
gauss = gauss.reshape(row, col, ch)
noisy = (image / 255 + gauss) if np.random.randint(0, 2) == 1 else (image / 255 - gauss)
noisy = cv2.normalize(noisy, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
noisy = np.uint8(noisy * 255)
return noisy
elif noise_typ == "s&p":
row, col, ch = image.shape
s_vs_p = 0.5
amount = 0.004
out = np.copy(image)
# Salt mode
num_salt = np.ceil(amount * image.size * s_vs_p)
coords = [np.random.randint(0, i - 1, int(num_salt))
for i in image.shape[:2]]
out[:, :, 0:1][tuple(coords)] = 255
out[:, :, 1:2][tuple(coords)] = 255
out[:, :, 2:3][tuple(coords)] = 255
# Pepper mode
num_pepper = np.ceil(amount * image.size * (1. - s_vs_p))
coords = [np.random.randint(0, i - 1, int(num_pepper))
for i in image.shape[:2]]
out[:, :, 0:1][tuple(coords)] = 0
out[:, :, 1:2][tuple(coords)] = 0
out[:, :, 2:3][tuple(coords)] = 0
return out
def add_noise(image):
# random number
r = np.random.rand(1)
# gaussian noise
if r < 0.9:
row, col, ch = image.shape
mean = 0
var = np.random.rand(1) * 0.3 * 256
sigma = var ** 0.5
gauss = sigma * np.random.randn(row, col) + mean
gauss = np.repeat(gauss[:, :, np.newaxis], ch, axis=2)
noisy = image + gauss
noisy = np.clip(noisy, 0, 255)
else:
# motion blur
sizes = [3, 5, 7, 9, 11, 15]
size = sizes[int(np.random.randint(len(sizes), size=1))]
kernel_motion_blur = np.zeros((size, size))
if np.random.rand(1) < 0.5:
kernel_motion_blur[int((size - 1) / 2), :] = np.ones(size)
else:
kernel_motion_blur[:, int((size - 1) / 2)] = np.ones(size)
kernel_motion_blur = kernel_motion_blur / size
noisy = cv2.filter2D(image, -1, kernel_motion_blur)
return noisy
def virtual_light(img):
# 获取图像行和列
rows, cols = img.shape[:2]
# 设置中心点和光照半径
centerX = np.random.randint(50, cols - 50)
centerY = np.random.randint(50, rows - 50)
radius = min(centerX, centerY)
# radius = np.random.randint(50, cols // 4)
# 设置光照强度
strength = 0 + np.random.randint(-20, 20)
x = 1 if np.random.randint(0, 2) == 1 else -1
# 新建目标图像
distance = (centerY - np.arange(rows)[:, np.newaxis] - 0.5) ** 2 + \
(centerX - np.arange(cols)[np.newaxis, :] - 0.5) ** 2
# 计算结果矩阵
result = strength * (1 - np.sqrt(distance) / radius)
result[distance >= radius ** 2] = 0
result = np.clip(result * x, -255, 255).astype("int32")
# 添加结果
dst = np.clip(img + result[..., np.newaxis], 0, 255).astype("uint8")
return dst
def random_resize(img, right, up, fg):
H, W = img.shape[0], img.shape[1]
if fg == 0:
if len(img.shape) == 3:
img_croped = img[20:H - 20, 20:W - 20, :]
else:
img_croped = img[20:H - 20, 20:W - 20]
img_resized = cv2.resize(img_croped, dsize=(W, H), interpolation=cv2.INTER_NEAREST)
elif fg == 1:
img_resized = img
else:
img_paded = cv2.copyMakeBorder(img, int(up), int(up), int(right), int(right),
cv2.BORDER_CONSTANT, value=0)
img_resized = cv2.resize(img_paded, dsize=(W, H), interpolation=cv2.INTER_NEAREST)
return img_resized.copy()
def add_time_noise(image):
mu = np.random.randint(0, 30) / 10.
sigma = np.random.randint(0, 30) / 10.
noise = np.random.normal(mu, sigma, image.shape)
noisy_image = (image + noise) if np.random.randint(0, 2) == 0 else (image - noise)
noisy_image = np.clip(noisy_image, 0, 255).astype(np.uint8)
return noisy_image
def rgb_norm(image):
r, g, b = cv2.split(image)
eps = 1e-7
r = (r - np.mean(r)) / (np.std(r) + eps)
g = (g - np.mean(g)) / (np.std(g) + eps)
b = (b - np.mean(b)) / (np.std(b) + eps)
return cv2.merge((r, g, b))
def make_datasets(args):
H, W = args.HW
for a in range(L):
## begin
# 随机从A图中截取部分copy至B图中
# num_diff = np.random.randint(low=12, high=20, size=None, dtype='l') # 生成差异的数量
# mask = np.zeros((H, W)) # 生成固定大小的空白图片(全黑)
## begin
src = cv2.imread(imgs[a])
src = cv2.resize(src, dsize=(H, W)) # 随机选择背景图片resize到640x480
src_show = copy.deepcopy(src)
if args.rot:
angle = np.random.randint(low=-10, high=11) / 4
src = rotation(img=src, angle=angle)
if args.jit:
dx, dy = np.random.randint(low=-5, high=6, size=2)
src = random_jitter(src, dx, dy)
if args.crop:
src = black_edge_crop(src, H, W)
if args.hue:
src = random_h(src)
if args.bright:
src = brightness_adjustment(src)
if args.channel_change:
sd = np.random.randint(low=0, high=6)
src = change_channel(src, sd=sd)
if args.flip:
sd = np.random.randint(low=0, high=6)
src = random_flip(src, sd)
## begin
# 随机从A图中截取部分copy至B图中
# diff_img = cv2.imread(imgs[np.random.randint(0, len(imgs))])
# diff_src = cv2.resize(diff_img, dsize=(H, W))
# src, _ = gen_diffs(src, mask, diff_src, num_diff, H, W)
## begin
if args.curve:
src, mask = one_curve(src, mask, H, W)
if args.noise:
if np.random.randint(0, 3) == 0:
src = noisy(noise_typ='gauss', image=src)
elif np.random.randint(0, 3) == 1:
src = noisy(noise_typ='s&p', image=src)
else:
src = add_noise(src)
if args.blur:
k = np.random.randint(1, 3) * 2 + 1
src = cv2.GaussianBlur(src, (k, k), 0)
if args.light:
src = virtual_light(src)
fg = np.random.randint(0, 2)
right = np.random.randint(10, 15)
up = np.random.randint(10, 15)
src = random_resize(src, right, up, fg)
if args.time_noise:
src = add_time_noise(src)
if args.show_result:
cv2.imshow('left is source, right is augumented', np.hstack((src_show, src)))
cv2.waitKey(500)
if args.save_result:
cv2.imwrite(imgs[a].replace('.png', '_augumented.png'), src)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--HW', default=[640, 640], help='height, width of image')
parser.add_argument('--imgs_path', default="D:\\BUFFER\\Pycharm\\Images\\", help='path of background images')
parser.add_argument('--jit', default=False, help='translation augument') # 是否进行抖动增强
parser.add_argument('--rot', default=False, help='rotation augument') # 是否进行旋转增强
parser.add_argument('--channel_change', default=False, help='channel augument') # 是否进行通道变化增强
parser.add_argument('--crop', default=True, help='random crop') # 是否进行随机裁剪增强
parser.add_argument('--hue', default=False, help='color augument') # 是否进行色调随机增强
parser.add_argument('--bright', default=False, help='brightness augument') # 是否进行亮度随机增强
parser.add_argument('--flip', default=False, help='random flip') # 是否进行随机翻转
parser.add_argument('--light', default=False, help='virtual light') # 是否模拟光照增强
parser.add_argument('--noise', default=False, help='random noise') # 是否添加随机噪声
parser.add_argument('--blur', default=False, help='guass blur') # 是否添加随机模糊
parser.add_argument('--curve', default=False, help='add something thin and long') # 是否训练丝状异物
parser.add_argument('--time_noise', default=False, help='motion blur') # 是否模拟时间噪声
parser.add_argument('--show_result', default=True, help='show result') # 是否显示结果
parser.add_argument('--save_result', default= True, help='save result') # 是否保存结果
args = parser.parse_args()
imgs = glob.glob(args.imgs_path + '\*.png')
L = len(imgs)
make_datasets(args) # 结果为图像列表