Concepto basico
El núcleo del algoritmo de segmentación semántica: clasificación a nivel de píxeles
Flujo básico del algoritmo de segmentación semántica
- Entrada: Imagen (RGB)
- Algoritmo: modelo de aprendizaje profundo
- Salida: resultado de clasificación (imagen de un solo canal consistente con el tamaño de entrada)
- Proceso de entrenamiento:
- Entrada: imagen + etiqueta
- Adelante: fuera = modelo (imagen)
- Calcule la pérdida: loss = loss_func (out, label)
- Inversa: loss.backward ()
- Actualizar peso: optimizer.minimize (pérdida)
Índice de evaluación
mAcc
Estilo de resultado:
Proceso de cálculo real:
mIOU
Entre ellos, la media está representada en múltiples categorías de pagarés al final de la etiqueta.
Ambos indicadores de evaluación son cuanto más grandes, mejor.
Proceso de implementación específico
Figura (relacionado con el preprocesamiento de datos)
import random
import cv2
import numpy as np
import paddle.fluid as fluid
import os
class Transform(object):
def __init__(self, size=256):
self.size=size
def __call__(self, input, label):
input = cv2.resize(input, (self.size, self.size), interpolation=cv2.INTER_LINEAR)
label = cv2.resize(input, (self.size, self.size), interpolation=cv2.INTER_LINEAR)
return input, label
class BasicDataLoader(object):
def __init__(self,
image_folder,
image_list_file,
transform=None,
shuffle=True):
#需要输出的参数
self.image_folder = image_folder
self.image_list_file = image_list_file
self.transform = transform
self.shuffle = shuffle
self.data_list = self.read_list()
def read_list(self):
#获取到文件列表
data_list = []
with open(self.image_list_file) as infile:
for line in infile:
data_path = os.path.join(self.image_folder,line.split()[0])
label_path = os.path.join(self.image_folder, line.split()[1])
data_list.append((data_path, label_path))
random.shuffle(data_list)
return data_list
def preprocess(self, data, label):
#定义预处理流程
h, w, c = data.shape
h_gt, w_gt = label.shape
assert h == h_gt, "Error"
assert w == w_gt, "Error"
if self.transform:
data, label = self.transform(data, label)
label = label[:, :, np.newaxis]
return data, label
def __len__(self):
return len(self.data_list)
def __call__(self):
#调用时,用迭代器返回数据和对应标签
for data_path, label_path in self.data_list:
data = cv2.imread(data_path, cv2.IMREAD_COLOR)
data = cv2.cvtColor(data, cv2.COLOR_BGR2RGB)
label = cv2.imread(label_path, cv2.IMREAD_GRAYSCALE)
data, label = self.preprocess(data, label)
yield data, label
def main():
batch_size = 5
place = fluid.CPUPlace()
with fluid.dygraph.guard(place):
transform = Transform(256)
# create BasicDataloader instance
basic_dataloader = BasicDataLoader(
image_folder = './dummy_data',
image_list_file = './dummy_data/list.txt',
transform = transform,
shuffle = True
)
# create fluid.io.Dataloader instance (配合paddel数据集加载器使用,先创建)
dataloader = fluid.io.DataLoader.from_generator(capacity=1, use_multiprocess=False)
# set sample generator for fluid dataloader (再配置关联上,我们定义的数据集加载器)
dataloader.set_sample_generator(basic_dataloader,
batch_size=batch_size,
places=place)
num_epoch = 2
for epoch in range(1, num_epoch+1):
print(f'Epoch [{epoch}/{num_epoch}]:')
for idx, (data, label) in enumerate(dataloader):
print(f'iter {idx}, Data shape: {data.shape}, Label shape:{label.shape}')
if __name__ == '__main__':
main()
Cuando los datos se transforman, además definiremos una nueva clase para realizar operaciones comunes de preprocesamiento de datos, como voltear, recortar y rellenar los bordes.
import cv2
import numpy as np
import random
class Compose(object):
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, image, label=None):
for t in self.transforms:
image, label = t(image, label)
return image, label
class Normalize(object):
def __init__(self, mean_val, std_val, val_scale=1):
# set val_scale = 1 if mean and std are in range (0,1)
# set val_scale to other value, if mean and std are in range (0,255)
self.mean = np.array(mean_val, dtype=np.float32)
self.std = np.array(std_val, dtype=np.float32)
self.val_scale = 1/255.0 if val_scale==1 else 1
def __call__(self, image, label=None):
image = image.astype(np.float32)
image = image * self.val_scale
image = image - self.mean
image = image * (1 / self.std)
return image, label
class ConvertDataType(object):
def __call__(self, image, label=None):
if label is not None:
label = label.astype(np.int64)
return image.astype(np.float32), label
# 增加边框,size指定为一个int类型,确定增加后图像的尺寸,方形;
# 若指定为一个tuple或list则宽高分别为list的值
class Pad(object):
def __init__(self, size, ignore_label=255, mean_val=0, val_scale=1):
# set val_scale to 1 if mean_val is in range (0, 1)
# set val_scale to 255 if mean_val is in range (0, 255)
factor = 255 if val_scale == 1 else 1
if isinstance(size, int):
self.size_height, self.size_width = size, size
else:
self.size_height, self.size_width = size[0], size[1]
self.ignore_label = ignore_label
self.mean_val=mean_val
# from 0-1 to 0-255
if isinstance(self.mean_val, (tuple,list)):
self.mean_val = [int(x* factor) for x in self.mean_val]
else:
self.mean_val = int(self.mean_val * factor)
def __call__(self, image, label=None):
h, w, c = image.shape
pad_h = max(self.size_height - h, 0)
pad_w = max(self.size_width - w, 0)
pad_h_half = int(pad_h / 2)
pad_w_half = int(pad_w / 2)
if pad_h > 0 or pad_w > 0:
image = cv2.copyMakeBorder(image,
top=pad_h_half,
left=pad_w_half,
bottom=pad_h - pad_h_half,
right=pad_w - pad_w_half,
borderType=cv2.BORDER_CONSTANT,
value=self.mean_val)
if label is not None:
label = cv2.copyMakeBorder(label,
top=pad_h_half,
left=pad_w_half,
bottom=pad_h - pad_h_half,
right=pad_w - pad_w_half,
borderType=cv2.BORDER_CONSTANT,
value=self.ignore_label)
return image, label
# 输入为一个int类型的整数,或者元组,列表
class CenterCrop(object):
def __init__(self, output_size):
if isinstance(output_size, int):
self.output_size = (output_size, output_size)
else:
self.output_size = output_size
def _get_params(self, img):
th, tw = self.output_size
h, w, _ = img.shape
assert th <= h and tw <= w, "output size is bigger than image size"
x = int(round((w - tw) / 2.0))
y = int(round((h - th) / 2.0))
return x, y
def __call__(self, img, label=None):
x, y = self._get_params(img)
th, tw = self.output_size
if label is not None:
return img[y:y + th, x:x + tw], label[y:y + th, x:x + tw]
else:
return img[y:y + th, x:x + tw], label
# 缩放图像,输入尺寸可以是一个int类型,或一个tuple或list
class Resize(object):
def __init__(self, size, interpolation=1):
if isinstance(size, int):
self.size = (size, size)
else:
self.size = size
self.interpolation = interpolation
def __call__(self, img, label=None):
if label is not None:
return cv2.resize(img, self.size, self.interpolation), cv2.resize(label, self.size, self.interpolation)
else:
return cv2.resize(img, self.size, self.interpolation), label
# 随机翻转,code=0 垂直翻转,code=1 水平翻转,code=-1 水平垂直翻转
class RandomFlip(object):
def __init__(self, code=0):
self.prob = 0.5
self.code = code
def __call__(self, img, label=None):
if np.random.random() < self.prob:
if label is not None:
return cv2.flip(img, self.code), cv2.flip(label, self.code)
else:
return cv2.flip(img, self.code), label
return img, label
# 随机裁剪,输入尺寸,在图片上随机区域裁剪出指定大小图片
# 输入类型为int,tuple,list
class RandomCrop(object):
def __init__(self, img_size):
if isinstance(img_size, int):
self.img_width, self.img_height = img_size, img_size
else:
self.img_width, self.img_height = img_size[0], img_size[1]
def __call__(self, img, label=None):
return self.Random_crop(img, label)
def Random_crop(self, img, label):
height, width, _ = img.shape
width_range = width - self.img_width
height_range = height - self.img_height
random_ws = np.random.randint(width_range)
random_hs = np.random.randint(height_range)
random_wd = self.img_width + random_ws
random_hd = self.img_height + random_hs
img = img[random_hs:random_hd, random_ws:random_wd]
if label is not None:
label = label[random_hs:random_hd, random_ws:random_wd]
return img, label
# 缩放,输入为一个float类型
class Scale(object):
def __init__(self, ratio, interpolation=1):
self.ratio = ratio
self.interpolation = interpolation
def __call__(self, img, label=None):
width, height, _ = img.shape
if label is not None:
return cv2.resize(img, (int(height * self.ratio), int(width * self.ratio)), self.interpolation), \
cv2.resize(label, (int(height * self.ratio), int(width * self.ratio)), self.interpolation)
else:
return cv2.resize(img, (int(height * self.ratio), int(width * self.ratio)), self.interpolation), label
# 随即缩放,输入为一个float类型,或tuple,list
class RandomScale(object):
def __init__(self, range_data, interpolation=1):
if isinstance(range_data, (int, float)):
self.ratio = range_data
else:
self.ratio = random.uniform(range_data[0], range_data[1])
self.interpolation = interpolation
def __call__(self, img, label=None):
width, height, _ = img.shape
if label is not None:
return cv2.resize(img, (int(height * self.ratio), int(width * self.ratio)), self.interpolation), \
cv2.resize(label, (int(height * self.ratio), int(width * self.ratio)), self.interpolation)
else:
return cv2.resize(img, (int(height * self.ratio), int(width * self.ratio)), self.interpolation), label
def main():
image = cv2.imread('./work/dummy_data/JPEGImages/2008_000064.jpg')
label = cv2.imread('./work/dummy_data/GroundTruth_trainval_png/2008_000064.png')
# crop_size
img_1 = RandomCrop((300, 200))(image)[0]
cv2.imwrite('RandomCrop.png', img_1)
# Transform: RandomScale, RandomFlip, Pad, RandomCrop
img_2 = RandomScale((0.5, 3))(image)[0]
img_2 = RandomFlip(0)(img_2)[0]
img_2 = Pad(700)(img_2)[0]
img_2 = RandomCrop((400, 300))(img_2)[0]
cv2.imwrite('Transfoimgrm.png', img_2)
for i in range(10):
# call transform
img = RandomScale((0.5, 3))(image)[0]
img = RandomFlip(0)(img)[0]
img = Pad((700, 700))(img)[0]
img = RandomCrop((400, 300))(img)[0]
# save image
cv2.imwrite('Transform_{}.png'.format(i+1), img)
print('Transform_{}.png'.format(i+1) + ' has been saved to disk')
if __name__ == "__main__":
main()
Net (construcción de redes de aprendizaje profundo)
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import Conv2D,Pool2D #TODO 导入需要的层
from paddle.fluid.dygraph import base #TODO
import numpy as np
np.set_printoptions(precision=2) #打印精度
class BasicModel(fluid.dygraph.Layer):
# BasicModel contains:
# 1. pool: 4x4 max pool op, with stride 4
# 2. conv: 3x3 kernel size, takes RGB image as input and output num_classes channels,
# note that the feature map size should be the same
# 3. upsample: upsample to input size
#
# TODOs:
# 1. The model takes an random input tensor with shape (1, 3, 8, 8)
# 2. The model outputs a tensor with same HxW size of the input, but C = num_classes
# 3. Print out the model output in numpy format
#类比pytorch搭建流程 继承fluid.dygraph.Layer
#先定义层,在forward的里面再将层串起来
def __init__(self, num_classes=59):
super(BasicModel, self).__init__()
self.pool1 = Pool2D(pool_size = 4,pool_stride = 4)# TODO
self.conv2 = Conv2D(3,num_classes,3,padding=1)# TODO
def forward(self, inputs):
x = self.pool1(inputs)# TODO
x = self.conv2(x) # TODO
x = fluid.layers.interpolate(x, out_shape=(inputs.shape[2], inputs.shape[3]))
return x
def main():
place = paddle.fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
model = BasicModel(num_classes=59)
model.eval()
input_data = np.random.uniform(-1, 1, [1, 3,8, 8]).astype('float32')# TODO
print('Input data shape: ', input_data.shape)
input_data = base.to_variable(input_data) # TODO
output_data = model(input_data) # TODO
output_data = output_data.numpy() # TODO
print('Output data shape: ', output_data.shape)
if __name__ == "__main__":
main()
Entrenamiento (prueba de entrenamiento en red)
Calcular la pérdida
def Basic_SegLoss(preds, labels, ignore_index=255):
n, c, h, w = preds.shape
preds = fluid.layers.transpose(preds, [0, 2, 3, 1])
mask = labels!=ignore_index
mask = fluid.layers.cast(mask, 'float32')
loss = fluid.layers.softmax_with_cross_entropy(preds, labels)
loss = loss * mask
avg_loss = fluid.layers.mean(loss) / (fluid.layers.mean(mask) + eps)
return avg_loss
Definir un único proceso de formación
def train(dataloader, model, criterion, optimizer, epoch, total_batch):
model.train()
train_loss_meter = AverageMeter()
for batch_id, data in enumerate(dataloader):
image = data[0]
label = data[1]
image = fluid.layers.transpose(image, (0, 3, 1, 2))
pred = model(image)
loss = criterion(pred, label)
loss.backward()
optimizer.minimize(loss)
model.clear_gradients()
n = image.shape[0]
train_loss_meter.update(loss.numpy()[0], n)
print(f"Epoch[{epoch:03d}/{args.num_epochs:03d}], " +
f"Step[{batch_id:04d}/{total_batch:04d}], " +
f"Average Loss: {train_loss_meter.avg:4f}")
return train_loss_meter.avg
Cadena en un guión de entrenamiento completo
import os
import paddle
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
import numpy as np
import argparse
from utils import AverageMeter
from basic_model import BasicModel
from basic_dataloader import BasicDataLoader
from basic_seg_loss import Basic_SegLoss
from basic_data_preprocessing import TrainAugmentation
parser = argparse.ArgumentParser()
parser.add_argument('--net', type=str, default='basic')
parser.add_argument('--lr', type=float, default=0.001)
parser.add_argument('--num_epochs', type=int, default=10)
parser.add_argument('--batch_size', type=int, default=4)
parser.add_argument('--image_folder', type=str, default='./work/dummy_data')
parser.add_argument('--image_list_file', type=str, default='./work/dummy_data/list.txt')
parser.add_argument('--checkpoint_folder', type=str, default='./output')
parser.add_argument('--save_freq', type=int, default=2)
args = parser.parse_args()
def main():
# Step 0: preparation
place = paddle.fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
# Step 1: Define training dataloader
basic_augmentation = TrainAugmentation(image_size=256)
basic_dataloader = BasicDataLoader(image_folder=args.image_folder,
image_list_file=args.image_list_file,
transform=basic_augmentation,
shuffle=True)
train_dataloader = fluid.io.DataLoader.from_generator(capacity=10,
use_multiprocess=True)
train_dataloader.set_sample_generator(basic_dataloader,
batch_size=args.batch_size,
places=place)
total_batch = int(len(basic_dataloader) / args.batch_size)
# Step 2: Create model
if args.net == "basic":
model = BasicModel()
else:
raise NotImplementedError(f"args.net: {args.net} is not Supported!")
# Step 3: Define criterion and optimizer
criterion = Basic_SegLoss
# create optimizer
optimizer = AdamOptimizer(learning_rate=args.lr,
parameter_list=model.parameters())
# Step 4: Training
for epoch in range(1, args.num_epochs+1):
train_loss = train(train_dataloader,
model,
criterion,
optimizer,
epoch,
total_batch)
print(f"----- Epoch[{epoch}/{args.num_epochs}] Train Loss: {train_loss:.4f}")
if epoch % args.save_freq == 0 or epoch == args.num_epochs:
model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}-Loss-{train_loss}")
# save model and optmizer states
model_dict = model.state_dict()
fluid.save_dygraph(model_dict, model_path)
optimizer_dict = optimizer.state_dict()
fluid.save_dygraph(optimizer_dict, model_path)
print(f'----- Save model: {model_path}.pdparams')
print(f'----- Save optimizer: {model_path}.pdopt')
if __name__ == "__main__":
main()