Paddle image segmentation from entry to practice (1): semantic image foundation

Basic concept

The core of the semantic segmentation algorithm: pixel-level classification

Basic flow of semantic segmentation algorithm

  1. Input: Image (RGB)
  2. Algorithm: deep learning model
  3. Output: classification result (single-channel image consistent with the input size)
  4. Training process:
    1. Input: image + label
    2. Forward: out = model(image)
    3. Calculate the loss: loss = loss_func(out,label)
    4. Reverse: loss.backward()
    5. Update weight: optimizer.minimize(loss) 

Evaluation index

mAcc

Result style:

Actual calculation process:

mIOU

Among them, mean is represented in multiple categories of IOUs at the end of the label.

Both evaluation indicators are the bigger the better.

Specific implementation process

Figure (data preprocessing related)

import random
import cv2
import numpy as np
import paddle.fluid as fluid
import os


class Transform(object):
    def __init__(self, size=256):
        self.size=size

    def __call__(self, input, label):
        input = cv2.resize(input, (self.size, self.size), interpolation=cv2.INTER_LINEAR)
        label = cv2.resize(input, (self.size, self.size), interpolation=cv2.INTER_LINEAR)

        return input, label


class BasicDataLoader(object):
    
    def __init__(self,
                 image_folder,
                 image_list_file,
                 transform=None,
                 shuffle=True):
        #需要输出的参数
        self.image_folder = image_folder
        self.image_list_file = image_list_file
        self.transform = transform
        self.shuffle = shuffle
        self.data_list = self.read_list()
    
    def read_list(self):
        #获取到文件列表
        data_list = []
        with open(self.image_list_file) as infile:
            for line in infile:
                data_path = os.path.join(self.image_folder,line.split()[0])
                label_path = os.path.join(self.image_folder, line.split()[1])
                data_list.append((data_path, label_path))
        random.shuffle(data_list)
        return data_list

    def preprocess(self, data, label):
        #定义预处理流程
        h, w, c = data.shape
        h_gt, w_gt = label.shape
        assert h == h_gt, "Error"
        assert w == w_gt, "Error"
        if self.transform:
            data, label = self.transform(data, label)
        label = label[:, :, np.newaxis]
        return data, label

    def __len__(self):
        return len(self.data_list)

    def __call__(self):
        #调用时,用迭代器返回数据和对应标签
        for data_path, label_path in self.data_list:
            data = cv2.imread(data_path, cv2.IMREAD_COLOR)
            data = cv2.cvtColor(data, cv2.COLOR_BGR2RGB)
            label = cv2.imread(label_path, cv2.IMREAD_GRAYSCALE)
            data, label = self.preprocess(data, label)

            yield data, label


def main():
    batch_size = 5
    place = fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        transform = Transform(256)
        # create BasicDataloader instance
        basic_dataloader = BasicDataLoader(
            image_folder = './dummy_data',
            image_list_file = './dummy_data/list.txt',
            transform = transform,
            shuffle = True
            )
        # create fluid.io.Dataloader instance (配合paddel数据集加载器使用,先创建)
        dataloader = fluid.io.DataLoader.from_generator(capacity=1, use_multiprocess=False)
        
        # set sample generator for fluid dataloader (再配置关联上,我们定义的数据集加载器)
        dataloader.set_sample_generator(basic_dataloader,
                                        batch_size=batch_size,
                                        places=place)        

        num_epoch = 2
        for epoch in range(1, num_epoch+1):
            print(f'Epoch [{epoch}/{num_epoch}]:')
            for idx, (data, label) in enumerate(dataloader):
                print(f'iter {idx}, Data shape: {data.shape}, Label shape:{label.shape}')

if __name__ == '__main__':
    main()

When data is transformed, we will additionally define a new class to perform common data preprocessing operations such as flipping, cropping, and edge filling.

import cv2
import numpy as np
import random

class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms
    def __call__(self, image, label=None):
        for t in self.transforms:
            image, label = t(image, label)
        return image, label


class Normalize(object):
    def __init__(self, mean_val, std_val, val_scale=1):
        # set val_scale = 1 if mean and std are in range (0,1)
        # set val_scale to other value, if mean and std are in range (0,255)
        self.mean = np.array(mean_val, dtype=np.float32)
        self.std = np.array(std_val, dtype=np.float32)
        self.val_scale = 1/255.0 if val_scale==1 else 1
    def __call__(self, image, label=None):
        image = image.astype(np.float32)
        image = image * self.val_scale
        image = image - self.mean
        image = image * (1 / self.std)
        return image, label


class ConvertDataType(object):
    def __call__(self, image, label=None):
        if label is not None:
            label = label.astype(np.int64)
        return image.astype(np.float32), label


# 增加边框,size指定为一个int类型,确定增加后图像的尺寸,方形;
# 若指定为一个tuple或list则宽高分别为list的值
class Pad(object):
    def __init__(self, size, ignore_label=255, mean_val=0, val_scale=1):
        # set val_scale to 1 if mean_val is in range (0, 1)
        # set val_scale to 255 if mean_val is in range (0, 255) 
        factor = 255 if val_scale == 1 else 1
        
        if isinstance(size, int):
            self.size_height, self.size_width = size, size
        else:
            self.size_height, self.size_width = size[0], size[1]
        self.ignore_label = ignore_label
        self.mean_val=mean_val
        # from 0-1 to 0-255
        if isinstance(self.mean_val, (tuple,list)):
            self.mean_val = [int(x* factor) for x in self.mean_val]
        else:
            self.mean_val = int(self.mean_val * factor)


    def __call__(self, image, label=None):
        h, w, c = image.shape
        pad_h = max(self.size_height - h, 0)
        pad_w = max(self.size_width - w, 0)

        pad_h_half = int(pad_h / 2)
        pad_w_half = int(pad_w / 2)

        if pad_h > 0 or pad_w > 0:

            image = cv2.copyMakeBorder(image,
                                       top=pad_h_half,
                                       left=pad_w_half,
                                       bottom=pad_h - pad_h_half,
                                       right=pad_w - pad_w_half,
                                       borderType=cv2.BORDER_CONSTANT,
                                       value=self.mean_val)
            if label is not None:
                label = cv2.copyMakeBorder(label,
                                           top=pad_h_half,
                                           left=pad_w_half,
                                           bottom=pad_h - pad_h_half,
                                           right=pad_w - pad_w_half,
                                           borderType=cv2.BORDER_CONSTANT,
                                           value=self.ignore_label)
        return image, label


# 输入为一个int类型的整数,或者元组,列表
class CenterCrop(object):
    def __init__(self, output_size):
        if isinstance(output_size, int):
            self.output_size = (output_size, output_size)
        else:
            self.output_size = output_size

    def _get_params(self, img):
        th, tw = self.output_size
        h, w, _ = img.shape
        assert th <= h and tw <= w, "output size is bigger than image size"
        x = int(round((w - tw) / 2.0))
        y = int(round((h - th) / 2.0))
        return x, y

    def __call__(self, img, label=None):
        x, y = self._get_params(img)
        th, tw = self.output_size
        if label is not None:
            return img[y:y + th, x:x + tw], label[y:y + th, x:x + tw]
        else:
            return img[y:y + th, x:x + tw], label


# 缩放图像,输入尺寸可以是一个int类型,或一个tuple或list
class Resize(object):
    def __init__(self, size, interpolation=1):
        if isinstance(size, int):
            self.size = (size, size)
        else:
            self.size = size
        self.interpolation = interpolation

    def __call__(self, img, label=None):
        if label is not None:
            return cv2.resize(img, self.size, self.interpolation), cv2.resize(label, self.size, self.interpolation)
        else:
            return cv2.resize(img, self.size, self.interpolation), label


# 随机翻转,code=0 垂直翻转,code=1 水平翻转,code=-1 水平垂直翻转
class RandomFlip(object):
    def __init__(self, code=0):
        self.prob = 0.5
        self.code = code

    def __call__(self, img, label=None):
        if np.random.random() < self.prob:
            if label is not None:
                return cv2.flip(img, self.code), cv2.flip(label, self.code)
            else:
                return cv2.flip(img, self.code), label
        return img, label


# 随机裁剪,输入尺寸,在图片上随机区域裁剪出指定大小图片
# 输入类型为int,tuple,list
class RandomCrop(object):
    def __init__(self, img_size):
        if isinstance(img_size, int):
            self.img_width, self.img_height = img_size, img_size
        else:
            self.img_width, self.img_height = img_size[0], img_size[1]

    def __call__(self, img, label=None):
        return self.Random_crop(img, label)

    def Random_crop(self, img, label):
        height, width, _ = img.shape
        width_range = width - self.img_width
        height_range = height - self.img_height
        random_ws = np.random.randint(width_range)
        random_hs = np.random.randint(height_range)
        random_wd = self.img_width + random_ws 
        random_hd = self.img_height + random_hs
        img = img[random_hs:random_hd, random_ws:random_wd]
        if label is not None:
            label = label[random_hs:random_hd, random_ws:random_wd]
        return img, label


# 缩放,输入为一个float类型
class Scale(object):
    def __init__(self, ratio, interpolation=1):
        self.ratio = ratio
        self.interpolation = interpolation

    def __call__(self, img, label=None):
        width, height, _ = img.shape

        if label is not None:
            return cv2.resize(img, (int(height * self.ratio), int(width * self.ratio)), self.interpolation), \
                   cv2.resize(label, (int(height * self.ratio), int(width * self.ratio)), self.interpolation)
        else:
            return cv2.resize(img, (int(height * self.ratio), int(width * self.ratio)), self.interpolation), label


# 随即缩放,输入为一个float类型,或tuple,list
class RandomScale(object):
    def __init__(self, range_data, interpolation=1):
        if isinstance(range_data, (int, float)):
            self.ratio = range_data
        else:
            self.ratio = random.uniform(range_data[0], range_data[1])
        self.interpolation = interpolation

    def __call__(self, img, label=None):
        width, height, _ = img.shape
        if label is not None:
            return cv2.resize(img, (int(height * self.ratio), int(width * self.ratio)), self.interpolation), \
                   cv2.resize(label, (int(height * self.ratio), int(width * self.ratio)), self.interpolation)
        else:
            return cv2.resize(img, (int(height * self.ratio), int(width * self.ratio)), self.interpolation), label


def main():
    image = cv2.imread('./work/dummy_data/JPEGImages/2008_000064.jpg')
    label = cv2.imread('./work/dummy_data/GroundTruth_trainval_png/2008_000064.png')

    # crop_size
    img_1 = RandomCrop((300, 200))(image)[0]
    cv2.imwrite('RandomCrop.png', img_1)

    # Transform: RandomScale, RandomFlip, Pad, RandomCrop
    img_2 = RandomScale((0.5, 3))(image)[0]
    img_2 = RandomFlip(0)(img_2)[0]
    img_2 = Pad(700)(img_2)[0]
    img_2 = RandomCrop((400, 300))(img_2)[0]
    cv2.imwrite('Transfoimgrm.png', img_2)

    for i in range(10):
        # call transform
        img = RandomScale((0.5, 3))(image)[0]
        img = RandomFlip(0)(img)[0]
        img = Pad((700, 700))(img)[0]
        img = RandomCrop((400, 300))(img)[0]
        #  save image
        cv2.imwrite('Transform_{}.png'.format(i+1), img)
        print('Transform_{}.png'.format(i+1) + ' has been saved to disk')

if __name__ == "__main__":
    main()

Net (deep learning network construction)

import paddle
import paddle.fluid as fluid                    
from paddle.fluid.dygraph import Conv2D,Pool2D  #TODO 导入需要的层    
from paddle.fluid.dygraph import base  #TODO   
import numpy as np
np.set_printoptions(precision=2)  #打印精度


class BasicModel(fluid.dygraph.Layer):
    # BasicModel contains:
    # 1. pool:   4x4 max pool op, with stride 4
    # 2. conv:   3x3 kernel size, takes RGB image as input and output num_classes channels,
    #            note that the feature map size should be the same
    # 3. upsample: upsample to input size
    #
    # TODOs:
    # 1. The model takes an random input tensor with shape (1, 3, 8, 8)
    # 2. The model outputs a tensor with same HxW size of the input, but C = num_classes
    # 3. Print out the model output in numpy format 

#类比pytorch搭建流程 继承fluid.dygraph.Layer
#先定义层,在forward的里面再将层串起来


    def __init__(self, num_classes=59):
        super(BasicModel, self).__init__()

        self.pool1 = Pool2D(pool_size = 4,pool_stride = 4)# TODO
        self.conv2 = Conv2D(3,num_classes,3,padding=1)# TODO

    def forward(self, inputs):
        x = self.pool1(inputs)# TODO
        x = self.conv2(x) # TODO
        x = fluid.layers.interpolate(x, out_shape=(inputs.shape[2], inputs.shape[3]))
        return x

def main():
    place = paddle.fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        model = BasicModel(num_classes=59)
        model.eval()
        input_data = np.random.uniform(-1, 1, [1, 3,8, 8]).astype('float32')# TODO
        print('Input data shape: ', input_data.shape)
        input_data = base.to_variable(input_data)  # TODO
        output_data = model(input_data)            # TODO
        output_data = output_data.numpy()          # TODO
        print('Output data shape: ', output_data.shape)

if __name__ == "__main__":
    main()

Training (network training test)

Calculate loss

def Basic_SegLoss(preds, labels, ignore_index=255):
    n, c, h, w = preds.shape

    preds = fluid.layers.transpose(preds, [0, 2, 3, 1])
    
    mask = labels!=ignore_index
    mask = fluid.layers.cast(mask, 'float32')

    loss = fluid.layers.softmax_with_cross_entropy(preds, labels)
    loss = loss * mask
    avg_loss = fluid.layers.mean(loss) / (fluid.layers.mean(mask) + eps)

    return avg_loss

Define a single training process

def train(dataloader, model, criterion, optimizer, epoch, total_batch):
    model.train()
    train_loss_meter = AverageMeter()
    for batch_id, data in enumerate(dataloader):
        image = data[0]
        label = data[1]

        image = fluid.layers.transpose(image, (0, 3, 1, 2))
        pred = model(image)
        loss = criterion(pred, label)

        loss.backward()
        optimizer.minimize(loss)
        model.clear_gradients()

        n = image.shape[0]
        train_loss_meter.update(loss.numpy()[0], n)
        print(f"Epoch[{epoch:03d}/{args.num_epochs:03d}], " +
              f"Step[{batch_id:04d}/{total_batch:04d}], " +
              f"Average Loss: {train_loss_meter.avg:4f}")

    return train_loss_meter.avg

String into a complete training script

import os
import paddle
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
import numpy as np
import argparse
from utils import AverageMeter
from basic_model import BasicModel
from basic_dataloader import BasicDataLoader
from basic_seg_loss import Basic_SegLoss
from basic_data_preprocessing import TrainAugmentation


parser = argparse.ArgumentParser()
parser.add_argument('--net', type=str, default='basic')
parser.add_argument('--lr', type=float, default=0.001)
parser.add_argument('--num_epochs', type=int, default=10)
parser.add_argument('--batch_size', type=int, default=4)
parser.add_argument('--image_folder', type=str, default='./work/dummy_data')
parser.add_argument('--image_list_file', type=str, default='./work/dummy_data/list.txt')
parser.add_argument('--checkpoint_folder', type=str, default='./output')
parser.add_argument('--save_freq', type=int, default=2)


args = parser.parse_args()


def main():
    # Step 0: preparation
    place = paddle.fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        # Step 1: Define training dataloader
        basic_augmentation = TrainAugmentation(image_size=256)
        basic_dataloader = BasicDataLoader(image_folder=args.image_folder,
                                           image_list_file=args.image_list_file,
                                           transform=basic_augmentation,
                                           shuffle=True)
        train_dataloader = fluid.io.DataLoader.from_generator(capacity=10,
                                                          use_multiprocess=True)
        train_dataloader.set_sample_generator(basic_dataloader,
                                              batch_size=args.batch_size,
                                              places=place)
        total_batch = int(len(basic_dataloader) / args.batch_size)
        
        # Step 2: Create model
        if args.net == "basic":
            model = BasicModel()
        else:
            raise NotImplementedError(f"args.net: {args.net} is not Supported!")

        # Step 3: Define criterion and optimizer
        criterion = Basic_SegLoss

        # create optimizer
        optimizer = AdamOptimizer(learning_rate=args.lr,
                                  parameter_list=model.parameters())
        # Step 4: Training
        for epoch in range(1, args.num_epochs+1):
            train_loss = train(train_dataloader,
                               model,
                               criterion,
                               optimizer,
                               epoch,
                               total_batch)
            print(f"----- Epoch[{epoch}/{args.num_epochs}] Train Loss: {train_loss:.4f}")

            if epoch % args.save_freq == 0 or epoch == args.num_epochs:
                model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}-Loss-{train_loss}")

                # save model and optmizer states
                model_dict = model.state_dict()
                fluid.save_dygraph(model_dict, model_path)
                optimizer_dict = optimizer.state_dict()
                fluid.save_dygraph(optimizer_dict, model_path)
                print(f'----- Save model: {model_path}.pdparams')
                print(f'----- Save optimizer: {model_path}.pdopt')


if __name__ == "__main__":
    main()

 

Guess you like

Origin blog.csdn.net/fan1102958151/article/details/109282977