Use Deeplabv3plus to train your own dataset from scratch

introduction

In order to segment the features we want from the picture, the blogger has investigated several semantic segmentation methods that can be deployed. Here we mainly explain how to use Deeplabv3plus to train our own data set.

1. Prepare the data set

First of all, we need to know what kind of data set the Deeplabv3plus network needs to prepare. The overall format of the data set is as follows:

Among them, PV is the name of my data set, and there are three folders below. The first one is ImageSets. The subfolder Segmentation in it stores the path txt of training and verification images; JPEGImage stores the original image, and SegmentationClass stores the segmented pictures. Next, we officially start preparing the data set.

1. First, we use the labelme tool to make our data set. The specific production method will not be described here. In the end, we can get the original JPG image and the corresponding JSON file.

2. Convert JSON to images, the specific code is as follows, modify according to your own JSON file and label.txt address:

from __future__ import print_function

import argparse
import glob
import os
import os.path as osp
import sys
import imgviz
import numpy as np
import labelme


def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument("--input_dir",default="E:\File\Pycharm\hed-master\hed-data\PV_label_deeplab", help="input annotated directory") #json路径
    parser.add_argument("--output_dir",default="E:\File\Pycharm\hed-master\hed-data\zw", help="output dataset directory")  #输出地址
    parser.add_argument("--labels",default="E:\File\Pycharm\hed-master\hed-data\labels\label.txt", help="labels file")  #标签txt
    parser.add_argument(
        "--noviz", help="no visualization", action="store_true"
    )
    args = parser.parse_args()

    if osp.exists(args.output_dir):
        print("Output directory already exists:", args.output_dir)
        sys.exit(1)
    os.makedirs(args.output_dir)
    os.makedirs(osp.join(args.output_dir, "JPEGImages"))
    os.makedirs(osp.join(args.output_dir, "SegmentationClass"))
    os.makedirs(osp.join(args.output_dir, "SegmentationClassPNG"))
    if not args.noviz:
        os.makedirs(
            osp.join(args.output_dir, "SegmentationClassVisualization")
        )
    print("Creating dataset:", args.output_dir)

    class_names = []
    class_name_to_id = {}
    for i, line in enumerate(open(args.labels).readlines()):
        class_id = i - 1  # starts with -1
        class_name = line.strip()
        class_name_to_id[class_name] = class_id
        if class_id == -1:
            assert class_name == "__ignore__"
            continue
        elif class_id == 0:
            assert class_name == "_background_"
        class_names.append(class_name)
    class_names = tuple(class_names)
    print("class_names:", class_names)
    out_class_names_file = osp.join(args.output_dir, "class_names.txt")
    with open(out_class_names_file, "w") as f:
        f.writelines("\n".join(class_names))
    print("Saved class_names:", out_class_names_file)

    for filename in glob.glob(osp.join(args.input_dir, "*.json")):
        print("Generating dataset from:", filename)

        label_file = labelme.LabelFile(filename=filename)

        base = osp.splitext(osp.basename(filename))[0]
        out_img_file = osp.join(args.output_dir, "JPEGImages", base + ".jpg")
        out_lbl_file = osp.join(
            args.output_dir, "SegmentationClass", base + ".npy"
        )
        out_png_file = osp.join(
            args.output_dir, "SegmentationClassPNG", base + ".png"
        )
        if not args.noviz:
            out_viz_file = osp.join(
                args.output_dir,
                "SegmentationClassVisualization",
                base + ".jpg",
            )

        with open(out_img_file, "wb") as f:
            f.write(label_file.imageData)
        img = labelme.utils.img_data_to_arr(label_file.imageData)

        lbl, _ = labelme.utils.shapes_to_label(
            img_shape=img.shape,
            shapes=label_file.shapes,
            label_name_to_value=class_name_to_id,
        )
        labelme.utils.lblsave(out_png_file, lbl)

        np.save(out_lbl_file, lbl)

        if not args.noviz:
            viz = imgviz.label2rgb(
                label=lbl,
                #img改成image,labelme接口的问题不然会报错
                #img=imgviz.rgb2gray(img),
                image=imgviz.rgb2gray(img),
                font_size=15,
                label_names=class_names,
                loc="rb",
            )
            imgviz.io.imsave(out_viz_file, viz)


if __name__ == "__main__":
    main()

 After executing the above code, the following files will be generated:

 In this way, we are left with Imagesets.

3. Generate train.txt, trainval.txt and val.txt

Prepare the JPEGImages (original image) generated above, and execute the following code:

import os
import numpy as np
root = r"E:\File\Pycharm\pytorch-deeplab-xception-master\dataset\PV\JPEGImages"
output = r"E:\File\Pycharm\pytorch-deeplab-xception-master\dataset\PV\ImageSets\Segmentation"
filename = []
#从存放原图的目录中遍历所有图像文件
# dirs = os.listdir(root)
for root, dir, files in os.walk(root):
    for file in files:
        print(file)
        filename.append(file[:-4])  # 去除后缀,存储


#打乱文件名列表
np.random.shuffle(filename)
#划分训练集、测试集,默认比例6:2:2
train = filename[:int(len(filename)*0.6)]
trainval = filename[int(len(filename)*0.6):int(len(filename)*0.8)]
val = filename[int(len(filename)*0.8):]

#分别写入train.txt, test.txt
with open(os.path.join(output,'train.txt'), 'w') as f1, open(os.path.join(output,'trainval.txt'), 'w') as f2,open(os.path.join(output,'val.txt'), 'w') as f3:
    for i in train:
        f1.write(i + '\n')
    for i in trainval:
        f2.write(i + '\n')
    for i in val:
        f3.write(i + '\n')

print('成功!')

After executing the above code, three txt files will be generated. At this point, all the files we need to prepare are ready, we just need to store all the files in the format given at the beginning.

2. Add data set code

1. Download the project code: GitHub - jfzhang95/pytorch-deeplab-xception: DeepLab v3+ model in PyTorch. Support different backbones.  After downloading, extract it to any directory.

2. Open mypath.py and add your own data set. Our data set name is PV. Below PV are the three folders prepared above.

 3. In the dataloaders/datasets/ directory, copy a copy of pascal.py, rename it to the name of your dataset, here is PV.py, and modify the number of categories and the name of your dataset under PV.py, Red line part.

 4. Modify dataloaders/utils.py, as shown in the figure below, add the color settings of your data set above the def get_cityscapes_labels() function, add as many tear categories as there are, and set the color as you like.

 Then modify the decode_segmap() function under the same file, and call the newly added method, such as the red part:

 5. Modify __init__.py under the dataloaders file:

Add the reading code of your own data set:

    if args.dataset == 'PV':
        train_set = PV.VOCSegmentation(args, split='train')
        val_set = PV.VOCSegmentation(args, split='val')

        num_class = train_set.NUM_CLASSES
        train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, **kwargs)
        val_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False, **kwargs)
        test_loader = None

        return train_loader, val_loader, test_loader, num_class

At this point, the code is ready to read our own dataset.

3. Training and testing

1. Modify the training data parameters in train.py, as shown in the figure below

 2. Enter the training code:

python train.py --backbone mobilenet --lr 0.007 --workers 1 --epochs 50 --batch-size 8 --gpu-ids 0 --checkname deeplab-mobilenet

 Generally speaking, the training can be successful, but: Many small partners encounter any Assert Error , just what assertion error, most of the error is the position in the picture below (under the PV.py file):

The reason for the error: It is actually very simple. This error is basically that the name of the picture in your txt does not correspond to the name of the picture in the picture path. The picture name in the txt is as shown in the figure below. Check carefully whether the name of your picture is correct. And check the suffix of the picture, because the original picture is JPG, and the divided picture is PNG.

Another reason is that you typed a newline at the end of your txt file, causing the program to think that this is also a picture (but the name is empty: .jpg), just delete the newline.

If there are still problems, you can print(_image) and print(_cat) yourself to see if there is a problem with the printed picture name.

2. Test

There is no test code in the original code, so here is a simple demo, which will be added later.

#
# demo.py
#
import argparse
import os
import numpy as np
import time
 
from modeling.deeplab import *
from dataloaders import custom_transforms as tr
from PIL import Image
from torchvision import transforms
from dataloaders.utils import  *
from torchvision.utils import make_grid, save_image
 
def main():
 
    parser = argparse.ArgumentParser(description="PyTorch DeeplabV3Plus Training")
    parser.add_argument('--in-path', type=str,  default='/root/home/zyx/Seg552_VOC/test',
                        help='image to test')
    # parser.add_argument('--out-path', type=str, required=True, help='mask image to save')
    parser.add_argument('--backbone', type=str, default='resnet',
                        choices=['resnet', 'xception', 'drn', 'mobilenet'],
                        help='backbone name (default: resnet)')
    parser.add_argument('--ckpt', type=str, default='deeplab-resnet.pth',
                        help='saved model')
    parser.add_argument('--out-stride', type=int, default=16,
                        help='network output stride (default: 8)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--gpu-ids', type=str, default='0',
                        help='use which gpu to train, must be a \
                        comma-separated list of integers only (default=0)')
    parser.add_argument('--dataset', type=str, default='belt',
                        choices=['pascal', 'coco', 'cityscapes','belt'],
                        help='dataset name (default: pascal)')
    parser.add_argument('--crop-size', type=int, default=513,
                        help='crop image size')
    parser.add_argument('--num_classes', type=int, default=2,
                        help='crop image size')
    parser.add_argument('--sync-bn', type=bool, default=None,
                        help='whether to use sync bn (default: auto)')
    parser.add_argument('--freeze-bn', type=bool, default=False,
                        help='whether to freeze bn parameters (default: False)')
 
    args = parser.parse_args()
    args.cuda = not args.no_cuda and torch.cuda.is_available()
    if args.cuda:
        try:
            args.gpu_ids = [int(s) for s in args.gpu_ids.split(',')]
        except ValueError:
            raise ValueError('Argument --gpu_ids must be a comma-separated list of integers only')
 
    if args.sync_bn is None:
        if args.cuda and len(args.gpu_ids) > 1:
            args.sync_bn = True
        else:
            args.sync_bn = False
    model_s_time = time.time()
    model = DeepLab(num_classes=args.num_classes,
                    backbone=args.backbone,
                    output_stride=args.out_stride,
                    sync_bn=args.sync_bn,
                    freeze_bn=args.freeze_bn)
	model = nn.DataParallel(model)

    ckpt = torch.load(args.ckpt, map_location='cpu')
    model.load_state_dict(ckpt['state_dict'])
    model = model.cuda()
    model_u_time = time.time()
    model_load_time = model_u_time-model_s_time
    print("model load time is {}".format(model_load_time))
 
    composed_transforms = transforms.Compose([
        tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        tr.ToTensor()])
    for name in os.listdir(args.in_path):
        s_time = time.time()
        image = Image.open(args.in_path+"/"+name).convert('RGB')
 
        # image = Image.open(args.in_path).convert('RGB')
        target = Image.open(args.in_path+"/"+name).convert('L')
        sample = {'image': image, 'label': target}
        tensor_in = composed_transforms(sample)['image'].unsqueeze(0)
 
        model.eval()
        if args.cuda:
            tensor_in = tensor_in.cuda()
        with torch.no_grad():
            output = model(tensor_in)
 
        grid_image = make_grid(decode_seg_map_sequence(torch.max(output[:3], 1)[1].detach().cpu().numpy()),
                                3, normalize=False, range=(0, 255))
        save_image(grid_image,args.in_path+"/"+"{}_mask.png".format(name[0:-4]))
        u_time = time.time()
        img_time = u_time-s_time
        print("image:{} time: {} ".format(name,img_time))
        # save_image(grid_image, args.out_path)
        # print("type(grid) is: ", type(grid_image))
        # print("grid_image.shape is: ", grid_image.shape)
    print("image save in in_path.")
if __name__ == "__main__":
   main()
 
# python demo.py --in-path your_file --out-path your_dst_file
 

To be continued. . . . . .

If you have any questions, just leave a message and the blogger will answer them one by one.

Guess you like

Origin blog.csdn.net/qq_39149619/article/details/131896081