Convert json to txt and divide data sets in YOLOV5-seg

This article is purely a record of personal learning, if there is any infringement, it will be deleted

1. Instance segmentation annotation

labelimg: only for rectangular frame labeling

labelme: Polygon labeling is possible. If you want to use yolov5-seg, you have to use labelme for labeling

There are many blogs about how to download, so I won’t repeat them here.

There is one thing I need to suggest. After labeling a picture, save the label. You can put the label and picture in a folder, which is also convenient for our next conversion.

Two, json to txt

import json
import os
import glob
import os.path as osp


def labelme2yolov2Seg(jsonfilePath="", resultDirPath="", classList=["buds"]):
    """
    此函数用来将labelme软件标注好的数据集转换为yolov5_7.0sege中使用的数据集
    :param jsonfilePath: labelme标注好的*.json文件所在文件夹
    :param resultDirPath: 转换好后的*.txt保存文件夹
    :param classList: 数据集中的类别标签
    :return:
    """
    # 0.创建保存转换结果的文件夹
    if(not os.path.exists(resultDirPath)):
        os.mkdir(resultDirPath)

    # 1.获取目录下所有的labelme标注好的Json文件,存入列表中
    jsonfileList = glob.glob(osp.join(jsonfilePath, "*.json"))
    print(jsonfileList)  # 打印文件夹下的文件名称

    # 2.遍历json文件,进行转换
    for jsonfile in jsonfileList:
        # 3. 打开json文件
        with open(jsonfile, "r") as f:
            file_in = json.load(f)

            # 4. 读取文件中记录的所有标注目标
            shapes = file_in["shapes"]

            # 5. 使用图像名称创建一个txt文件,用来保存数据
            with open(resultDirPath + "\\" + jsonfile.split("\\")[-1].replace(".json", ".txt"), "w") as file_handle:
                # 6. 遍历shapes中的每个目标的轮廓
                for shape in shapes:
                    # 7.根据json中目标的类别标签,从classList中寻找类别的ID,然后写入txt文件中
                    file_handle.writelines(str(classList.index(shape["label"])) + " ")

                    # 8. 遍历shape轮廓中的每个点,每个点要进行图像尺寸的缩放,即x/width, y/height
                    for point in shape["points"]:
                        x = point[0]/file_in["imageWidth"]  # mask轮廓中一点的X坐标
                        y = point[1]/file_in["imageHeight"]  # mask轮廓中一点的Y坐标
                        file_handle.writelines(str(x) + " " + str(y) + " ")  # 写入mask轮廓点

                    # 9.每个物体一行数据,一个物体遍历完成后需要换行
                    file_handle.writelines("\n")
            # 10.所有物体都遍历完,需要关闭文件
            file_handle.close()
        # 10.所有物体都遍历完,需要关闭文件
        f.close()

if __name__ == "__main__":
    jsonfilePath = "E:\ML-data\VOC\images-aug"  # 要转换的json文件所在目录
    resultDirPath = "E:\ML-data\VOC\images-aug"  # 要生成的txt文件夹
    labelme2yolov2Seg(jsonfilePath=jsonfilePath, resultDirPath=resultDirPath, classList=["buds"]) # 更改为自己的类别名


3. Divide the dataset

import os, shutil, random
import numpy as np

postfix = 'jpg'  # 这里要注意下,文件夹里的图片格式要都是jpg,如果是PNG那就都得是PNG
base_path = 'E:\Deep learning\yolov5-7.0\VOCdata\images'   # 转化完的图片和txt所在的文件夹
dataset_path = 'E:\Deep learning\yolov5-7.0\VOCdata\data_path'  # 新建一个文件夹
val_size, test_size = 0.1, 0.0  # 这里把test设置为0,也就是train:val = 9:1
# val_size = 0.1

os.makedirs(dataset_path, exist_ok=True)
os.makedirs(f'{dataset_path}/images', exist_ok=True)
os.makedirs(f'{dataset_path}/images/train', exist_ok=True)
os.makedirs(f'{dataset_path}/images/val', exist_ok=True)
os.makedirs(f'{dataset_path}/images/test', exist_ok=True)
os.makedirs(f'{dataset_path}/labels/train', exist_ok=True)
os.makedirs(f'{dataset_path}/labels/val', exist_ok=True)
os.makedirs(f'{dataset_path}/labels/test', exist_ok=True)

path_list = np.array([i.split('.')[0] for i in os.listdir(base_path) if 'txt' in i])
random.shuffle(path_list)
train_id = path_list[:int(len(path_list) * (1 - val_size - test_size))]
# train_id = path_list[:int(len(path_list) * (1 - val_size))]

# val_id = path_list[int(len(path_list) * (1 - val_size - test_size)):int(len(path_list) * (1 - test_size))]
val_id = path_list[int(len(path_list) * (1 - val_size - test_size)):int(len(path_list) * (1 - test_size))]
test_id = path_list[int(len(path_list) * (1 - test_size)):]

for i in train_id:
    shutil.copy(f'{base_path}/{i}.{postfix}', f'{dataset_path}/images/train/{i}.{postfix}')
    shutil.copy(f'{base_path}/{i}.txt', f'{dataset_path}/labels/train/{i}.txt')

for i in val_id:
    shutil.copy(f'{base_path}/{i}.{postfix}', f'{dataset_path}/images/val/{i}.{postfix}')
    shutil.copy(f'{base_path}/{i}.txt', f'{dataset_path}/labels/val/{i}.txt')

for i in test_id:
    shutil.copy(f'{base_path}/{i}.{postfix}', f'{dataset_path}/images/test/{i}.{postfix}')
    shutil.copy(f'{base_path}/{i}.txt', f'{dataset_path}/labels/test/{i}.txt')

Guess you like

Origin blog.csdn.net/Zeng999212/article/details/129370999