1. File Placement

JPEGImages: to store photos of datasets
labels: to store txt files in yolo format
DataSet: to store our divided pictures and txt files

2. Code

Refer to the link below for the code, but the '\\' in the code in Linux needs to be changed to '/', put the code in the same location as the appeal folder and run it, and the required folder will be generated in the DataSet if the operation is successful. The code can be divided according to the ratio you want. (1 message) YOLOv5 dataset division script (train, val, test)_yolov5 val_Blog of All-powerful Cloud-CSDN Blog

import os, shutil, random
from tqdm import tqdm
 
def split_img(img_path, label_path, split_list):
    try :   
        Data = 'DataSet'
        # Data是你要将要创建的文件夹路径（路径一定是相对于你当前的这个脚本而言的）
        os.mkdir(Data)
 
        train_img_dir = Data + '/images/train'
        val_img_dir = Data + '/images/val'
        test_img_dir = Data + '/images/test'
 
        train_label_dir = Data + '/labels/train'
        val_label_dir = Data + '/labels/val'
        test_label_dir = Data + '/labels/test'
 
        # 创建文件夹
        os.makedirs(train_img_dir)
        os.makedirs(train_label_dir)
        os.makedirs(val_img_dir)
        os.makedirs(val_label_dir)
        os.makedirs(test_img_dir)
        os.makedirs(test_label_dir)
 
    except:
        print('文件目录已存在')
        
    train, val, test = split_list
    all_img = os.listdir(img_path)
    all_img_path = [os.path.join(img_path, img) for img in all_img]
    # all_label = os.listdir(label_path)
    # all_label_path = [os.path.join(label_path, label) for label in all_label]
    train_img = random.sample(all_img_path, int(train * len(all_img_path)))
    train_img_copy = [os.path.join(train_img_dir, img.split('/')[-1]) for img in train_img]
    train_label = [toLabelPath(img, label_path) for img in train_img]
    train_label_copy = [os.path.join(train_label_dir, label.split('/')[-1]) for label in train_label]
    for i in tqdm(range(len(train_img)), desc='train ', ncols=80, unit='img'):
        _copy(train_img[i], train_img_dir)
        _copy(train_label[i], train_label_dir)
        all_img_path.remove(train_img[i])
    val_img = random.sample(all_img_path, int(val / (val + test) * len(all_img_path)))
    val_label = [toLabelPath(img, label_path) for img in val_img]
    for i in tqdm(range(len(val_img)), desc='val ', ncols=80, unit='img'):
        _copy(val_img[i], val_img_dir)
        _copy(val_label[i], val_label_dir)
        all_img_path.remove(val_img[i])
    test_img = all_img_path
    test_label = [toLabelPath(img, label_path) for img in test_img]
    for i in tqdm(range(len(test_img)), desc='test ', ncols=80, unit='img'):
        _copy(test_img[i], test_img_dir)
        _copy(test_label[i], test_label_dir)
 
 
def _copy(from_path, to_path):
    shutil.copy(from_path, to_path)
 
def toLabelPath(img_path, label_path):
    img = img_path.split('/')[-1]
    label = img.split('.jpg')[0] + '.txt'
    return os.path.join(label_path, label)
 
def main():
    img_path = 'JPEGImages'
    label_path = 'labels'
    split_list = [0.8, 0.1, 0.1]	# 数据集划分比例[train:val:test]
    split_img(img_path, label_path, split_list)
 
if __name__ == '__main__':
    main()

3.DataSet folder

DataSet
  ---images   # 划分后的图片
     -----train
     -----test
     -----val
  ---labels   # 划分后的txt
     -----train
     -----test
     -----val

4. Data yaml file

The path of the data yaml file is filled in with the relative path of our divided image (relative to train.py)
nc: the number of categories of your own dataset
names: the category name of your own dataset

Data set division in YOLO format in Linux

1. File Placement

2. Code

3.DataSet folder

4. Data yaml file

Guess you like