Python separates YOLO format image data annotation files into training sets and verification sets, and copies them to the corresponding folders

Just read the comments when using it

The following code works

  1. Create datasets in the same directory, including training sets and validation sets.
  2. Randomly divide train_rate 80% data into training set, 100%-train_rate 20% into validation set and copy to the corresponding folder
import numpy as np
import os
from shutil import copy2
import random

# ---------配置文件 需要确定
orgin_path='..\\'
orgin_image_path = orgin_path+'images'
orgin_label_path = orgin_path+'YOLO'
train_rate = 0.8
support_suffix = ['.tiff', '.png', '.jpg']

# ---------配置文件
path = r'datasets'  # dataset root dir
train = r'images/train'  # train images (relative to 'path')
val = r'images/val'  # val images (relative to 'path')
train_label = r'labels/train'  # train images (relative to 'path')
val_label = r'labels/val'  # val images (relative to 'path')

# 以下代码作用 
# 1. 同级目录下创建datasets 包含 训练集、验证集
# 2. 随机将 train_rate*80%数据分为训练集、100%-train_rate*20%分为验证集 并复制到对应文件夹


train_path = f'{
      
      path}/{
      
      train}'
val_path = f'{
      
      path}/{
      
      val}'
train_label_path = f'{
      
      path}/{
      
      train_label}'
val_label_path = f'{
      
      path}/{
      
      val_label}'


def check_file(path_data):
    if not os.path.exists(path_data):
        os.makedirs(path_data)


check_file(train_path)
check_file(val_path)
check_file(train_label_path)
check_file(val_label_path)

all_data = os.listdir(orgin_image_path)  # (图片文件夹)
all_data_img = []
for i in all_data:
    suffix_str = '.' + i.split('.')[-1:][0]
    if suffix_str in support_suffix:
        all_data_img.append(i)
num_all_data = len(all_data_img)
print("分离图片总数: " + str(num_all_data))
# 随机下标
index_list = list(range(num_all_data))
print(index_list)
random.shuffle(index_list)
print(index_list)
num = 0
for i in index_list:
    fileName = os.path.join(orgin_image_path, all_data_img[i])
    # 获取后缀名称 包括 .
    suffix_str = '.' + all_data_img[i].split('.')[-1:][0]
    yolo_label = all_data_img[i][:-len(suffix_str)] + ".txt"
    yolo_label_file = os.path.join(orgin_label_path, yolo_label)
    if num < num_all_data * train_rate:
        copy2(fileName, train_path)
        copy2(yolo_label_file, train_label_path)
    else:
        copy2(fileName, val_path)
        copy2(yolo_label_file, val_label_path)
    num += 1
print("finished!")

Guess you like

Origin blog.csdn.net/TY_GYY/article/details/128940872