1. File Placement
JPEGImages: to store photos of datasets
labels: to store txt files in yolo format
DataSet: to store our divided pictures and txt files
2. Code
Refer to the link below for the code, but the '\\' in the code in Linux needs to be changed to '/', put the code in the same location as the appeal folder and run it, and the required folder will be generated in the DataSet if the operation is successful. The code can be divided according to the ratio you want. (1 message) YOLOv5 dataset division script (train, val, test)_yolov5 val_Blog of All-powerful Cloud-CSDN Blog
import os, shutil, random
from tqdm import tqdm
def split_img(img_path, label_path, split_list):
try :
Data = 'DataSet'
# Data是你要将要创建的文件夹路径(路径一定是相对于你当前的这个脚本而言的)
os.mkdir(Data)
train_img_dir = Data + '/images/train'
val_img_dir = Data + '/images/val'
test_img_dir = Data + '/images/test'
train_label_dir = Data + '/labels/train'
val_label_dir = Data + '/labels/val'
test_label_dir = Data + '/labels/test'
# 创建文件夹
os.makedirs(train_img_dir)
os.makedirs(train_label_dir)
os.makedirs(val_img_dir)
os.makedirs(val_label_dir)
os.makedirs(test_img_dir)
os.makedirs(test_label_dir)
except:
print('文件目录已存在')
train, val, test = split_list
all_img = os.listdir(img_path)
all_img_path = [os.path.join(img_path, img) for img in all_img]
# all_label = os.listdir(label_path)
# all_label_path = [os.path.join(label_path, label) for label in all_label]
train_img = random.sample(all_img_path, int(train * len(all_img_path)))
train_img_copy = [os.path.join(train_img_dir, img.split('/')[-1]) for img in train_img]
train_label = [toLabelPath(img, label_path) for img in train_img]
train_label_copy = [os.path.join(train_label_dir, label.split('/')[-1]) for label in train_label]
for i in tqdm(range(len(train_img)), desc='train ', ncols=80, unit='img'):
_copy(train_img[i], train_img_dir)
_copy(train_label[i], train_label_dir)
all_img_path.remove(train_img[i])
val_img = random.sample(all_img_path, int(val / (val + test) * len(all_img_path)))
val_label = [toLabelPath(img, label_path) for img in val_img]
for i in tqdm(range(len(val_img)), desc='val ', ncols=80, unit='img'):
_copy(val_img[i], val_img_dir)
_copy(val_label[i], val_label_dir)
all_img_path.remove(val_img[i])
test_img = all_img_path
test_label = [toLabelPath(img, label_path) for img in test_img]
for i in tqdm(range(len(test_img)), desc='test ', ncols=80, unit='img'):
_copy(test_img[i], test_img_dir)
_copy(test_label[i], test_label_dir)
def _copy(from_path, to_path):
shutil.copy(from_path, to_path)
def toLabelPath(img_path, label_path):
img = img_path.split('/')[-1]
label = img.split('.jpg')[0] + '.txt'
return os.path.join(label_path, label)
def main():
img_path = 'JPEGImages'
label_path = 'labels'
split_list = [0.8, 0.1, 0.1] # 数据集划分比例[train:val:test]
split_img(img_path, label_path, split_list)
if __name__ == '__main__':
main()
3.DataSet folder
DataSet
---images # 划分后的图片
-----train
-----test
-----val
---labels # 划分后的txt
-----train
-----test
-----val
4. Data yaml file
The path of the data yaml file is filled in with the relative path of our divided image (relative to train.py)
nc: the number of categories of your own dataset
names: the category name of your own dataset