YOLOv5配置本地目标检测数据集

1 数据集文件相对位置

data_path# 数据文件
├──images
	├──train# 此目录存放训练数据exp 001.jpg 002.jpg...
	├──val# 此目录存放验证数据
	├──test# 此目录存放测试数据
├──labels
	├──train# 此目录存放训练数据对应的标签exp 001.txt 002.txt...
	├──val# 此目录存放验证数据对应的标签
	├──test# 此目录存放测试数据对应的标签
├──train.txt# 此文本存放训练数据的路径exp ./images/train/001.jpg
├──val.txt# 此目录存放验证数据的路径
├──test.txt# 此目录存放测试数据的路径

2.1 xml转txt





import os
import xml.etree.ElementTree as ET
from decimal import Decimal

dirpath = r'E:\Dataset\RemoteSensingImage\5-DIOR\Annotations'  # 原来存放xml文件的目录
newdir = r'E:\Dataset\RemoteSensingImage\5-DIOR\labels'  # 修改label后形成的txt目录
# xml_path=r'E:\Dataset\RemoteSensingImage\5-DIOR\Annotations\00001.xml'
labels=['airplane','airport','baseballfield','basketballcourt','bridge','chimney','dam','Expressway-Service-area','Expressway-toll-station',\
        'golffield','groundtrackfield','harbor','overpass','ship','stadium','storagetank','tenniscourt','trainstation','vehicle','windmill']

for fp in os.listdir(dirpath):
    root = ET.parse(os.path.join(dirpath, fp)).getroot()
    xmin, ymin, xmax, ymax = 0, 0, 0, 0
    sz = root.find('size')
    width = float(sz[0].text)
    height = float(sz[1].text)
    filename = root.find('filename').text
    # print(fp)
    with open(os.path.join(newdir, fp.split('.')[0] + '.txt'), 'a+') as f:
        for child in root.findall('object'):  # 找到图片中的所有框

            sub = child.find('bndbox')  # 找到框的标注值并进行读取
            sub_label = child.find('name')
            xmin = float(sub[0].text)
            ymin = float(sub[1].text)
            xmax = float(sub[2].text)
            ymax = float(sub[3].text)
            try:  # 转换成yolov的标签格式，需要归一化到（0-1）的范围内
                x_center = Decimal(str(round(float((xmin + xmax) / (2 * width)), 6))).quantize(Decimal('0.000000'))
                y_center = Decimal(str(round(float((ymin + ymax) / (2 * height)), 6))).quantize(Decimal('0.000000'))
                w = Decimal(str(round(float((xmax - xmin) / width), 6))).quantize(Decimal('0.000000'))
                h = Decimal(str(round(float((ymax - ymin) / height), 6))).quantize(Decimal('0.000000'))
                print(str(x_center) + ' ' + str(y_center) + ' ' + str(w) + ' ' + str(h))
                # 读取需要的标签
                for idx in range(len(labels)):
                    if sub_label.text == labels[idx]:
                        f.write(' '.join([str(idx),str(x_center), str(y_center), str(w), str(h) + '\n']))
            except ZeroDivisionError:
                print(filename, '有问题')
'''
有其他标签选用
                if sub_label.text == 'xxx':
                    f.write(' '.join([str(1), str(x_center), str(y_center), str(w), str(h) + '\n']))
                if sub_label.text == 'xxx':
                    f.write(' '.join([str(2), str(x_center), str(y_center), str(w), str(h) + '\n']))
'''


# with open(os.path.join(newdir, fp.split('.')[0] + '.txt'), 'a+') as f:
#     f.write(' '.join([str(2), str(x_center), str(y_center), str(w), str(h) + '\n']))

2.2 images、labels文件划分

对于大部分公共数据集而言，训练验证测试早已划分好。但是如果我们手头只有未划分的数据集和标签则应该按照第一部分所列的结构进行划分。
下列程序只涉及图片操作，未对label进行操作，仅作参考。

import os
from shutil import copy, rmtree
import random


def mk_file(file_path: str):
    if os.path.exists(file_path):
        # 如果文件夹存在，则先删除原文件夹在重新创建
        rmtree(file_path)
    os.makedirs(file_path)


def main():
    # 保证随机可复现
    random.seed(0)

    # 将数据集中10%的数据划分到验证集中
    split_rate = 0.1

    # 指向你解压后的数据集文件夹
    cwd = os.getcwd()
    data_root = cwd
    origin_flower_path = os.path.join(data_root, "data")
    assert os.path.exists(origin_flower_path), "path '{}' does not exist.".format(origin_flower_path)

    classes = [cla for cla in os.listdir(origin_flower_path)
                    if os.path.isdir(os.path.join(origin_flower_path, cla))]

    # 建立保存训练集的文件夹
    train_root = os.path.join(data_root, "train")
    mk_file(train_root)
    for cla in classes:
        # 建立每个类别对应的文件夹
        mk_file(os.path.join(train_root, cla))

    # 建立保存验证集的文件夹
    val_root = os.path.join(data_root, "val")
    mk_file(val_root)
    for cla in classes:
        # 建立每个类别对应的文件夹
        mk_file(os.path.join(val_root, cla))

    for cla in classes:
        cla_path = os.path.join(origin_flower_path, cla)
        images = os.listdir(cla_path)
        num = len(images)
        # 随机采样验证集的索引
        eval_index = random.sample(images, k=int(num*split_rate))
        for index, image in enumerate(images):
            if image in eval_index:
                # 将分配至验证集中的文件复制到相应目录
                image_path = os.path.join(cla_path, image)
                new_path = os.path.join(val_root, cla)
                copy(image_path, new_path)
            else:
                # 将分配至训练集中的文件复制到相应目录
                image_path = os.path.join(cla_path, image)
                new_path = os.path.join(train_root, cla)
                copy(image_path, new_path)
            print("\r[{}] processing [{}/{}]".format(cla, index+1, num), end="")  # processing bar
        print()

    print("processing done!")


if __name__ == '__main__':
    main()

3 创建路径文件

import os
import sys



if __name__ == '__main__':
    TrainPath=r'E:\Dataset\SIMD\images\train2017'
    ValPath=r'E:\Dataset\SIMD\images\val2017'

    train_list=os.listdir(TrainPath)
    val_list=os.listdir(ValPath)


    # 将训练集写入train.txt
    with open(os.path.join('E:\Dataset\SIMD','train.txt'), 'a')as f:
        for img in train_list:
            f.write(os.path.join('./images/train2017',img)+'\n')
        print("训练集图片处理完毕")

    # 将验证集写入val.txt
    with open(os.path.join('E:\Dataset\SIMD','val.txt'), 'a')as f:
        for img in val_list:
            f.write(os.path.join('./images/val2017',img) +'\n')
    print("验证集图片处理完毕")

4 配置yaml文件

yolov5的数据配置使用yaml文件，所以需要将上述处理好的数据写入一个yaml文件
格式如下：

path: E:\Dataset\SIMD
train: train.txt  # train images (relative to 'path') 118287 images
val: val.txt  # train images (relative to 'path') 5000 images
#test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794

# Classes
nc: 15  # number of classes
names: ['car', 'truck', 'van', 'long vehicle', 'bus', 'airliner', 'propeller aircraft', 'trainer aircraft', 'chartered aircraft', 'fighter aircraft',
        'others', 'stair truck', 'pushback truck', 'helicopter', 'boat', ]  # class names

5 修改yolov5源码data部分，指定为yaml文件路径

在这里插入图片描述

YOLOv5配置本地目标检测数据集

1 数据集文件相对位置

2.1 xml转txt

2.2 images、labels文件划分

3 创建路径文件

4 配置yaml文件

5 修改yolov5源码data部分，指定为yaml文件路径

猜你喜欢