常用数据集转换

voc转yolo
import xml.etree.ElementTree as ET
import os
from os.path import join, splitext
from shutil import copyfile
from tqdm import tqdm

def get_file_list(root_dir, file_list, extend_name=['jpg']):
    if os.path.isfile(root_dir):
        if item.split('.')[-1] in extend_name:
            file_list.append(filePath)
        return file_list
    for item in os.listdir(root_dir):
        if os.path.isdir(os.path.join(root_dir,item)):
            subroot_dir = os.path.join(root_dir, item)
            get_file_list(subroot_dir, file_list)
        else:
            
            filePath = os.path.join(root_dir,item)
            if os.path.isfile(filePath):
                if item.split('.')[-1] in extend_name:
                    file_list.append(filePath)
    return file_list

def clear_dir(dir):
    dir_list = os.listdir(dir)
    for i in dir_list:
        abspath = os.path.join(os.path.abspath(dir), i)
        if os.path.isfile(abspath):
            os.remove(abspath)
        else:
            clear_dir(abspath)

def convert(size, box):
    dw = 1./size[0]
    dh = 1./size[1]
    x = (box[0] + box[1])/2.0
    y = (box[2] + box[3])/2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)

def convert_annotation(image_file_name, voc_annotation_dir, yolo_labels_dir):
    in_file = open(f'{voc_annotation_dir}{image_file_name}.xml', encoding='UTF-8')
    out_file = open(f'{yolo_labels_dir}/{image_file_name}.txt', 'w')

    tree = ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    if (w == 0) or (h == 0):
        print("{} width or height = 0".format(image_file_name))
        return False

    if (w <= h):
        print("Waring {} width <= height ...".format(image_file_name))

    for obj in root.iter('object'):
        cls = obj.find('name').text
        if cls not in CLASSES:
            continue
        cls_id = CLASSES.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
        bb = convert((w,h), b)
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
    in_file.close()
    out_file.close()

    return True

def voc2yolo(voc_root_dir, voc_sub_dir, yolo_images_dir, yolo_labels_dir):
    voc_annotation_dir = join(voc_root_dir, voc_sub_dir, "Annotations/")
    voc_jpegimage_dir = join(voc_root_dir, voc_sub_dir, "JPEGImages/")

    images_list = os.listdir(voc_jpegimage_dir) # list image files
    for image_name in tqdm(images_list):
        image_file_name = splitext(image_name)[0]
        annotation_path = os.path.join(voc_annotation_dir, image_file_name + '.xml')
        jpegimage_path = join(voc_jpegimage_dir, image_name)
        if os.path.exists(annotation_path):
            yolo_label_sub_dir = os.path.join(yolo_labels_dir, voc_sub_dir)
            if not os.path.exists(yolo_label_sub_dir):
                os.makedirs(yolo_label_sub_dir)
            if(not convert_annotation(image_file_name, voc_annotation_dir, yolo_label_sub_dir)):
                print("label convert failed !!!")
                break
            yolo_image_sub_dir = os.path.join(yolo_images_dir, voc_sub_dir)
            if not os.path.exists(yolo_image_sub_dir):
                os.makedirs(yolo_image_sub_dir)
            copyfile(jpegimage_path, os.path.join(yolo_image_sub_dir, image_name))
        else:
            print("annotation file {} not exist !!!".format(annotation_path))
            break

CLASSES = ['dog', 'car', 'cat']

if __name__ == '__main__':

    voc_root_dir = 'voc xml dir'
    yolo_root_dir = 'yolov txt dir'

    yolo_images_dir = os.path.join(yolo_root_dir, 'images')
    if not os.path.exists(yolo_images_dir):
        os.makedirs(yolo_images_dir)
    else:
        clear_dir(yolo_images_dir)

    yolo_labels_dir = os.path.join(yolo_root_dir, 'labels')
    if not os.path.exists(yolo_labels_dir):
        os.makedirs(yolo_labels_dir)
    else:
        clear_dir(yolo_labels_dir)

    voc_sub_dirs = [dir for dir in os.listdir(voc_root_dir) if os.path.isdir(os.path.join(voc_root_dir, dir))]
    print(voc_sub_dirs)
    
    for sub_dir in voc_sub_dirs:
        voc2yolo(voc_root_dir, sub_dir, yolo_images_dir, yolo_labels_dir)
猜你喜欢