PASCAL VOC data set is converted to yolo data set format

1. PASCAL VOC data format

<?xml version='1.0' encoding='utf-8'?>
<annotation verified="no">
  <folder>JPEGImages</folder>
  <filename>2018_06_05_09_06_55_065</filename>
  <path>F:\receive\VOC2007\JPEGImages\2018_06_05_09_06_55_065.jpg</path>
  <source>
    <database>Unknown</database>
  </source>
  <size>
    <width>2048</width>
    <height>1536</height>
    <depth>3</depth>
  </size>
  <segmented>0</segmented>
  <object>
    <name>1</name>
    <pose>Unspecified</pose>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>530</xmin>
      <ymin>752</ymin>
      <xmax>1498</xmax>
      <ymax>1326</ymax>
    </bndbox>
  </object>
</annotation>

2. yolo data format

class x_center y_center w h The coordinate position and the width and height of the box are divided by the width and height of the picture

0 0.947510 0.546224 0.049316 0.050781
0 0.434326 0.586263 0.217285 0.194661

3. The file layout is shown in the figure

Images are stored in images, labels_voc are stored in *.xml (labels), and labels are stored in *.txt (converted labels)

Classes.names stores the name of each category, one category name per line

The contents of pascalVOC2yolov3.py are as follows:

#coding:utf-8
from __future__ import print_function

import os
import random
import glob
import xml.etree.ElementTree as ET

def xml_reader(filename):
    """ Parse a PASCAL VOC xml file """
    tree = ET.parse(filename)
    size = tree.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(bbox.find('xmin').text),
                              int(bbox.find('ymin').text),
                              int(bbox.find('xmax').text),
                              int(bbox.find('ymax').text)]
        objects.append(obj_struct)
    return width, height, objects


def voc2yolo(filename):
    classes_dict = {}
    with open("classes.names") as f:
        for idx, line in enumerate(f.readlines()):
            class_name = line.strip()
            classes_dict[class_name] = idx
    
    width, height, objects = xml_reader(filename)

    lines = []
    for obj in objects:
        x, y, x2, y2 = obj['bbox']
        class_name = obj['name']
        label = classes_dict[class_name]
        cx = (x2+x)*0.5 / width
        cy = (y2+y)*0.5 / height
        w = (x2-x)*1. / width
        h = (y2-y)*1. / height
        line = "%s %.6f %.6f %.6f %.6f\n" % (label, cx, cy, w, h)
        lines.append(line)

    txt_name = filename.replace(".xml", ".txt").replace("labels_voc", "labels")
    with open(txt_name, "w") as f:
        f.writelines(lines)


def get_image_list(image_dir, suffix=['jpg', 'jpeg', 'JPG', 'JPEG','png']):
    '''get all image path ends with suffix'''
    if not os.path.exists(image_dir):
        print("PATH:%s not exists" % image_dir)
        return []
    imglist = []
    for root, sdirs, files in os.walk(image_dir):
        if not files:
            continue
        for filename in files:
            filepath = "data/custom/" + os.path.join(root, filename) + "\n"
            if filename.split('.')[-1] in suffix:
                imglist.append(filepath)
    return imglist


def imglist2file(imglist):
    random.shuffle(imglist)
    train_list = imglist[:-100]
    valid_list = imglist[-100:]
    with open("train.txt", "w") as f:
        f.writelines(train_list)
    with open("valid.txt", "w") as f:
        f.writelines(valid_list)


if __name__ == "__main__":
    xml_path_list = glob.glob("labels_voc/*.xml")
    for xml_path in xml_path_list:
        voc2yolo(xml_path)


    imglist = get_image_list("images")
    imglist2file(imglist)

After the data conversion is completed, the detection model can be trained using the yolo series of algorithms.

PASCAL VOC data set is converted to yolo data set format

1. PASCAL VOC data format

2. yolo data format

3. The file layout is shown in the figure

Guess you like