[Tool] in VOC script format data format data transfer COCO target detection. Pro-test! No bug! Not being given!

Introduction: When we do target detection, sometimes you need to train their VOC data format into a format COCO provide the corresponding script here, pro-test, without error!

Instructions for use:

1) voc_clses: Set the category name their own data set.

2) voc2007xmls: xml file specified path VOC format data.

3) test_txt: txt file, specify the name of the xml file. ( Content is only in the name of xml txt file, does not require tape path and suffix! )

4) json_name: Specifies the name of generating good json format files.

Well, nonsense stop here, then put the protagonist!

# coding=utf-8
import xml.etree.ElementTree as ET
import os
import json

# 此处按照自己的类别名称修改 <<----
voc_clses = ['none_of_the_above','chepai','chedeng', 'chebiao','person',]


categories = []
for iind, cat in enumerate(voc_clses):
    cate = {}
    cate['supercategory'] = cat
    cate['name'] = cat
    cate['id'] = iind
    categories.append(cate)

def getimages(xmlname, id):
    sig_xml_box = []
    tree = ET.parse(xmlname)
    root = tree.getroot()
    images = {}
    for i in root:  # 遍历一级节点
        if i.tag == 'filename':
            file_name = i.text  # 0001.jpg
            # print('image name: ', file_name)
            images['file_name'] = file_name
        if i.tag == 'size':
            for j in i:
                if j.tag == 'width':
                    width = j.text
                    images['width'] = width
                if j.tag == 'height':
                    height = j.text
                    images['height'] = height
        if i.tag == 'object':
            for j in i:
                if j.tag == 'name':
                    cls_name = j.text
                # cat_id = voc_clses.index(cls_name) + 1  
                cat_id = voc_clses.index(cls_name)   # 去掉后面的+1
                if j.tag == 'bndbox':
                    bbox = []
                    xmin = 0
                    ymin = 0
                    xmax = 0
                    ymax = 0
                    for r in j:
                        if r.tag == 'xmin':
                            xmin = eval(r.text)
                        if r.tag == 'ymin':
                            ymin = eval(r.text)
                        if r.tag == 'xmax':
                            xmax = eval(r.text)
                        if r.tag == 'ymax':
                            ymax = eval(r.text)
                    bbox.append(xmin)
                    bbox.append(ymin)
                    bbox.append(xmax - xmin)
                    bbox.append(ymax - ymin)
                    bbox.append(id)   # 保存当前box对应的image_id
                    bbox.append(cat_id)
                    # anno area
                    bbox.append((xmax - xmin) * (ymax - ymin) - 10.0)   # bbox的ares
                    # coco中的ares数值是 < w*h 的, 因为它其实是按segmentation的面积算的,所以我-10.0一下...
                    sig_xml_box.append(bbox)
                    # print('bbox', xmin, ymin, xmax - xmin, ymax - ymin, 'id', id, 'cls_id', cat_id)
    images['id'] = id
    # print ('sig_img_box', sig_xml_box)
    return images, sig_xml_box



def txt2list(txtfile):
    f = open(txtfile)
    l = []
    for line in f:
        l.append(line[:-1])
    return l


# voc2007xmls = 'anns'   #设置xml文件的路径
voc2007xmls = '/data_1/script_file/model_test_script-master/mAp/input/Annotations'

# test_txt = 'voc2007/test.txt'  #设置xml文件的名称(注意!txt中只有文件名称,无需指定路径和名称后缀)
test_txt = '/data_1/script_file/model_test_script-master/mAp/input/ImageSets/Main/test.txt'
xml_names = txt2list(test_txt)
xmls = []
bboxes = []
ann_js = {}
for ind, xml_name in enumerate(xml_names):
    xmls.append(os.path.join(voc2007xmls, xml_name + '.xml'))
json_name = 'instances_voc2007val.json'   #设置保存的json文件的名称
images = []
for i_index, xml_file in enumerate(xmls):
    print(xml_file)
    image, sig_xml_bbox = getimages(xml_file, i_index)
    images.append(image)
    bboxes.extend(sig_xml_bbox)
ann_js['images'] = images
ann_js['categories'] = categories
annotations = []
for box_ind, box in enumerate(bboxes):
    anno = {}
    anno['image_id'] =  box[-3]
    anno['category_id'] = box[-2]
    anno['bbox'] = box[:-3]
    anno['id'] = box_ind
    anno['area'] = box[-1]
    anno['iscrowd'] = 0
    annotations.append(anno)
ann_js['annotations'] = annotations

json.dump(ann_js, open(json_name, 'w'), indent=4)  # indent=4 更加美观显示

 

Published 72 original articles · won praise 17 · views 70000 +

Guess you like

Origin blog.csdn.net/chen1234520nnn/article/details/103937751