COCO to VOC code: convert the json file in coco format to the xml file in voc format


from pycocotools.coco import COCO
import os
from lxml import etree, objectify
import shutil
from tqdm import tqdm
import sys
import argparse


# 将类别名字和id建立索引
def catid2name(coco):
    classes = dict()
    for cat in coco.dataset['categories']:
        classes[cat['id']] = cat['name']
    return classes


# 将标签信息写入xml
def save_anno_to_xml(filename, size, objs, save_path):
    E = objectify.ElementMaker(annotate=False)
    anno_tree = E.annotation(
        E.folder("DATA"),
        E.filename(filename),
        E.source(
            E.database("The VOC Database"),
            E.annotation("PASCAL VOC"),
            E.image("flickr")
        ),
        E.size(
            E.width(size['width']),
            E.height(size['height']),
            E.depth(size['depth'])
        ),
        E.segmented(0)
    )
    for obj in objs:
        E2 = objectify.ElementMaker(annotate=False)
        anno_tree2 = E2.object(
            E.name(obj[0]),
            E.pose("Unspecified"),
            E.truncated(0),
            E.difficult(0),
            E.bndbox(
                E.xmin(obj[1]),
                E.ymin(obj[2]),
                E.xmax(obj[3]),
                E.ymax(obj[4])
            )
        )
        anno_tree.append(anno_tree2)
    anno_path = os.path.join(save_path, filename[:-3] + "xml")
    etree.ElementTree(anno_tree).write(anno_path, pretty_print=True)


# 利用cocoAPI从json中加载信息
def load_coco(anno_file, xml_save_path):
    if os.path.exists(xml_save_path):
        shutil.rmtree(xml_save_path)
    os.makedirs(xml_save_path)

    coco = COCO(anno_file)
    classes = catid2name(coco)
    imgIds = coco.getImgIds()
    classesIds = coco.getCatIds()
    for imgId in tqdm(imgIds):
        size = {
    
    }
        img = coco.loadImgs(imgId)[0]
        filename = img['file_name']
        width = img['width']
        height = img['height']
        size['width'] = width
        size['height'] = height
        size['depth'] = 3
        annIds = coco.getAnnIds(imgIds=img['id'], iscrowd=None)
        anns = coco.loadAnns(annIds)
        objs = []
        for ann in anns:
            object_name = classes[ann['category_id']]
            # bbox:[x,y,w,h]
            bbox = list(map(int, ann['bbox']))
            xmin = bbox[0]
            ymin = bbox[1]
            xmax = bbox[0] + bbox[2]
            ymax = bbox[1] + bbox[3]
            obj = [object_name, xmin, ymin, xmax, ymax]
            objs.append(obj)
        save_anno_to_xml(filename, size, objs, xml_save_path)


def parseJsonFile(data_dir, xmls_save_path):
    assert os.path.exists(data_dir), "data dir:{} does not exits".format(data_dir)

    if os.path.isdir(data_dir):
        # 这里注意修改
        data_types = ['train', 'val']
        for data_type in data_types:
            ann_file = 'instances_{}.json'.format(data_type)
            xmls_save_path = os.path.join(xmls_save_path, data_type)
            load_coco(ann_file, xmls_save_path)
    elif os.path.isfile(data_dir):
        anno_file = data_dir
        load_coco(anno_file, xmls_save_path)


if __name__ == '__main__':
    """
    脚本说明:
        该脚本用于将coco格式的json文件转换为voc格式的xml文件
    参数说明:
        data_dir:json文件的路径
        xml_save_path:xml输出路径
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('-d', '--data-dir', type=str, default='./Task/cocome/annotations/instance_train.json', help='json path')
    parser.add_argument('-s', '--save-path', type=str, default='./Task/voc', help='xml save path')
    opt = parser.parse_args()
    print(opt)

    if len(sys.argv) > 1:
        parseJsonFile(opt.data_dir, opt.save_path)
    else:
    # 这里修改 coco的训练集json地址
        data_dir = './Task/cocome/annotations/instance_train.json'
    # 这里改成VOC xml文件的保存路径
        xml_save_path = './Task/voc'
        parseJsonFile(data_dir=data_dir, xmls_save_path=xml_save_path)

https://zhuanlan.zhihu.com/p/461488682

Guess you like

Origin blog.csdn.net/qq_39237205/article/details/129001777