Conversión de datos --- convierta la etiqueta xml de voc al formato json de labelme en lotes

Conversión de datos --- convierta la etiqueta xml de voc al formato json de labelme en lotes


Conversión de datos: convierta la etiqueta xml de voc al formato json de labelme en lotes
)

  Debido a la necesidad de trabajo, prepárese para convertir las etiquetas de formato voc xml que tiene a mano en formato labelme. Refiriéndose al código de la red, se escribió una herramienta de conversión de un solo archivo voc_to_labelme.py.

El formato del conjunto de datos de formato VOC es el siguiente:

xml2json
    Annotations/
    JPEGImages/

inserte la descripción de la imagen aquí

Uso de la herramienta de línea de comandos:

python voc_to_labelme.py 
命令行参数解释:
--voc_dir  VOC数据集目录,默认VOCdevkit/VOC2007
--labelme_version Labelme版本号,默认3.2.6
--labelme_shape   Labelme标记框形状,支持rectangle或polygon,默认rectangle
--image_data      Labelme的imageData节点是否输出数据,默认True
--out_dir         Labelme格式数据集的输出目录

Código fuente de voc_to_labelme.py:

# -*- coding:UTF-8 -*-
'''
VOC格式转换为labelme的json格式
'''

import argparse
import glob
import base64
import logging
import io
import os
import PIL
import PIL.Image
import xml.etree.ElementTree as ET
import json
import shutil
import numpy as np


def parse_opt(known=False):
    parser = argparse.ArgumentParser(description='xml2json')
    parser.add_argument('--voc_dir', default='xml2json', help='voc directory')
    parser.add_argument('--labelme_version', default='5.1.1', help='labelme version')
    parser.add_argument('--labelme_shape', default='rectangle', help='labelme shape')
    parser.add_argument('--image_data', default=True, type=bool, help='wether write image data to xml2json')
    parser.add_argument('--out_dir', default='labelme', help='the path of output directory')
    opt = parser.parse_args()
    return opt


def img_data_to_pil(img_data):
    f = io.BytesIO()
    f.write(img_data)
    img_pil = PIL.Image.open(f)
    return img_pil


def img_data_to_arr(img_data):
    img_pil = img_data_to_pil(img_data)
    img_arr = np.array(img_pil)
    return img_arr


def img_arr_to_b64(img_arr):
    img_pil = PIL.Image.fromarray(img_arr)
    f = io.BytesIO()
    img_pil.save(f, format="PNG")
    img_bin = f.getvalue()
    if hasattr(base64, "encodebytes"):
        img_b64 = base64.encodebytes(img_bin)
    else:
        img_b64 = base64.encodestring(img_bin)
    return img_b64


def apply_exif_orientation(image):
    try:
        exif = image._getexif()
    except AttributeError:
        exif = None

    if exif is None:
        return image

    exif = {
    
    
        PIL.ExifTags.TAGS[k]: v
        for k, v in exif.items()
        if k in PIL.ExifTags.TAGS
    }

    orientation = exif.get("Orientation", None)

    if orientation == 1:
        # do nothing
        return image
    elif orientation == 2:
        # left-to-right mirror
        return PIL.ImageOps.mirror(image)
    elif orientation == 3:
        # rotate 180
        return image.transpose(PIL.Image.ROTATE_180)
    elif orientation == 4:
        # top-to-bottom mirror
        return PIL.ImageOps.flip(image)
    elif orientation == 5:
        # top-to-left mirror
        return PIL.ImageOps.mirror(image.transpose(PIL.Image.ROTATE_270))
    elif orientation == 6:
        # rotate 270
        return image.transpose(PIL.Image.ROTATE_270)
    elif orientation == 7:
        # top-to-right mirror
        return PIL.ImageOps.mirror(image.transpose(PIL.Image.ROTATE_90))
    elif orientation == 8:
        # rotate 90
        return image.transpose(PIL.Image.ROTATE_90)
    else:
        return image


def load_image_file(filename):
    image_pil = PIL.Image.open(filename)

    # apply orientation to image according to exif
    image_pil = apply_exif_orientation(image_pil)

    with io.BytesIO() as f:
        ext = os.path.splitext(filename)[1].lower()
        if ext in [".jpg", ".jpeg"]:
            format = "JPEG"
        else:
            format = "PNG"
        image_pil.save(f, format=format)
        f.seek(0)
        return f.read()


def read_xml_gtbox_and_label(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)
    depth = int(size.find('depth').text)
    points = []
    for obj in root.iter('object'):
        cls = obj.find('name').text
        pose = obj.find('pose').text
        xmlbox = obj.find('bndbox')
        xmin = float(xmlbox.find('xmin').text)
        xmax = float(xmlbox.find('xmax').text)
        ymin = float(xmlbox.find('ymin').text)
        ymax = float(xmlbox.find('ymax').text)
        point = [cls, xmin, ymin, xmax, ymax]
        points.append(point)
    return points, width, height


def voc_bndbox_to_labelme(opt):
    xml_dir = os.path.join(opt.voc_dir, 'Annotations')
    img_dir = os.path.join(opt.voc_dir, 'JPEGImages')
    if not os.path.exists(opt.out_dir):
        os.makedirs(opt.out_dir)

    xml_files = glob.glob(os.path.join(xml_dir, '*.xml'))
    for xml_file in xml_files:
        _, filename = os.path.split(xml_file)
        filename = filename.rstrip('.xml')
        img_name = filename + '.jpg'
        img_path = os.path.join(img_dir, img_name)
        points, width, height = read_xml_gtbox_and_label(xml_file)
        json_str = {
    
    }
        json_str['version'] = opt.labelme_version
        json_str['flags'] = {
    
    }
        shapes = []
        for i in range(len(points)):
            cls, xmin, ymin, xmax, ymax = points[i]
            shape = {
    
    }
            shape['label'] = cls
            if opt.labelme_shape == 'rectangle': # fixme:两个点
                shape['points'] = [[xmin, ymin], [xmax, ymax]]
            else:  # fixme: 四个点polygon
                shape['points'] = [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]]
            shape['line_color'] = None
            shape['fill_color'] = None
            shape['shape_type'] = opt.labelme_shape
            shape['flags'] = {
    
    }
            shapes.append(shape)
        json_str['shapes'] = shapes
        json_str['imagePath'] = img_name
        if opt.image_data:
            with open(img_path, "rb") as f:
                image_data = f.read()
            json_str['imageData'] = base64.b64encode(image_data).decode("utf-8")
        else:
            json_str['imageData'] = None
        json_str['imageHeight'] = height
        json_str['imageWidth'] = width
        json_str['lineColor'] = [0, 255, 0, 128]
        json_str['fillColor'] = [255, 0, 0, 128]
        target_path = os.path.join(opt.out_dir, img_name)
        shutil.copy(img_path, target_path)
        json_file = os.path.join(opt.out_dir, filename + '.json')
        print(json_file, "convert success")
        with open(json_file, 'w') as f:
            json.dump(json_str, f, indent=2)


def main(opt):
    voc_bndbox_to_labelme(opt)


if __name__ == '__main__':
    opt = parse_opt()
    main(opt)

inserte la descripción de la imagen aquí

otro:

# -*- coding: utf-8 -*-
import xml.etree.ElementTree as ET  # 读取xml。
import os
from PIL import Image, ImageDraw, ImageFont
import os
import json


def parse_rec(rootPath, file):
    pathFile = os.path.join(rootPath, file)
    root = ET.parse(pathFile)  # 解析读取xml函数
    folder = root.find('folder').text
    filename = root.find('filename').text
    path = root.find('path').text
    print(folder, filename, path)
    sz = root.find('size')
    width = int(sz[0].text)
    height = int(sz[1].text)
    print(width, height)
    data = {
    
    }
    data['imagePath'] = filename
    data['flags'] = {
    
    }
    data['imageWidth'] = width
    data['imageHeight'] = height
    data['imageData'] = None
    data['version'] = "4.5.6"
    data["shapes"] = []
    for child in root.findall('object'):  # 找到图片中的所有框
        sub = child.find('bndbox')  # 找到框的标注值并进行读取
        xmin = float(sub[0].text)
        ymin = float(sub[1].text)
        xmax = float(sub[2].text)
        ymax = float(sub[3].text)

        # fixme: 此处新增两个点
        ###################################################3
        #       xmin,ymin --------------- xmax,ymin (新增)
        #                 -             -
        #                 -             -
        # (新增) xmin,ymax --------------- xmax,ymax

        points = [[xmin, ymin], [xmin, ymax], [xmax, ymin], [xmax, ymax]]
        itemData = {
    
    'points': []}
        itemData['points'].extend(points)
        name = child.find("name").text
        itemData["flag"] = {
    
    }
        itemData["group_id"] = None
        itemData["shape_type"] = "rectangle"
        itemData["label"] = name
        data["shapes"].append(itemData)

    (filename, extension) = os.path.splitext(file)
    jsonName = ".".join([filename, "json"])
    print(rootPath, jsonName)
    # jsonPath = os.path.join(rootPath, jsonName)
    jsonPath = os.path.join("xml2json/json", jsonName)
    with open(jsonPath, "w") as f:
        json.dump(data, f)
    print("加载入文件完成...")


if __name__ == '__main__':
    path = "xml2json"
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(".xml"):
                parse_rec(root, file)

おすすめ

転載: blog.csdn.net/m0_46825740/article/details/129671377