Conversión de datos --- convierta la etiqueta xml de voc al formato json de labelme en lotes
Conversión de datos: convierta la etiqueta xml de voc al formato json de labelme en lotes
)
Debido a la necesidad de trabajo, prepárese para convertir las etiquetas de formato voc xml que tiene a mano en formato labelme. Refiriéndose al código de la red, se escribió una herramienta de conversión de un solo archivo voc_to_labelme.py.
El formato del conjunto de datos de formato VOC es el siguiente:
xml2json
Annotations/
JPEGImages/
Uso de la herramienta de línea de comandos:
python voc_to_labelme.py
命令行参数解释:
--voc_dir VOC数据集目录,默认VOCdevkit/VOC2007
--labelme_version Labelme版本号,默认3.2.6
--labelme_shape Labelme标记框形状,支持rectangle或polygon,默认rectangle
--image_data Labelme的imageData节点是否输出数据,默认True
--out_dir Labelme格式数据集的输出目录
Código fuente de voc_to_labelme.py:
# -*- coding:UTF-8 -*-
'''
VOC格式转换为labelme的json格式
'''
import argparse
import glob
import base64
import logging
import io
import os
import PIL
import PIL.Image
import xml.etree.ElementTree as ET
import json
import shutil
import numpy as np
def parse_opt(known=False):
parser = argparse.ArgumentParser(description='xml2json')
parser.add_argument('--voc_dir', default='xml2json', help='voc directory')
parser.add_argument('--labelme_version', default='5.1.1', help='labelme version')
parser.add_argument('--labelme_shape', default='rectangle', help='labelme shape')
parser.add_argument('--image_data', default=True, type=bool, help='wether write image data to xml2json')
parser.add_argument('--out_dir', default='labelme', help='the path of output directory')
opt = parser.parse_args()
return opt
def img_data_to_pil(img_data):
f = io.BytesIO()
f.write(img_data)
img_pil = PIL.Image.open(f)
return img_pil
def img_data_to_arr(img_data):
img_pil = img_data_to_pil(img_data)
img_arr = np.array(img_pil)
return img_arr
def img_arr_to_b64(img_arr):
img_pil = PIL.Image.fromarray(img_arr)
f = io.BytesIO()
img_pil.save(f, format="PNG")
img_bin = f.getvalue()
if hasattr(base64, "encodebytes"):
img_b64 = base64.encodebytes(img_bin)
else:
img_b64 = base64.encodestring(img_bin)
return img_b64
def apply_exif_orientation(image):
try:
exif = image._getexif()
except AttributeError:
exif = None
if exif is None:
return image
exif = {
PIL.ExifTags.TAGS[k]: v
for k, v in exif.items()
if k in PIL.ExifTags.TAGS
}
orientation = exif.get("Orientation", None)
if orientation == 1:
# do nothing
return image
elif orientation == 2:
# left-to-right mirror
return PIL.ImageOps.mirror(image)
elif orientation == 3:
# rotate 180
return image.transpose(PIL.Image.ROTATE_180)
elif orientation == 4:
# top-to-bottom mirror
return PIL.ImageOps.flip(image)
elif orientation == 5:
# top-to-left mirror
return PIL.ImageOps.mirror(image.transpose(PIL.Image.ROTATE_270))
elif orientation == 6:
# rotate 270
return image.transpose(PIL.Image.ROTATE_270)
elif orientation == 7:
# top-to-right mirror
return PIL.ImageOps.mirror(image.transpose(PIL.Image.ROTATE_90))
elif orientation == 8:
# rotate 90
return image.transpose(PIL.Image.ROTATE_90)
else:
return image
def load_image_file(filename):
image_pil = PIL.Image.open(filename)
# apply orientation to image according to exif
image_pil = apply_exif_orientation(image_pil)
with io.BytesIO() as f:
ext = os.path.splitext(filename)[1].lower()
if ext in [".jpg", ".jpeg"]:
format = "JPEG"
else:
format = "PNG"
image_pil.save(f, format=format)
f.seek(0)
return f.read()
def read_xml_gtbox_and_label(xml_path):
tree = ET.parse(xml_path)
root = tree.getroot()
size = root.find('size')
width = int(size.find('width').text)
height = int(size.find('height').text)
depth = int(size.find('depth').text)
points = []
for obj in root.iter('object'):
cls = obj.find('name').text
pose = obj.find('pose').text
xmlbox = obj.find('bndbox')
xmin = float(xmlbox.find('xmin').text)
xmax = float(xmlbox.find('xmax').text)
ymin = float(xmlbox.find('ymin').text)
ymax = float(xmlbox.find('ymax').text)
point = [cls, xmin, ymin, xmax, ymax]
points.append(point)
return points, width, height
def voc_bndbox_to_labelme(opt):
xml_dir = os.path.join(opt.voc_dir, 'Annotations')
img_dir = os.path.join(opt.voc_dir, 'JPEGImages')
if not os.path.exists(opt.out_dir):
os.makedirs(opt.out_dir)
xml_files = glob.glob(os.path.join(xml_dir, '*.xml'))
for xml_file in xml_files:
_, filename = os.path.split(xml_file)
filename = filename.rstrip('.xml')
img_name = filename + '.jpg'
img_path = os.path.join(img_dir, img_name)
points, width, height = read_xml_gtbox_and_label(xml_file)
json_str = {
}
json_str['version'] = opt.labelme_version
json_str['flags'] = {
}
shapes = []
for i in range(len(points)):
cls, xmin, ymin, xmax, ymax = points[i]
shape = {
}
shape['label'] = cls
if opt.labelme_shape == 'rectangle': # fixme:两个点
shape['points'] = [[xmin, ymin], [xmax, ymax]]
else: # fixme: 四个点polygon
shape['points'] = [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]]
shape['line_color'] = None
shape['fill_color'] = None
shape['shape_type'] = opt.labelme_shape
shape['flags'] = {
}
shapes.append(shape)
json_str['shapes'] = shapes
json_str['imagePath'] = img_name
if opt.image_data:
with open(img_path, "rb") as f:
image_data = f.read()
json_str['imageData'] = base64.b64encode(image_data).decode("utf-8")
else:
json_str['imageData'] = None
json_str['imageHeight'] = height
json_str['imageWidth'] = width
json_str['lineColor'] = [0, 255, 0, 128]
json_str['fillColor'] = [255, 0, 0, 128]
target_path = os.path.join(opt.out_dir, img_name)
shutil.copy(img_path, target_path)
json_file = os.path.join(opt.out_dir, filename + '.json')
print(json_file, "convert success")
with open(json_file, 'w') as f:
json.dump(json_str, f, indent=2)
def main(opt):
voc_bndbox_to_labelme(opt)
if __name__ == '__main__':
opt = parse_opt()
main(opt)
otro:
# -*- coding: utf-8 -*-
import xml.etree.ElementTree as ET # 读取xml。
import os
from PIL import Image, ImageDraw, ImageFont
import os
import json
def parse_rec(rootPath, file):
pathFile = os.path.join(rootPath, file)
root = ET.parse(pathFile) # 解析读取xml函数
folder = root.find('folder').text
filename = root.find('filename').text
path = root.find('path').text
print(folder, filename, path)
sz = root.find('size')
width = int(sz[0].text)
height = int(sz[1].text)
print(width, height)
data = {
}
data['imagePath'] = filename
data['flags'] = {
}
data['imageWidth'] = width
data['imageHeight'] = height
data['imageData'] = None
data['version'] = "4.5.6"
data["shapes"] = []
for child in root.findall('object'): # 找到图片中的所有框
sub = child.find('bndbox') # 找到框的标注值并进行读取
xmin = float(sub[0].text)
ymin = float(sub[1].text)
xmax = float(sub[2].text)
ymax = float(sub[3].text)
# fixme: 此处新增两个点
###################################################3
# xmin,ymin --------------- xmax,ymin (新增)
# - -
# - -
# (新增) xmin,ymax --------------- xmax,ymax
points = [[xmin, ymin], [xmin, ymax], [xmax, ymin], [xmax, ymax]]
itemData = {
'points': []}
itemData['points'].extend(points)
name = child.find("name").text
itemData["flag"] = {
}
itemData["group_id"] = None
itemData["shape_type"] = "rectangle"
itemData["label"] = name
data["shapes"].append(itemData)
(filename, extension) = os.path.splitext(file)
jsonName = ".".join([filename, "json"])
print(rootPath, jsonName)
# jsonPath = os.path.join(rootPath, jsonName)
jsonPath = os.path.join("xml2json/json", jsonName)
with open(jsonPath, "w") as f:
json.dump(data, f)
print("加载入文件完成...")
if __name__ == '__main__':
path = "xml2json"
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(".xml"):
parse_rec(root, file)