数据处理:xml标注转换成csv

数据处理:xml标注转换成csv


xml数据:

<annotation>
<filename>8a0004.jpg</filename>
<size>
<width>960</width>
<height>1280</height>
<depth>3</depth>
</size>
<object>
<name>object</name>
<bndbox>
<xmin>314</xmin>
<ymin>366</ymin>
<xmax>427</xmax>
<ymax>430</ymax>
</bndbox>
</object>
<object>
<name>object</name>
<bndbox>
<xmin>805</xmin>
<ymin>9</ymin>
<xmax>960</xmax>
<ymax>277</ymax>
</bndbox>
</object>
</annotation>

想要转换成的csv形式:(每一行一个bbox)
(图片名称,xmin,ymin,xmax,ymax,class,w,h)

test_0.jpg,1653,1290,1773,1535,object,2448,3264
test_0.jpg,1485,1221,1648,1544,object,2448,3264
test_0.jpg,1345,1295,1481,1540,object,2448,3264
test_0.jpg,1221,1290,1341,1543,object,2448,3264
test_0.jpg,1079,1332,1216,1537,object,2448,3264
test_0.jpg,927,1285,1069,1531,object,2448,3264
test_0.jpg,679,1279,845,1539,object,2448,3264
test_0.jpg,2187,2536,2276,2764,object,2448,3264
test_0.jpg,232,519,361,774,object,2448,3264
test_0.jpg,5,521,225,774,object,2448,3264
test_1.jpg,457,436,574,526,object,1920,2560
test_1.jpg,537,1949,612,2093,object,1920,2560
test_1.jpg,436,2020,534,2095,object,1920,2560
test_1.jpg,1774,1751,1870,1854,object,1920,2560
test_1.jpg,1679,1759,1769,1852,object,1920,2560
test_1.jpg,1578,1762,1674,1852,object,1920,2560
test_1.jpg,1470,1752,1566,1863,object,1920,2560
test_1.jpg,1403,1747,1465,1872,object,1920,2560
test_1.jpg,1231,1764,1330,1869,object,1920,2560
test_1.jpg,1130,1771,1224,1874,object,1920,2560
test_1.jpg,1028,1771,1121,1879,object,1920,2560
test_1.jpg,924,1774,1017,1872,object,1920,2560
test_1.jpg,847,1839,921,1881,object,1920,2560
test_1.jpg,844,1740,919,1833,object,1920,2560
test_1.jpg,759,1839,843,1890,object,1920,2560
test_1.jpg,764,1771,834,1838,object,1920,2560

转换代码:

# coding: utf-8

import xml.etree.ElementTree as ET
import os

names_dict = {
    
    }
cnt = 0
f = open('/home/hub/wsy/YOLOv3_TensorFlow/misc/new_data/sku.txt', 'r').readlines()#含有类别的txt
for line in f:
    line = line.strip()
    names_dict[line] = cnt
    cnt += 1

pic_path = '/home/hub/wsy/YOLOv3_TensorFlow/new_data/val'

anno_path = [os.path.join(pic_path, 'Annotations')]#xml文件夹
img_path = [os.path.join(pic_path, 'JPEGImages')]#图片文件夹
val_path = [os.path.join(pic_path, 'val.txt')]#保存图片名称的txt

def parse_xml(path,file):
    tree = ET.parse(path)
    img_name = path.split('/')[-1][:-4]
    # print(img_name)
    height = tree.findtext("./size/height")
    width = tree.findtext("./size/width")

    for obj in tree.findall('object'):
        # objects = []
        difficult = obj.find('difficult').text
        if difficult == '1':
            continue

        bbox = obj.find('bndbox')
        xmin = bbox.find('xmin').text
        ymin = bbox.find('ymin').text
        xmax = bbox.find('xmax').text
        ymax = bbox.find('ymax').text

        objects = img_name + ".jpg,"+str(xmin) +","+str(ymin) +","+str(xmax) +","+str(ymax) +","+ \
                      "object,"+str(width) + ","+str(height)

        file.write(objects + '\n')
        # print(objects)


test_cnt = 0
def gen_test_txt(txt_path):
    global test_cnt
    f = open(txt_path, 'w+')

    for i, path in enumerate(val_path):
        img_names = open(path, 'r').readlines()
        # file = open('val_sku1.txt', 'w+')
        for img_name in img_names:
            img_name = img_name.rstrip('\n')
            # print(img_name)
            xml_path = anno_path[i] + '/' + img_name + '.xml'
            # print(xml_path)
            parse_xml(xml_path,f)
    f.close()


gen_test_txt('val_2.txt')

csv转xml的博客见:https://blog.csdn.net/poppyty/article/details/115342136

猜你喜欢

转载自blog.csdn.net/poppyty/article/details/115342231