小工具:检查PASCAL VOC数据

# coding utf-8
# check the form of PASCAL_VOC dataset
# draw bbox on in the img

import os
import xml.etree.cElementTree as ET 
import cv2

folder = 'C:/Users/summer/Desktop/transformer/transformer1/'

names = []
for xml in os.listdir(folder + 'Annotations'):
    img = cv2.imread(folder + 'JPEG/' + xml.replace('xml','jpg'))
    tree = ET.parse(folder + 'Annotations/' + xml)
    objs = tree.findall('object')

    # Remove xml file if objs==None
    if len(objs) == 0:
        os.remove(folder + 'Annotations/'+ xml)
        print(xml)
        continue
    root = tree.getroot()

    size = tree.find('size')
    depth = size.find('depth')
    depth.text = '3'

    bbox = []

    for obj in objs:
        name = obj.find('name')
        bndbox = obj.find('bndbox')
        try:
            a = name.text
        except:
            os.remove(folder + 'Annotations/'+ xml)
            break
        
        xmin = int(float(bndbox.find('xmin').text))
        ymin = int(float(bndbox.find('ymin').text))
        xmax = int(float(bndbox.find('xmax').text))
        ymax = int(float(bndbox.find('ymax').text))

        bbox.append((xmin,ymin,xmax,ymax))

        # Remove object if bbox is too samll
        if xmax - xmin <=1 or ymax - ymin <=1:
            root.remove(obj)
            print(xml)
        
        # Merge several labels into one
        # if name.text == 'airfreight' or name.text == 'copy of airfreight' or name.text == 'Copy of airfreight':
        #     print(name.text,xml)
        #     name.text = 'airfreighter'

        names.append(name.text)
        bndbox.find('xmin').text = str(xmin)
        bndbox.find('ymin').text = str(ymin)
        bndbox.find('xmax').text = str(xmax)
        bndbox.find('ymax').text = str(ymax)
    # Draw bbox
    for a_box in bbox:
        bbox_img = cv2.rectangle(img, (a_box[0],a_box[1]), (a_box[2],a_box[3]), (0,0,255), 3)

    if not os.path.exists(folder + 'bbox_JPEG/'):
        os.makedirs(folder + 'bbox_JPEG/')

    cv2.imwrite(folder + 'bbox_JPEG/' + xml.replace('xml','jpg'), bbox_img)

    tree.write(folder + 'Annotations/' + xml)

print(set(names))

猜你喜欢

转载自blog.csdn.net/summermaoz/article/details/78982611