PASCAL VOC标注数据解析

PASCAL VOC标注数据解析

1 数据格式

PASCAL VOC标注数据文件(.xml)内容:
在这里插入图片描述

2 实现

文件:pascal_voc_label_parser.py

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@file    :   pascal_voc_label_parser.py
@time    :   2020/01/06 10:09:43
@author  :   XiaoY
@version :   1.0
@contact :   [email protected]
@license :   (c)copyright XiaoY
@desc    :   PASCAL VOC
"""

__author__ = "XiaoY"

import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET
import os

from .config import ANNO_DIR, BBOX_COLUMNS, IMG_DIR

class PascalVocLabels(object):

    def __init__(self, root):

        self._root = root
        self._anno_dir = os.path.join(root, ANNO_DIR)
        self._img_dir = os.path.join(root, IMG_DIR)

    def _anno_parser(self, filename):

        # element tree
        tree = ET.parse(os.path.join(self._anno_dir, filename))
        root = tree.getroot()

        # segmented
        segmented = int(root.find("segmented").text)

        # image shape
        img_size = root.find("size")
        img_width = float(img_size.find("width").text)
        img_height = float(img_size.find("height").text)
        img_channel = float(img_size.find("depth").text)
        img_shape = (img_height, img_width, img_channel)

        bboxes = pd.DataFrame(columns=BBOX_COLUMNS)
        # traverse objects in an image
        for obj in root.iter("object"):

            # the name of the object
            name = obj.find("name").text

            # pose
            pose = obj.find("pose").text

            # Indicates that the bounding box specified for the object does not
            # correspond to the full extent of the object.
            truncated = int(obj.find("truncated").text)

            # An object is marked as difficult when the object is considered
            # difficult to recognize.
            difficult = int(obj.find("difficult").text)

            # Axis-aligned rectangle specifying the extent of the object visible
            # in the image.
            bbox = obj.find("bndbox")
            xmin = float(bbox.find("xmin").text) - 1
            ymin = float(bbox.find("ymin").text) - 1
            xmax = float(bbox.find("xmax").text) - 1
            ymax = float(bbox.find("ymax").text) - 1

            bboxes = bboxes.append(
                other={
                    "name": name, "pose": pose,
                    "truncated": truncated, "difficult": difficult,
                    "xmin": xmin, "ymin": ymin, "xmax": xmax, "ymax": ymax
                }, ignore_index=True
            )

        return img_shape, segmented, bboxes

    def __call__(self, filename):
        return self._anno_parser(filename)

3 测试

from pascal_voc import PascalVocLabels

if __name__ == "__main__":

    parser = PascalVocLabels("./pascal_voc")
    filename = "000005.xml"

    img_shape, segmented, bboxes = parser(filename)
    print("image shape: {0[0]} x {0[1]} x {0[2]}".format(img_shape))
    print("segmented: {}".format(False if segmented==0 else True))
    print("bounding boxes:")
    print(bboxes)

image shape: 375.0 x 500.0 x 3.0
segmented: False
bounding boxes:
    name         pose truncated difficult   xmin   ymin   xmax   ymax
0  chair         Rear         0         0  262.0  210.0  323.0  338.0
1  chair  Unspecified         0         0  164.0  263.0  252.0  371.0
2  chair  Unspecified         1         1    4.0  243.0   66.0  373.0
3  chair  Unspecified         0         0  240.0  193.0  294.0  298.0
4  chair  Unspecified         1         1  276.0  185.0  311.0  219.0
发布了103 篇原创文章 · 获赞 162 · 访问量 5万+

猜你喜欢

转载自blog.csdn.net/zhaoyin214/article/details/103859168
今日推荐