Table of contents
VOC2012 dataset
Link: https://pan.baidu.com/s/1uV5j6BEkwd8yKLUhaUPzPQ?pwd=aaaa
Extraction code: aaaa
Dataset directory: (10 pictures in total)
Among them, Annotations is the label (xml file) of 10 pictures, the txt file in ImageSets-main is the name of 10 pictures, and JPEGImages is 10 pictures.
image category
There are 21 categories, see CLASS_NAME for the category name, and use the zip function to number the category names, corresponding to the serial numbers 0-20, and convert them into dictionary form.
"""类别字典的创建 class_name:序号 """
CLASSES_NAME = (
"__background__ ",
"aeroplane",
"bicycle",
"bird",
"boat",
"bottle",
"bus",
"car",
"cat",
"chair",
"cow",
"diningtable",
"dog",
"horse",
"motorbike",
"person",
"pottedplant",
"sheep",
"sofa",
"train",
"tvmonitor",
)
name2id =dict(zip(CLASSES_NAME,range(len(CLASSES_NAME))))
xml file reading
The xml file can only be traversed step by step from the root node to the next step . ET is the element tree method, ET.parse reads the label, and gets the root node anno through .getroot().
tag: tag, used to identify what kind of data the element represents
attrib: attribute, saved in dictionary form
text: text string , view the content of the node through .find(node).text
code
import xml.etree.ElementTree as ET
import os
import numpy as np
"""类别字典的创建 class_name:序号 """
CLASSES_NAME = (
"__background__ ",
"aeroplane",
"bicycle",
"bird",
"boat",
"bottle",
"bus",
"car",
"cat",
"chair",
"cow",
"diningtable",
"dog",
"horse",
"motorbike",
"person",
"pottedplant",
"sheep",
"sofa",
"train",
"tvmonitor",
)
name2id =dict(zip(CLASSES_NAME,range(len(CLASSES_NAME))))
def get_xml_label(label_path):
"""从xml文件中获得label"""
anno = ET.parse(label_path).getroot() # .getroot()获取根节点
# for node in anno: # 子树
# print(node.tag,node.attrib) # 节点名称以及节点属性(含object物体)
boxes = []
classes = []
for obj in anno.iter("object"): # 迭代object的子节点
# for i in obj:
# print(i) # object的子节点含:name pose truncated occluded bndbox difficult
# 放弃难分辨的图片
difficult = int(obj.find("difficult").text) == 1
if difficult:
continue
# bounding box坐标值的查找
_box = obj.find("bndbox")
box = [
_box.find("xmin").text,
_box.find("ymin").text,
_box.find("xmax").text,
_box.find("ymax").text,
]
# 框像素点位置-1(python从0开始)
TO_REMOVE = 1
box = tuple(
map(lambda x: x - TO_REMOVE, list(map(float, box)))
)
boxes.append(box)
# 框对应的类别序号
name = obj.find("name").text.lower().strip() # 类别名称,统一为小写,并且去除左右空格以及换行符
classes.append(name2id[name]) # 序号
boxes = np.array(boxes, dtype=np.float32)
return boxes, classes
label_path=os.path.join(r'D:\VOC2012\Annotations','%s.xml') # %s指待输入的字符串
boxes,classes=get_xml_label(label_path %'2008_000007')
print(boxes)
print(classes)