Use python to create, add, and modify annotation files in xml format

In the data sets used in deep learning target detection, annotation files in xml format are often encountered (for example, the annotation files used in VOC format data sets are in xml format). When we create xml annotation files ourselves or convert other types of annotation files (such as txt, json) to xml format, we need to understand this knowledge.

Below I will list the implementation code for creating, adding, and modifying functions of xml type files using python with the help of lxml.

1. Create an annotation file in xml format

The following shows the code for creating an xml format file and the content rendering of the xml file generated after running the code. Here we simply create an xml file, set the root node, add several child nodes to the root node, name the node, and set The text text information contained in the node.

code show as below:

from lxml.etree import Element, tostring, parse
from lxml.etree import SubElement as subElement
def xml_construct(save_path,folder,filename,path,width=800,height=600,depth = 3,segmented=0):
    default_text = 'default'
    node_root = Element('annotation')  # 根节点

    node_folder = subElement(node_root, 'folder')  # 在节点下添加名为'folder'的子节点
    node_folder.text = folder  # 设定节点的文字

    node_filename = subElement(node_root, 'filename')
    node_filename.text = filename

    node_path = subElement(node_root, 'path')
    node_path.text = path

    node_size = subElement(node_root, 'size')
    node_size_width = subElement(node_size, 'width')
    node_size_width.text = '%s' % int(width)
    node_size_height = subElement(node_size, 'height')
    node_size_height.text = '%s' % int(height)
    node_size_depth = subElement(node_size, 'depth')
    node_size_depth.text = '%s' % int(depth)

    node_segmented = subElement(node_root, 'segmented')
    node_segmented.text = '%s' % int(segmented)

    xml = tostring(node_root, pretty_print=True) #将上面设定的一串节点信息导出
    with open(save_path,'wb') as f: #将节点信息写入到文件路径save_path中
        f.write(xml)

    return

#----------调用上面所写的函数进行试验,创建名为test.xml的xml文件----------
xml_construct('test.xml','test','test','test',width=1600,height=1200,)

The result of running the above code is:

2. Add nodes to existing xml files

Here we show how to add nodes to the existing xml file, and then modify it on the test.xml generated in the previous step.

code show as below:
 

from lxml.etree import Element, tostring, parse
from lxml.etree import SubElement as subElement
def xml_add_object(xml_path,name,id,xmin,ymin,xmax,ymax,pose = 'Unspecified',truncated=0,difficult=0):
    tree = parse(xml_path)  # 读取xml文件
    node_root = tree.getroot()  # 找到xml文件的根节点
    node_object = subElement(node_root, 'object')  #在根节点node_root下面添加名为'object'的子节点
    node_object_name = subElement(node_object, 'name') #在根节点node_root的子节点object下面继续添加子节点object的子节点'name'
    node_object_name.text = name #设定该节点的文本text信息
    node_object_id = subElement(node_object, 'id')
    node_object_id.text = '%s' % int(id)
    node_object_pose = subElement(node_object, 'pose')
    node_object_pose.text = pose
    node_object_truncated = subElement(node_object, 'truncated')
    node_object_truncated.text = '%s' % int(truncated)
    node_object_difficult = subElement(node_object, 'difficult')
    node_object_difficult.text = '%s' % int(difficult)
    # object坐标
    node_bndbox = subElement(node_object, 'bndbox')
    node_xmin = subElement(node_bndbox, 'xmin')
    node_xmin.text = '%s' % int(xmin)
    node_ymin = subElement(node_bndbox, 'ymin')
    node_ymin.text = '%s' % int(ymin)
    node_xmax = subElement(node_bndbox, 'xmax')
    node_xmax.text = '%s' % int(xmax)
    node_ymax = subElement(node_bndbox, 'ymax')
    node_ymax.text = '%s' % int(ymax)

    xml = tostring(node_root, pretty_print=True) #将修改后的xml节点信息导出

    with open(xml_path,'wb') as f: #将修改后的xml节点信息覆盖掉修改前的
        f.write(xml)

    return

#----------试验,在test.xml中进行添加节点操作----------
xml_add_object(xml_path='test.xml',name='test_name',id=999,xmin=1,ymin=2,xmax=3,ymax=4,)

After running the above code, the original test.xml file is added with corresponding child nodes, the effect is as follows:

3. Modify the information of a specific node in an existing xml file

If we only want to modify some specific node information in the xml file, we can perform some code operations (here we still take the above test.xml file as the operation object, and change the width and height node information under the size node to modify):

from lxml.etree import Element, tostring, parse
from lxml.etree import SubElement as subElement
def modify_size(xml_path,size_width=800,size_height=600):
    tree = parse(xml_path)
    width = tree.xpath('//width')#这里返回的width是一个包含所有名称中带有"width"的节点的列表
    for width_i in width:
        width_i.text = '%s' % int(size_width) #这里直接写width_i.text = size_width会报错

    height = tree.xpath('//height')
    for height_i in height:
        height_i.text = '%s' % int(size_height)

    node_root = tree.getroot()
    xml = tostring(node_root, pretty_print=True)
    with open(xml_path,'wb') as f:
        f.write(xml)

    return

#----------试验----------
#以上面的test.xml文件为操作对象,将其中的size节点下的width和height节点信息进行修改
modify_size('test.xml',size_width=800,size_height=600)

The effect is as follows:

 

Guess you like

Origin blog.csdn.net/qq_40641713/article/details/127537245