Learning in the competition (2) - Convert .txt files to .xml files in the production of voc datasets

describe

One difference from the production of the voc dataset is that 4 coordinates are given in the competition, which means that the shape of the target may be a parallelogram. So it's a bit different from the voc dataset. Release the form of a .txt file

The final result is roughly like this (it does not correspond to the picture above)

Introduction

There are two functions of this code

(1) Rename the picture and change the picture position

(2) The .txt file becomes a .xml file

code

import xml.dom
import xml.dom.minidom
import them
import cv2
# xml file specification definition

The address where the #.txt and .jpg files are stored
_TXT_PATH= 'label'
_IMAGE_PATH= 'image'

_INDENT= ''*4
_NEW_LINE= '\n'
_FOLDER_NODE= 'VOC2007'
_ROOT_NODE= 'annotation'
_DATABASE_NAME= 'LOGODection'
_ANNOTATION= 'PASCAL VOC2007'
_AUTHOR= 'zhangyu'
_SEGMENTED= '0'
_DIFFICULT= '0'
_TRUNCATED= '0'
_POSE= 'Unspecified'

#.xml file and the storage address of the image rename file
_IMAGE_COPY_PATH= 'JPEGImages'
_ANNOTATION_SAVE_PATH= 'Annotations'


# Encapsulate the process of creating nodes?
def createElementNode(doc,tag, attr): # Create an element node??
    element_node = doc.createElement(tag)

    # Create a text section ??
    text_node = doc.createTextNode(attr)

    # Make the text node a child of the element node ??
    element_node.appendChild(text_node)

    return element_node

    # Encapsulates the process of adding a child node?
def createChildNode(doc,tag, attr,parent_node):



    child_node = createElementNode(doc, tag, attr)

    parent_node.appendChild(child_node)

# object node is special

def createObjectNode(doc,attrs):

    object_node = doc.createElement('object')

    createChildNode(doc, 'name', attrs['classification'],
                    object_node)

    createChildNode(doc, 'pose',
                    _POSE, object_node)

    createChildNode(doc, 'truncated',
                    _TRUNCATED, object_node)

    createChildNode(doc, 'difficult',
                    _DIFFICULT, object_node)

    bndbox_node = doc.createElement('bndbox')

    createChildNode(doc, 'x1', attrs['x1'],
                    bndbox_node)

    createChildNode(doc, 'y1', attrs['y1'],
                    bndbox_node)

    createChildNode(doc, 'x2', attrs['x2'],
                    bndbox_node)

    createChildNode(doc, 'y2', attrs['y2'],
                    bndbox_node)
    createChildNode(doc, 'x3', attrs['x3'],
                    bndbox_node)

    createChildNode(doc, 'y3', attrs['y3'],
                    bndbox_node)

    createChildNode(doc, 'x4', attrs['x4'],
                    bndbox_node)

    createChildNode(doc, 'y4', attrs['y4'],
                    bndbox_node)


    object_node.appendChild(bndbox_node)

    return object_node

# Write documentElement to XML file ??
def writeXMLFile(doc,filename):

    tmpfile =open('tmp.xml','w')

    doc.writexml(tmpfile, addindent=''*4,newl = '\n',encoding = 'utf-8')

    tmpfile.close()

    # remove the default added markup on the first line

    fin =open('tmp.xml')

    fout =open(filename, 'w')

    lines = fin.readlines()

    for line in lines[1:]:

        if line.split():

         fout.writelines(line)

        # new_lines = ''.join(lines[1:])

        # fout.write(new_lines)

    fin.close()

    fout.close()

def getFileList(path):

    fileList = []
    files = os.listdir(path)
    for f in files:
        if (os.path.isfile(path + '/' + f)):
            fileList.append(f)
    # print len(fileList)
    return fileList


if __name__ == "__main__":

    fileList = getFileList(_TXT_PATH)
    if fileList == 0:
        os._exit(-1)

    current_dirpath = os.path.dirname(os.path.abspath('__file__'))

    if not os.path.exists(_ANNOTATION_SAVE_PATH):
        os.mkdir(_ANNOTATION_SAVE_PATH)

    if not os.path.exists(_IMAGE_COPY_PATH):
        os.mkdir(_IMAGE_COPY_PATH)

    for xText in range(len(fileList)):

        saveName= "%05d" %(xText+1)
        pos = fileList[xText].rfind(".")
        textName = fileList[xText][:pos]

        ouput_file = open(_TXT_PATH + '/' + fileList[xText])
        # ouput_file =open(_TXT_PATH)

        lines = ouput_file.readlines()

        xml_file_name = os.path.join(_ANNOTATION_SAVE_PATH, (saveName + '.xml'))

        img=cv2.imread(os.path.join(_IMAGE_PATH,(textName+'.jpg')))
        #Because some pictures in the dataset are not displayed, because some pictures in the dataset are not displayed, there is no image.shape
        try:
            height,width,channel=img.shape
        except AttributeError:
            print('image can not open')
        
        
        print(os.path.join(_IMAGE_COPY_PATH,(textName+'.jpg')))
        cv2.imwrite(os.path.join(_IMAGE_COPY_PATH,(saveName+'.jpg')),img)
        my_dom = xml.dom.getDOMImplementation()

        doc = my_dom.createDocument(None,_ROOT_NODE,None)

        # Get the root node??
        root_node = doc.documentElement

        # folder node

        createChildNode(doc, 'folder',_FOLDER_NODE, root_node)

        # filename node

        createChildNode(doc, 'filename', saveName+'.jpg',root_node)

        # source node

        source_node = doc.createElement('source')

        # child node of source

        createChildNode(doc, 'database',_DATABASE_NAME, source_node)

        createChildNode(doc, 'annotation',_ANNOTATION, source_node)

        createChildNode(doc, 'image','flickr', source_node)

        createChildNode(doc, 'flickrid','NULL', source_node)

        root_node.appendChild(source_node)

        # owner node

        owner_node = doc.createElement('owner')

        # child node of owner

        createChildNode(doc, 'flickrid','NULL', owner_node)

        createChildNode(doc, 'name',_AUTHOR, owner_node)

        root_node.appendChild(owner_node)

        # size node

        size_node = doc.createElement('size')

        createChildNode(doc, 'width',str(width), size_node)

        createChildNode(doc, 'height',str(height), size_node)

        createChildNode(doc, 'depth',str(channel), size_node)

        root_node.appendChild(size_node)

        # segmented node

        createChildNode(doc, 'segmented',_SEGMENTED, root_node)


        for line in lines:

            s = line.rstrip('\n')

            array = s.split(',')

            print(array)

            attrs = dict()

            attrs['x1']= array[0]

            attrs['y1']= array[1]

            attrs['x2']= array[2]

            attrs['y2']= array[3]
            
            attrs['x3']= array[4]

            attrs['y3']= array[5]

            attrs['x4']= array[6]

            attrs['y4']= array[7]

            attrs['classification'] = array[8]
            # Build XML file name

            print(xml_file_name)

            # Create XML file

            # createXMLFile(attrs, width, height, xml_file_name)
            # object node

            object_node = createObjectNode(doc, attrs)

            root_node.appendChild(object_node)

            # write to file

            writeXMLFile(doc, xml_file_name)

hint

(1) Because the .txt file is separated by commas (,), the code is like this

array = s.split(',')

(2) Because some pictures are not displayed, there will be problems, and then use try accept to solve it (a little doubt whether it has been solved)

#Because some pictures in the dataset are not displayed, because some pictures in the dataset are not displayed, there is no image.shape
        try:
            height,width,channel=img.shape
        except AttributeError:
            print('image can not open')

(3) Do some small tests to understand these codes

#Test this code saveName= "%05d" %(xText+1)
xText = 0
savename =  "%05d" %(xText+1)
print(savename)

#Test this code pos = fileList[xText].rfind(".")
str = 'sdasdjaklda.jpg'
#pos = str.rfind('.')
pos = str.find('.')
textName = str[:pos]
print(textName)

Reference blog, but the code is really good. only changed a little laughing out loud

https://blog.csdn.net/yjl9122/article/details/56842098