When TensorFlow making your own data set, xml turn csv

When TensorFlow making your own data set, xml csv turn stereotyped, I turned off into the pit of.

If you train yourself to only one category of data sets, with xml_to_csv on the network, no problem, the source code is as follows:

# -*- coding: utf-8 -*-
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
 
def xml_to_csv(path):
    xml_list = []
    # 读取注释文件
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            value = (root.find('filename').text + '.jpg',
                     int(root.find('size')[0].text),
                     int(root.find('size')[1].text),
                     member[0].text,
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int (Member [. 4] [2] .text), 
                     int (Member [. 4] [. 3] .text) 
                     ) 
            xml_list.append (value) 
    column_name = [ 'filename', 'width', 'height', 'class' , 'Xmin', 'Ymin', 'Xmax', 'Ymax'] 
 
    # all the data is divided into samples and validation sets, typically a 3: 1 ratio 
    train_list = [0 xml_list: int ( len (xml_list) * 0.67 )] 
    eval_list = xml_list [int (len (xml_list) * 0.67) +. 1:] 
 
    # saved in CSV format 
    train_df = pd.DataFrame (train_list, Columns = column_name) 
    eval_df = pd.DataFrame (eval_list, Columns = column_name) 
    train_df. to_csv ( 'Data / train.csv', index = None) 
    eval_df.to_csv ( 'Data / eval.csv',index=None)
 
 
def main():
    path = './xml'
    xml_to_csv(path)
    print('Successfully converted xml to csv.')
 
main()

  

If you type data set, more than Class 2, then the above-mentioned source, that all the data sets 3: 1 split, rather than a category 3: 1 split.

The above source slightly adjusted perfectly to each type of data set in accordance with 9: 1 split the training data set and test data set, the source code is as follows:

# coding: utf-8
import glob
import pandas as pd
import xml.etree.ElementTree as ET
 
classes = ["20Km_h", "no_passing_35", "no_passing", "keep_left", "keep_right", "mandatory", "straight_or_left", "passing_limits",
           "bicycles", "pedestrians", "stop", "dangerous"]
 
def xml_to_csv(path):
    train_list = []
    eval_list = []
 
    for cls in classes:
        xml_list = []
        # 读取注释文件
        for xml_file in glob.glob(path + '/*.xml'):
            tree = ET.parse(xml_file)
            root = tree.getroot()
            for member in root.findall('object'):
                if cls == member[0].text:
                    value = (root.find('filename').text,
                             int(root.find('size')[0].text),
                             int(root.find('size')[1].text),
                             member[0].text,
                             int(member[4][0].text),
                             int(member[4][1].text),
                             int(member[4][2].text),
                             int(member[4][3].text)
                             )
                    xml_list.append(value)
 
        for i in range(0,int(len(xml_list) * 0.9)):
            train_list.append(xml_list[i])
        for j in range(int(len(xml_list) * 0.9) + 1,int(len(xml_list))):
            eval_list.append (xml_list [J]) 
 
    column_name = [ 'filename', 'width', 'height', 'class', 'Xmin', 'Ymin', 'Xmax', 'Ymax'] 
 
 
    # saved in CSV format 
    train_df pd.DataFrame = (train_list, Columns = column_name) 
    eval_df = pd.DataFrame (eval_list, Columns = column_name) 
    train_df.to_csv ( 'Data / train.csv', index = None) 
    eval_df.to_csv ( 'Data / eval.csv' , index = None) 
 
 
DEF main (): 
    # path = 'E: \\\ ImagesRF Royalty Free the Data \\\' 
    path = r'D: \ Work \ PycharmPro \ trafficsign \ SSD_NET \ the Data \ xml_data '# own path parameters more xml file folder where the path modify 
    xml_to_csv (path) 
    Print ( 'Successfully Converted xml to CSV.') 
 
 
main ()

  

classes = ["20Km_h", "no_passing_35", "no_passing", "keep_left", "keep_right", "mandatory", "straight_or_left", "passing_limits", "bicycles", "pedestrians", "stop", "dangerous"]

There needs to be changed own data set category labels name.


Original: https: //blog.csdn.net/miao0967020148/article/details/90208139

Guess you like

Origin www.cnblogs.com/qbdj/p/11024547.html