生成训练txt文件

生成训练txt文件



一、根据文件夹名字生成

文件夹的命名为标记

import os
import numpy as np

root = r"C:\Users\hq\Desktop\HoldingObject\pokemon"

#构建所有文件名的列表,dir为label
filename = []
#label = []
dirs = os.listdir(root)
for dir in dirs:
    dir_path = root + '\\' + dir
    names = os.listdir(dir_path)
    for n in names:
        filename.append(dir_path + '\\' + n + '\t' + dir)

#打乱文件名列表
np.random.shuffle(filename)
#划分训练集、测试集,默认比例4:1
train = filename[:int(len(filename)*0.8)]
test = filename[int(len(filename)*0.8):]

#分别写入train.txt, test.txt
with open('train.txt', 'w') as f1, open('test.txt', 'w') as f2:
    for i in train:
        f1.write(i + '\n')
    for j in test:
        f2.write(j + '\n')

print('成功!')

二、根据标注文件生成

两个文件分别为数据集文件夹与标注文件夹

import os
import numpy as np

root = r"E:\python\honglvdeng\jiaotongdeng"
Annotations=r"E:\python\honglvdeng\Annotations"
#构建所有文件名的列表,dir为label
filename = []
#label = []
names = os.listdir(root)
for file in names:
    filename.append(root+'/'+file+ '\t'+Annotations+'/'+file.split('.')[0]+ '.xml')    #path+'/'+file+'\t'+pathxml+'/'+file.split('.')[0]+ '.xml'+"\n"


#打乱文件名列表
np.random.shuffle(filename)
#划分训练集、测试集,默认比例4:1
train = filename[:int(len(filename)*0.8)]
test = filename[int(len(filename)*0.8):]

#分别写入train.txt, test.txt
with open('train.txt', 'w') as f1, open('test.txt', 'w') as f2:
    for i in train:
        f1.write(i + '\n')
    for j in test:
        f2.write(j + '\n')

print('成功!')

三、删除没有标记的图片(删除多余的标记文件)

import os


#根据标签删除图片   标签少图片多
images_dir = 'E:\duixiangjiance\honglvdeng\jiaotongdeng\jiaotongdeng'
xml_dir = 'E:\duixiangjiance\honglvdeng\Annotations\Annotations'

# 创建列表
xmls = []
# 读取xml文件名(即:标注的图片名)
for xml in os.listdir(xml_dir):
    # xmls.append(os.path.splitext(xml)[0])    #append()参数:在列表末尾添加新的对象,即将所有文件名读入列表
    xmls.append(xml.split('.')[0])  # splitext和split的区别:前者('0001','.jpg'), 后者('0001','jpg') 在此可选用
print(xmls)

# 读取所有图片
for image_name in os.listdir(images_dir):
    image_name = image_name.split('.')[0]
    if image_name not in xmls:
        image_name = image_name + '.jpg'
        print(image_name)
        os.remove(os.path.join(images_dir, image_name))


'''
#根据图片删除标签
import os

images_dir = './Annotations/'
xml_dir = './JPEGImages/'
# 创建列表
xmls = []
# 读取xml文件名(即:标注的图片名)
for xml in os.listdir(xml_dir):
    # xmls.append(os.path.splitext(xml)[0])    #append()参数:在列表末尾添加新的对象,即将所有文件名读入列表
    xmls.append(xml.split('.')[0])  # splitext和split的区别:前者('0001','.jpg'), 后者('0001','jpg') 在此可选用
print(xmls)

# 读取所有图片
for image_name in os.listdir(images_dir):
    image_name = image_name.split('.')[0]
    if image_name not in xmls:
        image_name = image_name + '.xml'
        print(image_name)
        os.remove(os.path.join(images_dir, image_name))
'''

猜你喜欢

转载自blog.csdn.net/qq_40276082/article/details/130628201