生成训练txt文件
一、根据文件夹名字生成
文件夹的命名为标记
import os
import numpy as np
root = r"C:\Users\hq\Desktop\HoldingObject\pokemon"
#构建所有文件名的列表,dir为label
filename = []
#label = []
dirs = os.listdir(root)
for dir in dirs:
dir_path = root + '\\' + dir
names = os.listdir(dir_path)
for n in names:
filename.append(dir_path + '\\' + n + '\t' + dir)
#打乱文件名列表
np.random.shuffle(filename)
#划分训练集、测试集,默认比例4:1
train = filename[:int(len(filename)*0.8)]
test = filename[int(len(filename)*0.8):]
#分别写入train.txt, test.txt
with open('train.txt', 'w') as f1, open('test.txt', 'w') as f2:
for i in train:
f1.write(i + '\n')
for j in test:
f2.write(j + '\n')
print('成功!')
二、根据标注文件生成
两个文件分别为数据集文件夹与标注文件夹
import os
import numpy as np
root = r"E:\python\honglvdeng\jiaotongdeng"
Annotations=r"E:\python\honglvdeng\Annotations"
#构建所有文件名的列表,dir为label
filename = []
#label = []
names = os.listdir(root)
for file in names:
filename.append(root+'/'+file+ '\t'+Annotations+'/'+file.split('.')[0]+ '.xml') #path+'/'+file+'\t'+pathxml+'/'+file.split('.')[0]+ '.xml'+"\n"
#打乱文件名列表
np.random.shuffle(filename)
#划分训练集、测试集,默认比例4:1
train = filename[:int(len(filename)*0.8)]
test = filename[int(len(filename)*0.8):]
#分别写入train.txt, test.txt
with open('train.txt', 'w') as f1, open('test.txt', 'w') as f2:
for i in train:
f1.write(i + '\n')
for j in test:
f2.write(j + '\n')
print('成功!')
三、删除没有标记的图片(删除多余的标记文件)
import os
#根据标签删除图片 标签少图片多
images_dir = 'E:\duixiangjiance\honglvdeng\jiaotongdeng\jiaotongdeng'
xml_dir = 'E:\duixiangjiance\honglvdeng\Annotations\Annotations'
# 创建列表
xmls = []
# 读取xml文件名(即:标注的图片名)
for xml in os.listdir(xml_dir):
# xmls.append(os.path.splitext(xml)[0]) #append()参数:在列表末尾添加新的对象,即将所有文件名读入列表
xmls.append(xml.split('.')[0]) # splitext和split的区别:前者('0001','.jpg'), 后者('0001','jpg') 在此可选用
print(xmls)
# 读取所有图片
for image_name in os.listdir(images_dir):
image_name = image_name.split('.')[0]
if image_name not in xmls:
image_name = image_name + '.jpg'
print(image_name)
os.remove(os.path.join(images_dir, image_name))
'''
#根据图片删除标签
import os
images_dir = './Annotations/'
xml_dir = './JPEGImages/'
# 创建列表
xmls = []
# 读取xml文件名(即:标注的图片名)
for xml in os.listdir(xml_dir):
# xmls.append(os.path.splitext(xml)[0]) #append()参数:在列表末尾添加新的对象,即将所有文件名读入列表
xmls.append(xml.split('.')[0]) # splitext和split的区别:前者('0001','.jpg'), 后者('0001','jpg') 在此可选用
print(xmls)
# 读取所有图片
for image_name in os.listdir(images_dir):
image_name = image_name.split('.')[0]
if image_name not in xmls:
image_name = image_name + '.xml'
print(image_name)
os.remove(os.path.join(images_dir, image_name))
'''