有一批混乱的标签数据:
内容为:(最后一列为标签)
按标签将这批数据重命名并另存到对应标签下的目录。
'''
将存放所有数据标签的txt文本,按照标签类别存放至新的文件夹路径。并以新的格式命名。
例如:原始数据tree txt/000001.txt-001000.txt
改为 tree data/[class1]/class1_000001.txt-class1_000002.txt
data/[class2]/class2_000001.txt-class2_000002.txt
'''
import os
import shutil
class ClassDataMake():
def __init__(self):
self.path='./txt'#表示需要处理的文件原始存放位置
self.root_new='./data'#表示新的存放位置的主目录
'''
按照标签构建文件夹存放对应数据txt文件
'''
def remakeDir(self):
# txt文件中的内容:20.98503304 -26.45740128 0.33661354 188 203 216 2.000000 最后一位是类别,按照该类别索引对应的类别名
class_dict={
0:'chemical_vehicle',1:'car',2:'truck',3:'bus'}
for item in class_dict.items():
new_dir=os.path.join(os.path.abspath(self.root_new),item[1])
# print(new_dir)
if not os.path.exists(new_dir):
os.makedirs(new_dir)
'''
该函数用于判断每个文件中记录的文本是否满足 每行:20.98503304 -26.45740128 0.33661354 188 203 216 2.000000 格式
是否存在缺失项或者多项
每个文本只需要一个标签,即最后一位为固定值【0,1,2,3】
'''
def judge_data_format(self):
filelist = os.listdir(self.path)
for i,item in enumerate(filelist):
tag = {
'len':True,}
# print(i,item) # 1292 010097.txt
file_path = os.path.join(os.path.abspath(self.path), item)
f = open(file_path)
a = []
for line in f:
line=line.strip('\n').split(' ',-1)
if len(line)!=7:
tag=False
else:
if line[6] not in a:
a.append(line[6])
#存在多个标签的话也标记为坏文件
if len(a) !=1:
# print(a,item)
tag = False
#关闭当前文件,并删除标志位对应文件
f.close()
if not tag:
print('删除文件',i,item)
os.remove(file_path)
# print(len(line))
'''
经过上述judge_data_format函数后文件将规范格式,再进行下面步骤
按类别重命名文件
'''
def renameFile(self):
filelist = os.listdir(self.path)
total_num = len(filelist)
class_dict = {
0: 'chemical_vehicle', 1: 'car', 2: 'truck', 3: 'bus'}
a = 1
b = 1
c = 1
d = 1
i = 0
for item in filelist:
# print('item',item)
file_name = os.path.join(os.path.abspath(self.path), item)
f = open(file_name)
line = f.readline().strip('\n').split(' ',-1)
index=int(float(line[6]))
cla=class_dict[index]
# print(index,cla)
if cla=='chemical_vehicle':
file_name_new = os.path.join(os.path.abspath(self.path),
cla+'_'+format(str(a), '0>6s') + '.txt')
a+=1
elif cla=='car':
file_name_new = os.path.join(os.path.abspath(self.path),
cla+'_'+format(str(b), '0>6s') + '.txt')
b+=1
elif cla=='truck':
file_name_new = os.path.join(os.path.abspath(self.path),
cla+'_'+format(str(c), '0>6s') + '.txt')
c+=1
else :
file_name_new = os.path.join(os.path.abspath(self.path),
cla+'_'+format(str(d), '0>6s') + '.txt')
d+=1
# print(file_name)
# print(file_name_new)
# print('++++++++++++++++++++++++')
f.close()
try:
os.rename(file_name, file_name_new)
print ('converting %s to %s ...' % (file_name, file_name_new))
i = i + 1
except:
continue
print('total %d to rename & converted %d' % (total_num, i))
print('+++++++++++++++++++++++++++++++++++++++++++')
'''
按类别存入不同文件夹
'''
def moveFile(self):
filelist = os.listdir(self.path)
total_num = len(filelist)
# class_dict = {0: 'chemical_vehicle', 1: 'car', 2: 'truck', 3: 'bus'}
i = 0
for item in filelist:
cla=item.split('_',-1)[0]
if cla=='chemical':
cla='chemical_vehicle'
# print('item',item,cla)
file_path = os.path.join(os.path.abspath(self.path), item)
file_new_path=os.path.join(os.path.abspath(self.root_new),cla,item)
# print(file_path)
# print(file_new_path)
try:
shutil.move(file_path, file_new_path)
print('converting %s to %s ...' % (file_path, file_new_path))
i = i + 1
except:
continue
print('total %d move %d' % (total_num, i))
if __name__ == '__main__':
demo = ClassDataMake()
demo.remakeDir()
demo.judge_data_format()
demo.renameFile()
demo.moveFile()