To make a data set, you may use small code (python)

Write in front
If you encounter batch processing in the production of data sets, you can leave a private message, and I will update it in time ( white prostitute )

1. Rename the original picture

The names of the pictures taken are messy, don’t worry, with this little code, mother doesn’t have to worry about naming pictures anymore!


import os

class BatchRename():
    '''
    批量重命名文件夹中的图片文件

    '''
    def __init__(self):
        self.path = 'f:/1' #表示需要命名处理的文件夹
        # self.path = 'f:/test'  # 表示需要命名处理的文件夹

    def rename(self):
        filelist = os.listdir(self.path) #获取文件路径
        total_num = len(filelist) #获取文件长度(个数)
        i = 0  #表示文件的命名是从1开始的
        for item in filelist:
            if item.endswith('.jpg'):  #初始的图片的格式为jpg格式的(或者源文件是png格式及其他格式,后面的转换格式就可以调整为自己需要的格式即可)
                src = os.path.join(os.path.abspath(self.path), item)
                # dst = os.path.join(os.path.abspath(self.path), '2_' + str(i) + '.jpg')#处理后的格式也为jpg格式的,当然这里可以改成png格式
                # dst = os.path.join(os.path.abspath(self.path), '002_' + str(i) + '.jpg')
                dst = os.path.join(os.path.abspath(self.path), '0000' + 'cqh' + str(i) + '.jpg')    #可以自主定义想要的格式
                try:
                    os.rename(src, dst)
                    print('converting %s to %s ...' % (src, dst))
                    i = i + 1
                except:
                    continue
        print('total %d to rename & converted %d jpgs' % (total_num, i))

if __name__ == '__main__':
    demo = BatchRename()
    demo.rename()

2. Modify the path of the xml file

When multiple students mark the same batch of data, the folders may be careless and not unified. Don't worry, with this small code, no matter how your friends mark the data, don't worry!

import os
import os.path
from xml.etree.ElementTree import parse, Element

# .xml文件地址
path = "F:/1"
# 得到文件夹下所有文件名称
files = os.listdir(path)
s = []
# 遍历文件夹
for xmlFile in files:
    # 判断是否是文件夹,不是文件夹才打开
    if not os.path.isdir(xmlFile):
        print(xmlFile)
        pass
    path = "F:/1"
    newStr = os.path.join(path, xmlFile)
    #最核心的部分,路径拼接,输入的是具体路径
    #得到.xml文件的根(也就是annotation)
    dom = parse(newStr)
    root = dom.getroot()
    #获得后缀.前的文件名(分离文件名和扩展名)
    part = os.path.splitext(xmlFile)[0]
    # 文件名+后缀
    part1 = part + '.jpg'
    # path里的新属性值:
    newStr1 = 'D:\\data-view\\round-1\\' + part1       # 修改xml文件的路径信息
    #通过句柄找到path的子节点,然后给子节点设置内容
    root.find('path').text = newStr1
    # #打印输出
    print('已经修改')
    dom.write(newStr)
    # dom.write(newStr, xml_declaration=True)
    pass

3. Modify the folder of the xml file

If there is a problem with the path of the xml file, then you have to carefully check whether the folder of the file is correct, it may also be wrong. This small code can help you modify the folders in batches.

import os
from xml.etree.ElementTree import parse, Element

path = 'f:/1'  # xml文件存放路径
sv_path = 'f:/2'  # 修改后的xml文件存放路径
files = os.listdir(path)
cnt = 1  ##从1开始计数,如果要在前面+0,用zfill函数

files = os.listdir(path)
for xmlFile in files:
    # 判断是否是文件夹,不是文件夹才打开
    if not os.path.isdir(xmlFile):
        print(xmlFile)
        pass
    newStr = os.path.join(path, xmlFile)
    newStr1 = os.path.join(sv_path, xmlFile)
    #最核心的部分,路径拼接,输入的是具体路径
    #得到.xml文件的根(也就是annotation)
    dom = parse(newStr)
    root = dom.getroot()
    root.find('folder').text = 'round-1'
# for xmlFile in files:
#     dom = xml.dom.minidom.parse(os.path.join(path, xmlFile))  # 打开xml文件,送到dom解析
    # root = dom.documentElement  # 得到文档元素对象
    # item = root.getElementsByTagName('folder')  # 获取path这一node名字及相关属性值
    # print(item)
    # for i in item:
    #     i.firstChild.data = 'glass'  # xml文件对应的图片路径
    #
    # with open(os.path.join(sv_path, xmlFile), 'w') as fh:
    dom.write(newStr1)
    cnt += 1

4. Extract training set and test set

Randomly extract training set and test set from the labeled data

##深度学习过程中,需要制作训练集和验证集、测试集。

import os, random, shutil


def moveFile(fileDir):
    pathDir = os.listdir(fileDir)  # 取图片的原始路径
    filenumber = len(pathDir)
    rate = 0.20  # 自定义抽取图片的比例,比方说100张抽10张,那就是0.1
    picknumber = int(filenumber * rate)  # 按照rate比例从文件夹中取一定数量图片
    sample = random.sample(pathDir, picknumber)  # 随机选取picknumber数量的样本图片
    print(sample)
    for name in sample:
        shutil.move(fileDir + name, tarDir + name)
    return


if __name__ == '__main__':
    fileDir = "i:\\333\\train\\"  # 源图片文件夹路径
    tarDir = 'i:\\333\\test\\'  # 移动到新的文件夹路径
    moveFile(fileDir)

Guess you like

Origin blog.csdn.net/ca___0/article/details/110249177