The yolo convert labels to generate labels tensorflow_yolov3 train.txt data cleaning and while doing test.txt

Code:

# -*- encoding: utf-8 -*-
"""
@File    : convert.py
@Time    : 2019/10/22 9:26
@Author  : Dontla
@Email   : [email protected]
@Software: PyCharm
"""
import os
import re
import cv2
import random

def extract_content(content):
    content_extract = re.findall('(.*?) (.*?) (.*?) (.*?) (.*?)\n', content)
    return content_extract


if __name__ == '__main__':

    # 记得路径尾部加“/”,不然调用join方法是它会用“\”替代,那样不好,容易造成转义字符问题。
    # ../表示上一层路径

    # 最终写入的文件路径信息是要给tensorflow-yolov3用的,我们要向其指定我们图片的位置:
    # source_img_path_related_to_train_py = '../Dontla_Dataset/20190822_Artificial_Flower/20191023_f_cotton_g/'
    source_img_path_related_to_train_py = './dontla_source_img/'

    # 以下三个路径是相对当前文件的
    source_img_path = './source_img_path/'
    source_txt_path = './source_txt_path/'
    target_txt_path = './target_txt_path/'

    # 读取source_txt_path路径下所有文件(包括子文件夹下文件)
    filenames = os.listdir(source_txt_path)

    # 调用自定义的sort_filenames函数对filenames重新排序(如果不重新排序它貌似会以1、10、100...的顺序排而不是以1、2、3...的顺序)
    # \是转义字符
    # pattern = '\((.*?)\)'
    # Dontla 20200204 现在文件名就是纯数字,所以pattern也得改
    pattern = '(.*?).txt'
    sort_filenames(filenames, pattern)

    # print(filenames)
    # ['f_cotton-g_top (1).txt', 'f_cotton-g_top (2).txt', 'f_cotton-g_top (3).txt',...]

    # 打开俩文件准备写入
    train_file = open(target_txt_path + 'train.txt', 'w', encoding='utf-8')
    test_file = open(target_txt_path + 'test.txt', 'w', encoding='utf-8')

    # 创建写入内容字符串变量
    train_file_content = ''
    test_file_content = ''

    # 打开文件提取其中数字并将内容重构后写入新文件
    for filename in filenames:

        # 打开文件:
        with open(os.path.join(source_txt_path, filename), 'r', encoding='utf-8') as f:

            # 读取文件内容
            content = f.read()

            # 提取数据
            content_extract = extract_content(content)

            # print(content_extract)
            # [('0', '0.228125', '0.670833', '0.164063', '0.227778'), ('0', '0.382031', '0.845139', '0.140625', '0.218056'),...]

            # 获取当前图片分辨率信息(这样不论图片尺寸多少都能成功转换)(re.findall()返回的是列表,需要将它转换成字符串)
            # 读取图片
            img = cv2.imread('{}{}.jpg'.format(source_img_path, ''.join(re.findall('(.*?).txt', filename))))

            # print(''.join(re.findall('(.*?).txt', filename)))
            # f_cotton-g_top (1)

            # 显示图片
            # cv2.namedWindow('test', cv2.WINDOW_AUTOSIZE)
            # cv2.imshow('test', img)
            # cv2.waitKey(0)

            # 获取图片分辨率
            img_width = img.shape[1]
            img_height = img.shape[0]

            # print(img.shape)
            # (720, 1280, 3)

            # f2.write('{}{}.jpg'.format(source_img_path_related_to_train_py, ''.join(re.findall('(.*?).txt', filename))))

            # 创建单行写入字符串的路径头字符串
            path_str = source_img_path_related_to_train_py + os.path.splitext(filename)[0] + '.jpg'

            # 创建单行写入字符串的目标坐标字符串
            obj_strs = ''

            # print(os.path.splitext(filename))
            # ('f_cotton-g_top (1)', '.txt')

            # 将数据格式从相对坐标转换成绝对坐标
            for obj_str in content_extract:
                # print(obj_str)
                # ('0', '0.228125', '0.670833', '0.164063', '0.227778')
                # ('0', '0.382031', '0.845139', '0.140625', '0.218056')
                # ('0', '0.380859', '0.652778', '0.135156', '0.200000')
                # ...

                # print(type(object_str))
                # <class 'tuple'>

                # 将元组字符串转换成列表数字
                object_evar = list(map(eval, obj_str))

                # print(object_evar)
                # [0, 0.228125, 0.670833, 0.164063, 0.227778]
                # ...

                # 映射变量
                class_id = object_evar[0]

                x, y = object_evar[1] * img_width, object_evar[2] * img_height

                w, h = object_evar[3] * img_width, object_evar[4] * img_height

                # 判断数据是否超出限制(数据清洗)(包括清洗超限坐标和错误class_id)
                if class_id != 0 \
                        or round(x - w / 2) < 0 \
                        or round(x + w / 2) > img_width \
                        or round(x - w / 2) >= round(x + w / 2) \
                        or round(y - h / 2) < 0 \
                        or round(y + h / 2) > img_height \
                        or round(y - h / 2) >= round(y + h / 2):
                    print('错误标注:')
                    print(filename)
                    print(object_evar)
                    print('[{}, {}, {}, {}, {}]'.format(round(x - w / 2), round(y - h / 2), round(x + w / 2),
                                                        round(y + h / 2), class_id))
                    continue

                # 将映射变量格式化后加入到obj_strs中:
                obj_strs += ' {},{},{},{},{}'.format(round(x - w / 2), round(y - h / 2), round(x + w / 2),
                                                     round(y + h / 2), class_id)

            # 拆分训练集和测试集
            # 训练集占比
            train_scale = 0.75

            # 设置随机概率
            proba = random.random()

            # 如果该张图片经过数据清洗后没有目标,则跳过,不将其加入到train.txt和test.txt文件中
            if obj_strs == '':
                print('空文件')
                continue
            else:
                write_strs = path_str + obj_strs

            # 判断该写入哪个文件
            if proba < train_scale:
                train_file_content += write_strs + '\n'
            else:
                test_file_content += write_strs + '\n'

            # print(write_strs)
            # ./dontla_source_img/1.jpg 275,138,374,226,0 669,36,782,153,0
			# ./dontla_source_img/2.jpg 453,228,623,366,0
			# ./dontla_source_img/3.jpg 723,269,847,414,0 339,376,541,494,0
			# ...

    # 将两个即将写入的内容去除首位的无效字符(如空格,换行符,制表符,回车符)
    train_file_content = train_file_content.strip()
    test_file_content = test_file_content.strip()

    # 将内容写入俩文件
    train_file.write(train_file_content)
    test_file.write(test_file_content)

    # 关闭俩文件
    train_file.close()
    test_file.close()

Generate results:
Here Insert Picture Description
Here Insert Picture Description

This code is how yolo annotations (annotations) .txt coordinate converted into tensorflow-yolov3 (YunYang1994) .txt marked the coordinates? An upgraded version! ! !

Published 781 original articles · won praise 37 · views 110 000 +

Guess you like

Origin blog.csdn.net/Dontla/article/details/104185403