Preprocesamiento de datos de detección de objetivos: según la categoría del componente según el rompecabezas de posición específico, lo que reduce el espacio de aprendizaje

En primer lugar, coloque las representaciones, para que pueda ver de manera más intuitiva lo que va a hacer la película:
inserte la descripción de la imagen aquí
como se muestra en la figura, el panorama general se divide en cuadrículas de 4 × 4, y los cuadros de destino de las 4 clases de componentes son ensambladas de acuerdo con las posiciones fijas, entre las que se encuentran la cabeza y el cuerpo. La imagen grande es una imagen para cada cuadrícula, y las imágenes pequeñas para manos y pies son imágenes de 2×2 para cada cuadrícula (porque las imágenes cortadas por manos y pies son generalmente pequeños, para no cambiar demasiado el tamaño y hacerlos demasiado borrosos).
Las imágenes pequeñas de cada categoría de componentes se juntan para probar si el algoritmo de detección de objetivos se centrará en un objetivo específico en un área específica para lograr el propósito de reducir el espacio de aprendizaje (para controlar las variables, la mejora de datos de la transformación de posición La clase del algoritmo en sí debe estar desactivada).
Los componentes aquí se refieren a una clase de objetivos, por ejemplo, la cabeza incluye objetivos en el área de la cabeza, como la cabeza y el sombrero. La imagen de cada tipo de componente se obtiene según la captura de pantalla del componente .

Preparar

El primero es convertir el formato json de los datos en el archivo py json2txt.py en formato txt:

import json
import os
import cv2

print(cv2.__version__)


def getBoundingBox(points):
    xmin = points[0][0]
    xmax = points[0][0]
    ymin = points[0][1]
    ymax = points[0][1]
    for p in points:
        if p[0] > xmax:
            xmax = p[0]
        elif p[0] < xmin:
            xmin = p[0]

        if p[1] > ymax:
            ymax = p[1]
        elif p[1] < ymin:
            ymin = p[1]
    return [int(xmin), int(xmax), int(ymin), int(ymax)]


def json2txt(json_path, txt_path):
    json_data = json.load(open(json_path))
    img_h = json_data["imageHeight"]
    img_w = json_data["imageWidth"]
    shape_data = json_data["shapes"]
    shape_data_len = len(shape_data)
    img_name = os.path.split(json_path)[-1].split(".json")[0]
    name = img_name + '.jpg'
    data = ''
    for i in range(shape_data_len):
        lable_name = shape_data[i]["label"]
        points = shape_data[i]["points"]
        [xmin, xmax, ymin, ymax] = getBoundingBox(points)
        if xmin <= 0:
            xmin = 0
        if ymin <= 0:
            ymin = 0
        if xmax >= img_w:
            xmax = img_w - 1
        if ymax >= img_h:
            ymax = img_h - 1
        b = name + ' ' + lable_name + ' ' + str(xmin) + ' ' + str(ymin) + ' ' + str(xmax) + ' ' + str(ymax)
        # print(b)
        data += b + '\n'
    with open(txt_path + '/' + img_name + ".txt", 'w', encoding='utf-8') as f:    
        f.writelines(data)

if __name__ == "__main__":
    json_path = "/data/cch/yolov5-augment/train/json"
    saveTxt_path = "/data/cch/yolov5-augment/train/txt"

    filelist = os.listdir(json_path)
    for file in filelist:
        old_dir = os.path.join(json_path, file)
        if os.path.isdir(old_dir):
            continue
        filetype = os.path.splitext(file)[1]
        if(filetype != ".json"): continue
        json2txt(old_dir, saveTxt_path)

def main_import(json_path, txt_path):
    filelist = os.listdir(json_path)
    for file in filelist:
        old_dir = os.path.join(json_path, file)
        if os.path.isdir(old_dir):
            continue
        filetype = os.path.splitext(file)[1]
        if(filetype != ".json"): continue
        json2txt(old_dir, txt_path)

Tome un archivo txt aleatorio y verifique su formato:

body_21.jpg cloth 51 12 255 270
body_21.jpg hand 50 206 79 257
body_21.jpg hand 195 217 228 269
body_21.jpg other 112 0 194 1

Formato: es el nombre de la imagen y el nombre de la clase x1 y1 x2 y2 (las coordenadas de las esquinas superior izquierda e inferior derecha del cuadro de destino, este formato txt no es el formato darknet entrenado por yolo).
Luego, convierta el formato txt de los datos en el archivo py modeTxt.py en formato darknet:

import os
from numpy.lib.twodim_base import triu_indices_from
import pandas as pd
from glob import glob
import cv2
import codecs


def txt2darknet(txt_path, img_path, saved_path):
    data = pd.DataFrame()
    filelist = os.listdir(txt_path) 
    for file in filelist:
        if not os.path.splitext(file)[-1] == ".txt":
            continue           
        # print(file)                                        
        file_path = os.path.join(txt_path, file)
        filename = os.path.splitext(file)[0]
        imgName = filename + '.jpg'
        imgPath = os.path.join(img_path, imgName)
        img = cv2.imread(imgPath)
        [img_h, img_w, _] = img.shape
        data = ""
        with codecs.open(file_path, 'r', encoding='utf-8',errors='ignore') as f1:
            for line in f1.readlines():
                line = line.strip('\n')
                a = line.split(' ')
                if a[1] == 'other' or a[1] == 'mask' or a[1] == 'del': continue
                # if a[1] == 'mouth':
                #     a[1] = '0'
                # elif a[1] == 'wearmask':
                #     a[1] = '1'
                if a[1] == 'head': 
                    a[1] = '0'
                elif a[1] == 'hat':
                    a[1] = '1'
                elif a[1] == 'helmet':
                    a[1] = '2'
                elif a[1] == 'eye':
                    a[1] = '3'
                elif a[1] == 'glasses' or a[1] == 'glass':
                    a[1] = '4'
                '''这里根据自己的类别名称及顺序'''

                x1 = float(a[2])
                y1 = float(a[3])
                w = float(a[4]) - float(a[2])
                h = float(a[5]) - float(a[3])

                # if w <= 15 and h <= 15: continue

                center_x = float(a[2]) + w / 2
                center_y = float(a[3]) + h / 2
                a[2] = str(center_x / img_w)
                a[3] = str(center_y / img_h)
                a[4] = str(w / img_w)
                a[5] = str(h / img_h)
                b = a[1] + ' ' + a[2] + ' ' + a[3] + ' ' + a[4] + ' ' + a[5]
                # print(b)
                data += b + '\n'
        with open(saved_path + '/' + filename + ".txt", 'w', encoding='utf-8') as f2:    
            f2.writelines(data)
            print(data)
                          
txt_path = '/data/cch/yolov5/runs/detect/hand_head_resize/labels'
saved_path = '/data/cch/yolov5/runs/detect/hand_head_resize/dr'
img_path = '/data/cch/data/pintu/test/hand_head_resize/images'

if __name__ == '__main__':    
    txt2darknet(txt_path, img_path, saved_path)

Los dos códigos de conversión anteriores se llaman en el rompecabezas.

rompecabezas

Comencemos nuestro código de rompecabezas:

'''
4*4
左上五个 1 2 3 5 6 head
左下五个 9 10 11 13 14 body
右上三个 4 7 8 各划分4宫格 hand
右下三个 12 15 16 各划分4宫格 foot

针对于部件拼图,每个部件一个文件夹,image和json的地址都取总地址
'''
import sys
import codecs
import random
import PIL.Image as Image
import os
import cv2
sys.path.append("/data/cch/拼图代码/format_transform")
import json2txt
import modeTxt
import shutil

# 定义图像拼接函数
def image_compose(imgsize, idx, ori_tmp, num, save_path, gt_resized_path, flag):
    to_image = Image.new('RGB', (imgsize, imgsize)) #创建一个新图
    new_name = ""
    for y in range(idx):
        for x in range(idx):
            index = y*idx + x 
            if index >= len(ori_tmp):
                break
            open_path = [gt_resized_path, small_pintu_foot, small_pintu_hand]
            for op in open_path:
                if os.path.exists(os.path.join(op, ori_tmp[index])):
                    to_image.paste(Image.open(os.path.join(op, ori_tmp[index])), (
                        int(x * (imgsize / idx)), int(y * (imgsize / idx))))
                    break
                else:
                    continue

    new_name = os.path.join(save_path, flag + str(num) + ".jpg")
    to_image.save(new_name) # 保存新图
    # print(new_name)
    return new_name

def labels_merge(imgsize, idx, ori_tmp, new_name, txt_resized_path, txt_pintu_path):
    data = ""
    for y in range(idx):
        for x in range(idx):
            index = y*idx + x
            if index >= len(ori_tmp):
                break
            txt_path = os.path.join(txt_resized_path, ori_tmp[index].split(".")[0] + ".txt")
            if not os.path.exists(txt_path):
                txt_path = os.path.join(txt_pintu_path_small, ori_tmp[index].split(".")[0] + ".txt")
            try:
                os.path.exists(txt_path)
            except:
                print(txt_path, "file not exists!")
            if os.path.exists(txt_path):
                with codecs.open(txt_path, 'r', encoding='utf-8',errors='ignore') as f1:
                    for line in f1.readlines():
                        line = line.strip('\n')
                        a = line.split(' ')
                        a[2] = str(float(a[2]) + (x * (imgsize / idx)))
                        a[3] = str(float(a[3]) + (y * (imgsize / idx)))
                        a[4] = str(float(a[4]) + (x * (imgsize / idx)))
                        a[5] = str(float(a[5]) + (y * (imgsize / idx)))
                        b =a[0] + ' ' + a[1] + ' ' + a[2] + ' ' + a[3] + ' ' + a[4] + ' ' + a[5]
                        data += b + "\n"

    write_path = os.path.join(txt_pintu_path, os.path.splitext(new_name)[0].split("/")[-1] + ".txt")
    with open(write_path, 'w', encoding='utf-8') as f2:    
        f2.writelines(data)
 
def pintu2black(txt_pintu_path, save_path, to_black_num, to_black_min_num, label_black):
    files = os.listdir(txt_pintu_path)
    for file in files:
        img_path = os.path.join(save_path, os.path.splitext(file)[0] + ".jpg")
        img_origal = cv2.imread(img_path)
        data = ""
        with codecs.open(txt_pintu_path+"/"+file, encoding="utf-8", errors="ignore") as f1:
            for line in f1.readlines():
                line = line.strip("\n")
                a = line.split(" ")
                xmin = int(eval(a[2]))
                ymin = int(eval(a[3]))
                xmax = int(eval(a[4]))
                ymax = int(eval(a[5]))
                if ((xmax - xmin < to_black_num) and (ymax - ymin < to_black_num)) or \
                    ((xmax - xmin < to_black_min_num) or (ymax - ymin < to_black_min_num)) \
                    or a[1] in label_black:
                    img_origal[ymin:ymax, xmin:xmax, :] = (0, 0, 0)
                    cv2.imwrite(img_path, img_origal)
                    line = ""
                if line:
                    data += line + "\n"
        with open(txt_pintu_path+"/"+file, 'w', encoding='utf-8') as f2:    
            f2.writelines(data)
        # print(data)

def gt_distribute(images_path, ori, gt_resized_path, txt_path, gt_range):
    image_names = os.listdir(images_path)
    for image_name in image_names:
        if not os.path.splitext(image_name)[-1] == ".jpg":
            continue
        imgPath = os.path.join(images_path, image_name)
        img = cv2.imread(imgPath)
        gt_resized_name = gt_resize(gt_resized_path, txt_path, image_name, img, gt_range, 2)
        ori.append(gt_resized_name)

def gt_resize(gt_resized_path, txt_path, image_name, img, img_size, x):
    if not os.path.exists(gt_resized_path):
        os.mkdir(gt_resized_path)
    [img_h, img_w, _] = img.shape
    img_read = [0, 0, 0]
    if img_h < img_w:
        precent = img_size / img_w
        img_read = cv2.resize(img, (img_size, int(img_h * precent)), interpolation=cv2.INTER_CUBIC)
    else:
        precent = img_size / img_h
        img_read = cv2.resize(img, (int(img_w * precent), img_size), interpolation=cv2.INTER_CUBIC)
    img_resized = gt_resized_path + "/" + image_name.split(".")[0] + "_" + str(x) + ".jpg"
    cv2.imwrite(img_resized, img_read)

    txt_name = txt_path + "/" + image_name.split(".")[0] + ".txt"
    txt_resized_name = gt_resized_path + "/" + image_name.split(".")[0] + "_" + str(x) + ".txt"
    if os.path.exists(txt_name):
        data = ""
        with codecs.open(txt_name, 'r', encoding='utf-8',errors='ignore') as f1:
            for line in f1.readlines():
                line = line.strip('\n')
                a = line.split(' ')
                a[2] = str(float(a[2]) * precent)
                a[3] = str(float(a[3]) * precent)
                a[4] = str(float(a[4]) * precent)
                a[5] = str(float(a[5]) * precent)
                b =a[0] + ' ' + a[1] + ' ' + a[2] + ' ' + a[3] + ' ' + a[4] + ' ' + a[5]
                data += b + "\n"
        with open(txt_resized_name, 'w', encoding='utf-8') as f2:    
            f2.writelines(data)
    return img_resized.split("/")[-1]

def pintu(idx, ori, img_threshold, imgsize, save_path, gt_resized_path, txt_pintu_path, flag):
    num = 0
    if flag != "wear_" :
        random.shuffle(ori)
    picknum = idx * idx
    index = 0
    while num < int(img_threshold):
        ori_tmp = []
        # random.sample(ori, picknum)
        if index >= len(ori) and flag != "wear_" :
            random.shuffle(ori)
            index = 0
        ori_tmp = ori[index:index+picknum]
        index = index + picknum
        new_name = image_compose(imgsize, idx, ori_tmp, num, save_path, gt_resized_path, flag) 
        labels_merge(imgsize, idx, ori_tmp, new_name, gt_resized_path, txt_pintu_path)

        ori_tmp.clear()
        num += 1
    print(flag, num, len(ori))

if __name__ == "__main__":
    images_path = '/data/cch/test'  # 图片集地址  
    json_path = "/data/cch/test"
    save_path = '/data/cch/save'
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    else:
        shutil.rmtree(save_path)
        os.mkdir(save_path)

    tmp = "/data/cch/pintu_data/save/tmp"
    if not os.path.exists(tmp):
        os.mkdir(tmp)
    else:
        shutil.rmtree(tmp)
        os.mkdir(tmp)
    gt_resized_path = os.path.join(tmp, "gt_resized")
    txt_path = os.path.join(tmp, "txt")        # 原数据txt
    txt_pintu_path = os.path.join(tmp, "txt_pintu")
    txt_pintu_path_small = os.path.join(tmp, "txt_pintu_small")
    small_pintu_foot = os.path.join(tmp, "pintu_foot")
    small_pintu_hand = os.path.join(tmp, "pintu_hand")

    os.mkdir(txt_path)
    os.mkdir(txt_pintu_path)
    os.mkdir(txt_pintu_path_small)
    os.mkdir(small_pintu_foot)
    os.mkdir(small_pintu_hand)

    label_black = ["other"]
    imgsize = 416

    to_black_num = 15        
    to_black_min_num = 5    

    gt_range_large = int(imgsize / 4)
    gt_range_small = int(imgsize / 8)

    json_dirs = os.listdir(json_path)
    for json_dir in json_dirs:
        json_ori_dir = os.path.join(json_path, json_dir)
        txt_dir = os.path.join(txt_path, json_dir)
        os.mkdir(txt_dir)
        json2txt.main_import(json_ori_dir, txt_dir)

    # foot
    ori_foot = []
    foot_images = os.path.join(images_path, "foot")
    foot_txt = os.path.join(txt_path, "foot")
    gt_distribute(foot_images, ori_foot, gt_resized_path, foot_txt, gt_range_small)

    img_threshold = int(len(ori_foot) / 4 * 1.6)
    idx = 2
    pintu(idx, ori_foot, img_threshold, int(imgsize/4), small_pintu_foot, gt_resized_path,\
        txt_pintu_path_small, "foot_")

    # hand
    ori_hand = []
    hand_images = os.path.join(images_path, "hand")
    hand_txt = os.path.join(txt_path, "hand")
    gt_distribute(hand_images, ori_hand, gt_resized_path, hand_txt, gt_range_small)

    img_threshold = int(len(ori_hand) / 4 * 1.6)
    idx = 2
    pintu(idx, ori_hand, img_threshold, int(imgsize/4), small_pintu_hand, gt_resized_path,\
        txt_pintu_path_small, "hand_")

    # head
    ori_head = []
    head_images = os.path.join(images_path, "head")
    head_txt = os.path.join(txt_path, "head")
    gt_distribute(head_images, ori_head, gt_resized_path, head_txt, gt_range_large)

    # body
    ori_body = []
    body_images = os.path.join(images_path, "body")
    body_txt = os.path.join(txt_path, "body")
    gt_distribute(body_images, ori_body, gt_resized_path, body_txt, gt_range_large)
    

    # pintu
    ori = []
    idx = 4
    ori_foot = os.listdir(small_pintu_foot)
    ori_hand = os.listdir(small_pintu_hand)
    random.shuffle(ori_foot)
    random.shuffle(ori_hand)
    random.shuffle(ori_head)
    random.shuffle(ori_body)

    
    [idx_hand, idx_foot, idx_head, idx_body] = [0, 0, 0, 0]
    img_threshold = int((len(ori_hand) + len(ori_foot) + len(ori_head) + len(ori_body)) / (idx*idx) * 1.5)
    while True:
        for i in range(idx*idx):
            if i in [0,1,2,4,5]:
                if idx_head >= len(ori_head):
                    random.shuffle(ori_head)
                    idx_head = 0
                ori.append(ori_head[idx_head])
                idx_head += 1
            elif i in [3,6,7]:
                if idx_hand >= len(ori_hand):
                    random.shuffle(ori_hand)
                    idx_hand = 0
                ori.append(ori_hand[idx_hand])
                idx_hand += 1
            elif i in [8,9,10,12,13]:
                if idx_body >= len(ori_body):
                    random.shuffle(ori_body)
                    idx_body = 0
                ori.append(ori_body[idx_body])
                idx_body += 1
            elif i in [11,14,15]:
                if idx_foot >= len(ori_foot):
                    random.shuffle(ori_foot)
                    idx_foot = 0
                ori.append(ori_foot[idx_foot])
                idx_foot += 1

        if int(len(ori)/(idx*idx)) > img_threshold:
            break
    pintu(idx, ori, int(len(ori)/(idx*idx)), imgsize, save_path, gt_resized_path,\
        txt_pintu_path, "wear_")

    pintu2black(txt_pintu_path, save_path, to_black_num, to_black_min_num, label_black)

    # input()
    modeTxt.txt2darknet(txt_pintu_path, save_path, save_path)
    shutil.rmtree(tmp)

La dirección de entrada aquí es la dirección total de los cuatro componentes, como se muestra en la figura:
inserte la descripción de la imagen aquí

Supongo que te gusta

Origin blog.csdn.net/weixin_45354497/article/details/130807002
Recomendado
Clasificación