Convert the json format of the record annotation file to txt, and transform the yolov4 preselection box clustering method

Scenes:

I downloaded the image recognition data set of the Alibaba Cloud Algorithm Competition from the Internet, and provided it in json format. I plan to use the yolo algorithm to practice, so I need to modify the label file format and record it for future use.
The original json format data set is as follows:

[
    {
        "name": "223_89_t20201125085855802_CAM3.jpg",
        "image_height": 3500,
        "image_width": 4096,
        "category": 4,
        "bbox": [
            1702.79,
            2826.53,
            1730.79,
            2844.53
        ]
    },
    {
        "name": "235_2_t20201127123021723_CAM2.jpg",
        "image_height": 6000,
        "image_width": 8192,
        "category": 5,
        "bbox": [
            1876.06,
            998.04,
            1883.06,
            1004.04
        ]
    },
    {
        "name": "235_2_t20201127123021723_CAM2.jpg",
        "image_height": 6000,
        "image_width": 8192,
        "category": 5,
        "bbox": [
            1655.06,
            1094.04,
            1663.06,
            1102.04
        ]
    },
    ......

1. Transform to the txt format required by yolov5

The txt input by yolov5 is a relative coordinate, which needs to be calculated. Step 1: Convert the picture name in the json file into a separate txt file, the code is as follows:

#json 转 yolov5需要的txt

import os
import json


json_dir = 'train_annos.json'  # json文件路径
out_dir = 'output/'  # 输出的 txt 文件路径


def main():
    # 读取 json 文件数据
    with open(json_dir, 'r') as load_f:
        content = json.load(load_f)
    # 循环处理
    for t in content:
        tmp = t['name'].split('.')
        filename = out_dir + tmp[0] + '.txt'

        if not os.path.exists(filename):
            fp = open(filename, mode="w", encoding="utf-8")
            fp.close()  
        # 计算 yolo 数据格式所需要的中心点的 相对 x, y 坐标, w,h 的值
        dw=1./t['image_width']
        dh=1./t['image_height']
        x = (t['bbox'][0] + t['bbox'][2]) / 2.0 -1
        y = (t['bbox'][1] + t['bbox'][3]) / 2.0 -1
        w = (t['bbox'][2] - t['bbox'][0]) 
        h = (t['bbox'][3] - t['bbox'][1])
        x=x*dw
        w=x*dw
        y=y*dh
        h=h*dh


        fp = open(filename, mode="r+", encoding="utf-8")
        file_str = str(t['category']) + ' ' + str(x) + ' ' + str(y) + ' ' + str(round(w, 6)) + \
                   ' ' + str(h)
        line_data = fp.readlines()

        if len(line_data) != 0:
            fp.write('\n' + file_str)
        else:
            fp.write(file_str)
        fp.close()



if __name__ == '__main__':
    main()
The second step: split the training set, test set and verification set, first split labels
#拆分lables

import os
import random 
import glob
import shutil 


xmlfilepath='/home/conda_work/conda_hsz/yolov5-master-aliyun/output/'
saveBasePath="/home/conda_work/conda_hsz/yolov5-master-aliyun/VOC/labels/train/"
saveBasePath1="/home/conda_work/conda_hsz/yolov5-master-aliyun/VOC/labels/val/"

trainval_percent=1
train_percent=0.8

temp_xml = os.listdir(xmlfilepath)
total_xml = []
for xml in temp_xml:
    if xml.endswith(".txt"):
        total_xml.append(xml)

num=len(total_xml)  
list=range(num)  
tv=int(num*trainval_percent)  
tr=int(tv*train_percent)  
trainval= random.sample(list,tv)  
train=random.sample(trainval,tr) 


for i  in list:  
    name=total_xml[i] 
    if i in train:  
        shutil.copy(xmlfilepath + name, saveBasePath + name)  
    else:  
        shutil.copy(xmlfilepath + name, saveBasePath1 + name)  

split picture

#拆分 图片

xmlfilepath2="/home/conda_work/conda_hsz/yolov5-master-aliyun/train_imgs/"
saveBasePath2="/home/conda_work/conda_hsz/yolov5-master-aliyun/VOC/images/train/"
saveBasePath3="/home/conda_work/conda_hsz/yolov5-master-aliyun/VOC/images/val/"

for i in list:  
    name=total_xml[i].split(".")[0]+".jpg"
    if i in train:  
        shutil.copy(xmlfilepath2 + name, saveBasePath2 + name) 
    else:  
        shutil.copy(xmlfilepath2 + name, saveBasePath3 + name)  

2. Transform to the txt format required by yolov4

The txt required by yolov4 is the real coordinates. The first step is to split it into a single txt file. The code is as follows:

import codecs
import json
import glob
# import cv2
import shutil
from sklearn.model_selection import train_test_split
import os

#转换坐标

def conver(json_dir,out_dir):
    # 读取 json 文件数据
    with open(json_dir, 'r') as load_f:
        content = json.load(load_f)
    # 循环处理
    for t in content:
        tmp = t['name'].split('.')
        filename = out_dir + tmp[0] + '.txt'

        if not os.path.exists(filename):
            fp = open(filename, mode="w", encoding="utf-8")
            fp.close()  
        # 计算 yolo 数据格式所需要的中心点的 相对 x, y 坐标, w,h 的值

        x = float(t['bbox'][0])
        y = float(t['bbox'][1])
        w = float(t['bbox'][2])
        h = float(t['bbox'][3])
        fp = open(filename, mode="r+", encoding="utf-8")
        file_str =   str(x) + ',' + str(y) + ',' + str(w) + \
                   ',' + str(h)+ ',' + str(t['category'])+ ' '
        line_data = fp.readlines()

        if len(line_data) != 0:
            fp.write(file_str)
        else:
            fp.write(file_str)
        fp.close()

#开始转换       
json_dir="/home/conda_work/conda_hsz/yolov4-keras-master_416/train_annos.json"   #json文件位置
out_dir = "VOCdevkit/VOC2007/Annotations/"
conver(json_dir,out_dir)

The second step is to integrate into a txt file.

#整合到txt
import glob
out_dir = "VOCdevkit/VOC2007/Annotations/"

org_txt_files = sorted(glob.glob(os.path.join(out_dir, '*.txt')))
org_txt_file_names = [i.split("\\")[-1].split(".txt")[0] for i in org_txt_files]
#org_txt_file_names

img_path="/home/conda_work/conda_hsz/yolov4-keras-master-aliyun/train_imgs/"
list_file=open('./2007_train.txt','w')

org_img_files = sorted(glob.glob(os.path.join(img_path, '*.jpg')))
org_img_file_names = [i.split("\\")[-1].split(".jpg")[0].split("/")[-1] for i in org_img_files]
for i, json_file_ in enumerate(org_txt_files):
    box_txt=open(json_file_, "r", encoding="utf-8").readline()
    image_path = os.path.join(img_path, org_txt_file_names[i].split("/")[-1]+'.jpg')

    if org_txt_file_names[i].split("/")[-1] in org_img_file_names:
        list_file.write(image_path+ ' '+box_txt)
        list_file.write('\n')
list_file.close()

3. Transform the kmeans_for_anchors.py file in yolov4

The main thing is to modify the load_data method in the file. It was originally based on xml to read the annotation data, and after the transformation, it is based on the json format to read the annotation data. The original and modified functions are as follows:

#原始方法
def load_data1(path):
    data = []
    # 对于每一个xml都寻找box
    for xml_file in glob.glob('{}/*xml'.format(path)):
        tree = ET.parse(xml_file)
        height = int(tree.findtext('./size/height'))
        width = int(tree.findtext('./size/width'))
        # 对于每一个目标都获得它的宽高
        for obj in tree.iter('object'):
            xmin = int(float(obj.findtext('bndbox/xmin'))) / width
            ymin = int(float(obj.findtext('bndbox/ymin'))) / height
            xmax = int(float(obj.findtext('bndbox/xmax'))) / width
            ymax = int(float(obj.findtext('bndbox/ymax'))) / height

            xmin = np.float64(xmin)
            ymin = np.float64(ymin)
            xmax = np.float64(xmax)
            ymax = np.float64(ymax)
            # 得到宽高
            data.append([xmax-xmin,ymax-ymin])
    return np.array(data)

After transformation

def load_data(path):
    data = []
# 对于每一个xml都寻找box
    with open(path, 'r') as load_f:
        content = json.load(load_f)
    for t in content:
        height = t['image_height']
        width =  t['image_width']
        # 对于每一个目标都获得它的宽高
        xmin = float(t['bbox'][0]) / width
        ymin = float(t['bbox'][1]) / height
        xmax = float(t['bbox'][2]) / width
        ymax = float(t['bbox'][3]) / height

        xmin = np.float64(xmin)
        ymin = np.float64(ymin)
        xmax = np.float64(xmax)
        ymax = np.float64(ymax)
        # 得到宽高
        data.append([xmax-xmin,ymax-ymin])
    return np.array(data)

Guess you like

Origin blog.csdn.net/h363924219/article/details/113561818