[Target Detection] Visdrone data set and CARPK data set preprocessing

Previous blog post[Target Detection] YOLOv5 ran through the VisDrone data set I have introduced the Visdrone data set and will not repeat it here. This article mainly focuses on Visdrone dataset and CARPK dataset for target extraction and filtering.

Description of Requirement

This article needs to extract and merge the data sets about cars and people in the Visdrone data set. Cars are marked as category 0 and people are marked as category 1, and converted into txt format supported by YOLO.

Visdrone dataset

Convert Visdrone data set to YOLO txt format

First, perform a format conversion on the original data set. The following code continues to use the official conversion script.

from utils.general import download, os, Path


def visdrone2yolo(dir):
    from PIL import Image
    from tqdm import tqdm

    def convert_box(size, box):
        # Convert VisDrone box to YOLO xywh box
        dw = 1. / size[0]
        dh = 1. / size[1]
        return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh

    (dir / 'labels').mkdir(parents=True, exist_ok=True)  # make labels directory
    pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {
      
      dir}')
    for f in pbar:
        img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size
        lines = []
        with open(f, 'r') as file:  # read annotation.txt
            for row in [x.split(',') for x in file.read().strip().splitlines()]:
                if row[4] == '0':  # VisDrone 'ignored regions' class 0
                    continue
                cls = int(row[5]) - 1  # 类别号-1
                box = convert_box(img_size, tuple(map(int, row[:4])))
                lines.append(f"{
      
      cls} {
      
      ' '.join(f'{ 
        x:.6f}' for x in box)}\n")
                with open(str(f).replace(os.sep + 'annotations' + os.sep, os.sep + 'labels' + os.sep), 'w') as fl:
                    fl.writelines(lines)  # write label.txt


dir = Path(r'E:\Dataset\VisDrone')  # datasets文件夹下Visdrone2019文件夹目录
# Convert
for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
    visdrone2yolo(dir / d)  # convert VisDrone annotations to YOLO labels

Label visualization

Visualize the txt tag and see the effect before filtering.

import os
import numpy as np
import cv2

# 修改输入图片文件夹
img_folder = "image"
img_list = os.listdir(img_folder)
img_list.sort()
# 修改输入标签文件夹
label_folder = "labels2"
label_list = os.listdir(label_folder)
label_list.sort()
# 输出图片文件夹位置
path = os.getcwd()
output_folder = path + '/' + str("output")
os.mkdir(output_folder)

# 坐标转换
def xywh2xyxy(x, w1, h1, img):
    label, x, y, w, h = x
    # print("原图宽高:\nw1={}\nh1={}".format(w1, h1))
    # 边界框反归一化
    x_t = x * w1
    y_t = y * h1
    w_t = w * w1
    h_t = h * h1
    # print("反归一化后输出：\n第一个:{}\t第二个:{}\t第三个:{}\t第四个:{}\t\n\n".format(x_t, y_t, w_t, h_t))
    # 计算坐标
    top_left_x = x_t - w_t / 2
    top_left_y = y_t - h_t / 2
    bottom_right_x = x_t + w_t / 2
    bottom_right_y = y_t + h_t / 2

    # print('标签:{}'.format(labels[int(label)]))
    # print("左上x坐标:{}".format(top_left_x))
    # print("左上y坐标:{}".format(top_left_y))
    # print("右下x坐标:{}".format(bottom_right_x))
    # print("右下y坐标:{}".format(bottom_right_y))
    # 绘制矩形框
    # cv2.rectangle(img, (int(top_left_x), int(top_left_y)), (int(bottom_right_x), int(bottom_right_y)), colormap[1], 2)
    # (可选)给不同目标绘制不同的颜色框
    if int(label) == 0:
        cv2.rectangle(img, (int(top_left_x), int(top_left_y)), (int(bottom_right_x), int(bottom_right_y)), (0, 255, 0), 2)
    elif int(label) == 1:
        cv2.rectangle(img, (int(top_left_x), int(top_left_y)), (int(bottom_right_x), int(bottom_right_y)), (255, 0, 0), 2)
    else:
        cv2.rectangle(img, (int(top_left_x), int(top_left_y)), (int(bottom_right_x), int(bottom_right_y)), (0, 0, 0), 2)

    return img


if __name__ == '__main__':
    for i in range(len(img_list)):
        image_path = img_folder + "/" + img_list[i]
        label_path = label_folder + "/" + label_list[i]
        # 读取图像文件
        img = cv2.imread(str(image_path))
        h, w = img.shape[:2]
        # 读取 labels
        with open(label_path, 'r') as f:
            lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32)
        # 绘制每一个目标
        for x in lb:
            # 反归一化并得到左上和右下坐标，画出矩形框
            img = xywh2xyxy(x, w, h, img)
        """
        # 直接查看生成结果图
        cv2.imshow('show', img)
        cv2.waitKey(0)
        """
        cv2.imwrite(output_folder + '/' + '{}.png'.format(image_path.split('/')[-1][:-4]), img)

The visualization effect is as shown in the figure:
Note: This data set also distinguishes human postures. People in walking state are classified as pedestrian, and other postures (such as lying down or sitting) Below) labeled people.

Insert image description here

filter tags

Specific filtering rules:

Merge car, van, truck, bus into car(0)
Merge pedestrian, people is person(1)
Discard other categories

import os
import numpy as np
from tqdm import tqdm

# Visdrone类别
# names: ['pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor' ]

# 修改输入标签文件夹
label_folder = "labels"
label_list = os.listdir(label_folder)

# 标签输出文件夹
label_output = "labels2"

# class_set
car_set = [3, 4, 5, 8]
person_set = [0, 1]

if __name__ == '__main__':
    for label_file in tqdm(os.listdir(label_folder)):
        # 读取 labels
        with open(os.path.join(label_folder, label_file), 'r') as f:
            lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32)
        # 写入 labels
        with open(os.path.join(label_output, label_file), 'a') as f:
            for obj in lb:
                # 若是行人，修改类别为1
                if int(obj[0]) in person_set:
                    obj[0] = 1
                    f.write(('%g ' * 5).rstrip() % tuple(obj) + '\n')
                # 若是车辆，修改类别为0
                elif int(obj[0]) in car_set:
                    obj[0] = 0
                    f.write(('%g ' * 5).rstrip() % tuple(obj) + '\n')

The effect after filtering is shown in the figure:

Insert image description here

CARPK data set

The CARPK data set is a car data set captured by a drone at an altitude of 40 meters, which only contains a single target of a car.

Download address:https://github.com/zstar1003/Dataset

Original label format:

1019 521 1129 571 1
1013 583 1120 634 1

The corresponding meanings are: xmin, ymin, xmax, ymax, cls

Processing script:

import os
import numpy as np
from tqdm import tqdm

# 修改输入标签文件夹
# label_folder = r"E:\Dataset\CARPK_devkit\data\Annotations"
label_folder = r"annotations"
label_list = os.listdir(label_folder)

# 标签输出文件夹
label_output = r"labels"

# 图像宽高
img_width = 1280
img_height = 720

if __name__ == '__main__':
    for label_file in tqdm(os.listdir(label_folder)):
        # 读取 labels
        with open(os.path.join(label_folder, label_file), 'r') as f:
            lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=int)
        for obj in lb:
            class_index = obj[4]
            xmin, ymin, xmax, ymax = obj[0], obj[1], obj[2], obj[3]
            # 将box信息转换到yolo格式
            xcenter = xmin + (xmax - xmin) / 2
            ycenter = ymin + (ymax - ymin) / 2
            w = xmax - xmin
            h = ymax - ymin
            # 绝对坐标转相对坐标，保存6位小数
            xcenter = round(xcenter / img_width, 6)
            ycenter = round(ycenter / img_height, 6)
            w = round(w / img_width, 6)
            h = round(h / img_height, 6)
            info = [str(i) for i in [class_index, xcenter, ycenter, w, h]]
            # 写入 labels
            with open(os.path.join(label_output, label_file), 'a') as f:
                # 若文件不为空，添加换行
                if os.path.getsize(os.path.join(label_output, label_file)):
                    f.write("\n" + " ".join(info))
                else:
                    f.write(" ".join(info))

Visually verify the conversion effect:

Insert image description here