Modify the Egohands public data set for YOLOv5 training general hand detection model

〇. Background:

The project requires the use of cameras for real-time monitoring of human hands. The first consideration is the easy-to-use and efficient YOLOv5, and soon found a public dataset: Egohands

EgoHands: A Dataset for Hands in Complex Egocentric Interactions | IU Computer Vision Lab

1. Introduction of Egohands:

The EgoHands dataset contains 48 Google Glass videos of complex first-person interactions between two people. The main purpose of this dataset is to provide a better data-driven approach to understanding hands in first-person computer vision.

I downloaded and used [Labeled Data] this dataset, the archive contains all the labeled frames as JPEG files (720x1280px). Each of the 48 videos has 100 labeled frames, for a total of 4,800 frames. The ground truth labels consist of pixel-wise masks for each hand shape and are provided as Matlab files.

Since the dataset is for MATLAB, it needs to be converted to YOLOv5.

2. Dataset conversion:

The following programs are mainly for reference, and you need to modify them according to your own situation when using them.

1. Use the program to automatically download, decompress and archive the dataset

import scipy.io as sio
import numpy as np
import os
import gc
import six.moves.urllib as urllib
import cv2
import time
import xml.etree.cElementTree as ET
import random
import shutil as sh
from shutil import copyfile
import zipfile

import csv


def save_csv(csv_path, csv_content):
    with open(csv_path, 'w') as csvfile:
        wr = csv.writer(csvfile)
        for i in range(len(csv_content)):
            wr.writerow(csv_content[i])


def get_bbox_visualize(base_path, dir):
    image_path_array = []
    for root, dirs, filenames in os.walk(base_path + dir):
        for f in filenames:
            if(f.split(".")[1] == "jpg"):
                img_path = base_path + dir + "/" + f
                image_path_array.append(img_path)

    #sort image_path_array to ensure its in the low to high order expected in polygon.mat
    image_path_array.sort()
    boxes = sio.loadmat(
        base_path + dir + "/polygons.mat")
    # there are 100 of these per folder in the egohands dataset
    polygons = boxes["polygons"][0]
    # first = polygons[0]
    # print(len(first))
    pointindex = 0

    for first in polygons:
        index = 0

        font = cv2.FONT_HERSHEY_SIMPLEX

        img_id = image_path_array[pointindex]
        img = cv2.imread(img_id)

        img_params = {}
        img_params["width"] = np.size(img, 1)
        img_params["height"] = np.size(img, 0)
        head, tail = os.path.split(img_id)
        img_params["filename"] = tail
        img_params["path"] = os.path.abspath(img_id)
        img_params["type"] = "train"
        pointindex += 1

        boxarray = []
        csvholder = []
        for pointlist in first:
            pst = np.empty((0, 2), int)
            max_x = max_y = min_x = min_y = height = width = 0

            findex = 0
            for point in pointlist:
                if(len(point) == 2):
                    x = int(point[0])
                    y = int(point[1])

                    if(findex == 0):
                        min_x = x
                        min_y = y
                    findex += 1
                    max_x = x if (x > max_x) else max_x
                    min_x = x if (x < min_x) else min_x
                    max_y = y if (y > max_y) else max_y
                    min_y = y if (y < min_y) else min_y
                    # print(index, "====", len(point))
                    appeno = np.array([[x, y]])
                    pst = np.append(pst, appeno, axis=0)
                    cv2.putText(img, ".", (x, y), font, 0.7,
                                (255, 255, 255), 2, cv2.LINE_AA)

            hold = {}
            hold['minx'] = min_x
            hold['miny'] = min_y
            hold['maxx'] = max_x
            hold['maxy'] = max_y
            if (min_x > 0 and min_y > 0 and max_x > 0 and max_y > 0):
                boxarray.append(hold)
                labelrow = [tail,
                            np.size(img, 1), np.size(img, 0), "hand", min_x, min_y, max_x, max_y]
                csvholder.append(labelrow)

            cv2.polylines(img, [pst], True, (0, 255, 255), 1)
            cv2.rectangle(img, (min_x, max_y),
                          (max_x, min_y), (0, 255, 0), 1)

        csv_path = img_id.split(".")[0]
        if not os.path.exists(csv_path + ".csv"):
            cv2.putText(img, "DIR : " + dir + " - " + tail, (20, 50),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.75, (77, 255, 9), 2)
            cv2.imshow('Verifying annotation ', img)
            save_csv(csv_path + ".csv", csvholder)
            print("===== saving csv file for ", tail)
        cv2.waitKey(2)  # close window when a key press is detected


def create_directory(dir_path):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

# combine all individual csv files for each image into a single csv file per folder.


def generate_label_files(image_dir):
    header = ['filename', 'width', 'height',
              'class', 'xmin', 'ymin', 'xmax', 'ymax']
    for root, dirs, filenames in os.walk(image_dir):
        for dir in dirs:
            csvholder = []
            csvholder.append(header)
            loop_index = 0
            for f in os.listdir(image_dir + dir):
                if(f.split(".")[1] == "csv"):
                    loop_index += 1
                    #print(loop_index, f)
                    csv_file = open(image_dir + dir + "/" + f, 'r')
                    reader = csv.reader(csv_file)
                    for row in reader:
                        csvholder.append(row)
                    csv_file.close()
                    os.remove(image_dir + dir + "/" + f)
            save_csv(image_dir + dir + "/" + dir + "_labels.csv", csvholder)
            print("Saved label csv for ", dir, image_dir +
                  dir + "/" + dir + "_labels.csv")


# Split data, copy to train/test folders
def split_data_test_eval_train(image_dir):
    create_directory("images")
    create_directory("images/train")
    create_directory("images/test")

    data_size = 4000
    loop_index = 0
    data_sampsize = int(0.1 * data_size)
    test_samp_array = random.sample(range(data_size), k=data_sampsize)

    for root, dirs, filenames in os.walk(image_dir):
        for dir in dirs:
            for f in os.listdir(image_dir + dir):
                if(f.split(".")[1] == "jpg"):
                    loop_index += 1
                    print(loop_index, f)

                    if loop_index in test_samp_array:
                        os.rename(image_dir + dir +
                                  "/" + f, "images/test/" + f)
                        os.rename(image_dir + dir +
                                  "/" + f.split(".")[0] + ".csv", "images/test/" + f.split(".")[0] + ".csv")
                    else:
                        os.rename(image_dir + dir +
                                  "/" + f, "images/train/" + f)
                        os.rename(image_dir + dir +
                                  "/" + f.split(".")[0] + ".csv", "images/train/" + f.split(".")[0] + ".csv")
                    print(loop_index, image_dir + f)
            print(">   done scanning director ", dir)
            os.remove(image_dir + dir + "/polygons.mat")
            os.rmdir(image_dir + dir)

        print("Train/test content generation complete!")
        generate_label_files("images/")


def generate_csv_files(image_dir):
    for root, dirs, filenames in os.walk(image_dir):
        for dir in dirs:
            get_bbox_visualize(image_dir, dir)

    print("CSV generation complete!\nGenerating train/test/eval folders")
    split_data_test_eval_train("egohands/_LABELLED_SAMPLES/")


# rename image files so we can have them all in a train/test/eval folder.
def rename_files(image_dir):
    print("Renaming files")
    loop_index = 0
    for root, dirs, filenames in os.walk(image_dir):
        for dir in dirs:
            for f in os.listdir(image_dir + dir):
                if (dir not in f):
                    if(f.split(".")[1] == "jpg"):
                        loop_index += 1
                        os.rename(image_dir + dir +
                                  "/" + f, image_dir + dir +
                                  "/" + dir + "_" + f)
                else:
                    break

    generate_csv_files("egohands/_LABELLED_SAMPLES/")

def extract_folder(dataset_path):
    print("Egohands dataset already downloaded.\nGenerating CSV files")
    if not os.path.exists("egohands"):
        zip_ref = zipfile.ZipFile(dataset_path, 'r')
        print("> Extracting Dataset files")
        zip_ref.extractall("egohands")
        print("> Extraction complete")
        zip_ref.close()
        rename_files("egohands/_LABELLED_SAMPLES/")

def download_egohands_dataset(dataset_url, dataset_path):
    is_downloaded = os.path.exists(dataset_path)
    if not is_downloaded:
        print(
            "> downloading egohands dataset. This may take a while (1.3GB, say 3-5mins). Coffee break?")
        opener = urllib.request.URLopener()
        opener.retrieve(dataset_url, dataset_path)
        print("> download complete")
        extract_folder(dataset_path);

    else:
        extract_folder(dataset_path)


EGOHANDS_DATASET_URL = "http://vision.soic.indiana.edu/egohands_files/egohands_data.zip"
EGO_HANDS_FILE = "egohands_data.zip"


download_egohands_dataset(EGOHANDS_DATASET_URL, EGO_HANDS_FILE)

In the data_deal folder, train_labels.csv and test_labels.csv are decompressed label files, and the csv files need to be converted below

2. Convert csv file to txt file

#-*- coding:utf-8
import pandas as pd
import  os
 
path_dir = './'
# csvPath = path_dir + 'train_labels.csv'
csvPath = path_dir + 'test_labels.csv'
if not os.path.exists(csvPath):
    print('Not that files:%s'%csvPath)
 
# txtPath = path_dir+'train_labels.txt'
txtPath = path_dir+'test_labels.txt'
data = pd.read_csv(csvPath, encoding='utf-8')
 
with open(txtPath,'a+', encoding='utf-8') as f:
    for line in data.values:
        f.write((str(line[0])+'\t'+str(line[1])+','+str(line[2])+'\t'+str(line[4])+','+str(line[5])+','+str(line[6])+','+str(line[7])+'\t'+str(line[3])+'\n'))

After the conversion is completed, two txt files are formed, and the next step is to split the txt files

3. Split the txt file

import os

txt_path='./train_txt/'
with open('./train_labels.txt') as f:
	lines=f.readlines()
	for line in lines:
		line=line.strip()
		words=line.split('\t')
		file_name=words[0]
		img_size=words[1]
		coordinate=words[2]
		label=words[3]
		with open(txt_path+file_name[:-3]+'txt','a') as f1:
			f1.write(coordinate+','+label+'\n')

After the split is complete, the corresponding txt tag files will be generated in the two folders, but the current format is not the format required by YOLOv5, and further conversion is required

4. Convert txt file

import os

input_folder = "./labels/train/"  # 替换为包含 txt 文件的文件夹路径
output_folder = "./labels_yolo/train/"  # 替换为输出转换后标签的文件夹路径

image_width = 1280
image_height = 720

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

for filename in os.listdir(input_folder):
    if filename.endswith(".txt"):
        with open(os.path.join(input_folder, filename), "r") as input_file:
            lines = input_file.readlines()

        output_lines = []
        for line in lines:
            xmin, ymin, xmax, ymax, class_name = line.strip().split(",")
            center_x = (int(xmin) + int(xmax)) / (2.0 * image_width)
            center_y = (int(ymin) + int(ymax)) / (2.0 * image_height)
            width = (int(xmax) - int(xmin)) / image_width
            height = (int(ymax) - int(ymin)) / image_height
            
            yolo_line = f"0 {center_x:.6f} {center_y:.6f} {width:.6f} {height:.6f}"
            output_lines.append(yolo_line)

        output_filename = os.path.join(output_folder, filename)
        with open(output_filename, "w") as output_file:
            output_file.write("\n".join(output_lines))

At this point, the conversion step is over, and we have obtained a complete 4800 images and corresponding txt tags, which can then be used for training

3. YOLOv5 training

Leave the server unused, I ran half the night with the idle 2060, and the results are as follows

 

 

 

 

 

4. Directly download the modified data set, labels and trained model files

I originally wanted to upload it to the CSDN resource, but my compressed file is 1.3GB, and CSDN can only upload files up to 1GB, so this wave of points can’t be cheated. Below are the links of 123 disks and Baidu Netdisk. Friends pick it up:

123云盘:https://www.123pan.com/s/wTqA-j00ph.html
提取码:oBH3
百度网盘: https://pan.baidu.com/s/1wT8K4xTutfqE3WXLanAxiA?pwd=nmdj 
提取码: nmdj

 

Guess you like

Origin blog.csdn.net/weixin_45498383/article/details/132224006