YOLOv3 + Citypersons Dataset

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_33614902/article/details/83313898

1、citypersons2yolo.py

#coding=utf-8
import scipy.io
import os

#Function:将训练集的annotations转换为YOLOv3训练所需的label/train/XXX.txt格式
#How to run? ###python citypersons2yolo.py
def convert(size, box):
    dw = 1./size[0]
    dh = 1./size[1]
    x = (box[0] + box[1])/2.0
    y = (box[2] + box[3])/2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)


#You can download anno_train.mat from "https://bitbucket.org/shanshanzhang/citypersons/src/f44d4e585d51d0c3fd7992c8fb913515b26d4b5a/annotations/".   
data = scipy.io.loadmat('annotations/anno_train.mat')
data = data['anno_train_aligned'][0]

if not os.path.exists('labels/train/'):
	os.makedirs('labels/train/')


for record in data:
	im_name = record['im_name'][0][0][0]
	bboxes = record['bbs'][0][0]
	(shot_name, extension) = os.path.splitext(im_name)
	txt_name = os.path.join('labels/train', shot_name+'.txt')
	f = open(txt_name, 'w')
	#im_name = os.path.join('train', im_name.split('_', 1)[0], im_name)

	for bbox in bboxes:
		class_label, x1, y1, w, h, instance_id, x1_vis, y1_vis, w_vis, h_vis = bbox
		if class_label == 0:
			continue
		b = (float(x1), float(x1+w), float(y1), float(y1+h)) #(xmin, xmax, ymin, ymax)
		bb = convert((int(2048), int(1024)), b)
		f.write('0 ' + ' '.join([str(a) for a in bb]) + '\n')

	f.close()

2、k-means聚类YOLOv3中使用的anchor

把YOLOv3训练所需的label直接拿来用即可。

原代码地址:https://github.com/lars76/kmeans-anchor-boxes

修改了example.py文件如下。

运行:python3 example.py。注意:解释器是python3

# coding=utf-8
import glob
import xml.etree.ElementTree as ET

import numpy as np

from kmeans import kmeans, avg_iou


ANNOTATIONS_PATH = "./citypersons/labels/train/"
#ANNOTATIONS_PATH = "./voc/labels/"
CLUSTERS = 9
W = 512 #1024
H = 256	#512

#此函数直接从yolo格式的训练数据中加载
def load_dataset(path):
	dataset = []
	for txt_file in glob.glob("{}/*txt".format(path)):
		#print(txt_file)
		f = open(txt_file, 'r')
		lines = f.readlines()
		for line in lines:
			line = line.strip('\n') #delete '\n'
			l = line.split(' ')
			dataset.append([np.float64(l[3]), np.float64(l[4])]) #[bbox_w/img_w, bbox_h/img_h]
			#dataset.append([xmax - xmin, ymax - ymin])
		f.close()
	
	return np.array(dataset)


if __name__ == '__main__':

	data = load_dataset(ANNOTATIONS_PATH)
	out = kmeans(data, k=CLUSTERS)
	#clusters = [[10,13],[16,30],[33,23],[30,61],[62,45],[59,119],[116,90],[156,198],[373,326]]
	#out= np.array(clusters)/416.0
	print(out)
	print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))
	print("Boxes:\n {}-{}".format(out[:, 0]*W, out[:,1]*H))

	ratios = np.around(out[:, 0] / out[:, 1], decimals=2).tolist()
	print("Ratios:\n {}".format(sorted(ratios)))

 

猜你喜欢

转载自blog.csdn.net/qq_33614902/article/details/83313898