将widerface标注转换为VOC格式
原文:https://blog.csdn.net/minstyrain/article/details/77986262
widerface是包含了3万多张总计近40万张人脸的人脸检测库,里面包含了大大小小各式各样的人脸,是不可多得的素材。
请将下面的代码保存至widerface.py,并至于下图所示的eval_tools文件夹下,其他的文件结构一并如图所示。
Update:
由于widerface里包含很多小脸,用SSD训练不一定能收敛,此外SSD要求输入为方形,不然会挤压图片造成变形,因此需要对此做些处理.
-
import os,h5py,cv2,sys,shutil
-
import numpy as np
-
from xml.dom.minidom import Document
-
rootdir="../"
-
convet2yoloformat=True
-
convert2vocformat=True
-
resized_dim=(48, 48)
-
#最小取20大小的脸,并且补齐
-
minsize2select=20
-
usepadding=True
-
datasetprefix="/home/yanhe/data/widerface"#
-
def gen_hdf5():
-
imgdir=rootdir+"/WIDER_train/images"
-
gtfilepath=rootdir+"/wider_face_split/wider_face_train_bbx_gt.txt"
-
index =0
-
with open(gtfilepath,'r') as gtfile:
-
faces=[]
-
labels=[]
-
while(True ):#and len(faces)<10
-
imgpath=gtfile.readline()[:-1]
-
if(imgpath==""):
-
break;
-
print index,imgpath
-
img=cv2.imread(imgdir+"/"+imgpath)
-
numbbox=int(gtfile.readline())
-
bbox=[]
-
for i in range(numbbox):
-
line=gtfile.readline()
-
line=line.split()
-
line=line[0:4]
-
if(int(line[3])<=0 or int(line[2])<=0):
-
continue
-
bbox=(int(line[0]),int(line[1]),int(line[2]),int(line[3]))
-
face=img[int(line[1]):int(line[1])+int(line[3]),int(line[0]):int(line[0])+int(line[2])]
-
face=cv2.resize(face, resized_dim)
-
faces.append(face)
-
labels.append(1)
-
cv2.rectangle(img,(int(line[0]),int(line[1])),(int(line[0])+int(line[2]),int(line[1])+int(line[3])),(255,0,0))
-
#cv2.imshow("img",img)
-
#cv2.waitKey(1)
-
index=index+1
-
faces=np.asarray(faces)
-
labels=np.asarray(labels)
-
f=h5py.File('train.h5','w')
-
f['data']=faces.astype(np.float32)
-
f['label']=labels.astype(np.float32)
-
f.close()
-
def viewginhdf5():
-
f = h5py.File('train.h5','r')
-
f.keys()
-
faces=f['data'][:]
-
for face in faces:
-
face=face.astype(np.uint8)
-
cv2.imshow("img",face)
-
cv2.waitKey(1)
-
f.close()
-
def convertimgset(img_set="train"):
-
imgdir=rootdir+"/WIDER_"+img_set+"/images"
-
gtfilepath=rootdir+"/wider_face_split/wider_face_"+img_set+"_bbx_gt.txt"
-
imagesdir=rootdir+"/images"
-
vocannotationdir=rootdir+"/Annotations"
-
labelsdir=rootdir+"/labels"
-
if not os.path.exists(imagesdir):
-
os.mkdir(imagesdir)
-
if convet2yoloformat:
-
if not os.path.exists(labelsdir):
-
os.mkdir(labelsdir)
-
if convert2vocformat:
-
if not os.path.exists(vocannotationdir):
-
os.mkdir(vocannotationdir)
-
index=0
-
with open(gtfilepath,'r') as gtfile:
-
while(True ):#and len(faces)<10
-
filename=gtfile.readline()[:-1]
-
if(filename==""):
-
break;
-
sys.stdout.write("\r"+str(index)+":"+filename+"\t\t\t")
-
sys.stdout.flush()
-
imgpath=imgdir+"/"+filename
-
img=cv2.imread(imgpath)
-
if not img.data:
-
break;
-
imgheight=img.shape[0]
-
imgwidth=img.shape[1]
-
maxl=max(imgheight,imgwidth)
-
paddingleft=(maxl-imgwidth)>>1
-
paddingright=(maxl-imgwidth)>>1
-
paddingbottom=(maxl-imgheight)>>1
-
paddingtop=(maxl-imgheight)>>1
-
saveimg=cv2.copyMakeBorder(img,paddingtop,paddingbottom,paddingleft,paddingright,cv2.BORDER_CONSTANT,value=0)
-
showimg=saveimg.copy()
-
numbbox=int(gtfile.readline())
-
bboxes=[]
-
for i in range(numbbox):
-
line=gtfile.readline()
-
line=line.split()
-
line=line[0:4]
-
if(int(line[3])<=0 or int(line[2])<=0):
-
continue
-
x=int(line[0])+paddingleft
-
y=int(line[1])+paddingtop
-
width=int(line[2])
-
height=int(line[3])
-
bbox=(x,y,width,height)
-
x2=x+width
-
y2=y+height
-
#face=img[x:x2,y:y2]
-
if width>=minsize2select and height>=minsize2select:
-
bboxes.append(bbox)
-
cv2.rectangle(showimg,(x,y),(x2,y2),(0,255,0))
-
#maxl=max(width,height)
-
#x3=(int)(x+(width-maxl)*0.5)
-
#y3=(int)(y+(height-maxl)*0.5)
-
#x4=(int)(x3+maxl)
-
#y4=(int)(y3+maxl)
-
#cv2.rectangle(img,(x3,y3),(x4,y4),(255,0,0))
-
else:
-
cv2.rectangle(showimg,(x,y),(x2,y2),(0,0,255))
-
filename=filename.replace("/","_")
-
if len(bboxes)==0:
-
print "warrning: no face"
-
continue
-
cv2.imwrite(imagesdir+"/"+filename,saveimg)
-
if convet2yoloformat:
-
height=saveimg.shape[0]
-
width=saveimg.shape[1]
-
txtpath=labelsdir+"/"+filename
-
txtpath=txtpath[:-3]+"txt"
-
ftxt=open(txtpath,'w')
-
for i in range(len(bboxes)):
-
bbox=bboxes[i]
-
xcenter=(bbox[0]+bbox[2]*0.5)/width
-
ycenter=(bbox[1]+bbox[3]*0.5)/height
-
wr=bbox[2]*1.0/width
-
hr=bbox[3]*1.0/height
-
txtline="0 "+str(xcenter)+" "+str(ycenter)+" "+str(wr)+" "+str(hr)+"\n"
-
ftxt.write(txtline)
-
ftxt.close()
-
if convert2vocformat:
-
xmlpath=vocannotationdir+"/"+filename
-
xmlpath=xmlpath[:-3]+"xml"
-
doc = Document()
-
annotation = doc.createElement('annotation')
-
doc.appendChild(annotation)
-
folder = doc.createElement('folder')
-
folder_name = doc.createTextNode('widerface')
-
folder.appendChild(folder_name)
-
annotation.appendChild(folder)
-
filenamenode = doc.createElement('filename')
-
filename_name = doc.createTextNode(filename)
-
filenamenode.appendChild(filename_name)
-
annotation.appendChild(filenamenode)
-
source = doc.createElement('source')
-
annotation.appendChild(source)
-
database = doc.createElement('database')
-
database.appendChild(doc.createTextNode('wider face Database'))
-
source.appendChild(database)
-
annotation_s = doc.createElement('annotation')
-
annotation_s.appendChild(doc.createTextNode('PASCAL VOC2007'))
-
source.appendChild(annotation_s)
-
image = doc.createElement('image')
-
image.appendChild(doc.createTextNode('flickr'))
-
source.appendChild(image)
-
flickrid = doc.createElement('flickrid')
-
flickrid.appendChild(doc.createTextNode('-1'))
-
source.appendChild(flickrid)
-
owner = doc.createElement('owner')
-
annotation.appendChild(owner)
-
flickrid_o = doc.createElement('flickrid')
-
flickrid_o.appendChild(doc.createTextNode('yanyu'))
-
owner.appendChild(flickrid_o)
-
name_o = doc.createElement('name')
-
name_o.appendChild(doc.createTextNode('yanyu'))
-
owner.appendChild(name_o)
-
size = doc.createElement('size')
-
annotation.appendChild(size)
-
width = doc.createElement('width')
-
width.appendChild(doc.createTextNode(str(saveimg.shape[1])))
-
height = doc.createElement('height')
-
height.appendChild(doc.createTextNode(str(saveimg.shape[0])))
-
depth = doc.createElement('depth')
-
depth.appendChild(doc.createTextNode(str(saveimg.shape[2])))
-
size.appendChild(width)
-
size.appendChild(height)
-
size.appendChild(depth)
-
segmented = doc.createElement('segmented')
-
segmented.appendChild(doc.createTextNode('0'))
-
annotation.appendChild(segmented)
-
for i in range(len(bboxes)):
-
bbox=bboxes[i]
-
objects = doc.createElement('object')
-
annotation.appendChild(objects)
-
object_name = doc.createElement('name')
-
object_name.appendChild(doc.createTextNode('face'))
-
objects.appendChild(object_name)
-
pose = doc.createElement('pose')
-
pose.appendChild(doc.createTextNode('Unspecified'))
-
objects.appendChild(pose)
-
truncated = doc.createElement('truncated')
-
truncated.appendChild(doc.createTextNode('1'))
-
objects.appendChild(truncated)
-
difficult = doc.createElement('difficult')
-
difficult.appendChild(doc.createTextNode('0'))
-
objects.appendChild(difficult)
-
bndbox = doc.createElement('bndbox')
-
objects.appendChild(bndbox)
-
xmin = doc.createElement('xmin')
-
xmin.appendChild(doc.createTextNode(str(bbox[0])))
-
bndbox.appendChild(xmin)
-
ymin = doc.createElement('ymin')
-
ymin.appendChild(doc.createTextNode(str(bbox[1])))
-
bndbox.appendChild(ymin)
-
xmax = doc.createElement('xmax')
-
xmax.appendChild(doc.createTextNode(str(bbox[0]+bbox[2])))
-
bndbox.appendChild(xmax)
-
ymax = doc.createElement('ymax')
-
ymax.appendChild(doc.createTextNode(str(bbox[1]+bbox[3])))
-
bndbox.appendChild(ymax)
-
f=open(xmlpath,"w")
-
f.write(doc.toprettyxml(indent = ''))
-
f.close()
-
#cv2.imshow("img",showimg)
-
#cv2.waitKey()
-
index=index+1
-
def generatetxt(img_set="train"):
-
gtfilepath=rootdir+"/wider_face_split/wider_face_"+img_set+"_bbx_gt.txt"
-
f=open(rootdir+"/"+img_set+".txt","w")
-
with open(gtfilepath,'r') as gtfile:
-
while(True ):#and len(faces)<10
-
filename=gtfile.readline()[:-1]
-
if(filename==""):
-
break;
-
filename=filename.replace("/","_")
-
imgfilepath=datasetprefix+"/images/"+filename
-
f.write(imgfilepath+'\n')
-
numbbox=int(gtfile.readline())
-
for i in range(numbbox):
-
line=gtfile.readline()
-
f.close()
-
def generatevocsets(img_set="train"):
-
if not os.path.exists(rootdir+"/ImageSets"):
-
os.mkdir(rootdir+"/ImageSets")
-
if not os.path.exists(rootdir+"/ImageSets/Main"):
-
os.mkdir(rootdir+"/ImageSets/Main")
-
gtfilepath=rootdir+"/wider_face_split/wider_face_"+img_set+"_bbx_gt.txt"
-
f=open(rootdir+"/ImageSets/Main/"+img_set+".txt",'w')
-
with open(gtfilepath,'r') as gtfile:
-
while(True ):#and len(faces)<10
-
filename=gtfile.readline()[:-1]
-
if(filename==""):
-
break;
-
filename=filename.replace("/","_")
-
imgfilepath=filename[:-4]
-
f.write(imgfilepath+'\n')
-
numbbox=int(gtfile.readline())
-
for i in range(numbbox):
-
line=gtfile.readline()
-
f.close()
-
def convertdataset():
-
img_sets=["train","val"]
-
for img_set in img_sets:
-
convertimgset(img_set)
-
generatetxt(img_set)
-
generatevocsets(img_set)
-
if __name__=="__main__":
-
convertdataset()
-
shutil.move(rootdir+"/"+"train.txt",rootdir+"/"+"trainval.txt")
-
shutil.move(rootdir+"/"+"val.txt",rootdir+"/"+"test.txt")
-
shutil.move(rootdir+"/ImageSets/Main/"+"train.txt",rootdir+"/ImageSets/Main/"+"trainval.txt")
-
shutil.move(rootdir+"/ImageSets/Main/"+"val.txt",rootdir+"/ImageSets/Main/"+"test.txt")
如果没有时间自己转换,也可以下载已经转换好的文件,百度网盘,密码:xsdt