将widerface标注转换为VOC格式

将widerface标注转换为VOC格式

原文:https://blog.csdn.net/minstyrain/article/details/77986262

widerface是包含了3万多张总计近40万张人脸的人脸检测库,里面包含了大大小小各式各样的人脸,是不可多得的素材。

请将下面的代码保存至widerface.py,并至于下图所示的eval_tools文件夹下,其他的文件结构一并如图所示。

Update:

由于widerface里包含很多小脸,用SSD训练不一定能收敛,此外SSD要求输入为方形,不然会挤压图片造成变形,因此需要对此做些处理.

 
  1. import os,h5py,cv2,sys,shutil

  2. import numpy as np

  3. from xml.dom.minidom import Document

  4. rootdir="../"

  5. convet2yoloformat=True

  6. convert2vocformat=True

  7. resized_dim=(48, 48)

  8.  
  9. #最小取20大小的脸,并且补齐

  10. minsize2select=20

  11. usepadding=True

  12.  
  13. datasetprefix="/home/yanhe/data/widerface"#

  14. def gen_hdf5():

  15. imgdir=rootdir+"/WIDER_train/images"

  16. gtfilepath=rootdir+"/wider_face_split/wider_face_train_bbx_gt.txt"

  17. index =0

  18. with open(gtfilepath,'r') as gtfile:

  19. faces=[]

  20. labels=[]

  21. while(True ):#and len(faces)<10

  22. imgpath=gtfile.readline()[:-1]

  23. if(imgpath==""):

  24. break;

  25. print index,imgpath

  26. img=cv2.imread(imgdir+"/"+imgpath)

  27. numbbox=int(gtfile.readline())

  28. bbox=[]

  29. for i in range(numbbox):

  30. line=gtfile.readline()

  31. line=line.split()

  32. line=line[0:4]

  33. if(int(line[3])<=0 or int(line[2])<=0):

  34. continue

  35. bbox=(int(line[0]),int(line[1]),int(line[2]),int(line[3]))

  36. face=img[int(line[1]):int(line[1])+int(line[3]),int(line[0]):int(line[0])+int(line[2])]

  37. face=cv2.resize(face, resized_dim)

  38. faces.append(face)

  39. labels.append(1)

  40. cv2.rectangle(img,(int(line[0]),int(line[1])),(int(line[0])+int(line[2]),int(line[1])+int(line[3])),(255,0,0))

  41. #cv2.imshow("img",img)

  42. #cv2.waitKey(1)

  43. index=index+1

  44. faces=np.asarray(faces)

  45. labels=np.asarray(labels)

  46. f=h5py.File('train.h5','w')

  47. f['data']=faces.astype(np.float32)

  48. f['label']=labels.astype(np.float32)

  49. f.close()

  50. def viewginhdf5():

  51. f = h5py.File('train.h5','r')

  52. f.keys()

  53. faces=f['data'][:]

  54. for face in faces:

  55. face=face.astype(np.uint8)

  56. cv2.imshow("img",face)

  57. cv2.waitKey(1)

  58. f.close()

  59.  
  60. def convertimgset(img_set="train"):

  61. imgdir=rootdir+"/WIDER_"+img_set+"/images"

  62. gtfilepath=rootdir+"/wider_face_split/wider_face_"+img_set+"_bbx_gt.txt"

  63. imagesdir=rootdir+"/images"

  64. vocannotationdir=rootdir+"/Annotations"

  65. labelsdir=rootdir+"/labels"

  66. if not os.path.exists(imagesdir):

  67. os.mkdir(imagesdir)

  68. if convet2yoloformat:

  69. if not os.path.exists(labelsdir):

  70. os.mkdir(labelsdir)

  71. if convert2vocformat:

  72. if not os.path.exists(vocannotationdir):

  73. os.mkdir(vocannotationdir)

  74. index=0

  75. with open(gtfilepath,'r') as gtfile:

  76. while(True ):#and len(faces)<10

  77. filename=gtfile.readline()[:-1]

  78. if(filename==""):

  79. break;

  80. sys.stdout.write("\r"+str(index)+":"+filename+"\t\t\t")

  81. sys.stdout.flush()

  82. imgpath=imgdir+"/"+filename

  83. img=cv2.imread(imgpath)

  84. if not img.data:

  85. break;

  86. imgheight=img.shape[0]

  87. imgwidth=img.shape[1]

  88. maxl=max(imgheight,imgwidth)

  89. paddingleft=(maxl-imgwidth)>>1

  90. paddingright=(maxl-imgwidth)>>1

  91. paddingbottom=(maxl-imgheight)>>1

  92. paddingtop=(maxl-imgheight)>>1

  93. saveimg=cv2.copyMakeBorder(img,paddingtop,paddingbottom,paddingleft,paddingright,cv2.BORDER_CONSTANT,value=0)

  94. showimg=saveimg.copy()

  95. numbbox=int(gtfile.readline())

  96. bboxes=[]

  97. for i in range(numbbox):

  98. line=gtfile.readline()

  99. line=line.split()

  100. line=line[0:4]

  101. if(int(line[3])<=0 or int(line[2])<=0):

  102. continue

  103. x=int(line[0])+paddingleft

  104. y=int(line[1])+paddingtop

  105. width=int(line[2])

  106. height=int(line[3])

  107. bbox=(x,y,width,height)

  108. x2=x+width

  109. y2=y+height

  110. #face=img[x:x2,y:y2]

  111. if width>=minsize2select and height>=minsize2select:

  112. bboxes.append(bbox)

  113. cv2.rectangle(showimg,(x,y),(x2,y2),(0,255,0))

  114. #maxl=max(width,height)

  115. #x3=(int)(x+(width-maxl)*0.5)

  116. #y3=(int)(y+(height-maxl)*0.5)

  117. #x4=(int)(x3+maxl)

  118. #y4=(int)(y3+maxl)

  119. #cv2.rectangle(img,(x3,y3),(x4,y4),(255,0,0))

  120. else:

  121. cv2.rectangle(showimg,(x,y),(x2,y2),(0,0,255))

  122. filename=filename.replace("/","_")

  123. if len(bboxes)==0:

  124. print "warrning: no face"

  125. continue

  126. cv2.imwrite(imagesdir+"/"+filename,saveimg)

  127. if convet2yoloformat:

  128. height=saveimg.shape[0]

  129. width=saveimg.shape[1]

  130. txtpath=labelsdir+"/"+filename

  131. txtpath=txtpath[:-3]+"txt"

  132. ftxt=open(txtpath,'w')

  133. for i in range(len(bboxes)):

  134. bbox=bboxes[i]

  135. xcenter=(bbox[0]+bbox[2]*0.5)/width

  136. ycenter=(bbox[1]+bbox[3]*0.5)/height

  137. wr=bbox[2]*1.0/width

  138. hr=bbox[3]*1.0/height

  139. txtline="0 "+str(xcenter)+" "+str(ycenter)+" "+str(wr)+" "+str(hr)+"\n"

  140. ftxt.write(txtline)

  141. ftxt.close()

  142. if convert2vocformat:

  143. xmlpath=vocannotationdir+"/"+filename

  144. xmlpath=xmlpath[:-3]+"xml"

  145. doc = Document()

  146. annotation = doc.createElement('annotation')

  147. doc.appendChild(annotation)

  148. folder = doc.createElement('folder')

  149. folder_name = doc.createTextNode('widerface')

  150. folder.appendChild(folder_name)

  151. annotation.appendChild(folder)

  152. filenamenode = doc.createElement('filename')

  153. filename_name = doc.createTextNode(filename)

  154. filenamenode.appendChild(filename_name)

  155. annotation.appendChild(filenamenode)

  156. source = doc.createElement('source')

  157. annotation.appendChild(source)

  158. database = doc.createElement('database')

  159. database.appendChild(doc.createTextNode('wider face Database'))

  160. source.appendChild(database)

  161. annotation_s = doc.createElement('annotation')

  162. annotation_s.appendChild(doc.createTextNode('PASCAL VOC2007'))

  163. source.appendChild(annotation_s)

  164. image = doc.createElement('image')

  165. image.appendChild(doc.createTextNode('flickr'))

  166. source.appendChild(image)

  167. flickrid = doc.createElement('flickrid')

  168. flickrid.appendChild(doc.createTextNode('-1'))

  169. source.appendChild(flickrid)

  170. owner = doc.createElement('owner')

  171. annotation.appendChild(owner)

  172. flickrid_o = doc.createElement('flickrid')

  173. flickrid_o.appendChild(doc.createTextNode('yanyu'))

  174. owner.appendChild(flickrid_o)

  175. name_o = doc.createElement('name')

  176. name_o.appendChild(doc.createTextNode('yanyu'))

  177. owner.appendChild(name_o)

  178. size = doc.createElement('size')

  179. annotation.appendChild(size)

  180. width = doc.createElement('width')

  181. width.appendChild(doc.createTextNode(str(saveimg.shape[1])))

  182. height = doc.createElement('height')

  183. height.appendChild(doc.createTextNode(str(saveimg.shape[0])))

  184. depth = doc.createElement('depth')

  185. depth.appendChild(doc.createTextNode(str(saveimg.shape[2])))

  186. size.appendChild(width)

  187. size.appendChild(height)

  188. size.appendChild(depth)

  189. segmented = doc.createElement('segmented')

  190. segmented.appendChild(doc.createTextNode('0'))

  191. annotation.appendChild(segmented)

  192. for i in range(len(bboxes)):

  193. bbox=bboxes[i]

  194. objects = doc.createElement('object')

  195. annotation.appendChild(objects)

  196. object_name = doc.createElement('name')

  197. object_name.appendChild(doc.createTextNode('face'))

  198. objects.appendChild(object_name)

  199. pose = doc.createElement('pose')

  200. pose.appendChild(doc.createTextNode('Unspecified'))

  201. objects.appendChild(pose)

  202. truncated = doc.createElement('truncated')

  203. truncated.appendChild(doc.createTextNode('1'))

  204. objects.appendChild(truncated)

  205. difficult = doc.createElement('difficult')

  206. difficult.appendChild(doc.createTextNode('0'))

  207. objects.appendChild(difficult)

  208. bndbox = doc.createElement('bndbox')

  209. objects.appendChild(bndbox)

  210. xmin = doc.createElement('xmin')

  211. xmin.appendChild(doc.createTextNode(str(bbox[0])))

  212. bndbox.appendChild(xmin)

  213. ymin = doc.createElement('ymin')

  214. ymin.appendChild(doc.createTextNode(str(bbox[1])))

  215. bndbox.appendChild(ymin)

  216. xmax = doc.createElement('xmax')

  217. xmax.appendChild(doc.createTextNode(str(bbox[0]+bbox[2])))

  218. bndbox.appendChild(xmax)

  219. ymax = doc.createElement('ymax')

  220. ymax.appendChild(doc.createTextNode(str(bbox[1]+bbox[3])))

  221. bndbox.appendChild(ymax)

  222. f=open(xmlpath,"w")

  223. f.write(doc.toprettyxml(indent = ''))

  224. f.close()

  225. #cv2.imshow("img",showimg)

  226. #cv2.waitKey()

  227. index=index+1

  228.  
  229. def generatetxt(img_set="train"):

  230. gtfilepath=rootdir+"/wider_face_split/wider_face_"+img_set+"_bbx_gt.txt"

  231. f=open(rootdir+"/"+img_set+".txt","w")

  232. with open(gtfilepath,'r') as gtfile:

  233. while(True ):#and len(faces)<10

  234. filename=gtfile.readline()[:-1]

  235. if(filename==""):

  236. break;

  237. filename=filename.replace("/","_")

  238. imgfilepath=datasetprefix+"/images/"+filename

  239. f.write(imgfilepath+'\n')

  240. numbbox=int(gtfile.readline())

  241. for i in range(numbbox):

  242. line=gtfile.readline()

  243. f.close()

  244.  
  245. def generatevocsets(img_set="train"):

  246. if not os.path.exists(rootdir+"/ImageSets"):

  247. os.mkdir(rootdir+"/ImageSets")

  248. if not os.path.exists(rootdir+"/ImageSets/Main"):

  249. os.mkdir(rootdir+"/ImageSets/Main")

  250. gtfilepath=rootdir+"/wider_face_split/wider_face_"+img_set+"_bbx_gt.txt"

  251. f=open(rootdir+"/ImageSets/Main/"+img_set+".txt",'w')

  252. with open(gtfilepath,'r') as gtfile:

  253. while(True ):#and len(faces)<10

  254. filename=gtfile.readline()[:-1]

  255. if(filename==""):

  256. break;

  257. filename=filename.replace("/","_")

  258. imgfilepath=filename[:-4]

  259. f.write(imgfilepath+'\n')

  260. numbbox=int(gtfile.readline())

  261. for i in range(numbbox):

  262. line=gtfile.readline()

  263. f.close()

  264.  
  265. def convertdataset():

  266. img_sets=["train","val"]

  267. for img_set in img_sets:

  268. convertimgset(img_set)

  269. generatetxt(img_set)

  270. generatevocsets(img_set)

  271.  
  272. if __name__=="__main__":

  273. convertdataset()

  274. shutil.move(rootdir+"/"+"train.txt",rootdir+"/"+"trainval.txt")

  275. shutil.move(rootdir+"/"+"val.txt",rootdir+"/"+"test.txt")

  276. shutil.move(rootdir+"/ImageSets/Main/"+"train.txt",rootdir+"/ImageSets/Main/"+"trainval.txt")

  277. shutil.move(rootdir+"/ImageSets/Main/"+"val.txt",rootdir+"/ImageSets/Main/"+"test.txt")


如果没有时间自己转换,也可以下载已经转换好的文件,百度网盘,密码:xsdt

猜你喜欢

转载自blog.csdn.net/jacke121/article/details/83828311