1、说明
计算VOC数据集中各个类别(以我们自己的数据集为例:'car',‘cottage','town house','apartment','person','bird nest','honeycomb')的数量,以及各个类别的面积从0-16,32,64,128,256,512,1024,……的数量。并且绘制对应的直方图
import os
from PIL import Image
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
import numpy as np
def calculateObjectArea(xml, objectname, im=None, outputPath=None):
imWidth = 0
imHeight = 0
areatuple = []
tree = ET.parse(xml)
root = tree.getroot()
# print(root)
for child1 in root:
# print(child1.tag, child1.attrib, child1.text)
for child2 in child1:
if child2.tag == 'width':
imWidth = child2.text
# print(imWidth)
if child2.tag == 'height':
imHeight = child2.text
# print(imHeight)
if objectname in child2.text:
# print(child1)
for child3 in child1.iter('bndbox'):
for child4 in child3:
# print('child4:', child4.tag, child4.text)
if child4.tag == 'xmin':
xmin = child4.text
# print(type(child4.text))
if child4.tag == 'xmax':
xmax = child4.text
if child4.tag == 'ymin':
ymin = child4.text
if child4.tag == 'ymax':
ymax = child4.text
# print((int(xmax)-int(xmin))*(int(ymax)-int(ymin)))
areatuple.append((int(xmax)-int(xmin))*(int(ymax)-int(ymin)))
# print(areatuple)
areatuple.sort()
return areatuple
def drawHist(tup, objectname, width):
tup.sort()
x=tup
# print('x =',x)
# draw hist
n, bins, patches = plt.hist(x, width, normed = False, facecolor = 'g')
plt.xlabel('Object Area')
plt.ylabel('Number')
# add title
plt.title(objectname)
# add text
# plt.text(60, .025, r'$\mu=100,\ \sigma=15$')
# plt.axis([40, 160, 0, 0.03])
plt.grid(True, linewidth = '1')
# plt.show()
def searchxml(xmlpath, objectname):
sumtup,lower16,lower32,lower64=[],[],[],[]
lower128,lower256,lower384,lower512,lower640,lower768 = [],[],[],[],[],[]
lower1024,lower1152,lower1280,lower1536,lower1792=[],[],[],[],[]
lower2000,lower4000,lower6000=[],[],[]
count16,count32,count64 = 0,0,0
count128,count256 ,count384,count512,count640,count768 = 0,0,0,0,0,0
count1024,count1152,count1280,count1536,count1792=0,0,0,0,0
count2000,count4000,count6000 = 0,0,0
for _, dirs, files in os.walk(xmlpath):
for f1 in files:
if os.path.splitext(f1)[1] == '.xml' and '.idea' not in os.path.join(_, f1):
# print('file is ', f1)
temptup = calculateObjectArea(os.path.join(_, f1), objectname)
# print('temptup', temptup)
sumtup = sumtup + temptup
for x in sumtup:
if x > 0 and x < 16:
lower16.append(x)
count16 = count16 + 1
if x > 0 and x < 32:
lower32.append(x)
count32 = count32 + 1
if x > 0 and x < 64:
lower64.append(x)
count64 = count64 + 1
if x > 0 and x < 128:
lower128.append(x)
count128 = count128 + 1
if x > 0 and x < 256:
lower256.append(x)
count256 = count256 + 1
if x > 0 and x < 384:
lower384.append(x)
count384 = count384 + 1
if x > 0 and x < 512:
lower512.append(x)
count512 = count512 + 1
if x > 0 and x < 640:
lower640.append(x)
count640 = count640 + 1
if x > 0 and x < 768:
lower768.append(x)
count768 = count768 + 1
if x > 0 and x < 1024:
lower1024.append(x)
count1024 = count1024 + 1
if x > 0 and x < 1152:
lower1152.append(x)
count1152 = count1152 + 1
if x > 0 and x < 1280:
lower1280.append(x)
count1280 = count1280 + 1
if x > 0 and x < 1536:
lower1536.append(x)
count1536 = count1536 + 1
if x > 0 and x < 1792:
lower1792.append(x)
count1792 = count1792 + 1
if x > 0 and x < 2000:
lower2000.append(x)
count2000 = count2000 + 1
if x > 0 and x < 4000:
lower4000.append(x)
count4000 = count4000 + 1
if x > 0 and x < 6000:
lower6000.append(x)
count6000 = count6000 + 1
print(objectname+' total:', len(sumtup))
drawHist(sumtup, objectname +'\'s number:'+str(len(sumtup)), 200)
if count16:
print('the number of '+objectname+' (\'area<16\' pixel):', count16)
drawHist(lower16, objectname+'--the number of \'area<16\' pixel:'+str(count16), 30)
if count32:
print('the number of '+objectname+' (\'area<32\' pixel):', count32)
drawHist(lower32, objectname+'--the number of \'area<32\' pixel:'+str(count32), 30)
if count64:
print('the number of '+objectname+' (\'area<64\' pixel):', count64)
drawHist(lower64, objectname+'--the number of \'area<64\' pixel:'+str(count64), 30)
if count128:
print('the number of '+objectname+' (\'area<128\' pixel):', count128)
drawHist(lower128, objectname+'--the number of \'area<128\' pixel:'+str(count128), 30)
if count256:
print('the number of '+objectname+' (\'area<256\' pixel):', count256)
drawHist(lower256, objectname+'--the number of \'area<256\' pixel:'+str(count256), 30)
if count384:
print('the number of '+objectname+' (\'area<384\' pixel):', count384)
drawHist(lower384, objectname+'--the number of \'area<384\' pixel:'+str(count384), 30)
if count512:
print('the number of '+objectname+' (\'area<512\' pixel):', count512)
drawHist(lower512, objectname+'--the number of \'area<512\' pixel:'+str(count512), 30)
if count640:
print('the number of '+objectname+' (\'area<640\' pixel):', count640)
drawHist(lower640, objectname+'--the number of \'area<640\' pixel:'+str(count640), 30)
if count768:
print('the number of '+objectname+' (\'area<768\' pixel):', count768)
drawHist(lower768, objectname+'--the number of \'area<768\' pixel:'+str(count768), 50)
if count1024:
print('the number of '+objectname+' (\'area<1024\' pixel):', count1024)
drawHist(lower1024, objectname+'--the number of \'area<1024\' pixel:'+str(count1024), 50)
if count1152:
print('the number of '+objectname+' (\'area<1152\' pixel):', count1152)
drawHist(lower1152, objectname + '--the number of \'area<1152\' pixel:' + str(count1152), 60)
if count1280:
print('the number of '+objectname+' (\'area<1280\' pixel):', count1280)
drawHist(lower1280, objectname + '--the number of \'area<1280\' pixel:' + str(count1280), 80)
if count1536:
print('the number of '+objectname+' (\'area<1536\' pixel):', count1536)
drawHist(lower1536, objectname + '--the number of \'area<1536\' pixel:' + str(count1536), 80)
if count1792:
print('the number of '+objectname+' (\'area<1792\' pixel):', count1792)
drawHist(lower1792, objectname + '--the number of \'area<1792\' pixel:' + str(count1792), 100)
if count2000:
print('the number of '+objectname+' (\'area<2000\' pixel):', count2000)
drawHist(lower2000, objectname + '--the number of \'area<2000\' pixel:' + str(count2000), 100)
if count4000:
print('the number of '+objectname+' (\'area<4000\' pixel):', count4000)
drawHist(lower4000, objectname + '--the number of \'area<4000\' pixel:' + str(count4000), 100)
if count6000:
print('the number of '+objectname+' (\'area<6000\' pixel):', count6000)
drawHist(lower6000, objectname + '--the number of \'area<6000\' pixel:' + str(count6000), 100)
if __name__=='__main__':
xmlpath='./Annotations'
objectnameList = ['car','cottage','town house','apartment','person','bird nest','honeycomb']
for objectname in objectnameList:
searchxml(xmlpath, objectname)
2、结果
直接上图