为了验证我们训练的MTCNN的O-Net在训练集上的表现,我们写了一个测试代码,来测试训练图片的landmark的mean error。
landmark标签格式如下所示:
48/landmark/0.jpg -1 -1 -1 -1 -1 0.224199 0.505338 0.334520 0.327402 0.583630 0.364769 0.336299 0.596085 0.674377 0.635231 48/landmark/1.jpg -1 -1 -1 -1 -1 0.494662 0.775801 0.665480 0.416370 0.672598 0.336299 0.364769 0.596085 0.635231 0.674377 48/landmark/2.jpg -1 -1 -1 -1 -1 0.283217 0.559441 0.391608 0.384615 0.636364 0.435315 0.407343 0.662587 0.739510 0.701049 48/landmark/3.jpg -1 -1 -1 -1 -1 0.440559 0.716783 0.608392 0.363636 0.615385 0.407343 0.435315 0.662587 0.701049 0.739510 48/landmark/4.jpg -1 -1 -1 -1 -1 0.153846 0.457692 0.273077 0.265385 0.542308 0.505769 0.475000 0.755769 0.840385 0.798077 48/landmark/5.jpg -1 -1 -1 -1 -1 0.542308 0.846154 0.726923 0.457692 0.734615 0.475000 0.505769 0.755769 0.798077 0.840385 48/landmark/6.jpg -1 -1 -1 -1 -1 0.110092 0.472477 0.252294 0.243119 0.573394 0.392202 0.355505 0.690367 0.791284 0.740826 48/landmark/7.jpg -1 -1 -1 -1 -1 0.527523 0.889908 0.747706 0.426606 0.756881 0.355505 0.392202 0.690367 0.740826 0.791284
第1列表示:图片名;
第2列表示:是不是人脸,是人脸为1,不是人脸为0, ignore为-1;
第3-6列表示: 人脸的标签,即人脸相对于ground truch人脸左上角的偏移量;
第7-16列表示: 人脸landmark标签,即人脸相对于crop区域的归一化量。
训练样本如下图所示:
将48*48的训练样本输入到O-Net中,预测landmark位置, 测试代码如下所示:
#!/usr/bin/env python # -*- coding: utf-8 -*- import sys sys.path.append('D:\\Anaconda2\\libs') import _init_paths import caffe import cv2 import math import numpy as np # from python_wrapper import * import os # 计算pts_gt, pts_pre的mean error def computer_meanerror(pts_gt, pts_pre): mean_error = [] d_outer = math.sqrt((pts_gt[2] - pts_gt[0]) * (pts_gt[2] - pts_gt[0]) + (pts_gt[3] - pts_gt[1]) * (pts_gt[3] - pts_gt[1])) for j in range(5): error = math.sqrt((pts_gt[2 * j] - pts_pre[2 * j]) * (pts_gt[2 * j] - pts_pre[2 * j]) + (pts_gt[2 * j + 1] - pts_pre[2 * j + 1]) * (pts_gt[2 * j + 1] - pts_pre[2 * j + 1])) error = error / d_outer mean_error.append(error) return mean_error # 画关键点 def drawlandmark(im, points): for i in range(points.shape[0]): for j in range(5): cv2.circle(im, (int(points[i][j]), int(points[i][j + 5])), 2, (255, 0, 0), -1) return im # landmark检测程序 def detect_face(img, ONet): # 定义temping数据格式和blob相同,即: num*channel*height*width temping = np.zeros((1, 48, 48, 3)) temping[0,:,:,:] = img tempimg = (temping - 127.5) * 0.0078125 # [0,255] -> [-1,1] , 归一化 # 对temping做轴变换,opencv读取img格式为:height*width*channel,变换后变为*channel*height*width tempimg = np.swapaxes(tempimg, 1, 3) tempimg = np.swapaxes(tempimg, 2, 3) ONet.blobs['data'].reshape(1, 3, 48, 48) ONet.blobs['data'].data[...] = tempimg out = ONet.forward() score = out['prob1'][:, 1] # 预测得分 points = out['conv6-3'] # landmark预测偏移量 point = [] for i in range(5): point.append( points[0][i] * 48 ) point.append( points[0][i+5] * 48 ) point = np.array(point) return point # 判断这是否为一个主程序,其他python程序无法调用 if __name__ == '__main__': infile_list = open('C:/mtcnn/48/landmark_48_1000.txt', 'r') img_dir = "C:/mtcnn/" caffe_model_path = "E:/mtcnn_DuinoDu/model" threshold = [0.6, 0.7, 0.7] caffe.set_mode_gpu() # ONet = caffe.Net(caffe_model_path+"/det3.prototxt", caffe_model_path+"/48net_v5_7.caffemodel", caffe.TEST) ONet = caffe.Net(caffe_model_path + "/det3.prototxt", "C:/mtcnn/train_V11_64/models_48_31" + "/_iter_140000.caffemodel", caffe.TEST) landmark_num = 0 landmark_pos = 0 mean_error = [0, 0, 0, 0, 0] mean_error = np.array(mean_error) for name_list in infile_list.readlines(): landmark_num = landmark_num + 1 print landmark_num align_gt = [] # landmark真实坐标位置 # align_pre = [] name_list = name_list.strip().split(' ') image_name = img_dir = "C:/mtcnn/" + name_list[0] img = cv2.imread(image_name) for i in range(5): align_gt.append(float(name_list[i + 6])) align_gt.append(float(name_list[i + 11])) align_gt = np.array(align_gt) # 标签是相对于48*48的crop图像的归一化位置,进行反变换得到真实位置 for j in range(10): align_gt[j] = align_gt[j] * 48 # forward fitch key point align_pre = detect_face(img, ONet) # 预测的landmark坐标位置 #for i in range(5): # cv2.circle(img, (int(align_gt[i]), int(align_gt[i+1])), 2, (255,0,0), -1) cv2.circle(img, (int(align_pre[0]), int(align_pre[1])), 2, (255, 0, 0), -1) cv2.circle(img, (int(align_pre[2]), int(align_pre[3])), 2, (255, 0, 0), -1) cv2.circle(img, (int(align_pre[4]), int(align_pre[5])), 2, (255, 0, 0), -1) cv2.circle(img, (int(align_pre[6]), int(align_pre[7])), 2, (255, 0, 0), -1) cv2.circle(img, (int(align_pre[8]), int(align_pre[9])), 2, (255, 0, 0), -1) #cv2.imshow("image", img) #cv2.waitKey(1000) error = [] # 计算landmark的 mean error error = computer_meanerror(align_gt, align_pre) error = np.array(error) mean_error = mean_error + error landmark_pos = landmark_pos + 1 mean_error = mean_error / float(landmark_pos) print "left eye mean error:", mean_error[0] print "right eye mean error:" , mean_error[1] print "nose mean error:" , mean_error[2] print "left mouth mean error:" , mean_error[3] print "right mouth mean error:" , mean_error[4] print "eye and mouth mean error:" ,(mean_error[0] + mean_error[1] + mean_error[3] + mean_error[4] ) / 4.0 infile_list.close()
顺便说一句:linux上pycaffe的测试代码,可以直接移植到windows上,只要将对应平台依赖的库替换即可,代码不需要修改。