ResNet：用dlib实现人脸识别—C++，包括人脸检测和人脸矫正(附代码)

前言

前段时间用dlib包做了一下人脸识别设计，这里做一个简单的笔记。设计实现了，人脸的检测，人脸的截取，人脸的矫正和人脸的识别。通过命令行输入的方式，输入人脸库图片，和输入测试图片。用ResNet实现对一个人脸的编码，生成一个128维向量，然后通过计算两个向量之间的距离来实现识别。模型都是别人训练好的，可以直接使用。

准备

VS2015 https://pan.baidu.com/s/19PdBDoX7hpfVMlsrYp8hwQ 密码：rh46
dlib http://dlib.net/files/dlib-19.16.zip
opencv https://sourceforge.net/projects/opencvlibrary/files/opencv-win/2.4.13/opencv-2.4.13.3-vc14.exe/download
cmake

环境搭建这里不说，网上很多教程，这里选用opencv2不用opencv3的原因是opencv3里没有contrib，这也是当时做的时候用血换来的教训，这里要用opencv来进行文件操作。建议把dlib和opencv都放在一个文件夹里，这样在移到其他电脑上时，只需要配置环境变量就可以直接使用。

执行

以下是我用dlib参考代码改出的代码，代码上我打的有注释，可以很容易看懂。在运行之前先要把图片准备好，还有下载以下两个文件：

shape_predictor_68_face_landmarks.dat

dlib_face_recognition_resnet_model_v1.dat

程序是先执行对图片库里所有的图片进行编码，然后对测试图片编码，最后比对测试图片与人脸库中所有图片编码之间的距离，然后输出距离最小的那个人的标签，从而实现简单的人脸识别。


/*-------------------------------------------------------------------------------------

这是一个例子，说明使用DLIB C++的深度学习工具库。在这里，我们将展示如何进行人脸识别。此示例使用预先培训过的
DLib_人脸识别_resnet_model_v1模型，可从DLIB网站下载。该模型在标准LFW面上的精度为99.38%。识别基准，与其他
最先进的面部识别方法相比截至2017年2月的认可。

on_images_ex.cpp示例。
------------------------------------------------------------------------------------*/
#include <cstdio>
#include <vector>
#include<algorithm>
#include<cstdio>

#include <iostream>
#include <fstream>
#include <cstring>
#include <cstdlib>
#include <cmath>
#include <algorithm>

#include "opencv\cv.h"
#include "opencv2\highgui\highgui.hpp"
#include "opencv2\imgproc\imgproc.hpp"
#include "opencv2\contrib\contrib.hpp"


#include <dlib/dnn.h>
#include <dlib/gui_widgets.h>
#include <dlib/clustering.h>
#include <dlib/string.h>
#include <dlib/image_io.h>
#include <dlib/image_processing/frontal_face_detector.h>
#include <dlib\opencv.h>


using namespace dlib;
using namespace std;
using namespace cv;

/*-----------------------------------------------------------------------------------*/

//下一位代码定义Resnet网络。基本上是复制的

//并从dnn_imagenet_ex.cpp示例粘贴，但我们替换了损失

//使用损耗度量进行分层，使网络变得更小。去读导论吧

//dlib dnn示例了解所有这些内容的含义。

//另外，dnn_metric_learning_on_images_ex.cpp示例显示了如何训练此网络。

//本例使用的dlib_face_recognition_resnet_model_v1模型是使用

//基本上是dnn_metric_learning_on_images_ex.cpp中显示的代码，除了

//小批量大（35x15而不是5x5），迭代没有进展

//设置为10000，训练数据集由大约300万个图像组成，而不是

//55。此外，输入层被锁定为150大小的图像。
/*------------------------------------------------------------------------------------*/


/*------------------------------------------------------------------------------------*/
template <template <int, template<typename>class, int, typename> class block, int N, template<typename>class BN, typename SUBNET>
using residual = add_prev1<block<N, BN, 1, tag1<SUBNET>>>;

template <template <int, template<typename>class, int, typename> class block, int N, template<typename>class BN, typename SUBNET>
using residual_down = add_prev2<avg_pool<2, 2, 2, 2, skip1<tag2<block<N, BN, 2, tag1<SUBNET>>>>>>;

template <int N, template <typename> class BN, int stride, typename SUBNET>
using block = BN<con<N, 3, 3, 1, 1, relu<BN<con<N, 3, 3, stride, stride, SUBNET>>>>>;

template <int N, typename SUBNET> using ares = relu<residual<block, N, affine, SUBNET>>;
template <int N, typename SUBNET> using ares_down = relu<residual_down<block, N, affine, SUBNET>>;

template <typename SUBNET> using alevel0 = ares_down<256, SUBNET>;
template <typename SUBNET> using alevel1 = ares<256, ares<256, ares_down<256, SUBNET>>>;
template <typename SUBNET> using alevel2 = ares<128, ares<128, ares_down<128, SUBNET>>>;
template <typename SUBNET> using alevel3 = ares<64, ares<64, ares<64, ares_down<64, SUBNET>>>>;
template <typename SUBNET> using alevel4 = ares<32, ares<32, ares<32, SUBNET>>>;

using anet_type = loss_metric<fc_no_bias<128, avg_pool_everything<
	alevel0<
	alevel1<
	alevel2<
	alevel3<
	alevel4<
	max_pool<3, 3, 2, 2, relu<affine<con<32, 7, 7, 2, 2,
	input_rgb_image_sized<150>
	>>>>>>>>>>>>;

/*------------------------------------------------------------------------------------*/




int main()
{

	cv::Mat II;                                                       
	std::vector<matrix<float, 0, 1>> vec;        //定义一个向量组，用于存放每一个人脸的编码；
	float vec_error[30];                         //定义一个浮点型的数组，用于存放一个人脸编码与人脸库的每一个人脸编码的差值；

	cout << "Enter the path of picture set：";
	string dir_path;
	cin >> dir_path;
	string test_path;
	cv::Directory dir;
	std::vector<string> fileNames = dir.GetListFiles(dir_path, "*.jpg", false);//统计文件夹里jpg格式文件的个数，并将每个文件的名字保存
	cout << "The number of picture is:" << fileNames.size() << endl;
	/*



	for (int i = 0; i < fileNames.size(); i++)
	{
	string fileName = fileNames[i];
	string fileFullName = dir_path + fileName;
	cout << "file name:" << fileName << endl;
	cout << "file paht:" << fileFullName << endl << endl;

	//Image processing
	Mat pScr;
	pScr = imread(fileFullName, 1); //以文件名命名窗口
	imshow(fileName, pScr);
	}*/


	//我们要做的第一件事是加载所有模型。首先，因为我们需要在图像中查找人脸我们需要人脸检测器：

	frontal_face_detector detector = get_frontal_face_detector();

	//我们还将使用人脸标记模型将人脸与标准姿势对齐：（有关介绍，请参见Face_Landmark_Detection_ex.cpp）
	shape_predictor sp;
	deserialize("G://dlib//shape_predictor_68_face_landmarks.dat") >> sp;

	//终于我们加载Resnet模型进行人脸识别
	anet_type net;
	deserialize("G://dlib//dlib_face_recognition_resnet_model_v1.dat") >> net;

	matrix<rgb_pixel> img, img1, img3;            //定义dlib型图片，彩色


 /*-------------------------------------------------------------------------*/
//此下为建立人脸编码库代码
	for (int k = 0; k < fileNames.size(); k++)  //依次加载完图片库里的文件
	{

		string fileFullName = dir_path + "//" + fileNames[k];//图片地址+文件名
		load_image(img, fileFullName);//load picture      //加载图片
									  // Display the raw image on the screen

		std::vector<dlib::rectangle> dets = detector(img);  //用dlib自带的人脸检测器检测人脸，然后将人脸位置大小信息存放到dets中
		img1 = img;
		cv::Mat I = dlib::toMat(img1);                     //dlib->opencv
		std::vector<full_object_detection> shapes;
		if (dets.size()<1)
			cout << "There is no face" << endl;
		else if (dets.size()>1)
			cout << "There is to many face" << endl;
		else
		{
			shapes.push_back(sp(img, dets[0]));             //画人脸轮廓，68点

			if (!shapes.empty()) {
				for (int j = 0; j < 68; j++) {
					circle(I, cvPoint(shapes[0].part(j).x(), shapes[0].part(j).y()), 3, cv::Scalar(255, 0, 0), -1);

					//	shapes[0].part(i).x();//68¸ö
				}
			}

			dlib::cv_image<rgb_pixel> dlib_img(I);//dlib<-opencv


												  // Run the face detector on the image of our action heroes, and for each face extract a
												  // copy that has been normalized to 150x150 pixels in size and appropriately rotated
												  // and centered.


												  //复制已规格化为150x150像素并适当旋转的

												  //居中。
			std::vector<matrix<rgb_pixel>> faces;//定义存放截取人脸数据组

			auto shape = sp(img, dets[0]);
			matrix<rgb_pixel> face_chip;
			extract_image_chip(img, get_face_chip_details(shape, 150, 0.25), face_chip);//截取人脸部分，并将大小调为150*150
			faces.push_back(move(face_chip));
			image_window win1(img); //显示原图

			win1.add_overlay(dets[0]);//在原图上框出人脸
			image_window win2(dlib_img);  //显示68点图

			image_window win3(faces[0]);//显示截取的人脸图像
										// Also put some boxes on the faces so we can see that the detector is finding
										// them.
										//同时在表面放置一些盒子，这样我们可以看到探测器正在寻找他们。

										// This call asks the DNN to convert each face image in faces into a 128D vector.
										// In this 128D vector space, images from the same person will be close to each other
										// but vectors from different people will be far apart.  So we can use these vectors to
										// identify if a pair of images are from the same person or from different people.  
										//此调用要求dnn将面中的每个面图像转换为128d矢量。

										//在这个128d向量空间中，同一个人的图像会彼此靠近

										//但是来自不同人群的向量会相差很远。所以我们可以用这些向量

										//标识一对图像是来自同一个人还是来自不同的人。
			std::vector<matrix<float, 0, 1>> face_descriptors = net(faces);//将150*150人脸图像载入Resnet残差网络，返回128D人脸特征存于face_descriptors

																		   //sprintf(vec, "%f", (double)length(face_descriptors[0]);
																		   //printf("%f\n", length(face_descriptors[0]));
																		   //vec[0] = face_descriptors[0];                              
			vec.push_back(face_descriptors[0]);                       //保存这一个人脸的特征向量到vec向量的对应位置
			cout << "The vector of picture " << fileNames[k] << "is:" << trans(face_descriptors[0]) << endl;//打印该人脸的标签和特征向量

																											/*-----------------------------------------------------------------------------------*/

		}
	}

/*---------------------------------------------------------------------------------*/
   //此下为识别部分代码

	while (1) {

		cout << "input the path of test picture:";
		cin >> test_path;
		cout << test_path << endl;
		load_image(img3, test_path);
		//image_window win4(img4);
		std::vector<matrix<rgb_pixel>> faces_test;
		for (auto face_test : detector(img3))
		{
			auto shape_test = sp(img3, face_test);
			matrix<rgb_pixel> face_chip_test;
			extract_image_chip(img3, get_face_chip_details(shape_test, 150, 0.25), face_chip_test);
			faces_test.push_back(move(face_chip_test));
			// Also put some boxes on the faces so we can see that the detector is finding
			// them.

		}

		std::vector<dlib::rectangle> dets_test = detector(img3);
		std::vector<matrix<float, 0, 1>> face_test_descriptors = net(faces_test);


		// In particular, one simple thing we can do is face clustering.  This next bit of code
		// creates a graph of connected faces and then uses the Chinese whispers graph clustering
		// algorithm to identify how many people there are and which faces belong to whom.
		std::vector<sample_pair> edges;
		for (size_t i = 0; i < face_test_descriptors.size(); ++i)                 //比对，识别
		{
			size_t m = 100;
			float error_min = 100.0;
			for (size_t j = 0; j < vec.size(); ++j)
			{

				// Faces are connected in the graph if they are close enough.  Here we check if
				// the distance between two face descriptors is less than 0.6, which is the
				// decision threshold the network was trained to use.  Although you can
				// certainly use any other threshold you find useful.

				vec_error[j] = (double)length(face_test_descriptors[i] - vec[j]);
				cout << "The error of two picture is:" << vec_error[j] << endl;

				//if (length(face_descriptors[i] - face_descriptors[j]) < 0.6)
				if (vec_error[j] < error_min)
				{
					error_min = vec_error[j];
					m = j;

				}

			}
			cout << "min error of two face:" << error_min << endl;
			II = dlib::toMat(img3);//½«dlibÍ¼Ïñ×ªµ½opencv
			std::string text = "Other face";
			if ((error_min < 0.4) && (m <= 27))
				text = fileNames[m];  //通过m定位文件，得到文件名


			int font_face = cv::FONT_HERSHEY_COMPLEX;
			double font_scale = 1;
			int thickness = 2;
			int baseline;
			//获取文本框的长宽
			cv::Size text_size = cv::getTextSize(text, font_face, font_scale, thickness, &baseline);

			//将文本框居中绘制
			cv::Point origin;


			cv::rectangle(II, cv::Rect(dets_test[i].left(), dets_test[i].top(), dets_test[i].width(), dets_test[i].width()), cv::Scalar(0, 0, 255), 1, 1, 0);//画矩形框
			origin.x = dets_test[i].left();
			origin.y = dets_test[i].top();
			cv::putText(II, text, origin, font_face, font_scale, cv::Scalar(255, 0, 0), thickness, 2, 0);//给图片加文字


		}
		dlib::cv_image<rgb_pixel> img4(II);
		image_window win4(img4);
		system("pause");
		if (cv::waitKey(100) == 27)break;
	}

}


// -------------------------------------------------------------------------------------