OpenCV+深度学习的人脸检测

        使用OpenCV的dnn模块写基于深度学习的图片人脸检测。 

需要用到两个文件:deploy.prototxt 和res10_300x300_ssd_iter_140000.caffemodel。deploy.prototxt在opencv的安装目录opencv-3.4.1\samples\dnn\face_detector下。res10_300x300_ssd_iter_140000.caffemodel自行下载。


#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>

using namespace std;
using namespace cv;
using namespace cv::dnn;

// Set the size of image and meanval
const size_t inWidth = 300;
const size_t inHeight = 300;
const double inScaleFactor = 1.0;
const Scalar meanVal(104.0, 177.0, 123.0);

int main(int argc, char** argv)
{
	// Load image
	Mat img;
	// Use commandline
#if 0
	if (argc < 2)
	{
		cerr << "please input " << endl;
		cerr << "[Format]face_detector_img.exe image.jpg" << endl;
		return -1;
	}
	img = imread(argv[1]);
#else
	// Not use commandline
	img = imread("E:\\faceRec\\facePictures\\face_dec\\newpic\\5.jpg");
#endif

	// Initialize Caffe network
	float min_confidence = 0.5;
	String modelConfiguration = "deploy.prototxt";
	String modelBinary = "res10_300x300_ssd_iter_140000.caffemodel";
	dnn::Net net = readNetFromCaffe(modelConfiguration, modelBinary);//建立神经网络
	if (net.empty())//Returns true if there are no layers in the network.
	{
		cerr << "Can't load network by using the following files: " << endl;
		cerr << "prototxt:   " << modelConfiguration << endl;
		cerr << "caffemodel: " << modelBinary << endl;
		cerr << "Models are available here:" << endl;
		cerr << "<OPENCV_SRC_DIR>/samples/dnn/face_detector" << endl;
		cerr << "or here:" << endl;
		cerr << "https://github.com/opencv/opencv/tree/master/samples/dnn/face_detector" << endl;
		exit(-1);
	}

	// Prepare blob
	/*
	Parameters
		image	input image (with 1-, 3- or 4-channels).
		size	spatial size for output image
		mean	scalar with mean values which are subtracted from channels. Values are intended to be in (mean-R, mean-G, mean-B) order if image has BGR ordering and swapRB is true.
		scalefactor	multiplier for image values.
		swapRB	flag which indicates that swap first and last channels in 3-channel image is necessary.
		crop	flag which indicates whether image will be cropped after resize or not
	if crop is true, input image is resized so one side after resize is equal to corresponing dimension in size and another one is equal or larger. Then, crop from the center is performed. If crop is false, direct resize without cropping and preserving aspect ratio is performed.

		Returns
			4-dimansional Mat with NCHW dimensions order.
	*/
	Mat inputBlob = blobFromImage(img, inScaleFactor, Size(inWidth, inHeight), meanVal, false, true);
	net.setInput(inputBlob, "data");	// set the network input
	Mat detection = net.forward("detection_out");	// compute output

													// Calculate and display time and frame rate
	vector<double> layersTimings;
	double freq = getTickFrequency() / 1000;//执行时间
	double time = net.getPerfProfile(layersTimings) / freq;

	Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());

	ostringstream ss;
	ss << "FPS: " << 1000 / time << " ; time: " << time << "ms" << endl;
	putText(img, ss.str(), Point(20, 20), 0, 0.5, Scalar(0, 0, 255));//在图像中呈现指定的文本字符串,point文本的位置,Scalar(0, 0, 255)颜色

	// 
	float confidenceThreshold = min_confidence;
	for (int i = 0; i < detectionMat.rows; ++i)
	{
		// judge confidence
		float confidence = detectionMat.at<float>(i, 2);
		if (confidence > confidenceThreshold)
		{
			int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * img.cols);
			int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * img.rows);
			int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * img.cols);
			int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * img.rows);

			Rect object((int)xLeftBottom, (int)yLeftBottom,
				(int)(xRightTop - xLeftBottom),
				(int)(yRightTop - yLeftBottom));//

			rectangle(img, object, Scalar(0, 255, 0));

			ss.str("");
			ss << confidence;
			String conf(ss.str());
			String label = "Face: " + conf;
			int baseLine = 0;
			Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);//getTextSize计算并返回包含指定文本的框的大小。
			rectangle(img, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height),
				Size(labelSize.width, labelSize.height + baseLine)),
				Scalar(255, 255, 255), CV_FILLED);
			putText(img, label, Point(xLeftBottom, yLeftBottom),
				FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0));


		}
	}

	namedWindow("Face Detection", WINDOW_NORMAL);
	imshow("Face Detection", img);
	waitKey(0);

	return 0;
}

运行效果: 

猜你喜欢

转载自blog.csdn.net/sinat_25373795/article/details/81560229