Optimization of face detection in opencv video stream (camera)

Reminder: This blog post supports opencv-2.4 and opencv-3.4 versions, which can be controlled by the macro VERSION_2_4.


The previous article simply realized the face detection of pictures and video streams, but this is only a simple and rude way. From the effect point of view, the video detection problem is more serious. This time, it is mainly optimized for this problem.


1. Unoptimized version of the program

First, put the simple and rude code of opencv to open the camera to detect faces:

#include "opencv2/objdetect.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include <iostream>
#include <stdio.h>

using namespace cv;  
using namespace std;
  
// control the compiled version macro
//#define VERSION_2_4

intmain()  
{  
	CascadeClassifier faceCascade;
    double t = 0;
	int		nRet = 0;

	VideoCapture capture;
	capture.open(0);
//  capture.open("video.avi");
	if(!capture.isOpened())
	{
	  cout << "open camera failed. " << endl;
	  return -1;
	}

	/* load classifier */
#ifdef VERSION_2_4	
		nRet = faceCascade.load("/root/library/opencv/opencv-2.4.13.6/data/haarcascades/haarcascade_frontalface_alt.xml");
#else
		nRet = faceCascade.load("/root/library/opencv/opencv-3.4.0/data/haarcascades/haarcascade_frontalface_alt.xml");
#endif

	if(!nRet)
	{
		printf("load xml failed.\n");
		return -1;
	}
	
    Mat img, imgGray;
    vector<Rect> faces;
	while(1)
	{
		capture >> img;
		if(img.empty())	
		{
			continue;
		}

		cvtColor(img, imgGray, CV_RGB2GRAY);  

		/* detect faces */
		t = (double)getTickCount();
		faceCascade.detectMultiScale( imgGray, faces,
			1.1, 2, 0
			//|CASCADE_FIND_BIGGEST_OBJECT
			//|CASCADE_DO_ROUGH_SEARCH
			|CASCADE_SCALE_IMAGE,
			Size(30, 30) );
		t = (double)getTickCount() - t;
		printf( "detection time = %g ms\n", t*1000/getTickFrequency());
		
		/* Draw a rectangular frame out of the face */
	   for(size_t i =0; i<faces.size(); i++)  
	   {  
		   rectangle(img, Point(faces[i].x, faces[i].y), Point(faces[i].x + faces[i].width, faces[i].y + faces[i].height),	 
						   Scalar(0, 255, 0), 1, 8);	
	   }  
		
		imshow("CameraFace", img);  
		
		if(waitKey(1) > 0) // delay ms to wait for the key to exit
		{
			break;
		}
	}
  
    return 0;  
}  


Running effect (the face image will not be posted):

detection time = 278.65 ms
detection time = 280.025 ms
detection time = 280.809 ms
detection time = 289.129 ms
detection time = 279.903 ms
detection time = 281.969 ms


2. Optimization processing

For the use of third-party libraries, the most authoritative tutorial is the official documentation, as is opencv.

So, first refer to/understand the official face detection routine:

Official website: https://docs.opencv.org/3.4.0/db/d3a/facedetect_8cpp-example.html

Library source code: opencv-3.4.0/samples/cpp/facedetect.cpp


The official example mainly deals with images as follows:

1. Zoom the image, which can improve the detection rate and reduce the detection time

void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
                 double inv_scale_x, double inv_scale_y, int interpolation )

2. Histogram equalization to improve image quality

void equalizeHist(const oclMat &mat_src, oclMat &mat_dst)

This program is also mainly used for the above optimization processing (limited capacity, I hope there will be more optimizations in the future)

Optimized program:

#include "opencv2/objdetect.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include <iostream>
#include <stdio.h>

using namespace cv;  
using namespace std;

// control the compiled version macro
//#define VERSION_2_4

/* Parameters: input image, cascade classifier, scaling factor */
void DetectAndDraw( Mat& img, CascadeClassifier& cascade, double scale);

intmain()  
{  
	CascadeClassifier faceCascade;
	double scale = 4;
	int		nRet = 0;
	
	VideoCapture capture;
	capture.open(0);
//  capture.open("video.avi");
	if(!capture.isOpened())
	{
	  cout << "open camera failed. " << endl;
	  return -1;
	}
	cout << "open camera succeed. " << endl;

	/* load classifier */
#ifdef VERSION_2_4	
    nRet = faceCascade.load("/root/library/opencv/opencv-2.4.13.6/data/haarcascades/haarcascade_frontalface_alt.xml");
#else
    nRet = faceCascade.load("/root/library/opencv/opencv-3.4.0/data/haarcascades/haarcascade_frontalface_alt.xml");
#endif

	if(!nRet)
	{
		printf("load xml failed.\n");
		return -1;
	}

    Mat frame;
    vector<Rect> faces;
	while(1)
	{
		capture >> frame;
		if(frame.empty())	
		{
			continue;
		}
		
		Mat frame1 = frame.clone();
		DetectAndDraw( frame1, faceCascade, scale );
		
		if(waitKey(1) > 0) // delay ms to wait for the key to exit
		{
			break;
		}
	}
  
    return 0;  
}  

void DetectAndDraw( Mat& img, CascadeClassifier& cascade, double scale )
{
    double t = 0;
    vector<Rect> faces;
    Mat gray, smallImg;
    double fx = 1 / scale;
	
    cvtColor( img, gray, COLOR_BGR2GRAY ); // Convert the source image to grayscale

	/* scale the image */
#ifdef VERSION_2_4	
    resize( gray, smallImg, Size(), fx, fx, INTER_LINEAR );
#else
	resize( gray, smallImg, Size(), fx, fx, INTER_LINEAR_EXACT );
#endif

    equalizeHist( smallImg, smallImg ); // Histogram equalization to improve image quality

	/* detect target */
    t = (double)getTickCount();
    cascade.detectMultiScale( smallImg, faces,
        1.1, 2, 0
        //|CASCADE_FIND_BIGGEST_OBJECT
        //|CASCADE_DO_ROUGH_SEARCH
        |CASCADE_SCALE_IMAGE,
        Size(30, 30) );
    t = (double)getTickCount() - t;
    printf( "detection time = %g ms\n", t*1000/getTickFrequency());

	/* Draw a rectangle to outline the target */
    for ( size_t i = 0; i < faces.size(); i++ ) // faces.size(): number of detected objects
    {
        Rect rectFace = faces[i];
		
        rectangle(	img, Point(rectFace.x, rectFace.y) * scale,
					Point(rectFace.x + rectFace.width, rectFace.y + rectFace.height) * scale,
					Scalar(0, 255, 0), 2, 8);
    }

    imshow( "FaceDetect", img ); // show
}


running result:

detection time = 23.5718 ms
detection time = 27.5776 ms
detection time = 22.7952 ms
detection time = 31.5242 ms
detection time = 39.568 ms
detection time = 31.5325 ms

It can be seen that the detection speed is increased by about 10 times, and the video freeze effect is basically eliminated.


Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325522852&siteId=291194637