Outline
OpenCV4.0 depth neural network module, the model supports importing openface extracted facial feature vector 128, the similarity comparison, face recognition. For more information see here Openface model
http://www.cv-foundation.org/openaccess/content_cvpr_2015/app/1A_089.pdf
The main principle is that the paper FaceNet network based CVPR 2015, wrote an article last year, when it is introduced, want to know details Click here to view
OpenCV + Tensorflow real-time face recognition demo
The main idea
Use OpenCV4.0 DNN first support face detection module model for face detection to the image or video, then the value, the cosine similarity characteristics of the face region 128 extracted by the features of the pre-training model openface value comparison, face recognition. Complete process can be illustrated as follows:
Cosine similarity formula and explanation:
Code implementation steps
01
Load Network
You need to load the face detection and face recognition openface network model, the code is implemented as follows:
String modelDesc = "D:/projects/opencv_tutorial/data/models/resnet/deploy.prototxt"; String modelBinary = "D:/projects/opencv_tutorial/data/models/resnet/res10_300x300_ssd_iter_140000.caffemodel"; String facemodel = "D:/projects/opencv_tutorial/data/models/face_detector/openface.nn4.small2.v1.t7"; // 初始化网络 Net net = readNetFromCaffe(modelDesc, modelBinary); Net netRecogn = readNetFromTorch(facemodel);
Download these two models are as follows:
https://github.com/gloomyfish1998/opencv_tutorial/tree/master/data/models/face_detector
02
Setting computing background
OpenCV supports different computing background, here we use OpenVINO as computing background, computing can be accelerated, as follows:
// 设置计算后台Net netRecogn = readNetFromTorch(facemodel);net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);net.setPreferableTarget(DNN_TARGET_CPU);netRecogn.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);netRecogn.setPreferableTarget(DNN_TARGET_CPU);// load face datavector<vector<float>> face_data;vector<string> labels;vector<string> faces;glob("D:/my_faces/zhigang", faces);for (auto fn : faces) { vector<float> fv; Mat sample = imread(fn); recognize_face(sample, netRecogn, fv); face_data.push_back(fv); printf("file name : %s\n", fn.c_str()); labels.push_back("zhigang");}faces.clear();glob("D:/my_faces/balvin", faces);for (auto fn : faces) { vector<float> fv; Mat sample = imread(fn); recognize_face(sample, netRecogn, fv); face_data.push_back(fv); printf("file name : %s\n", fn.c_str()); labels.push_back("balvin");}if (net.empty() || netRecogn.empty()){ printf("could not load net...\n"); return -1;}
03
Face Detection
For face detected by the face detection network, code is implemented as follows:
// 输入数据调整Mat inputBlob = blobFromImage(frame, inScaleFactor, Size(inWidth, inHeight), meanVal, false, false);net.setInput(inputBlob, "data");// 人脸检测Mat detection = net.forward("detection_out");vector<double> layersTimings;double freq = getTickFrequency() / 1000;double time = net.getPerfProfile(layersTimings) / freq;Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
04
Face alignment
The ROI is real-time detection and pre-loaded to give the face samples are compared to find the minimum distance is less than the threshold T, the output is the result of recognition, face detection and the analytical face recognition code is as follows:
for (int i = 0; i < detectionMat.rows; i++){ // 置信度 0~1之间 float confidence = detectionMat.at<float>(i, 2); if (confidence > confidenceThreshold) { int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols); int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows); int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols); int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows); Rect object((int)xLeftBottom, (int)yLeftBottom, (int)(xRightTop - xLeftBottom), (int)(yRightTop - yLeftBottom)); if (object.width < 5 || object.height < 5) { continue; } // 截取人脸ROI区域 Mat roi = frame(object); // 人脸比对,发现相似度最高的 vector<float> curr_fv; recognize_face(roi, netRecogn, curr_fv); float minDist = 10; int index = -1; for (int i = 0; i < face_data.size(); i++) { float dist = compare(curr_fv, face_data[i]); if (minDist > dist) { minDist = dist; index = i; } } // 阈值与显示识别结果 printf("index : %d, dist: %.2f \n", index, minDist); if (index >= 0 && minDist < 0.30) { putText(frame, labels[index].c_str(), Point(xLeftBottom, yLeftBottom-20), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(255, 0, 255)); } rectangle(frame, object, Scalar(0, 255, 0)); ss.str(""); ss << confidence; String conf(ss.str()); String label = "Face: " + conf; int baseLine = 0; Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height), Size(labelSize.width, labelSize.height + baseLine)), Scalar(255, 255, 255), FILLED); putText(frame, label, Point(xLeftBottom, yLeftBottom), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0)); }}
Cosine similarity comparison
float compare(vector<float> &fv1, vector<float> fv2) { // 计算余弦相似, 0 ~ 1 距离,距离越小越相似, // 0表示夹角为0°,1表示夹角为90° float dot = 0; float sum2 = 0; float sum3 = 0; for (int i = 0; i < fv1.size(); i++) { dot += fv1[i] * fv2[i]; sum2 += pow(fv1[i], 2); sum3 += pow(fv2[i], 2); } float norm = sqrt(sum2)*sqrt(sum3); float similarity = dot / norm; float dist = acos(similarity) / CV_PI; return dist;}
running result