0、序

本文就如何使用Dlib中的深度残差网络(ResNet)实现实时人脸识别进行相关记述，基本开发环境如下：

所安装软件	version
CUDA	10.2.89
cuDNN	8.0.0.180
OpenCV	4.4.0
TensorFlow	2.3.1
Jetpack	Jetpack 4.4.1
Platform	Jetson nano

之前尝试了使用opencv进行人脸检测的实现，以及使用dlib中的的face_recognition模块进行人脸识别，但是face-recognition在识别的准确度上不太理想，尤其是对亚洲人的脸型，容易识别为同一个人。本文将借助dlib中的深度残差网络-ResNet实现人脸识别，需要先说明的是本文不涉及有关深度残差网络的构建，而是使用以及训练完成的相关预训练模型进行该功能的实现。

1、Sources准备

相关模型以及参数的下载，dlib官网传送门：http://dlib.net/files/

detector     = dlib.cnn_face_detection_model_v1('mmod_human_face_detector.dat')
sp           = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
facerec       = dlib.face_recognition_model_v1('dlib_face_recognition_resnet_model_v1.dat')

2、Coding

在这里插入图片描述

2.1、人脸数据分类，保存本地人脸特征向量及标签

在保存人脸的特征数据这里使用的是预训练的resnet模型，并将人脸特征数据以及对应的name标签保存为本地文件，供实时人脸识别的时候使用。说到底人脸特征向量是啥？现在我也还是不是很清楚，只是知道其能表示一个人的面部特征。

import os
import cv2
import dlib
import numpy as np
import json

detector = dlib.cnn_face_detection_model_v1('mmod_human_face_detector.dat')
sp = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
facerec = dlib.face_recognition_model_v1('dlib_face_recognition_resnet_model_v1.dat')

imagePATH = '/home/colin/works/face_recognition_resnet/data/'
data = np.zeros((1, 128))
lables = []

for file in os.listdir(imagePATH):
    if '.jpg' in file or '.png' in file:
        fileName = file
        lableName = file.split('_')[0]
        print('current image:', file)
        print('current lable:', lableName)

        img = cv2.imread(imagePATH + file)
        if img.shape[0] * img.shape[1] > 500000:
            img = cv2.resize(img, (0,0), fx = 0.5, fy = 0.5)
        dets = detector(img, 1)
        for k, d in enumerate(dets):
            rec = dlib.rectangle(d.rect.left(), d.rect.top(), d.rect.right(), d.rect.bottom())
            shape = sp(img, rec)
            face_descriptor = facerec.compute_face_descriptor(img, shape)
            faceArray = np.array(face_descriptor).reshape((1, 128))
            data  = np.concatenate((data, faceArray))
            lables.append(lableName)
            cv2.rectangle(img, (rec.left(), rec.top(), rec.right(), rec.bottom()), (0, 255, 0), 2)
        cv2.waitKey(2)
        cv2.imshow('img', img)

data = data[1:, :]
np.savetxt('faceData.txt', data, fmt = '%f')

lableFile = open("labels.txt", 'w')
json.dump(lables, lableFile)
lableFile.close()

cv2.destroyAllWindows()

2.2、人脸检测

detector = dlib.cnn_face_detection_model_v1('mmod_human_face_detector.dat')

2.3、人脸识别

# 640 480 320 240
def gstreamer_pipeline(
    capture_width=320,
    capture_height=240,
    display_width=320,
    display_height=240,
    framerate=30,
    flip_method=0,
):
    return (
        "nvarguscamerasrc ! "
        "video/x-raw(memory:NVMM), "
        "width=(int)%d, height=(int)%d, "
        "format=(string)NV12, framerate=(fraction)%d/1 ! "
        "nvvidconv flip-method=%d ! "
        "video/x-raw, width=(int)%d, height=(int)%d, format=(string)BGRx ! "
        "videoconvert ! "
        "video/x-raw, format=(string)BGR ! appsink"
        % (
            capture_width,
            capture_height,
            framerate,
            flip_method,
            display_width,
            display_height,
        )
)

def findNearestClassForImage(face_descriptor, faceLabel):
    global threshold
    temp =  face_descriptor - data
    e = np.linalg.norm(temp,axis=1,keepdims=True)
    min_distance = e.min() 
    print('distance: ', min_distance)
    if min_distance > threshold:
        return 'unknow'
    index = np.argmin(e)
    return faceLabel[index]

def recognition(img):
    dets = detector(img, 1)
    for k, d in enumerate(dets):
        
        print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
            k, d.rect.left(), d.rect.top(), d.rect.right(), d.rect.bottom()))
        rec = dlib.rectangle(d.rect.left(),d.rect.top(),d.rect.right(),d.rect.bottom())
        print(rec.left(),rec.top(),rec.right(),rec.bottom())
        shape = sp(img, rec)
        face_descriptor = facerec.compute_face_descriptor(img, shape)        
        
        class_pre = findNearestClassForImage(face_descriptor, label)
        print(class_pre)
        cv2.rectangle(img, (rec.left(), rec.top()+10), (rec.right(), rec.bottom()), (0, 255, 0), 2)
        cv2.putText(img, class_pre , (rec.left(),rec.top()), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2, cv2.LINE_AA)

        img = image_shop.mark_add(rec.left(), rec.right(), rec.top(), rec.bottom(), img)

    return img


def data_load():
    global label, data, filePATH
    labelFile = open(filePATH + 'labels.txt', 'r')
    label = json.load(labelFile)
    labelFile.close()

    data = np.loadtxt(filePATH + 'faceData.txt', dtype=float)


def face_recognition_livevideo(window_name, camera_idx):
    cv2.namedWindow(window_name)

    #CSI Camera for get pipeline
    cap = cv2.VideoCapture(gstreamer_pipeline(flip_method=camera_idx), cv2.CAP_GSTREAMER)
    
    while cap.isOpened():
        ok, frame = cap.read() #read 1 frame
        if not ok:
            break
        
        resImage = recognition(frame)

        #display
        cv2.imshow(window_name, resImage)
        c = cv2.waitKey(1)
        if c & 0xFF == ord('q'):
            break

    #close
    cap.release()
    cv2.destroyAllWindows()    


if __name__ == '__main__':
    data_load()
    face_recognition_livevideo('Find Face', 0)