0、序
本文就如何使用Dlib中的深度残差网络(ResNet)实现实时人脸识别进行相关记述,基本开发环境如下:
所安装软件 | version |
---|---|
CUDA | 10.2.89 |
cuDNN | 8.0.0.180 |
OpenCV | 4.4.0 |
TensorFlow | 2.3.1 |
Jetpack | Jetpack 4.4.1 |
Platform | Jetson nano |
之前尝试了使用opencv进行人脸检测的实现,以及使用dlib中的的face_recognition模块进行人脸识别,但是face-recognition在识别的准确度上不太理想,尤其是对亚洲人的脸型,容易识别为同一个人。本文将借助dlib中的深度残差网络-ResNet实现人脸识别,需要先说明的是本文不涉及有关深度残差网络的构建,而是使用以及训练完成的相关预训练模型进行该功能的实现。
1、Sources准备
相关模型以及参数的下载,dlib官网传送门:http://dlib.net/files/
detector = dlib.cnn_face_detection_model_v1('mmod_human_face_detector.dat')
sp = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
facerec = dlib.face_recognition_model_v1('dlib_face_recognition_resnet_model_v1.dat')
2、Coding
2.1、人脸数据分类,保存本地人脸特征向量及标签
在保存人脸的特征数据这里使用的是预训练的resnet模型,并将人脸特征数据以及对应的name标签保存为本地文件,供实时人脸识别的时候使用。说到底人脸特征向量是啥?现在我也还是不是很清楚,只是知道其能表示一个人的面部特征。
import os
import cv2
import dlib
import numpy as np
import json
detector = dlib.cnn_face_detection_model_v1('mmod_human_face_detector.dat')
sp = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
facerec = dlib.face_recognition_model_v1('dlib_face_recognition_resnet_model_v1.dat')
imagePATH = '/home/colin/works/face_recognition_resnet/data/'
data = np.zeros((1, 128))
lables = []
for file in os.listdir(imagePATH):
if '.jpg' in file or '.png' in file:
fileName = file
lableName = file.split('_')[0]
print('current image:', file)
print('current lable:', lableName)
img = cv2.imread(imagePATH + file)
if img.shape[0] * img.shape[1] > 500000:
img = cv2.resize(img, (0,0), fx = 0.5, fy = 0.5)
dets = detector(img, 1)
for k, d in enumerate(dets):
rec = dlib.rectangle(d.rect.left(), d.rect.top(), d.rect.right(), d.rect.bottom())
shape = sp(img, rec)
face_descriptor = facerec.compute_face_descriptor(img, shape)
faceArray = np.array(face_descriptor).reshape((1, 128))
data = np.concatenate((data, faceArray))
lables.append(lableName)
cv2.rectangle(img, (rec.left(), rec.top(), rec.right(), rec.bottom()), (0, 255, 0), 2)
cv2.waitKey(2)
cv2.imshow('img', img)
data = data[1:, :]
np.savetxt('faceData.txt', data, fmt = '%f')
lableFile = open("labels.txt", 'w')
json.dump(lables, lableFile)
lableFile.close()
cv2.destroyAllWindows()
2.2、人脸检测
detector = dlib.cnn_face_detection_model_v1('mmod_human_face_detector.dat')
2.3、人脸识别
# 640 480 320 240
def gstreamer_pipeline(
capture_width=320,
capture_height=240,
display_width=320,
display_height=240,
framerate=30,
flip_method=0,
):
return (
"nvarguscamerasrc ! "
"video/x-raw(memory:NVMM), "
"width=(int)%d, height=(int)%d, "
"format=(string)NV12, framerate=(fraction)%d/1 ! "
"nvvidconv flip-method=%d ! "
"video/x-raw, width=(int)%d, height=(int)%d, format=(string)BGRx ! "
"videoconvert ! "
"video/x-raw, format=(string)BGR ! appsink"
% (
capture_width,
capture_height,
framerate,
flip_method,
display_width,
display_height,
)
)
def findNearestClassForImage(face_descriptor, faceLabel):
global threshold
temp = face_descriptor - data
e = np.linalg.norm(temp,axis=1,keepdims=True)
min_distance = e.min()
print('distance: ', min_distance)
if min_distance > threshold:
return 'unknow'
index = np.argmin(e)
return faceLabel[index]
def recognition(img):
dets = detector(img, 1)
for k, d in enumerate(dets):
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
k, d.rect.left(), d.rect.top(), d.rect.right(), d.rect.bottom()))
rec = dlib.rectangle(d.rect.left(),d.rect.top(),d.rect.right(),d.rect.bottom())
print(rec.left(),rec.top(),rec.right(),rec.bottom())
shape = sp(img, rec)
face_descriptor = facerec.compute_face_descriptor(img, shape)
class_pre = findNearestClassForImage(face_descriptor, label)
print(class_pre)
cv2.rectangle(img, (rec.left(), rec.top()+10), (rec.right(), rec.bottom()), (0, 255, 0), 2)
cv2.putText(img, class_pre , (rec.left(),rec.top()), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2, cv2.LINE_AA)
img = image_shop.mark_add(rec.left(), rec.right(), rec.top(), rec.bottom(), img)
return img
def data_load():
global label, data, filePATH
labelFile = open(filePATH + 'labels.txt', 'r')
label = json.load(labelFile)
labelFile.close()
data = np.loadtxt(filePATH + 'faceData.txt', dtype=float)
def face_recognition_livevideo(window_name, camera_idx):
cv2.namedWindow(window_name)
#CSI Camera for get pipeline
cap = cv2.VideoCapture(gstreamer_pipeline(flip_method=camera_idx), cv2.CAP_GSTREAMER)
while cap.isOpened():
ok, frame = cap.read() #read 1 frame
if not ok:
break
resImage = recognition(frame)
#display
cv2.imshow(window_name, resImage)
c = cv2.waitKey(1)
if c & 0xFF == ord('q'):
break
#close
cap.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
data_load()
face_recognition_livevideo('Find Face', 0)
2.4、使用GPU进行加速:
使用启用了CUDA的dlib即可,没启用可能需要重新安装dlib,在编译安装的时候加上“-DDLIB_USE_CUDA=1”即可。可参考本人之前的一篇博文中关于dlib库的安装。
3、Demo效果
实现的效果还是不错的,并且dlib也能很好地调用CUDA参与运算,得力于GPU的调用,CPU不会出现太高的负荷状态。
参考附录
1)使用dlib中的深度残差网络(ResNet)实现实时人脸识别
2)大牛教你使用dlib中的深度残差网络(ResNet)实现实时人脸识别