cv2 实现人脸检测

本文主要介绍使用python中cv2模块的多种方法来实现人脸检测。

其中，windows中dlib模块的安装可参考我另一篇博客：python 3.x 安装dlib（windows环境）

（1）dlib-hog

代码：

import cv2
import dlib
import time
import sys

def detectFaceDlib(detector, frame, inHeight=300, inWidth=0):
    frameDlibHog = frame.copy()
    frameHeight = frameDlibHog.shape[0]
    frameWidth = frameDlibHog.shape[1]
    
    if not inWidth:
        inWidth = int((frameWidth / frameHeight) * inHeight)
    
    scaleHeight = frameHeight / inHeight
    scaleWidth = frameWidth / inWidth
    
    frameDlibHogSmall = cv2.resize(frameDlibHog, (inWidth, inHeight))
    faceRects = detector(frameDlibHogSmall, 0)
    print(frameWidth, frameHeight, inWidth, inHeight)
    bboxes = []
    for faceRect in faceRects:
        cvRect = [int(faceRect.left() * scaleWidth), int(faceRect.top() * scaleHeight), int(faceRect.right() * scaleWidth), int(faceRect.bottom() * scaleHeight)]
        bboxes.append(cvRect)
        cv2.rectangle(frameDlibHog, (cvRect[0], cvRect[1]), (cvRect[2], cvRect[3]), (0, 255, 0), int(round(frameHeight/250)), 4)
    
    return frameDlibHog, bboxes



if __name__ == '__main__':
    hogFaceDetector = dlib.get_frontal_face_detector()
    
    source = './videos/baby.mp4'
    
    cap = cv2.VideoCapture(source)
    hasFrame, frame = cap.read()
    
    vid_writer = cv2.VideoWriter('output-hog-{}.avi'.format(str(source).split(".")[0]), cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 15, (frame.shape[1], frame.shape[0]))
    
    frame_count = 0
    tt_dlibHog = 0
    
    while(1):
        hasFrame, frame = cap.read()
        
        if not hasFrame:
            break
            
        frame_count += 1
        
        t = time.time()
        
        outDlibHog, bboxes = detectFaceDlib(hogFaceDetector, frame)
        
        tt_dlibHog += time.time()  - t
        fpsDlibHog = frame_count / tt_dlibHog
        
        label = "DLIB HoG ; ; FPS : {:.2f}".format(fpsDlibHog)
        cv2.putText(outDlibHog, label, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.4, (0, 0, 255), 3, cv2.LINE_AA)

        cv2.imshow("Face Detection Comparison", outDlibHog)

        vid_writer.write(outDlibHog)
        if frame_count == 1:
            tt_dlibHog = 0

        k = cv2.waitKey(10)
        if k == 27:
            break
    cv2.destroyAllWindows()
    vid_writer.release()

运行结果：

（2）dlib-mmod

代码：

import cv2
import dlib
import time
import sys

def detectFaceDlibMMOD(detector, frame, inHeight=300, inWidth=0):
    frameDlibMMOD = frame.copy()
    frameHeight = frameDlibMMOD.shape[0]
    frameWidth = frameDlibMMOD.shape[1]
    
    if not inWidth:
        inWidth = int((frameWidth / frameHeight) * inHeight)
    
    scaleHeight = frameHeight / inHeight
    scaleWidth = frameWidth / inWidth
    
    frameDlibMMODSmall = cv2.resize(frameDlibMMOD, (inWidth, inHeight))
    frameDlibMMODSmall = cv2.cvtColor(frameDlibMMODSmall, cv2.COLOR_BGR2RGB)
    faceRects = detector(frameDlibMMODSmall, 0)
    
    print(frameWidth, frameHeight, inWidth, inHeight)
    bboxes = [] 
    for faceRect in faceRects:
        cvRect = [int(faceRect.rect.left() * scaleWidth), int(faceRect.rect.top() * scaleHeight), int(faceRect.rect.right() * scaleWidth), int(faceRect.rect.bottom() * scaleHeight)]
        bboxes.append(cvRect)
        cv2.rectangle(frameDlibMMOD, (cvRect[0], cvRect[1]), (cvRect[2], cvRect[3]), (0, 255, 0), int(round(frameHeight / 150)), 4)
    return frameDlibMMOD, bboxes


if __name__ == '__main__':
    source  = 'videos/baby.mp4'
    
    cap = cv2.VideoCapture(source)
    
    hasFrame, frame = cap.read()
    
    dnnFaceDetector = dlib.cnn_face_detection_model_v1('./models/mmod_human_face_detector.dat')
    
    vid_writer = cv2.VideoWriter('output-mmod-{}.avi'.format(str(source).split(".")[0]), cv2.VideoWriter_fourcc("M", "J", "P", "G"), 15, (frame.shape[1], frame.shape[0]))
    
    frame_count = 0
    
    tt_dlibMmod = 0
    while(1):
        hasFrame, frame = cap.read()
        if not hasFrame:
            break
        
        frame_count += 1
        t = time.time()
        outDlibMMOD, bboxes = detectFaceDlibMMOD(dnnFaceDetector, frame)
        tt_dlibMmod += time.time() - t
        fpsDlibMmod = frame_count / tt_dlibMmod

        label = "DLIB MMOD ; FPS : {:.2f}".format(fpsDlibMmod)
        cv2.putText(outDlibMMOD, label, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.4, (0, 0, 255), 3, cv2.LINE_AA)

        cv2.imshow("Face Detection Comparison", outDlibMMOD)

        vid_writer.write(outDlibMMOD)
        if frame_count == 1:
            tt_dlibMmod = 0

        k = cv2.waitKey(10)
        if k == 27:
            break
    cv2.destroyAllWindows()
    vid_writer.release()

检测结果：

（3）opencv-dnn

代码：

import cv2
import time
import sys

def detectFaceOpenCVDnn(net, frame):
    frameOpenCVDnn = frame.copy()
    frameHeight = frameOpenCVDnn.shape[0]
    frameWidth = frameOpenCVDnn.shape[1]
    blob = cv2.dnn.blobFromImage(frameOpenCVDnn, 1.0, (300, 300), [104, 117, 123], False, False)
    
    net.setInput(blob)
    detections = net.forward()
    bboxes = []
    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > conf_threshold:
            x1 = int(detections[0, 0, i, 3] * frameWidth)
            y1 = int(detections[0, 0, i, 4] * frameHeight)
            x2 = int(detections[0, 0, i, 5] * frameWidth)
            y2 = int(detections[0, 0, i, 6] * frameHeight)
            bboxes.append([x1, y1, x2, y2])
            cv2.rectangle(frameOpenCVDnn, (x1, y1), (x2, y2), (0, 255, 0), int(frameHeight / 150), 8)
    return frameOpenCVDnn, bboxes

if __name__ == '__main__':
    DNN = 'TF'
    if DNN == 'CAFFE':
        modelFile = './models/res10_300x300_ssd_iter_140000_fp16.caffemodel'
        configFile = './models/deploy.prototxt'
        net = cv2.dnn.readNetFromCaffe(configFile, modelFile)
    else:
        modelFile = './models/opencv_face_detector_uint8.pb'
        configFile = './models/opencv_face_detector.pbtxt'
        net = cv2.dnn.readNetFromTensorflow(modelFile, configFile)
    
    conf_threshold = 0.7
    
    source = './videos/baby.mp4'
    cap = cv2.VideoCapture(source)
    ret, frame = cap.read()
    
    vid_writer = cv2.VideoWriter('output-dnn-{}.avi'.format(str(source).split(".")[0]), cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 15, (frame.shape[1], frame.shape[0]))
    
    frame_count = 0
    tt_opencvDnn = 0
    
    while(1):
        hasFrame, frame = cap.read()
        if not hasFrame:
            break
        frame_count += 1
        
        t = time.time()
        outOpencvDnn, bboxes = detectFaceOpenCVDnn(net, frame)
        tt_opencvDnn += time.time() - t
        fpsOpencvDnn = frame_count / tt_opencvDnn
        label = "OpenCV DNN ; FPS : {:.2f}".format(fpsOpencvDnn)
        cv2.putText(outOpencvDnn, label, (10,50), cv2.FONT_HERSHEY_SIMPLEX, 1.4, (0, 0, 255), 3, cv2.LINE_AA)

        cv2.imshow("Face Detection Comparison", outOpencvDnn)

        vid_writer.write(outOpencvDnn)
        if frame_count == 1:
            tt_opencvDnn = 0
        
        k = cv2.waitKey(10)
        if k == 27:
            break
    cv2.destroyAllWindows()
    vid_writer.release()

caffe测试结果：

tensorflow测试结果：

（4）opencv-haar

import cv2
import time
import sys

def detectionFaceOpenCVHaar(faceCascade, frame, inHeight=300, inWidth=0):
    frameOpenCVHaar = frame.copy()
    frameHeight = frameOpenCVHaar.shape[0]
    frameWidth = frameOpenCVHaar.shape[1]
    
    if not inWidth:
        inWidth = int((frameWidth / frameHeight) * inHeight)
    
    scaleHeight = int(frameHeight / inHeight)
    scaleWidth = int(frameWidth / inWidth)
    
    frameOpenCVHaarSmall = cv2.resize(frameOpenCVHaar, (scaleWidth, scaleHeight))
    frameGray = cv2.cvtColor(frameOpenCVHaarSmall, cv2.COLOR_BGR2GRAY)
    
    faces = faceCascade.detectMultiScale(frameGray)
    bboxes = []
    for (x, y, w, h) in faces:
        x1 = x
        y1 = y
        x2 = x + w
        y2 = y + h
        cvRect = [int(x1 * scaleWidth), int(y1 * scaleHeight),
                  int(x2 * scaleWidth), int(y2 * scaleHeight)]
        bboxes.append(cvRect)
        cv2.rectangle(frameOpenCVHaar, (cvRect[0], cvRect[1]), (cvRect[2], cvRect[3]), (0, 255, 0),
                      int(round(frameHeight / 150)), 4)
    return frameOpenCVHaar, bboxes

if __name__ == '__main__':
    source = './videos/baby.mp4'

    faceCascade = cv2.CascadeClassifier('./models/haarcascade_frontalface_default.xml')

    cap = cv2.VideoCapture(source)
    hasFrame, frame = cap.read()

    vid_writer = cv2.VideoWriter('output-haar-{}.avi'.format(str(source).split(".")[0]),cv2.VideoWriter_fourcc('M','J','P','G'), 15, (frame.shape[1],frame.shape[0]))

    frame_count = 0
    tt_opencvHaar = 0
    while(1):
        hasFrame, frame = cap.read()
        if not hasFrame:
            break
        frame_count += 1

        t = time.time()
        outOpencvHaar, bboxes = detectionFaceOpenCVHaar(faceCascade, frame)
        tt_opencvHaar += time.time() - t
        fpsOpencvHaar = frame_count / tt_opencvHaar

        label = "OpenCV Haar ; FPS : {:.2f}".format(fpsOpencvHaar)
        cv2.putText(outOpencvHaar, label, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.4, (0, 0, 255), 3, cv2.LINE_AA)

        cv2.imshow("Face Detection Comparison", outOpencvHaar)

        vid_writer.write(outOpencvHaar)
        if frame_count == 1:
            tt_opencvHaar = 0
        
        k = cv2.waitKey(10)
        if k == 27:
            break
    cv2.destroyAllWindows()
    vid_writer.release()

Github链接：https://github.com/cchangcs/FaceDetection

（1）dlib-hog

（2）dlib-mmod

（3）opencv-dnn

（4）opencv-haar

猜你喜欢