前言

本文为个人随笔，为了记录阅读facenet中使用的mtcnn的代码的一些笔记。本文使用的是https://github.com/davidsandberg/facenet 中实现的mtcnn的代码。这个facenet的代码中有使用mtcnn的代码。本文主要记录mtcnn返回的关键点坐标和boxes框坐标的顺序。

正文

写了一个测试的代码如下

import cv2
import numpy as np
import matplotlib.pyplot as plot
import detect
from scipy import misc
import facenet
import time, threading
import copy
import tensorflow as tf
import detect_face
import os
import json
import sys

def angle_between_2_points(p1, p2):
    x1, y1 = p1
    x2, y2 = p2
    tan = (y2 - y1) / (x2 - x1)
    return np.degrees(np.arctan(tan))

def get_rotation_matrix(p1, p2, bb):
    angle = angle_between_2_points(p1, p2)
    x1, y1 = p1
    x2, y2 = p2
    xc = (bb[0] + bb[2]) // 2
    yc = (bb[1] + bb[3]) // 2
    M = cv2.getRotationMatrix2D((xc, yc), angle, 1)
    return M

def det_face(image_path):
    if os.path.exists(image_path):
        img = misc.imread(image_path)
        if img.ndim == 2:
            img = facenet.to_rgb(img)
        img = img[:, :, 0:3]
        height, width = img.shape[:2]
        bbox, landmarks = align_data(img, margin=16, score_threshold=0.9)
        for landmark in landmarks:
            for i in range(int(landmark.size / 2) ): #m默认是小数除法，整数除法使用 //
                cv2.circle(img, (int(landmark[i]), int(int(landmark[i + 5]))), 3, (0, 0, 255))
        cv2.imshow("lala", img)
        cv2.waitKey(0)
        i = 0
        for landmark in landmarks:
            M = get_rotation_matrix((landmark[0], landmark[5]), (landmark[1], landmark[6]), bbox[i])
            rotated = cv2.warpAffine(img, M, (width, height), flags=cv2.INTER_CUBIC)
            cv2.imshow("rotated", rotated)
            cv2.waitKey(0)
            i += 1

        img_list = []
        if bbox is not None:
            for bb in bbox:
                cropped_img = img[bb[1]:bb[3], bb[0]:bb[2], :]
                cv2.imshow("cropped_img", cropped_img)
                cv2.waitKey(0)
                img_list.append(cropped_img)
        align_img_list = []
        if bbox is not None:
            for bb in bbox:
                rotated_cropped_img = rotated [bb[1]:bb[3], bb[0]:bb[2], :]
                cv2.imshow("rotated_cropped_img", rotated_cropped_img)
                cv2.waitKey(0)
                align_img_list.append(rotated_cropped_img)
        return img, img_list, bbox
    else:
        return None, None


def align_data(img, margin=32, score_threshold=0.9):
    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor
    # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_size = np.asarray(img.shape)[0:2]
    tf.Graph().as_default()
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
    pnet, rnet, onet = detect_face.create_mtcnn(sess, None)

    bounding_boxes, points = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
    if len(bounding_boxes) < 1:
        bbox = None
        return None, None
    else:
        row, col = bounding_boxes.shape
        bbox = []
        landmark_list = []
        for i in range(row):
            det = np.squeeze(bounding_boxes[i, 0:])
            landmarks = np.squeeze(points[:,i])
            bb = []
            if det[-1] > score_threshold:
                landmark_list.append(landmarks)
                bb.append(np.maximum(det[0] - margin // 2, 0))
                bb.append(np.maximum(det[1] - margin // 2, 0))
                bb.append(np.minimum(det[2] + margin // 2, img_size[1]))
                bb.append(np.minimum(det[3] + margin // 2, img_size[0]))
            bbox.append(np.array(bb,dtype=np.int32))
    return np.array(bbox,dtype=np.int32), np.array(landmark_list,dtype=np.int32)

image_path = './demo_images/twofaces.jpg'
#image_path = './demo_images/Aaron_Patterson_0001.jpg'
#image_path = './demo_images/Aaron_Eckhart_0001.jpg'
det_face(image_path)

在align_data()函数中调用，detect_face.detect_face（）函数，返回bounding_boxes, points，bounding_boxes 就是返回的人脸框，points就是返回的关键点。mtcnn返回5个关键点，分别是左眼，右眼，鼻子，左嘴角，右嘴角。bounding_boxes是一个n*5的数组，n表示检测到的人脸数目，每一行5个元素是一个行数组，表示一个人脸框信息。bb = bounding_boxes[0]表示第一个人脸框信息，bb[0]表示人脸框的左上角的水平方向的坐标（列的坐标），bb[1]表示人脸框的左上角的垂直方向的坐标（行的坐标），bb[2]表示人脸框的右下角的水平方向的坐标（列的坐标），bb[3]表示人脸框的右下角的垂直方向的坐标（行的坐标），bb[4]表示检测到的是人脸的score。points是一个10*n的数组，n表示检测到的人脸数目，每一列10个元素是一个列数组，表示一个人脸关键点的信息。point = points[:,0]表示第一个人脸关键点信息，point [0:4]是五个关键点在水平方向上的坐标(列坐标)，point [5:9]是五个关键点在垂直方向上的坐标（行坐标）。即，（point [0]，point[5]）表示左眼的坐标，（point [1]，point[6]）表示右眼的坐标，（point [2]，point[7]）表示鼻子的坐标，（point [3]，point[8]）表示左嘴角的坐标，（point [4]，point[9]）表示右嘴角的坐标。
使以上的代码，和以下的图片，画出人脸的关键点，效果如下。
这里写图片描述

mtcnn坐标分析

前言

正文

猜你喜欢