基于python开发pepper机器人的人脸识别--使用facenet

一：关于Facenet

Facenet 是Florian Schroff等人2015年提出的一种人脸识别的模型，论文地址
FaceNet: A Unified Embedding for Face Recognition and Clustering
Facenet 的基本流程是首先在给出的图片中选出人脸的区域，然后计算人脸的特征embings，输入一张图片，就可以将所有包含在图片中的所有人脸特征，每个人脸特曾可以使用128维的向量表示。要做人脸识别，那么一个简单的思路就是，计算待检测样本与数据库中的人脸特征数据对比距离，距离越小那么待检测的样本就越有可能是这个人。为了避免数据库中无事前录入的人脸信息，而导致的错误，我们可以设置一个最小的距离阈值。

二：开发pepper机器人

pepper机器人只支持python2.7版本的，而我们平时使用的是python3.x，模型中涉及到各种依赖，两个版本之间的兼容性难以处理。博主采取的方式是机器人采集照片通过网络传回服务器，服务器部署人脸识别模型，服务器处理完成之后，把识别结果返回给机器人，机器人做相关的处理。

三：代码

1机器人采集照片，并传送给服务器

import naoqi
from naoqi import ALProxy
import socket
import time
address = ('192.168.100.22', 2567)
photoCaptureProxy = ALProxy("ALPhotoCapture", "192.168.100.108", 9559)
tts=ALProxy("ALTextToSpeech", "192.168.100.108", 9559)
photoCaptureProxy.setResolution(2)
photoCaptureProxy.setPictureFormat("jpg")
def takephoto():  #机器人以每秒1张速度采集照片
    while True:
        photos = photoCaptureProxy.takePictures(1, "/home/nao/recordings/cameras/", "image")
        send(photos)
        print('即将发送{}'.format(photos))
        time.sleep(1)
def getfaceInfor():
    pass
def send(photos):   #将照片信息传回服务器，接受服务器的处理结果，并作出相关的操作
    for photo in photos[0]:
        print('sending {}'.format(photo))
        data = file_deal(photo)
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.connect(address)
        sock.send('{}|{}'.format(len(data), file).encode())
        reply = sock.recv(1024)
        if 'ok' == reply.decode():
            go = 0
            total = len(data)
            while go < total:
                data_to_send = data[go:go + 1024]
                sock.send(data_to_send)
                go += len(data_to_send)
            reply = sock.recv(1024)
            if 'copy' == reply.decode():
                print('{} send successfully'.format(photo))
                sock.send(b'infor')
                person_infor = sock.recv(1024)
                person_name = person_infor
                if 'no' != person_name:
                    sayHello(person_name)
        sock.close()
def sayHello(person_name):   #说出人名
    tts.say('你好{}'.format(person_name))
def file_deal(file_path):
    mes = b''
    try:
        file = open(file_path,'rb')
        mes = file.read()
    except:
        print('error{}'.format(file_path))
    else:
        file.close()
        return mes
if __name__ == '__main__':
    takephoto()

2，服务器端代码

LOCAL_IP = '192.168.100.22'
PORT = 2567
def init_sourceData(path):
    image_paths = []
    try:
        image_dir = os.listdir(path)
        for file in image_dir:
            image_path = os.path.join(path,file)
            if os.path.isfile(image_path):
                image_paths.append(image_path)
    except FileNotFoundError as e:
        print(e)
    return image_paths
def generate_dataBase(image_paths):
    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor
    margin = 44
    image_size = 160
    controller = None
    sessD = None
    data_h5 = None
    try:
        data_h5 = h5py.File('people_infor.h5','w')
    except FileNotFoundError as e:
        print(e)
    with tf.Graph().as_default():
        print("开始加载图片对齐模型")
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)
        print("开始加载图片识别模型")
        with tf.Session() as sessD:
            # Load the model
            facenet.load_model('20170512-110547')
            images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
            phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
            for image in image_paths:
                img_list = []
                img = misc.imread(os.path.expanduser(image), mode='RGB')
                img_size = np.asarray(img.shape)[0:2]
                bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
                if len(bounding_boxes) < 1:
                    image_paths.remove(image)
                    print("没有发现人脸", image)
                    continue
                det = np.squeeze(bounding_boxes[0, 0:4])
                bb = np.zeros(4, dtype=np.int32)
                bb[0] = np.maximum(det[0] - margin / 2, 0)
                bb[1] = np.maximum(det[1] - margin / 2, 0)
                bb[2] = np.minimum(det[2] + margin / 2, img_size[1])
                bb[3] = np.minimum(det[3] + margin / 2, img_size[0])
                cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
                aligned = misc.imresize(cropped, (image_size, image_size), interp='bilinear')
                prewhitened = facenet.prewhiten(aligned)
                img_list.append(prewhitened)
                images = np.stack(img_list)
                # 计算特征
                # Run forward pass to calculate embeddings
                feed_dict = {images_placeholder: images, phase_train_placeholder: False}
                emb = sessD.run(embeddings, feed_dict=feed_dict)
                if h5py != None:
                    people_infor = os.path.split(image)
                    people_name = None
                    if len(people_infor) == 2:
                        people_name = os.path.splitext(people_infor[1])[0]
                    else:
                        people_name = os.path.splitext(people_infor[0])[0]
                    data_h5[people_name] = emb[0,:]
                else:
                    print("生成h5文件失败")
                    break
    try:
        data_h5.close()
    except:
        print("生成h5文件失败")

def recognition():
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)  # socket.AF_INET ipv4  socket.SOCK_STREAM tcp
    sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    sock.bind((LOCAL_IP, PORT))
    print("开始加载图片识别模型")
    with tf.Graph().as_default():
        with tf.Session() as sess:
            facenet.load_model('20170512-110547')
            print("==========================")
            images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
            phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
            hfut_face.load_database()
            print('人脸识别模型加载完成！')
            sock.listen(3)
            print('开始监听端口：')
            while True:
                sc, sc_name = sock.accept()
                print('收到{}请求'.format(sc_name))
                infor = sc.recv(1024)
                length, file_name = infor.decode().split('|')
                if length and file_name:
                    newfile = open('image/' + str(random.randint(1, 10000)) + '.jpg', 'wb')
                    print('length {},filename {}'.format(length, file_name))
                    sc.send(b'ok')
                    file = b''
                    total = int(length)
                    get = 0
                    while get < total:
                        data = sc.recv(1024)
                        file += data
                        get = get + len(data)
                    print('应该接受{},实际接受{}'.format(length, len(file)))
                    if file:
                        print('acturally length:{}'.format(len(file)))
                        newfile.write(file[:])
                        newfile.close()
                        images = hfut_face_align.align_image(newfile.name)
                        person_infor = 'no'
                        if len(images) > 0:
                            result_emb = hfut_face.calculat_embing(images, sess, embeddings, images_placeholder,
                                                                   phase_train_placeholder)
                            if result_emb['face'] != 0:
                                name = hfut_face.validate(result_emb['emb'])
                                person_infor = name
                                print('识别结果:%s' % name)
                            else:
                                print('识别失败，请重试')
                        sc.send(b'copy')
                        reply = sc.recv(32).decode()
                        if 'infor' == reply:
                            sc.send(person_infor.encode())
                sc.close()
def prepare():
    paths = init_sourceData('imageData')
    generate_dataBase(paths)
if __name__ == '__main__':
    # prepare() # 生成数据库
     recognition()

hfut_facenet.py

minsize = 20  # minimum size of face
threshold = [0.6, 0.7, 0.7]  # three steps's threshold
factor = 0.709  # scale factor
margin = 44
image_size = 160
controller = None
people_source = {}
def calculat_embing(images,sess,embeddings,images_placeholder,phase_train_placeholder):
    result = {}
    result['face'] = 0
    print("图片识别模型完成！")
    feed_dict = {images_placeholder: images, phase_train_placeholder: False}
    emb = sess.run(embeddings, feed_dict=feed_dict)
    result['emb'] = emb
    result['face'] = 1
    return result
def validate(emb):
    person_name = ""
    tem_dis = 99
    for name in people_source:
        dist =  np.sqrt(np.sum(np.square(np.subtract(emb, people_source[name]))))
        if dist<tem_dis:
            tem_dis = dist
            person_name = name
    return person_name
def init_sourceData(path):
    image_paths = []
    try:
        image_dir = os.listdir(path)
        for file in image_dir:
            image_path = os.path.join(path,file)
            if os.path.isfile(image_path):
                image_paths.append(image_path)
    except FileNotFoundError as e:
        print(e)
    return image_paths
def generate_dataBase(image_paths):
    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor
    margin = 44
    image_size = 160
    controller = None
    sessD = None
    data_h5 = None
    try:
        data_h5 = h5py.File('people_infor.h5','w')
    except FileNotFoundError as e:
        print(e)
    with tf.Graph().as_default():
        print("开始加载图片对齐模型")
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)
        print("开始加载图片识别模型")
        with tf.Session() as sessD:
            # Load the model
            facenet.load_model('20170512-110547')
            images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
            phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
            for image in image_paths:
                img_list = []
                img = misc.imread(os.path.expanduser(image), mode='RGB')
                img_size = np.asarray(img.shape)[0:2]
                bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
                if len(bounding_boxes) < 1:
                    image_paths.remove(image)
                    print("没有发现人脸", image)
                    continue
                det = np.squeeze(bounding_boxes[0, 0:4])
                bb = np.zeros(4, dtype=np.int32)
                bb[0] = np.maximum(det[0] - margin / 2, 0)
                bb[1] = np.maximum(det[1] - margin / 2, 0)
                bb[2] = np.minimum(det[2] + margin / 2, img_size[1])
                bb[3] = np.minimum(det[3] + margin / 2, img_size[0])
                cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
                aligned = misc.imresize(cropped, (image_size, image_size), interp='bilinear')
                prewhitened = facenet.prewhiten(aligned)
                img_list.append(prewhitened)
                images = np.stack(img_list)
                # 计算特征
                # Run forward pass to calculate embeddings
                feed_dict = {images_placeholder: images, phase_train_placeholder: False}
                emb = sessD.run(embeddings, feed_dict=feed_dict)
                if h5py != None:
                    people_infor = os.path.split(image)
                    people_name = None
                    if len(people_infor) == 2:
                        people_name = os.path.splitext(people_infor[1])[0]
                    else:
                        people_name = os.path.splitext(people_infor[0])[0]
                    data_h5[people_name] = emb[0,:]
                else:
                    print("生成h5文件失败")
                    break
    try:
        data_h5.close()
    except:
        print("生成h5文件失败")
def load_database():
    h5file = h5py.File('people_infor.h5','r')
    for name in h5file.keys():
        people_source[name] = h5file[name]
    print("加载人脸数据完成！")

hfut_face_align.py  #对齐模型
minsize = 20  # minimum size of face
threshold = [0.6, 0.7, 0.7]  # three steps's threshold
factor = 0.709  # scale factor
margin = 44
image_size = 160
controller = None
sessD = None
people_source = {}
tf.Graph().as_default()
print("开始加载图片对齐模型")
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
sess.as_default()
pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)
print("图片对齐模型加载完成！")
def align_image(image):
    images = []
    img_list = []
    img = misc.imread(os.path.expanduser(image), mode='RGB')
    img_size = np.asarray(img.shape)[0:2]
    bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
    if len(bounding_boxes) < 1:
        print("没有发现人脸", image)
        return images
    det = np.squeeze(bounding_boxes[0, 0:4])
    bb = np.zeros(4, dtype=np.int32)
    bb[0] = np.maximum(det[0] - margin / 2, 0)
    bb[1] = np.maximum(det[1] - margin / 2, 0)
    bb[2] = np.minimum(det[2] + margin / 2, img_size[1])
    bb[3] = np.minimum(det[3] + margin / 2, img_size[0])
    cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
    aligned = misc.imresize(cropped, (image_size, image_size), interp='bilinear')
    prewhitened = facenet.prewhiten(aligned)
    img_list.append(prewhitened)
    images = np.stack(img_list)
    return images

目前人脸识别还不是太完善，希望和你一起交流讨论。