实战演习(五)——人脸识别(CNN)简单演练

本案例主要用了CNN算法进行实现,对七种表情进行判别,具体的七种表情为:

# 0:生气;

# 1:厌恶;

# 2:害怕;

# 3:高兴;

# 4:伤心;

# 5:惊讶;

# 6:正常;

本案例使用的数据集为fer2013.csv,这一数据集有三个维度:表情类别、图片像素、训练测试的分类。

1、face_data.py:

#!/usr/bin/env python

# _*_ UTF-8 _*_

import csv

import os

# 主要是将原始数据集分成三部分:

# 其一是做训练集;

# 其二是做验证集;

# 其三是做测试集;

databases_path = 'E:/Python_workspace/1cfz/fer2013'

datasets_path = 'E:/Python_workspace/1cfz'

csv_file = os.path.join(databases_path, 'fer2013.csv')

train_csv = os.path.join(datasets_path, 'train.csv')

val_csv = os.path.join(datasets_path, 'val.csv')

test_csv = os.path.join(datasets_path, 'test.csv')

with open(csv_file) as f:

    csvr = csv.reader(f)

    header = next(csvr)

    # 获取第一行标题

    print(header)

    rows = [row for row in csvr]

    # row[:-1]:取出除了最后一列之外的所有列

    # row[-1]:取出最后一列;

    trn = [row[:-1] for row in rows if row[-1] == 'Training']

    csv.writer(open(train_csv, 'w+'), lineterminator='\n').writerows([header[:-1]]+trn)

    print(len(trn))

    val = [row[:-1] for row in rows if row[-1] == 'PublicTest']

    csv.writer(open(val_csv, 'w+'), lineterminator='\n').writerows([header[:-1]]+val)

    print(len(val))

    tst = [row[:-1] for row in rows if row[-1] == 'PrivateTest']

    csv.writer(open(test_csv, 'w+'), lineterminator='\n').writerows([header[:-1]]+tst)

    print(len(tst))

数据分类后,需要将数据恢复成照片,恢复的时候同时将数据按照表情进行分类:

Img_recover.py:

#!/usr/bin/env python

# _*_ UTF-8 _*_

import csv

import os

from PIL import Image

import numpy as np

import face_data

# 主要是将分开的三个数据集转化成图片,即将三个数据集转化成可以查看的图片

# 0:生气;

# 1:厌恶;

# 2:害怕;

# 3:高兴;

# 4:伤心;

# 5:惊讶;

# 6:正常;

datasets_path = 'E:/Python_workspace/1cfz'

# 构建流

train_csv = os.path.join(datasets_path, 'train.csv')

val_csv = os.path.join(datasets_path, 'val.csv')

test_csv = os.path.join(datasets_path, 'test.csv')

train_set = os.path.join(datasets_path, 'train')

val_set = os.path.join(datasets_path, 'val')

test_set = os.path.join(datasets_path, 'test')

for save_path, csv_file in [(train_set, train_csv), (val_set, val_csv), (test_set, test_csv)]:

    if not os.path.exists(save_path):

        os.makedirs(save_path)

    num = 1

    with open(csv_file) as f:

        csvr = csv.reader(f)

        print(csvr)

        header = next(csvr)

        for i, (label, pixel) in enumerate(csvr):

            # pixel.split()将像素按照空格进行截取,截取到的元素转化成float类型,并存放在48*48的矩阵中。

            pixel = np.asarray([float(p) for p in pixel.split()]).reshape(48, 48)

            subfolder = os.path.join(save_path, label)

            if not os.path.exists(subfolder):

                os.makedirs(subfolder)

            im = Image.fromarray(pixel).convert('L')

            image_name = os.path.join(subfolder, '{:05d}.jpg'.format(i))

            print(image_name)

            im.save(image_name)

数据分类完成后,需要对数据进行分批,直接使用每一批次的数据进行训练:

train_batchs.py:

#!/usr/bin/env python

# _*_ UTF-8 _*_

import os

import tensorflow as tf

import matplotlib.pyplot as plt

import numpy as np

import face_data

import Img_recover

from tensorflow.python.training.queue_runner_impl import start_queue_runners

# 0:生气;

# 1:厌恶;

# 2:害怕;

# 3:高兴;

# 4:伤心;

# 5:惊讶;

# 6:正常;

# 将数据细分成7类,每类又分成训练集和标签

anger_0 = []

anger_0_labels = []

disgust_1 = []

disgust_1_label = []

fear_2 = []

fear_2_label = []

happy_3 = []

happy_3_label = []

sad_4 = []

sad_4_label = []

surprised_5 = []

surprised_5_label = []

normal_6 = []

normal_6_label = []

def get_file(file_dir):

    # os.listdir(file_dir+'0'):返回文件夹中包含的文件,此处返回的file是目录下面的一个个文件名,即00001.jpg

    for file in os.listdir(file_dir+'0'):

        # 组装0下面的文件名

        anger_0.append(file_dir+'0'+'/'+file)

        # 将其标签设置为0:生气;

        anger_0_labels.append(0)

    for file in os.listdir(file_dir+'1'):

        disgust_1.append(file_dir+'1'+'/'+file)

        disgust_1_label.append(1)

    for file in os.listdir(file_dir+'2'):

        fear_2.append(file_dir+'2'+'/'+file)

        fear_2_label.append(2)

    for file in os.listdir(file_dir+'3'):

        happy_3.append(file_dir+'3'+'/'+file)

        happy_3_label.append(3)

    for file in os.listdir(file_dir+'4'):

        sad_4.append(file_dir+'4'+'/'+file)

        sad_4_label.append(4)

    for file in os.listdir(file_dir+'5'):

        surprised_5.append(file_dir+'5'+'/'+file)

        surprised_5_label.append(5)

    for file in os.listdir(file_dir+'6'):

        normal_6.append(file_dir+'6'+'/'+file)

        normal_6_label.append(6)

    # np.hstack将各个元素水平叠加起来

    image_list = np.hstack((anger_0,disgust_1,fear_2,happy_3,sad_4,surprised_5,normal_6))

    label_list = np.hstack((anger_0_labels,disgust_1_label,fear_2_label,happy_3_label,

                            sad_4_label,surprised_5_label,normal_6_label))

    temp = np.array([image_list, label_list])

    # transpose实现矩阵转置

    temp = temp.transpose()

    # 对第一维行做打乱顺序操作

    np.random.shuffle(temp)

    # temp[:,0]:获取第一列的所有行;

    # temp[:,1]:获取第二列的所有行;

    all_image_list = list(temp[:,0])

    all_label_list = list(temp[:,1])

    all_label_list = [int(i) for i in all_label_list]

    # 拼在一起做乱序,然后再将其分开,返回分类完成的数据集图片路径,以及标签

    return all_image_list, all_label_list

def get_batch(image, label, image_W, image_H, batch_size, capacity):

    # cast:类型转换

    image = tf.cast(image, tf.string)

    label = tf.cast(label, tf.int32)

    # 在数据读入到cpu时需要先将数据的文件名读入到文件名队列中,当文件名队列存储完成后,另有线程将数据从文件名队列中取出,进行计算处理:

    # 磁盘------》文件名队列-------cpu

    # 1slice_input_producer:定义样本放入文件名队列的方式,包括迭代和乱序等的线程,只是定义传输方式,并未进行实际传输;

    # 2start_queue_runners:执行将数据填充到文件名队列的线程;

    # 3tf.train.batch:按照给定的顺序,把batch_sizetensor推送给文件队列列表,作为训练一个batch的数据,等待tensor出队执行计算;

    input_queue = tf.train.slice_input_producer([image, label])

    label = input_queue[1]

    image_contents = tf.read_file(input_queue[0])

    # decode_jpeg:解码jpg格式的文件

    image = tf.image.decode_jpeg(image_contents, channels=1)

    # resize_image_with_crop_or_pad:图像的剪裁或填充,(image_W, image_H)为需要填充的图像的大小;

    image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)

    # per_image_standardization:将图片的函数标准化,主要是按照正太曲线,将像素转化成正则的数据,方便神经网络的训练

    image = tf.image.per_image_standardization(image)

    # num_threads:执行入队操作的线程数量

    # 将数据用32个线程传入到文件夹队列中。

    image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads = 32, capacity=capacity)

    label_batch = tf.reshape(label_batch, [batch_size])

    image_batch = tf.cast(image_batch, tf.float32)

    # 将图片数据进行标准化处理,然后返回其中的batch

    return image_batch, label_batch

# [[0对应的图片list],[0]

#  [1对应的图片list],[1]

#  [2对应的图片list],[2]

#  [3对应的图片list],[3]

#  [4对应的图片list],[4]

#  [5对应的图片list],[5]

#  [6对应的图片list],[6]]

# file_path = 'E:/Python_workspace/1cfz/test/'

# all_image_list, all_label_list = get_file(file_path)

# print(all_image_list)

# print(all_label_list)

# image_batch, label_batch = get_batch(all_image_list, all_label_list, 24, 24, 100)

# print(image_batch)

# print(label_batch)

数据分批完成后,开始构建模型cnn:

face_cnn.py:

#!/usr/bin/env python

# _*_ UTF-8 _*_

import tensorflow as tf

import face_data

import Img_recover

from tensorflow.python.ops.distributions.kullback_leibler import cross_entropy

from keras.models import load_model

from keras.models import Model

from keras import backend as K

from keras.backend.common import image_dim_ordering

# 前面三个face_data, img_recover, train_batch用来进行数据的处理:

# face_data:将数据集分成三类;

# img_recover:将像素数据转换成图片,并分类;

# train_batch:将分类好的数据再进行分批处理,形成多个批次;

# 然后进行第二段:即编写训练模型;

IMAGE_SIZE = 64

MODEL_PATH = "E:/Python_workspace/1cfz/train_log/model.ckpt"

def inference(images, batch_size, n_classes, regularizer, reuse):

    with tf.variable_scope('conv1', reuse = reuse) as scope:

        conv1_weights = tf.get_variable("weights", shape=[3,3,1,16], dtype=tf.float32,

initializer=tf.truncated_normal_initializer(stddev = 0.1, dtype=tf.float32))

        conv1_biases = tf.get_variable("biases", shape=[16], dtype=tf.float32,

initializer=tf.constant_initializer(0.1))

        conv1 = tf.nn.conv2d(images, conv1_weights, strides=[1,1,1,1], padding="SAME")

        pre_activation = tf.nn.bias_add(conv1, conv1_biases)

        activation = tf.nn.relu(pre_activation, name=scope.name)

        # 一层卷积,紧接着增加一个激活函数

    with tf.variable_scope('pool2') as scope:

        # 定义一个池化

        pool2 = tf.nn.max_pool(activation, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME", name=scope.name)

    with tf.variable_scope('fc1', reuse=reuse) as scope:

        reshaped = tf.reshape(pool2, shape=[batch_size, -1])

        dim = reshaped.get_shape()[1].value

        fc1_weights = tf.get_variable("weights", shape=[dim, 2048], dtype = tf.float32,

initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))

        if regularizer is not None:

            # 将数据转化成正则矩阵, 然后将fc1_weights放入一个集合,regularizer为一个正则化函数:

            tf.add_to_collection("losses", regularizer(fc1_weights))

        fc1_biases = tf.get_variable("biases", shape=[2048], dtype=tf.float32,

initializer=tf.constant_initializer(0.1))

        fc1 = tf.matmul(reshaped, fc1_weights)+fc1_biases

        activation = tf.nn.relu(fc1, name=scope.name)

        if not reuse:

            activation = tf.nn.dropout(activation, keep_prob=0.5)

    with tf.variable_scope('fc2', reuse=reuse) as scope:

        fc2_weights = tf.get_variable("weights", shape=[2048, 512], dtype=tf.float32,

initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))

        if regularizer is not None:

            tf.add_to_collection("losses", regularizer(fc2_weights))

        fc2_biases = tf.get_variable("biases", shape=[512], dtype=tf.float32,

initializer=tf.constant_initializer(0.0))

        fc2 = tf.matmul(activation, fc2_weights)+fc2_biases

        activation = tf.nn.relu(fc2, name=scope.name)

        if not reuse:

            activation = tf.nn.dropout(activation, keep_prob=0.5)

    with tf.variable_scope('softmax', reuse=reuse) as scope:

        softmax_weights = tf.get_variable("weights", shape=[512, n_classes], dtype=tf.float32,

initializer=tf.truncated_normal_initializer(stddev = 0.1, dtype = tf.float32))

        softmax_biases = tf.get_variable("biases", shape=[n_classes], dtype=tf.float32,

initializer=tf.constant_initializer(0.1))

        softmax_linear = tf.add(tf.matmul(activation, softmax_weights), softmax_biases, name=scope.name)

    return softmax_linear

def losses(logits, labels):

    with tf.variable_scope('loss') as scope:

        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name='entropy_per_example')

        cross_entropy_mean = tf.reduce_mean(cross_entropy, name=scope.name)

        loss = tf.add_n(tf.get_collection("losses"))+cross_entropy_mean

        tf.summary.scalar(scope.name + '/loss', cross_entropy_mean)

    return loss

def training(loss, learning_rate):

    with tf.variable_scope('optimizer') as scope:

        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

        global_step = tf.Variable(0, trainable=False, name='global_step')

        train_op = optimizer.minimize(loss, global_step=global_step, name=scope.name)

    return train_op

def evaluation(logits, labels):

    with tf.variable_scope('accuracy') as scope:

        correct = tf.nn.in_top_k(logits, labels, 1)

        correct = tf.cast(correct, tf.float16)

        accuracy = tf.reduce_mean(correct)

        tf.summary.scalar(scope.name+'/accuracy', accuracy)

    return accuracy

def load_models(file_path = MODEL_PATH):

    model = load_model(filepath)

def resize_image(image, height = IMAGE_SIZE, width = IMAGE_SIZE):

    top, bottom, left, right = (0,0,0,0)

    h, w = image.shape

    longest_edge = max(h, w)

    if h<longest_edge:

        dh = longest_edge - h

        top = dh // 2

        bottom = dh - top

    elif w<longest_edge:

        dw = longest_edge - w

        left = dw // 2

        right = dw - left

    else:

        pass

    black = [0,0,0]

    constant = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value = black)

    return cv2.resize(constant, (height, width))

def face_predict(image):

    if K.image_dim_ordering() == 'th' and image.shape != (1, 3, IMAGE_SIZE, IMAGE_SIZE):

        image = resize_image(image)

        image = image.reshape((1, 3, IMAGE_SIZE, IMAGE_SIZE))

    elif K.image_dim_ordering() == 'tf' and image.shape != (1, IMAGE_SIZE, IMAGE_SIZE, 3):

        image = resize_image(image)

        image = image.reshape((1, IMAGE_SIZE, IMAGE_SIZE, 3))

    image = image.astype('float32')

    image/=255

    # predict_classes:用于对测试数据的分类预测;

    result = model.predict_classes(image)

    return result[0]

模型构建完成后,需要进行模型的训练:

face_train.py:

#!/usr/bin/env python

# _*_ UTF-8 _*_

import os

import numpy as np

import tensorflow as tf

import train_batchs

import face_cnn

from tensorflow.contrib.layers import l2_regularizer

# 模型构建完成后,需要对模型进行训练,即确定对应的参数

N_CLASSES = 7

IMG_W = 48

IMG_H = 48

TRAIN_BATCH_SIZE = 32

VALIDATION_BATCH_SIZE = 100

CAPACITY = 256

MAX_STEP = 50000

LEARNING_RATE = 0.0001

REGULARIZATION_RATE = 0.0001

train_dir = "E:/Python_workspace/1cfz/train/"

logs_train_dir = "E:/Python_workspace/1cfz/train_log/"

logs_validation_dir = "E:/Python_workspace/1cfz/val/"

# 训练集:

train, train_label = train_batchs.get_file(file_dir=train_dir)

# 验证集:

validation, validation_label = train_batchs.get_file(file_dir=logs_validation_dir)

print(validation)

# 获取训练集数据批

train_batch, train_label_batch = train_batchs.get_batch(train, train_label, IMG_W, IMG_H, TRAIN_BATCH_SIZE, CAPACITY)

# 获取验证集数据批

validation_batch, validation_label_batch = train_batchs.get_batch(validation, validation_label, IMG_W, IMG_H,

VALIDATION_BATCH_SIZE, CAPACITY)

# 使用regularizer防止过拟合,防止过拟合的两种方法:

# 1L1L2优化方法

# 2dropout方法

regularizer = l2_regularizer(REGULARIZATION_RATE)

train_logits_op = face_cnn.inference(images=train_batch, batch_size=TRAIN_BATCH_SIZE, n_classes=N_CLASSES,

                                  regularizer=regularizer, reuse = False)

validation_logits_op = face_cnn.inference(images=validation_batch, batch_size=VALIDATION_BATCH_SIZE, n_classes=N_CLASSES,

                                       regularizer=None, reuse = True)

train_losses_op = face_cnn.losses(logits=train_logits_op, labels = train_label_batch)

validation_losses_op = face_cnn.losses(logits=validation_logits_op, labels=validation_label_batch)

# 训练完成的模型train_op,以及精确度

train_op = face_cnn.training(train_losses_op, learning_rate = LEARNING_RATE)

train_accuracy_op = face_cnn.evaluation(logits = train_logits_op, labels = train_label_batch)

validation_accuracy_op = face_cnn.evaluation(logits = validation_logits_op, labels = validation_label_batch)

# 将图形、训练过程等数据合并在一起,即将计算图融合

summary_op = tf.summary.merge_all()

with tf.Session() as sess:

    # tf.summary.FileWriter:制定一个文件用来保存图

    train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph, max_queue=3)

    val_writer = tf.summary.FileWriter(logs_validation_dir, sess.graph, max_queue=3)

    Saver = tf.train.Saver()

    sess.run(tf.global_variables_initializer())

    # 创建了一个线程管理器,主要是进行多线程操作时使用。

    coord = tf.train.Coordinator()

    # start_queue_runnersqueuerunner用来启动tensor入队线程,可以启动多个线程将文件传输到文件名队列中,当文件名被读入到文件名队列中后,可以激活计算线程;

    # coord:是线程协调器;

    # 用线程管理器,对sess任务启动多线程,并将线程存储在threads

    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    try:

        for step in np.arange(MAX_STEP):

            # 是否应该终止所有线程,当文件队列中的所有文件都已经读取出列的时候,会抛出一个outofrangeError异常,这是就应该停止sess中的所有线程。

            if coord.should_stop():

                break

            # 可以一次执行多个方法:

            # _:是指保留表达式的最后一次运算结果,此处保留了train_op的最后一次运算结果。

            _, train_loss, train_accuracy = sess.run([train_op, train_losses_op, train_accuracy_op])

            if step%100 == 0:

                print('step %d, train loss = %.2f, train accuracy = %.2f' % (step, train_loss, train_accuracy*100.0))

                summary_str = sess.run(summary_op)

                # 每运行一百次做一次计算图的融合,然后将其保存到train_writer中。

                train_writer.add_summary(summary_str, step)

            if step % 500 == 0 or (step+1) == MAX_STEP:

                checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')

                # 500次的时候做一次保存

                Saver.save(sess, checkpoint_path, global_step=step)

            if step % 2000 ==0 or (step+1) == MAX_STEP:

                val_loss, val_accuracy = sess.run([validation_losses_op, validation_accuracy_op])

                print('** step %d, val loss = %.2f, val accuracy = %.2f' % (step, val_loss, val_accuracy*100.0))

                summary_str = sess.run(summary_op)

                val_writer.add_summary(summary_str, step)

    except tf.errors.OutOfRangeError:

        print("Done training -- epoch limit reached")

    finally:

        coord.request_stop()

# train_op:是训练之后的模型;

# 导入要识别的图片,然后用上面的模型进行分类

# pred = train_op.eval(feed_dict={x:[result],keep_prob:1.0}, session=sess)

截止到上面模型已经构建完成,此时需要对验证数据和测试数据进行一下处理:

camera_face.py:

#!/usr/bin/env python

# _*_ UTF-8 _*_

import cv2

def CatchPICFromVideo(window_name, camera_idx, catch_pic_num, path_name):

    cv2.namedWindow(window_name)

    # 获取视频内容

    cap = cv2.VideoCapture(camera_idx)

    calssfier = cv2.CascadeClassifier("E:/Python_workspace/face_Recognition/fer2013/harracascade_frontalface_default.xml")

    color = (0, 255, 0)

    num = 0

    while cap.isOpened():

        # ok表示返回的状态,frame存储着图像数据矩阵,mat类型的;

        ok, frame = cap.read()

        if not ok:

            break

        # 图像灰度化:

        grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # 加载分类器,opencv自带的

        # 能够检测出图片中的所有人脸,并将人脸用vertor保存各个人脸的坐标、大小

        # grey:待检测图片;

        # scaleFactor:前后两次扫描窗口中,搜索窗的比例系数,1.1为扩大10%1.2为扩大20%

        # minNeighbors:检测目标相邻举行的最小个数;

        # minSize:限定目标区域的范围;

        faceRects = classfier.detectMultiScale(grey, scaleFactor = 1.2, minNeighbors = 3, minSize = (32, 32))

        if len(faceRects)>0:

            for faceRect in faceRects:

                x, y, w, h = faceRect

                # 返回的是坐标值

                img_name = "%s/%d.jpg" %(path_name, num)

                image = frame[y-10:y+h+10, x-10:x+w+10]

                # 此处用来对图片进行检测:

                faceID = face_cnn.face_predict(image)

                if faceID == 0:

                    cv2.rectangle(frame, (x-10, y-10), (x+w+10, y+h+10), color, thickness = 2)

                    cv2.putText(frame, 'ME', (x+30, y+30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,255), 2)

                else:

                    cv2.rectangle(frame, (x-10, y-10), (x+w+10, y+h+10), color, thickness = 2)

                    cv2.putText(frame, 'others', (x+30, y+30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,255), 2)

                # 保存图片

                cv2.imwrite(img_name, image, [int(cv2.IMWRITE_PNG_COMPRESSION), 9])

                num += 1

                if num>(catch_pic_num):

                    break

                cv2.rectangle(frame, (x-10, y-10), (x+w+10, y+h+10), color, 2)

                font = cv2.FONT_HERSHEY_SIMPLEX

                cv2.putText(frame, 'num:%d/100' %(num), (x+30, y+30), font, 1, (255, 0, 255), 4)

        if num>(catch_pic_num):

            break

        cv2.imshow(window_name, frame)

        c = cv2.waitKey(10)

        if c&0xFF == ord('q'):

            break

    cap.release()

    cv2.destroyAllWindows()

if __name__ == '__main__':

    IdentifyFace('IdentifyFace')

另外在导入图片的时候,有可能遇到图片不符合(48*48)规格的情况,需要对图片进行修剪:

cut_face.py:

#!/usr/bin/env python

# _*_ UTF-8 _*_

import tensorflow as tf

def process():

    img = tf.gfile.GFile("E:/Python_workspace/face_Recognition/pic/0.jpg").read()

    with tf.Session() as sess:

        img_data = tf.image.decode_jpeg(img)

        resized = tf.image.resize_images(img_data, [48, 48], method=1)

        image_data = sess.run(tf.image(rgb_to_grayscale(resized)))

        encoded_image = tf.image.encode_jpeg(image_data)

        with tf.gfile.GFile("E:/Python_workspace/face_Recognition/pic/60.jpg", "wb") as f:

            f.write(encoded_image.eval())

截止到此,模型使用完毕。

猜你喜欢

转载自blog.csdn.net/livan1234/article/details/81236488