实战演习（五）——人脸识别（CNN）简单演练

本案例主要用了CNN算法进行实现，对七种表情进行判别，具体的七种表情为：

# 0：生气；

# 1：厌恶；

# 2：害怕；

# 3：高兴；

# 4：伤心；

# 5：惊讶；

# 6：正常；

本案例使用的数据集为fer2013.csv，这一数据集有三个维度：表情类别、图片像素、训练测试的分类。

1、face_data.py:

#!/usr/bin/env python

# _*_ UTF-8 _*_

import csv

import os

# 主要是将原始数据集分成三部分：

# 其一是做训练集；

# 其二是做验证集；

# 其三是做测试集；

databases_path = 'E:/Python_workspace/1cfz/fer2013'

datasets_path = 'E:/Python_workspace/1cfz'

csv_file = os.path.join(databases_path, 'fer2013.csv')

train_csv = os.path.join(datasets_path, 'train.csv')

val_csv = os.path.join(datasets_path, 'val.csv')

test_csv = os.path.join(datasets_path, 'test.csv')

with open(csv_file) as f:

csvr = csv.reader(f)

header = next(csvr)

# 获取第一行标题

print(header)

rows = [row for row in csvr]

# row[:-1]：取出除了最后一列之外的所有列

# row[-1]：取出最后一列；

trn = [row[:-1] for row in rows if row[-1] == 'Training']

csv.writer(open(train_csv, 'w+'), lineterminator='\n').writerows([header[:-1]]+trn)

print(len(trn))

val = [row[:-1] for row in rows if row[-1] == 'PublicTest']

csv.writer(open(val_csv, 'w+'), lineterminator='\n').writerows([header[:-1]]+val)

print(len(val))

tst = [row[:-1] for row in rows if row[-1] == 'PrivateTest']

csv.writer(open(test_csv, 'w+'), lineterminator='\n').writerows([header[:-1]]+tst)

print(len(tst))

数据分类后，需要将数据恢复成照片，恢复的时候同时将数据按照表情进行分类：

Img_recover.py:

#!/usr/bin/env python

# _*_ UTF-8 _*_

import csv

import os

from PIL import Image

import numpy as np

import face_data

# 主要是将分开的三个数据集转化成图片，即将三个数据集转化成可以查看的图片

# 0：生气；

# 1：厌恶；

# 2：害怕；

# 3：高兴；

# 4：伤心；

# 5：惊讶；

# 6：正常；

datasets_path = 'E:/Python_workspace/1cfz'

# 构建流

train_csv = os.path.join(datasets_path, 'train.csv')

val_csv = os.path.join(datasets_path, 'val.csv')

test_csv = os.path.join(datasets_path, 'test.csv')

train_set = os.path.join(datasets_path, 'train')

val_set = os.path.join(datasets_path, 'val')

test_set = os.path.join(datasets_path, 'test')

for save_path, csv_file in [(train_set, train_csv), (val_set, val_csv), (test_set, test_csv)]:

if not os.path.exists(save_path):

os.makedirs(save_path)

num = 1

with open(csv_file) as f:

csvr = csv.reader(f)

print(csvr)

header = next(csvr)

for i, (label, pixel) in enumerate(csvr):

# pixel.split()将像素按照空格进行截取，截取到的元素转化成float类型，并存放在48*48的矩阵中。

pixel = np.asarray([float(p) for p in pixel.split()]).reshape(48, 48)

subfolder = os.path.join(save_path, label)

if not os.path.exists(subfolder):

os.makedirs(subfolder)

im = Image.fromarray(pixel).convert('L')

image_name = os.path.join(subfolder, '{:05d}.jpg'.format(i))

print(image_name)

im.save(image_name)

数据分类完成后，需要对数据进行分批，直接使用每一批次的数据进行训练：

train_batchs.py:

#!/usr/bin/env python

# _*_ UTF-8 _*_

import os

import tensorflow as tf

import matplotlib.pyplot as plt

import numpy as np

import face_data

import Img_recover

from tensorflow.python.training.queue_runner_impl import start_queue_runners

# 0：生气；

# 1：厌恶；

# 2：害怕；

# 3：高兴；

# 4：伤心；

# 5：惊讶；

# 6：正常；

# 将数据细分成7类，每类又分成训练集和标签

anger_0 = []

anger_0_labels = []

disgust_1 = []

disgust_1_label = []

fear_2 = []

fear_2_label = []

happy_3 = []

happy_3_label = []

sad_4 = []

sad_4_label = []

surprised_5 = []

surprised_5_label = []

normal_6 = []

normal_6_label = []

def get_file(file_dir):

# os.listdir(file_dir+'0')：返回文件夹中包含的文件，此处返回的file是目录下面的一个个文件名，即00001.jpg

for file in os.listdir(file_dir+'0'):

# 组装0下面的文件名

anger_0.append(file_dir+'0'+'/'+file)

# 将其标签设置为0：生气；

anger_0_labels.append(0)

for file in os.listdir(file_dir+'1'):

disgust_1.append(file_dir+'1'+'/'+file)

disgust_1_label.append(1)

for file in os.listdir(file_dir+'2'):

fear_2.append(file_dir+'2'+'/'+file)

fear_2_label.append(2)

for file in os.listdir(file_dir+'3'):

happy_3.append(file_dir+'3'+'/'+file)

happy_3_label.append(3)

for file in os.listdir(file_dir+'4'):

sad_4.append(file_dir+'4'+'/'+file)

sad_4_label.append(4)

for file in os.listdir(file_dir+'5'):

surprised_5.append(file_dir+'5'+'/'+file)

surprised_5_label.append(5)

for file in os.listdir(file_dir+'6'):

normal_6.append(file_dir+'6'+'/'+file)

normal_6_label.append(6)

# np.hstack将各个元素水平叠加起来

image_list = np.hstack((anger_0,disgust_1,fear_2,happy_3,sad_4,surprised_5,normal_6))

label_list = np.hstack((anger_0_labels,disgust_1_label,fear_2_label,happy_3_label,

sad_4_label,surprised_5_label,normal_6_label))

temp = np.array([image_list, label_list])

# transpose实现矩阵转置

temp = temp.transpose()

# 对第一维行做打乱顺序操作

np.random.shuffle(temp)

# temp[:,0]：获取第一列的所有行；

# temp[:,1]：获取第二列的所有行；

all_image_list = list(temp[:,0])

all_label_list = list(temp[:,1])

all_label_list = [int(i) for i in all_label_list]

# 拼在一起做乱序，然后再将其分开，返回分类完成的数据集图片路径，以及标签

return all_image_list, all_label_list

def get_batch(image, label, image_W, image_H, batch_size, capacity):

# cast:类型转换

image = tf.cast(image, tf.string)

label = tf.cast(label, tf.int32)

# 在数据读入到cpu时需要先将数据的文件名读入到文件名队列中，当文件名队列存储完成后，另有线程将数据从文件名队列中取出，进行计算处理：

# 磁盘------》文件名队列-------》cpu

# 1）slice_input_producer：定义样本放入文件名队列的方式，包括迭代和乱序等的线程，只是定义传输方式，并未进行实际传输；

# 2）start_queue_runners:执行将数据填充到文件名队列的线程；

# 3）tf.train.batch:按照给定的顺序，把batch_size个tensor推送给文件队列列表，作为训练一个batch的数据，等待tensor出队执行计算；

input_queue = tf.train.slice_input_producer([image, label])

label = input_queue[1]

image_contents = tf.read_file(input_queue[0])

# decode_jpeg：解码jpg格式的文件

image = tf.image.decode_jpeg(image_contents, channels=1)

# resize_image_with_crop_or_pad：图像的剪裁或填充，（image_W, image_H）为需要填充的图像的大小；

image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)

# per_image_standardization：将图片的函数标准化，主要是按照正太曲线，将像素转化成正则的数据，方便神经网络的训练

image = tf.image.per_image_standardization(image)

# num_threads：执行入队操作的线程数量

# 将数据用32个线程传入到文件夹队列中。

image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads = 32, capacity=capacity)

label_batch = tf.reshape(label_batch, [batch_size])

image_batch = tf.cast(image_batch, tf.float32)

# 将图片数据进行标准化处理，然后返回其中的batch

return image_batch, label_batch

# [[0对应的图片list],[0]

# [1对应的图片list],[1]

# [2对应的图片list],[2]

# [3对应的图片list],[3]

# [4对应的图片list],[4]

# [5对应的图片list],[5]

# [6对应的图片list],[6]]

# file_path = 'E:/Python_workspace/1cfz/test/'

# all_image_list, all_label_list = get_file(file_path)

# print(all_image_list)

# print(all_label_list)

# image_batch, label_batch = get_batch(all_image_list, all_label_list, 24, 24, 100)

# print(image_batch)

# print(label_batch)

数据分批完成后，开始构建模型cnn：

face_cnn.py:

#!/usr/bin/env python

# _*_ UTF-8 _*_

import tensorflow as tf

import face_data

import Img_recover

from tensorflow.python.ops.distributions.kullback_leibler import cross_entropy

from keras.models import load_model

from keras.models import Model

from keras import backend as K

from keras.backend.common import image_dim_ordering

# 前面三个face_data, img_recover, train_batch用来进行数据的处理：

# face_data：将数据集分成三类；

# img_recover：将像素数据转换成图片，并分类；

# train_batch：将分类好的数据再进行分批处理，形成多个批次；

# 然后进行第二段：即编写训练模型；

IMAGE_SIZE = 64

MODEL_PATH = "E:/Python_workspace/1cfz/train_log/model.ckpt"

def inference(images, batch_size, n_classes, regularizer, reuse):

with tf.variable_scope('conv1', reuse = reuse) as scope:

conv1_weights = tf.get_variable("weights", shape=[3,3,1,16], dtype=tf.float32,

initializer=tf.truncated_normal_initializer(stddev = 0.1, dtype=tf.float32))

conv1_biases = tf.get_variable("biases", shape=[16], dtype=tf.float32,

initializer=tf.constant_initializer(0.1))

conv1 = tf.nn.conv2d(images, conv1_weights, strides=[1,1,1,1], padding="SAME")

pre_activation = tf.nn.bias_add(conv1, conv1_biases)

activation = tf.nn.relu(pre_activation, name=scope.name)

# 一层卷积,紧接着增加一个激活函数

with tf.variable_scope('pool2') as scope:

# 定义一个池化

pool2 = tf.nn.max_pool(activation, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME", name=scope.name)

with tf.variable_scope('fc1', reuse=reuse) as scope:

reshaped = tf.reshape(pool2, shape=[batch_size, -1])

dim = reshaped.get_shape()[1].value

fc1_weights = tf.get_variable("weights", shape=[dim, 2048], dtype = tf.float32,

initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))

if regularizer is not None:

# 将数据转化成正则矩阵, 然后将fc1_weights放入一个集合,regularizer为一个正则化函数:

tf.add_to_collection("losses", regularizer(fc1_weights))

fc1_biases = tf.get_variable("biases", shape=[2048], dtype=tf.float32,

initializer=tf.constant_initializer(0.1))

fc1 = tf.matmul(reshaped, fc1_weights)+fc1_biases

activation = tf.nn.relu(fc1, name=scope.name)

if not reuse:

activation = tf.nn.dropout(activation, keep_prob=0.5)

with tf.variable_scope('fc2', reuse=reuse) as scope:

fc2_weights = tf.get_variable("weights", shape=[2048, 512], dtype=tf.float32,

initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))

if regularizer is not None:

tf.add_to_collection("losses", regularizer(fc2_weights))

fc2_biases = tf.get_variable("biases", shape=[512], dtype=tf.float32,

initializer=tf.constant_initializer(0.0))

fc2 = tf.matmul(activation, fc2_weights)+fc2_biases

activation = tf.nn.relu(fc2, name=scope.name)

if not reuse:

activation = tf.nn.dropout(activation, keep_prob=0.5)

with tf.variable_scope('softmax', reuse=reuse) as scope:

softmax_weights = tf.get_variable("weights", shape=[512, n_classes], dtype=tf.float32,

initializer=tf.truncated_normal_initializer(stddev = 0.1, dtype = tf.float32))

softmax_biases = tf.get_variable("biases", shape=[n_classes], dtype=tf.float32,

initializer=tf.constant_initializer(0.1))

softmax_linear = tf.add(tf.matmul(activation, softmax_weights), softmax_biases, name=scope.name)

return softmax_linear

def losses(logits, labels):

with tf.variable_scope('loss') as scope:

cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name='entropy_per_example')

cross_entropy_mean = tf.reduce_mean(cross_entropy, name=scope.name)

loss = tf.add_n(tf.get_collection("losses"))+cross_entropy_mean

tf.summary.scalar(scope.name + '/loss', cross_entropy_mean)

return loss

def training(loss, learning_rate):

with tf.variable_scope('optimizer') as scope:

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

global_step = tf.Variable(0, trainable=False, name='global_step')

train_op = optimizer.minimize(loss, global_step=global_step, name=scope.name)

return train_op

def evaluation(logits, labels):

with tf.variable_scope('accuracy') as scope:

correct = tf.nn.in_top_k(logits, labels, 1)

correct = tf.cast(correct, tf.float16)

accuracy = tf.reduce_mean(correct)

tf.summary.scalar(scope.name+'/accuracy', accuracy)

return accuracy

def load_models(file_path = MODEL_PATH):

model = load_model(filepath)

def resize_image(image, height = IMAGE_SIZE, width = IMAGE_SIZE):

top, bottom, left, right = (0,0,0,0)

h, w = image.shape

longest_edge = max(h, w)

if h<longest_edge:

dh = longest_edge - h

top = dh // 2

bottom = dh - top

elif w<longest_edge:

dw = longest_edge - w

left = dw // 2

right = dw - left

else:

pass

black = [0,0,0]

constant = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value = black)

return cv2.resize(constant, (height, width))

def face_predict(image):

if K.image_dim_ordering() == 'th' and image.shape != (1, 3, IMAGE_SIZE, IMAGE_SIZE):

image = resize_image(image)

image = image.reshape((1, 3, IMAGE_SIZE, IMAGE_SIZE))

elif K.image_dim_ordering() == 'tf' and image.shape != (1, IMAGE_SIZE, IMAGE_SIZE, 3):

image = resize_image(image)

image = image.reshape((1, IMAGE_SIZE, IMAGE_SIZE, 3))

image = image.astype('float32')

image/=255

# predict_classes：用于对测试数据的分类预测；

result = model.predict_classes(image)

return result[0]

模型构建完成后，需要进行模型的训练：

face_train.py:

#!/usr/bin/env python

# _*_ UTF-8 _*_

import os

import numpy as np

import tensorflow as tf

import train_batchs

import face_cnn

from tensorflow.contrib.layers import l2_regularizer

# 模型构建完成后,需要对模型进行训练,即确定对应的参数

N_CLASSES = 7

IMG_W = 48

IMG_H = 48

TRAIN_BATCH_SIZE = 32

VALIDATION_BATCH_SIZE = 100

CAPACITY = 256

MAX_STEP = 50000

LEARNING_RATE = 0.0001

REGULARIZATION_RATE = 0.0001

train_dir = "E:/Python_workspace/1cfz/train/"

logs_train_dir = "E:/Python_workspace/1cfz/train_log/"

logs_validation_dir = "E:/Python_workspace/1cfz/val/"

# 训练集：

train, train_label = train_batchs.get_file(file_dir=train_dir)

# 验证集：

validation, validation_label = train_batchs.get_file(file_dir=logs_validation_dir)

print(validation)

# 获取训练集数据批

train_batch, train_label_batch = train_batchs.get_batch(train, train_label, IMG_W, IMG_H, TRAIN_BATCH_SIZE, CAPACITY)

# 获取验证集数据批

validation_batch, validation_label_batch = train_batchs.get_batch(validation, validation_label, IMG_W, IMG_H,

VALIDATION_BATCH_SIZE, CAPACITY)

# 使用regularizer防止过拟合,防止过拟合的两种方法：

# 1）L1、L2优化方法

# 2）dropout方法

regularizer = l2_regularizer(REGULARIZATION_RATE)

train_logits_op = face_cnn.inference(images=train_batch, batch_size=TRAIN_BATCH_SIZE, n_classes=N_CLASSES,

regularizer=regularizer, reuse = False)

validation_logits_op = face_cnn.inference(images=validation_batch, batch_size=VALIDATION_BATCH_SIZE, n_classes=N_CLASSES,

regularizer=None, reuse = True)

train_losses_op = face_cnn.losses(logits=train_logits_op, labels = train_label_batch)

validation_losses_op = face_cnn.losses(logits=validation_logits_op, labels=validation_label_batch)

# 训练完成的模型train_op，以及精确度

train_op = face_cnn.training(train_losses_op, learning_rate = LEARNING_RATE)

train_accuracy_op = face_cnn.evaluation(logits = train_logits_op, labels = train_label_batch)

validation_accuracy_op = face_cnn.evaluation(logits = validation_logits_op, labels = validation_label_batch)

# 将图形、训练过程等数据合并在一起，即将计算图融合

summary_op = tf.summary.merge_all()

with tf.Session() as sess:

# tf.summary.FileWriter：制定一个文件用来保存图

train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph, max_queue=3)

val_writer = tf.summary.FileWriter(logs_validation_dir, sess.graph, max_queue=3)

Saver = tf.train.Saver()

sess.run(tf.global_variables_initializer())

# 创建了一个线程管理器，主要是进行多线程操作时使用。

coord = tf.train.Coordinator()

# start_queue_runners：queuerunner用来启动tensor入队线程，可以启动多个线程将文件传输到文件名队列中，当文件名被读入到文件名队列中后，可以激活计算线程；

# coord：是线程协调器；

# 用线程管理器，对sess任务启动多线程，并将线程存储在threads中

threads = tf.train.start_queue_runners(sess=sess, coord=coord)

try:

for step in np.arange(MAX_STEP):

# 是否应该终止所有线程，当文件队列中的所有文件都已经读取出列的时候，会抛出一个outofrangeError异常，这是就应该停止sess中的所有线程。

if coord.should_stop():

break

# 可以一次执行多个方法:

# _:是指保留表达式的最后一次运算结果，此处保留了train_op的最后一次运算结果。

_, train_loss, train_accuracy = sess.run([train_op, train_losses_op, train_accuracy_op])

if step%100 == 0:

print('step %d, train loss = %.2f, train accuracy = %.2f' % (step, train_loss, train_accuracy*100.0))

summary_str = sess.run(summary_op)

# 每运行一百次做一次计算图的融合，然后将其保存到train_writer中。

train_writer.add_summary(summary_str, step)

if step % 500 == 0 or (step+1) == MAX_STEP:

checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')

# 500次的时候做一次保存

Saver.save(sess, checkpoint_path, global_step=step)

if step % 2000 ==0 or (step+1) == MAX_STEP:

val_loss, val_accuracy = sess.run([validation_losses_op, validation_accuracy_op])

print('** step %d, val loss = %.2f, val accuracy = %.2f' % (step, val_loss, val_accuracy*100.0))

summary_str = sess.run(summary_op)

val_writer.add_summary(summary_str, step)

except tf.errors.OutOfRangeError:

print("Done training -- epoch limit reached")

finally:

coord.request_stop()

# train_op：是训练之后的模型；

# 导入要识别的图片，然后用上面的模型进行分类

# pred = train_op.eval(feed_dict={x:[result],keep_prob:1.0}, session=sess)

截止到上面模型已经构建完成，此时需要对验证数据和测试数据进行一下处理：

camera_face.py:

#!/usr/bin/env python

# _*_ UTF-8 _*_

import cv2

def CatchPICFromVideo(window_name, camera_idx, catch_pic_num, path_name):

cv2.namedWindow(window_name)

# 获取视频内容

cap = cv2.VideoCapture(camera_idx)

calssfier = cv2.CascadeClassifier("E:/Python_workspace/face_Recognition/fer2013/harracascade_frontalface_default.xml")

color = (0, 255, 0)

num = 0

while cap.isOpened():

# ok表示返回的状态，frame存储着图像数据矩阵，mat类型的；

ok, frame = cap.read()

if not ok:

break

# 图像灰度化：

grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

# 加载分类器，opencv自带的

# 能够检测出图片中的所有人脸，并将人脸用vertor保存各个人脸的坐标、大小

# grey：待检测图片；

# scaleFactor：前后两次扫描窗口中，搜索窗的比例系数，1.1为扩大10%，1.2为扩大20%；

# minNeighbors：检测目标相邻举行的最小个数；

# minSize：限定目标区域的范围；

faceRects = classfier.detectMultiScale(grey, scaleFactor = 1.2, minNeighbors = 3, minSize = (32, 32))

if len(faceRects)>0:

for faceRect in faceRects:

x, y, w, h = faceRect

# 返回的是坐标值

img_name = "%s/%d.jpg" %(path_name, num)

image = frame[y-10:y+h+10, x-10:x+w+10]

# 此处用来对图片进行检测：

faceID = face_cnn.face_predict(image)

if faceID == 0:

cv2.rectangle(frame, (x-10, y-10), (x+w+10, y+h+10), color, thickness = 2)

cv2.putText(frame, 'ME', (x+30, y+30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,255), 2)

else:

cv2.rectangle(frame, (x-10, y-10), (x+w+10, y+h+10), color, thickness = 2)

cv2.putText(frame, 'others', (x+30, y+30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,255), 2)

# 保存图片

cv2.imwrite(img_name, image, [int(cv2.IMWRITE_PNG_COMPRESSION), 9])

num += 1

if num>(catch_pic_num):

break

cv2.rectangle(frame, (x-10, y-10), (x+w+10, y+h+10), color, 2)

font = cv2.FONT_HERSHEY_SIMPLEX

cv2.putText(frame, 'num:%d/100' %(num), (x+30, y+30), font, 1, (255, 0, 255), 4)

if num>(catch_pic_num):

break

cv2.imshow(window_name, frame)

c = cv2.waitKey(10)

if c&0xFF == ord('q'):

break

cap.release()

cv2.destroyAllWindows()

if __name__ == '__main__':

IdentifyFace('IdentifyFace')

另外在导入图片的时候，有可能遇到图片不符合（48*48）规格的情况，需要对图片进行修剪：

cut_face.py:

#!/usr/bin/env python

# _*_ UTF-8 _*_

import tensorflow as tf

def process():

img = tf.gfile.GFile("E:/Python_workspace/face_Recognition/pic/0.jpg").read()

with tf.Session() as sess:

img_data = tf.image.decode_jpeg(img)

resized = tf.image.resize_images(img_data, [48, 48], method=1)

image_data = sess.run(tf.image(rgb_to_grayscale(resized)))

encoded_image = tf.image.encode_jpeg(image_data)

with tf.gfile.GFile("E:/Python_workspace/face_Recognition/pic/60.jpg", "wb") as f:

f.write(encoded_image.eval())

截止到此，模型使用完毕。

实战演习（五）——人脸识别（CNN）简单演练

猜你喜欢