版权声明:欢迎转载与留言提问 https://blog.csdn.net/qq_25439417/article/details/82694125
1.开发算法自动化创造数据集【训练】【验证】【测试】,人工过滤一波数据集
2.分类状态有三种,睁眼,闭眼,非眼【一开始用了两种,人眼和非人眼,对闭眼不能很好识别】
3.搭建cnn网络【采用类似VGG16的网络,但是网络一定要小,这样才能做到实时,方便后期ncnn移动端开发】
4.开发金字塔模型
5.对窗口用cnn predict
6.输入绘图,做可视化调试
1.自动化采集数据集算法【需要有摄像头,可用android手机 或者 usb 网络摄像头 或者视频,调节opencv参数打开】
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 13 09:31:01 2018
@author: Lenovo
"""
import cv2
import numpy as np
import pylab as pl
from PIL import Image
import matplotlib.pyplot as plt
import time
pnum= list()
if __name__ == '__main__':
cap = cv2.VideoCapture(0)
iss=True
i=2000
while iss:
iss,img = cap.read()
i+=1
if iss:
# edgeimg = getGabor(img,filters)[0]
edgeimg = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
limg = edgeimg[:,1:]
rimg = edgeimg[:,:-1]
limg = edgeimg[:,1:]
rimg = edgeimg[:,:-1]
himg = np.sum(np.power(rimg-limg,2),axis=1)[20:-20]
limg = np.sum(np.power(rimg-limg,2),axis=0)[20:-20]
minposy = np.argmax(himg)
minposx = np.argmax(limg)
raw = 300
rahs = 100
rahx = 250
ymax=edgeimg.shape[0]
ymin=0
xmax=edgeimg.shape[1]
xmin=0
if minposy+rahx <ymax:
ymax = minposy+rahx
if minposy-rahs >ymin:
ymin = minposy-rahs
if minposx+raw <xmax:
xmax = minposx+raw
if minposx-raw >xmin:
xmin = minposx-raw
a= edgeimg[ymin:ymax,xmin:xmax]
cv2.imshow('img',a)
cv2.imwrite('./val/'+str(i)+'.jpg',a,[int(cv2.IMWRITE_JPEG_QUALITY), 100])
if cv2.waitKey(1) & 0xFF == 'q':
break
2.搭建CNN
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 13 12:29:51 2018
@author: Lenovo
"""
from keras.layers import Dense,Conv2D,MaxPooling2D,Flatten,Dropout
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
from keras import optimizers
sgd = optimizers.SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True)
model = Sequential()
model.add(Conv2D(filters=32,kernel_size=(3,3),padding='same',activation='relu',input_shape=(224,224,3)))
model.add(Conv2D(filters=32,kernel_size=(3,3),padding='same',activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(Conv2D(filters=64,kernel_size=(3,3),padding='same',activation='relu'))
model.add(Conv2D(filters=64,kernel_size=(3,3),padding='same',activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(3,activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
gen = ImageDataGenerator(rescale=1. / 255,)
train_gen_data = gen.flow_from_directory('./train',batch_size=40, shuffle=True,target_size=(224, 224),class_mode='categorical')
test_gen_data = gen.flow_from_directory('./test',batch_size=40, shuffle=True,target_size=(224, 224),class_mode='categorical')
save_best = ModelCheckpoint('best_cnn_cla.h5', monitor='acc', verbose=1,save_best_only=True)
callbacks=[save_best]
model.fit_generator(train_gen_data,
steps_per_epoch=72,
epochs=45,
verbose=1,
callbacks=callbacks,
validation_data=test_gen_data,
validation_steps=45,
shuffle=True,
initial_epoch=0)
为了节约时间,调通算法,只训练了3个epoch,acc达到78%,后面继续训练的话,应该可以达到90+
3.金字塔+Predict
import numpy as np
import cv2
from keras.layers import Dense,Conv2D,MaxPooling2D,Flatten,Dropout
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
from keras import optimizers
sgd = optimizers.SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True)
model = Sequential()
model.add(Conv2D(filters=32,kernel_size=(3,3),padding='same',activation='relu',input_shape=(224,224,3)))
model.add(Conv2D(filters=32,kernel_size=(3,3),padding='same',activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(Conv2D(filters=64,kernel_size=(3,3),padding='same',activation='relu'))
model.add(Conv2D(filters=64,kernel_size=(3,3),padding='same',activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(3,activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
model.load_weights('best_cnn_cla.h5')
def py_nms(dets, thresh, mode="Union"):
"""
greedily select boxes with high confidence
keep boxes overlap <= thresh
rule out overlap > thresh
:param dets: [[x1, y1, x2, y2 score]]
:param thresh: retain overlap <= thresh
:return: indexes to keep
"""
if len(dets) == 0:
return []
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
if mode == "Union":
ovr = inter / (areas[i] + areas[order[1:]] - inter)
elif mode == "Minimum":
ovr = inter / np.minimum(areas[i], areas[order[1:]])
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return dets[keep]
def image_preprocess(img):
img = (img - 127.5)*0.0078125
'''m = img.mean()
s = img.std()
min_s = 1.0/(np.sqrt(img.shape[0]*img.shape[1]*img.shape[2]))
std = max(min_s, s)
img = (img-m)/std'''
return img
def slide_window(img, window_size, stride):
# 对构建的金字塔图片,滑动窗口。
# img:图片, window_size:滑动窗的大小,stride:步长。
window_list = []
w = img.shape[1]
h = img.shape[0]
if w<=window_size+stride or h<=window_size+stride:
return None
if len(img.shape)!=3:
return None
for i in range(int((w-window_size)/stride)):
for j in range(int((h-window_size)/stride)):
box = [j*stride, i*stride, j*stride+window_size, i*stride+window_size]
window_list.append(box)
return img, np.asarray(window_list)
def pyramid(image, f, window_size):
# 构建图像的金字塔,以便进行多尺度滑动窗口
# image:输入图像,f:缩放的尺度, window_size:滑动窗大小。
w = image.shape[1]
h = image.shape[0]
img_ls = []
while( w > window_size and h > window_size):
img_ls.append(image)
w = int(w * f)
h = int(h * f)
image = cv2.resize(image, (w, h))
return img_ls
def min_face(img, F, window_size, stride):
# img:输入图像,F:最小人脸大小, window_size:滑动窗,stride:滑动窗的步长。
h, w, _ = img.shape
w_re = int(float(w)*window_size/F)
h_re = int(float(h)*window_size/F)
if w_re<=window_size+stride or h_re<=window_size+stride:
print (None)
# 调整图片大小的时候注意参数,千万不要写反了
# 根据最小人脸缩放图片
img = cv2.resize(img, (w_re, h_re))
return img
image = cv2.imread('testbi.png')
h,w,_ = image.shape
print(h,w)
# 调参的参数
IMAGE_SIZE = 224
# 步长
stride = 30
# 最小人脸大小
F = 250
# 构建金字塔的比例
ff = 0.8
# 概率多大时判定为人脸?
p_12 = 0.8
p_24 = 0.7
# nms
overlapThresh_12 = 0.7
overlapThresh_24 = 0.3
# 需要检测的最小人脸
import time
image_ = min_face(image, F, IMAGE_SIZE, stride)
# 金字塔
pyd = pyramid(np.array(image_), ff, IMAGE_SIZE)
# net-12
window_after_12 = []
for i, img in enumerate(pyd):
# 滑动窗口
slide_return = slide_window(img, IMAGE_SIZE, stride)
if slide_return is None:
break
img_12 = slide_return[0]
window_net_12 = slide_return[1]
w_12 = img_12.shape[1]
h_12 = img_12.shape[0]
patch_net_12 = []
for box in window_net_12:
patch = img_12[box[0]:box[2], box[1]:box[3], :]
# 做归一化处理
patch = image_preprocess(patch)
patch_net_12.append(patch)
patch_net_12 = np.array(patch_net_12)
# for i in range(patch_net_12.shape[0]):
# cv2.imshow('1',patch_net_12[i])
# time.sleep(0.04)
# if cv2.waitKey(1) & 0xFF ==ord('q'):
# break
# 预测人脸
pred_cal_12 = model.predict(patch_net_12)
# print(pred_cal_12)
window_net = window_net_12
# print (pred_cal_12)
windows = []
for i, pred in enumerate(pred_cal_12):
# 概率大于0.8的判定为人脸。
# print(pred)
s = np.where(pred>p_12)[0]
if len(s)==0:
continue
#保存窗口位置和概率。
windows.append([window_net[i][0],window_net[i][1],window_net[i][2],window_net[i][3],pred[1]])
# 按照概率值 由大到小排序
windows = np.asarray(windows)
windows = py_nms(windows, overlapThresh_12, 'Union')
window_net = windows
for box in window_net:
lt_x = int(float(box[0])*w/w_12)
lt_y = int(float(box[1])*h/h_12)
rb_x = int(float(box[2])*w/w_12)
rb_y = int(float(box[3])*h/h_12)
p_box = box[4]
window_after_12.append([lt_x, lt_y, rb_x, rb_y, p_box])
# 按照概率值 由大到小排序
# window_after_12 = np.asarray(window_after_12)
# window_net = py_nms(window_after_12, overlapThresh_12, 'Union')
window_net = window_after_12
print(window_net)
#
## net-24
#windows_24 = []
#if window_net == []:
# print("windows is None!")
#
#if window_net != []:
# print('hhhhhhhhhhhh')
# patch_net_24 = []
# img_24 = image
# for box in window_net:
# patch = img_24[box[0]:box[2], box[1]:box[3], :]
# patch = cv2.resize(patch, (24, 24))
# # 做归一化处理
# patch = image_preprocess(patch)
# patch_net_24.append(patch)
# # 预测人脸
# pred_net_24 = model.predict(patch_net_24)
# print (pred_net_24)
# window_net = window_net
# # print (pred_net_24)
# for i, pred in enumerate(pred_net_24):
# s = np.where(pred[1]>p_24)[0]
# if len(s)==0:
# continue
# windows_24.append([window_net[i][0],window_net[i][1],window_net[i][2],window_net[i][3],pred[1]])
# # 按照概率值 由大到小排序
# windows_24 = np.asarray(windows_24)
# #window_net = nms_max(windows_24, overlapThresh=0.7)
# window_net = py_nms(windows_24, overlapThresh_24, 'Union')
#
#
if window_net == []:
print("windows is None!")
if window_net != []:
print(len(window_net))
for box in window_net:
#ImageDraw.Draw(image).rectangle((box[1], box[0], box[3], box[2]), outline = "red")
cv2.rectangle(image, (int(box[1]),int(box[0])), (int(box[3]),int(box[2])), (0, 255, 0), 2)
cv2.imwrite("images/face_img.jpg", image)
cv2.imshow("face detection", image)
#cv2.waitKey(10000)
#cv2.destroyAllWindows()
闭眼检测
睁眼检测
可用看到框很多,后面需要进一步调节NMS参数来达到去掉一些框的效果
输出得分最高的bbox
反思:
我们以大小为480*640的图像为例,缩放尺度为1.1,首先构建图片中最左侧的金字塔(构建金字塔的结束条件是最顶层图像长小于128或者宽小于64),然后以64*128的滑动窗口,滑动步长为8像素,对每一层都会进行滑动,获得检测窗口。这个时候,我们会获得13473张待检测图像,首先这个数量是很庞大的,其次就是图像冗余很大。
所以针对滑动窗口法的这一弱势,我们有必要对该方法进行改进~
后需修改:
调节网络形式与参数,训练效果有大幅度提升
结果:
在速度上也有了大幅度提升,从昨天的40s -> 0.3s