人脸检测：MTCNN训练数据正负样本生成

本代码基于作者提供的python版本代码修改，参考：

https://github.com/DuinoDu/mtcnn/blob/master/demo.py （作者提供）

https://github.com/dlunion/mtcnn/blob/master/train/gen_48net_data2.py

注解：Pnet和RNet之后的网络的Bbox的矩形框，关键点在框内的当成正样本来训练landmark；

1，生成positive,negative,part三种样本，用作者的net1->net2生成bbox，根据预测的bbox和ground truth计算IOU：

positive: IOU >= 0.65;

negative: IOU < 0.3;

part: 0.4 <= IOU < 0.65

代码如下：

[python]view plain copy
#!/usr/bin/env python  
# -*- coding: utf-8 -*-  
import _init_paths  
import caffe  
import cv2  
import numpy as np  
#from python_wrapper import *  
import os  
  
def bbreg(boundingbox, reg):  
    reg = reg.T   
      
    # calibrate bouding boxes  
    if reg.shape[1] == 1:  
        print "reshape of reg"  
        pass # reshape of reg  
    w = boundingbox[:,2] - boundingbox[:,0] + 1  
    h = boundingbox[:,3] - boundingbox[:,1] + 1  
  
    bb0 = boundingbox[:,0] + reg[:,0]*w  
    bb1 = boundingbox[:,1] + reg[:,1]*h  
    bb2 = boundingbox[:,2] + reg[:,2]*w  
    bb3 = boundingbox[:,3] + reg[:,3]*h  
      
    boundingbox[:,0:4] = np.array([bb0, bb1, bb2, bb3]).T  
    #print "bb", boundingbox  
    return boundingbox  
  
def pad(boxesA, w, h):  
    boxes = boxesA.copy() # shit, value parameter!!!  
  
    tmph = boxes[:,3] - boxes[:,1] + 1  
    tmpw = boxes[:,2] - boxes[:,0] + 1  
    numbox = boxes.shape[0]  
  
    dx = np.ones(numbox)  
    dy = np.ones(numbox)  
    edx = tmpw   
    edy = tmph  
  
    x = boxes[:,0:1][:,0]  
    y = boxes[:,1:2][:,0]  
    ex = boxes[:,2:3][:,0]  
    ey = boxes[:,3:4][:,0]  
    tmp = np.where(ex > w)[0]  
    if tmp.shape[0] != 0:  
        edx[tmp] = -ex[tmp] + w-1 + tmpw[tmp]  
        ex[tmp] = w-1  
  
    tmp = np.where(ey > h)[0]  
    if tmp.shape[0] != 0:  
        edy[tmp] = -ey[tmp] + h-1 + tmph[tmp]  
        ey[tmp] = h-1  
  
    tmp = np.where(x < 1)[0]  
    if tmp.shape[0] != 0:  
        dx[tmp] = 2 - x[tmp]  
        x[tmp] = np.ones_like(x[tmp])  
  
    tmp = np.where(y < 1)[0]  
    if tmp.shape[0] != 0:  
        dy[tmp] = 2 - y[tmp]  
        y[tmp] = np.ones_like(y[tmp])  
      
    # for python index from 0, while matlab from 1  
    dy = np.maximum(0, dy-1)  
    dx = np.maximum(0, dx-1)  
    y = np.maximum(0, y-1)  
    x = np.maximum(0, x-1)  
    edy = np.maximum(0, edy-1)  
    edx = np.maximum(0, edx-1)  
    ey = np.maximum(0, ey-1)  
    ex = np.maximum(0, ex-1)  
  
    return [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]  
  
def IoU(box, boxes):  
    """Compute IoU between detect box and gt boxes 
 
    Parameters: 
    ---------- 
    box: numpy array , shape (5, ): x1, y1, x2, y2, score 
        input box 
    boxes: numpy array, shape (n, 4): x1, y1, x2, y2 
        input ground truth boxes 
 
    Returns: 
    ------- 
    ovr: numpy.array, shape (n, ) 
        IoU 
    """  
    box_area = (box[2] - box[0] + 1) * (box[3] - box[1] + 1)  
    area = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)  
    xx1 = np.maximum(box[0], boxes[:, 0])  
    yy1 = np.maximum(box[1], boxes[:, 1])  
    xx2 = np.minimum(box[2], boxes[:, 2])  
    yy2 = np.minimum(box[3], boxes[:, 3])  
  
    # compute the width and height of the bounding box  
    w = np.maximum(0, xx2 - xx1 + 1)  
    h = np.maximum(0, yy2 - yy1 + 1)  
  
    inter = w * h  
    ovr = inter / (box_area + area - inter)  
    return ovr  
  
  
def rerec(bboxA):  
    # convert bboxA to square  
    w = bboxA[:,2] - bboxA[:,0]  
    h = bboxA[:,3] - bboxA[:,1]  
    l = np.maximum(w,h).T  
  
    bboxA[:,0] = bboxA[:,0] + w*0.5 - l*0.5  
    bboxA[:,1] = bboxA[:,1] + h*0.5 - l*0.5   
    bboxA[:,2:4] = bboxA[:,0:2] + np.repeat([l], 2, axis = 0).T   
    return bboxA  
  
  
def nms(boxes, threshold, type):  
    """nms 
    :boxes: [:,0:5] 
    :threshold: 0.5 like 
    :type: 'Min' or others 
    :returns: TODO 
    """  
    if boxes.shape[0] == 0:  
        return np.array([])  
    x1 = boxes[:,0]  
    y1 = boxes[:,1]  
    x2 = boxes[:,2]  
    y2 = boxes[:,3]  
    s = boxes[:,4]  
    area = np.multiply(x2-x1+1, y2-y1+1)  
    I = np.array(s.argsort()) # read s using I  
      
    pick = [];  
    while len(I) > 0:  
        xx1 = np.maximum(x1[I[-1]], x1[I[0:-1]])  
        yy1 = np.maximum(y1[I[-1]], y1[I[0:-1]])  
        xx2 = np.minimum(x2[I[-1]], x2[I[0:-1]])  
        yy2 = np.minimum(y2[I[-1]], y2[I[0:-1]])  
        w = np.maximum(0.0, xx2 - xx1 + 1)  
        h = np.maximum(0.0, yy2 - yy1 + 1)  
        inter = w * h  
        if type == 'Min':  
            o = inter / np.minimum(area[I[-1]], area[I[0:-1]])  
        else:  
            o = inter / (area[I[-1]] + area[I[0:-1]] - inter)  
        pick.append(I[-1])  
        I = I[np.where( o <= threshold)[0]]  
    return pick  
  
  
def generateBoundingBox(map, reg, scale, t):  
    stride = 2  
    cellsize = 12  
    map = map.T  
    dx1 = reg[0,:,:].T  
    dy1 = reg[1,:,:].T  
    dx2 = reg[2,:,:].T  
    dy2 = reg[3,:,:].T  
    (x, y) = np.where(map >= t)  
  
    yy = y  
    xx = x  
    score = map[x,y]  
    reg = np.array([dx1[x,y], dy1[x,y], dx2[x,y], dy2[x,y]])  
  
    if reg.shape[0] == 0:  
        pass  
    boundingbox = np.array([yy, xx]).T  
  
    bb1 = np.fix((stride * (boundingbox) + 1) / scale).T # matlab index from 1, so with "boundingbox-1"  
    bb2 = np.fix((stride * (boundingbox) + cellsize - 1 + 1) / scale).T # while python don't have to  
    score = np.array([score])  
  
    boundingbox_out = np.concatenate((bb1, bb2, score, reg), axis=0)  
  
    return boundingbox_out.T 
  
def drawBoxes(im, boxes):  
    x1 = boxes[:,0]  
    y1 = boxes[:,1]  
    x2 = boxes[:,2]  
    y2 = boxes[:,3]  
    for i in range(x1.shape[0]):  
        cv2.rectangle(im, (int(x1[i]), int(y1[i])), (int(x2[i]), int(y2[i])), (0,255,0), 1)  
    return im  
  
def drawlandmark(im, points):  
    for i in range(points.shape[0]):  
        for j in range(5):  
            cv2.circle(im, (int(points[i][j]), int(points[i][j+5])), 2, (255,0,0))  
    return im  
  
  
from time import time  
_tstart_stack = []  
def tic():  
    _tstart_stack.append(time())  
def toc(fmt="Elapsed: %s s"):  
    print fmt % (time()-_tstart_stack.pop())  
  
  
def detect_face(img, minsize, PNet, RNet, threshold, fastresize, factor):  
      
    img2 = img.copy()  
  
    factor_count = 0  
    total_boxes = np.zeros((0,9), np.float)  
    points = []  
    h = img.shape[0]  
    w = img.shape[1]  
    minl = min(h, w)  
    img = img.astype(float)  
    m = 12.0/minsize  
    minl = minl*m  
      
  
    # create scale pyramid  
    scales = []  
    while minl >= 12:  
        scales.append(m * pow(factor, factor_count))  
        minl *= factor  
        factor_count += 1  
      
    # first stage  
    for scale in scales:  
        hs = int(np.ceil(h*scale))  
        ws = int(np.ceil(w*scale))  
  
        if fastresize:  
            im_data = (img-127.5)*0.0078125 # [0,255] -> [-1,1]  
            im_data = cv2.resize(im_data, (ws,hs)) # default is bilinear  
        else:   
            im_data = cv2.resize(img, (ws,hs)) # default is bilinear  
            im_data = (im_data-127.5)*0.0078125 # [0,255] -> [-1,1]  
        #im_data = imResample(img, hs, ws); print "scale:", scale  
  
  
        im_data = np.swapaxes(im_data, 0, 2)  
        im_data = np.array([im_data], dtype = np.float)  
        PNet.blobs['data'].reshape(1, 3, ws, hs)  
        PNet.blobs['data'].data[...] = im_data  
        out = PNet.forward()  
      
        boxes = generateBoundingBox(out['prob1'][0,1,:,:], out['conv4-2'][0], scale, threshold[0])  
        if boxes.shape[0] != 0:  
            pick = nms(boxes, 0.5, 'Union')  
            if len(pick) > 0 :  
                boxes = boxes[pick, :]  
  
        if boxes.shape[0] != 0:  
            total_boxes = np.concatenate((total_boxes, boxes), axis=0)  
           
    #np.save('total_boxes_101.npy', total_boxes)  
  
    #####  
    # 1 #  
    #####  
    # print "[1]:",total_boxes.shape[0]  
    #print total_boxes  
    #return total_boxes, []   
  
  
    numbox = total_boxes.shape[0]  
    if numbox > 0:  
        # nms  
        pick = nms(total_boxes, 0.7, 'Union')  
        total_boxes = total_boxes[pick, :]  
        # print "[2]:",total_boxes.shape[0]  
          
        # revise and convert to square  
        regh = total_boxes[:,3] - total_boxes[:,1]  
        regw = total_boxes[:,2] - total_boxes[:,0]  
        t1 = total_boxes[:,0] + total_boxes[:,5]*regw  
        t2 = total_boxes[:,1] + total_boxes[:,6]*regh  
        t3 = total_boxes[:,2] + total_boxes[:,7]*regw  
        t4 = total_boxes[:,3] + total_boxes[:,8]*regh  
        t5 = total_boxes[:,4]  
        total_boxes = np.array([t1,t2,t3,t4,t5]).T  
        total_boxes = rerec(total_boxes) # convert box to square  
        # print "[4]:",total_boxes.shape[0]  
          
        total_boxes[:,0:4] = np.fix(total_boxes[:,0:4])  
        # print "[4.5]:",total_boxes.shape[0]  
        #print total_boxes  
        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(total_boxes, w, h)  
  
  
    numbox = total_boxes.shape[0]  
    if numbox > 0:  
        # second stage  
  
        # construct input for RNet  
        tempimg = np.zeros((numbox, 24, 24, 3)) # (24, 24, 3, numbox)  
        for k in range(numbox):  
            tmp = np.zeros((int(tmph[k]) +1, int(tmpw[k]) + 1,3))  
            tmp[int(dy[k]):int(edy[k])+1, int(dx[k]):int(edx[k])+1] = img[int(y[k]):int(ey[k])+1, int(x[k]):int(ex[k])+1]  
            #print "y,ey,x,ex", y[k], ey[k], x[k], ex[k]  
            #print "tmp", tmp.shape  
              
            tempimg[k,:,:,:] = cv2.resize(tmp, (24, 24))  
  
        #print tempimg.shape  
        #print tempimg[0,0,0,:]  
        tempimg = (tempimg-127.5)*0.0078125 # done in imResample function wrapped by python  
  
  
        # RNet  
  
        tempimg = np.swapaxes(tempimg, 1, 3)  
        #print tempimg[0,:,0,0]  
          
        RNet.blobs['data'].reshape(numbox, 3, 24, 24)  
        RNet.blobs['data'].data[...] = tempimg  
        out = RNet.forward()  
  
        score = out['prob1'][:,1]  
        #print 'score', score  
        pass_t = np.where(score>threshold[1])[0]  
        #print 'pass_t', pass_t  
          
        score =  np.array([score[pass_t]]).T  
        total_boxes = np.concatenate( (total_boxes[pass_t, 0:4], score), axis = 1)  
        # print "[5]:",total_boxes.shape[0]  
        #print total_boxes  
  
        #print "1.5:",total_boxes.shape  
          
        mv = out['conv5-2'][pass_t, :].T  
        #print "mv", mv  
        if total_boxes.shape[0] > 0:  
            pick = nms(total_boxes, 0.7, 'Union')  
            # print 'pick', pick  
            if len(pick) > 0:  
                total_boxes = total_boxes[pick, :]  
                # print "[6]:", total_boxes.shape[0]  
                total_boxes = bbreg(total_boxes, mv[:, pick])  
                # print "[7]:", total_boxes.shape[0]  
                total_boxes = rerec(total_boxes)  
                # print "[8]:", total_boxes.shape[0]  
  
    return total_boxes  
  
  
  
  
def main():  
    img_dir = "/home/xiao/code/mtcnn-caffe/prepare_data/WIDER_train/images/"  
    imglistfile = "wider_face_train.txt"  
    with open(imglistfile, 'r') as f:  
        annotations = f.readlines()  
    num = len(annotations)  
    print "%d pics in total" % num  
  
    neg_save_dir = "/media/xiao/软件/mtcnn/train/48/negative/"  
    pos_save_dir = "/media/xiao/软件/mtcnn/train/48/positive/"  
    part_save_dir = "/media/xiao/软件/mtcnn/train/48/part/"  
    image_size = 48  
    f1 = open('/media/xiao/软件/mtcnn/train/48/pos_48.txt', 'w')  
    f2 = open('/media/xiao/软件/mtcnn/train/48/neg_48.txt', 'w')  
    f3 = open('/media/xiao/软件/mtcnn/train/48/part_48.txt', 'w')  
  
    p_idx = 0  # positive  
    n_idx = 0  # negative  
    d_idx = 0  # dont care  
    image_idx = 0  
  
    minsize = 20  
    caffe_model_path = "./model"  
    threshold = [0.6, 0.7, 0.7]  
    factor = 0.709  
      
    caffe.set_mode_gpu()  
    PNet = caffe.Net(caffe_model_path+"/det1.prototxt", caffe_model_path+"/det1.caffemodel", caffe.TEST)  
    RNet = caffe.Net(caffe_model_path+"/det2.prototxt", caffe_model_path+"/det2.caffemodel", caffe.TEST)  
  
  
    for annotation in annotations:  
        # imgpath = imgpath.split('\n')[0]  
        annotation = annotation.strip().split(' ')  
        bbox = map(float, annotation[1:])  
        gts = np.array(bbox, dtype=np.float32).reshape(-1, 4)  
        img_path = img_dir + annotation[0] + '.jpg'  
  
        #print "######\n", img_path  
        print image_idx  
        image_idx += 1  
        img = cv2.imread(img_path)  
        img_matlab = img.copy()  
        tmp = img_matlab[:,:,2].copy()  
        img_matlab[:,:,2] = img_matlab[:,:,0]  
        img_matlab[:,:,0] = tmp  
  
        boundingboxes = detect_face(img_matlab, minsize, PNet, RNet, threshold, False, factor)  
  
        #img = drawBoxes(img, boundingboxes)  
        #cv2.imshow('img', img)  
        #cv2.waitKey(1000)  
  
        # generate positive,negative,part samples  
        for box in boundingboxes:  
            x_left, y_top, x_right, y_bottom, _ = box  
            crop_w = x_right - x_left + 1  
            crop_h = y_bottom - y_top + 1  
            # ignore box that is too small or beyond image border  
            if crop_w < image_size / 2 or crop_h < image_size / 2:  
                continue  
            if x_left < 0 or y_top < 0:  
                continue  
  
            # compute intersection over union(IoU) between current box and all gt boxes  
            Iou = IoU(box, gts)  
            cropped_im = img[int(y_top):int(y_bottom + 1) , int(x_left):int(x_right + 1) ]  
            resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR)  
            #try:  
            #    resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR)  
            #except  Exception as e:  
            #    print " 1 "  
            #    print e  
  
            # save negative images and write label， 负样本  
            if np.max(Iou) < 0.3:  
                # Iou with all gts must below 0.3  
                save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)  
                f2.write("%s/negative/%s.jpg" % (image_size, n_idx) + ' 0')  
                f2.write(" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n")  
                cv2.imwrite(save_file, resized_im)  
                n_idx += 1  
            else:  
                # find gt_box with the highest iou  
                idx = np.argmax(Iou)  
                assigned_gt = gts[idx]  
                x1, y1, x2, y2 = assigned_gt  
  
                # compute bbox reg label，offset_x1，offset_y1相对于左上角； offset_x2，offset_y2相对于右上角  
                offset_x1 = (x1 - x_left) / float(crop_w)  
                offset_y1 = (y1 - y_top) / float(crop_h)  
                # offset_x2 = (x2 - x_left) / float(crop_w)  
                # offset_y2 = (y2 - y_top) / float(crop_h)  
                offset_x2 = (x2 - x_right)  / float(crop_w)  
                offset_y2 = (y2 - y_bottom )/ float(crop_h)  
  
                # save positive and part-face images and write labels，  正样本  
                if np.max(Iou) >= 0.65:  
                    save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)  
                    f1.write("%s/positive/%s.jpg" % (image_size, p_idx) + ' 1 %.6f %.6f %.6f %.6f' % (offset_x1, offset_y1, offset_x2, offset_y2))  
                    f1.write(" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n")  
                    cv2.imwrite(save_file, resized_im)  
                    p_idx += 1  
  
                # part 样本  
                elif np.max(Iou) >= 0.4:  
                    save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)  
                    f3.write("%s/part/%s.jpg" % (image_size, d_idx) + ' -1 %.6f %.6f %.6f %.6f' % (offset_x1, offset_y1, offset_x2, offset_y2))  
                    f3.write(" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n")  
                    cv2.imwrite(save_file, resized_im)  
                    d_idx += 1  
  
  
    f.close()  
    f1.close()  
    f2.close()  
    f3.close()  
  
if __name__ == "__main__":  
    main()  
  
</span>  

2，生成landmark样本，用作者的net1->net2生成bbox，根据5个landmark是否都在bbox中作为判别条件：

扫描二维码关注公众号，回复： 1013491 查看本文章

代码如下：

[python]view plain copy
#!/usr/bin/env python  
# -*- coding: utf-8 -*-  
  
import _init_paths  
import caffe  
import cv2  
import numpy as np  
#from python_wrapper import *  
import os  
  
def bbreg(boundingbox, reg):  
    reg = reg.T   
      
    # calibrate bouding boxes  
    if reg.shape[1] == 1:  
        print "reshape of reg"  
        pass # reshape of reg  
    w = boundingbox[:,2] - boundingbox[:,0] + 1  
    h = boundingbox[:,3] - boundingbox[:,1] + 1  
  
    bb0 = boundingbox[:,0] + reg[:,0]*w  
    bb1 = boundingbox[:,1] + reg[:,1]*h  
    bb2 = boundingbox[:,2] + reg[:,2]*w  
    bb3 = boundingbox[:,3] + reg[:,3]*h  
      
    boundingbox[:,0:4] = np.array([bb0, bb1, bb2, bb3]).T  
    #print "bb", boundingbox  
    return boundingbox  
  
  
def pad(boxesA, w, h):  
    boxes = boxesA.copy() # shit, value parameter!!!  
  
    tmph = boxes[:,3] - boxes[:,1] + 1  
    tmpw = boxes[:,2] - boxes[:,0] + 1  
    numbox = boxes.shape[0]  
  
    dx = np.ones(numbox)  
    dy = np.ones(numbox)  
    edx = tmpw   
    edy = tmph  
  
    x = boxes[:,0:1][:,0]  
    y = boxes[:,1:2][:,0]  
    ex = boxes[:,2:3][:,0]  
    ey = boxes[:,3:4][:,0]  
     
     
    tmp = np.where(ex > w)[0]  
    if tmp.shape[0] != 0:  
        edx[tmp] = -ex[tmp] + w-1 + tmpw[tmp]  
        ex[tmp] = w-1  
  
    tmp = np.where(ey > h)[0]  
    if tmp.shape[0] != 0:  
        edy[tmp] = -ey[tmp] + h-1 + tmph[tmp]  
        ey[tmp] = h-1  
  
    tmp = np.where(x < 1)[0]  
    if tmp.shape[0] != 0:  
        dx[tmp] = 2 - x[tmp]  
        x[tmp] = np.ones_like(x[tmp])  
  
    tmp = np.where(y < 1)[0]  
    if tmp.shape[0] != 0:  
        dy[tmp] = 2 - y[tmp]  
        y[tmp] = np.ones_like(y[tmp])  
      
    # for python index from 0, while matlab from 1  
    dy = np.maximum(0, dy-1)  
    dx = np.maximum(0, dx-1)  
    y = np.maximum(0, y-1)  
    x = np.maximum(0, x-1)  
    edy = np.maximum(0, edy-1)  
    edx = np.maximum(0, edx-1)  
    ey = np.maximum(0, ey-1)  
    ex = np.maximum(0, ex-1)  
  
    return [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]  
  
def IoU(box, boxes):  
    """Compute IoU between detect box and gt boxes 
 
    Parameters: 
    ---------- 
    box: numpy array , shape (5, ): x1, y1, x2, y2, score 
        input box 
    boxes: numpy array, shape (n, 4): x1, y1, x2, y2 
        input ground truth boxes 
 
    Returns: 
    ------- 
    ovr: numpy.array, shape (n, ) 
        IoU 
    """  
    box_area = (box[2] - box[0] + 1) * (box[3] - box[1] + 1)  
    area = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)  
    xx1 = np.maximum(box[0], boxes[:, 0])  
    yy1 = np.maximum(box[1], boxes[:, 1])  
    xx2 = np.minimum(box[2], boxes[:, 2])  
    yy2 = np.minimum(box[3], boxes[:, 3])  
  
    # compute the width and height of the bounding box  
    w = np.maximum(0, xx2 - xx1 + 1)  
    h = np.maximum(0, yy2 - yy1 + 1)  
  
    inter = w * h  
    ovr = inter / (box_area + area - inter)  
    return ovr  
  
  
def rerec(bboxA):  
    # convert bboxA to square  
    w = bboxA[:,2] - bboxA[:,0]  
    h = bboxA[:,3] - bboxA[:,1]  
    l = np.maximum(w,h).T  
  
    bboxA[:,0] = bboxA[:,0] + w*0.5 - l*0.5  
    bboxA[:,1] = bboxA[:,1] + h*0.5 - l*0.5   
    bboxA[:,2:4] = bboxA[:,0:2] + np.repeat([l], 2, axis = 0).T   
    return bboxA  
  
  
def nms(boxes, threshold, type):  
    """nms 
    :boxes: [:,0:5] 
    :threshold: 0.5 like 
    :type: 'Min' or others 
    :returns: TODO 
    """  
    if boxes.shape[0] == 0:  
        return np.array([])  
    x1 = boxes[:,0]  
    y1 = boxes[:,1]  
    x2 = boxes[:,2]  
    y2 = boxes[:,3]  
    s = boxes[:,4]  
    area = np.multiply(x2-x1+1, y2-y1+1)  
    I = np.array(s.argsort()) # read s using I  
      
    pick = [];  
    while len(I) > 0:  
        xx1 = np.maximum(x1[I[-1]], x1[I[0:-1]])  
        yy1 = np.maximum(y1[I[-1]], y1[I[0:-1]])  
        xx2 = np.minimum(x2[I[-1]], x2[I[0:-1]])  
        yy2 = np.minimum(y2[I[-1]], y2[I[0:-1]])  
        w = np.maximum(0.0, xx2 - xx1 + 1)  
        h = np.maximum(0.0, yy2 - yy1 + 1)  
        inter = w * h  
        if type == 'Min':  
            o = inter / np.minimum(area[I[-1]], area[I[0:-1]])  
        else:  
            o = inter / (area[I[-1]] + area[I[0:-1]] - inter)  
        pick.append(I[-1])  
        I = I[np.where( o <= threshold)[0]]  
    return pick  
  
  
def generateBoundingBox(map, reg, scale, t):  
    stride = 2  
    cellsize = 12  
    map = map.T  
    dx1 = reg[0,:,:].T  
    dy1 = reg[1,:,:].T  
    dx2 = reg[2,:,:].T  
    dy2 = reg[3,:,:].T  
    (x, y) = np.where(map >= t)  
  
    yy = y  
    xx = x  
  
  
    score = map[x,y]  
    reg = np.array([dx1[x,y], dy1[x,y], dx2[x,y], dy2[x,y]])  
  
    if reg.shape[0] == 0:  
        pass  
    boundingbox = np.array([yy, xx]).T  
  
    bb1 = np.fix((stride * (boundingbox) + 1) / scale).T # matlab index from 1, so with "boundingbox-1"  
    bb2 = np.fix((stride * (boundingbox) + cellsize - 1 + 1) / scale).T # while python don't have to  
    score = np.array([score])  
  
    boundingbox_out = np.concatenate((bb1, bb2, score, reg), axis=0)  
  
    return boundingbox_out.T  
  
  
  
def drawBoxes(im, boxes):  
    x1 = boxes[:,0]  
    y1 = boxes[:,1]  
    x2 = boxes[:,2]  
    y2 = boxes[:,3]  
    for i in range(x1.shape[0]):  
        cv2.rectangle(im, (int(x1[i]), int(y1[i])), (int(x2[i]), int(y2[i])), (0,255,0), 1)  
    return im  
  
def drawBoxes_align(im, boxe):  
    x1 = boxe[0]  
    y1 = boxe[1]  
    x2 = boxe[2]  
    y2 = boxe[3]  
    cv2.rectangle(im, (int(x1), int(y1)), (int(x2), int(y2)), (0,255,0), 1)  
    return im  
  
def drawlandmark(im, points):  
    for i in range(points.shape[0]):  
        for j in range(5):  
            cv2.circle(im, (int(points[i][j]), int(points[i][j+5])), 2, (255,0,0))  
    return im  
  
def drawlandmark_align(im, point):  
    for j in range(5):  
        cv2.circle(im, (int(point[j*2]), int(point[j*2+1])), 2, (255,0,0))  
    return im  
  
  
from time import time  
_tstart_stack = []  
def tic():  
    _tstart_stack.append(time())  
def toc(fmt="Elapsed: %s s"):  
    print fmt % (time()-_tstart_stack.pop())  
  
  
def detect_face(img, minsize, PNet, RNet, threshold, fastresize, factor):  
      
    img2 = img.copy()  
  
    factor_count = 0  
    total_boxes = np.zeros((0,9), np.float)  
    points = []  
    h = img.shape[0]  
    w = img.shape[1]  
    minl = min(h, w)  
    img = img.astype(float)  
    m = 12.0/minsize  
    minl = minl*m  
      
  
    # create scale pyramid  
    scales = []  
    while minl >= 12:  
        scales.append(m * pow(factor, factor_count))  
        minl *= factor  
        factor_count += 1  
      
    # first stage  
    for scale in scales:  
        hs = int(np.ceil(h*scale))  
        ws = int(np.ceil(w*scale))  
  
        if fastresize:  
            im_data = (img-127.5)*0.0078125 # [0,255] -> [-1,1]  
            im_data = cv2.resize(im_data, (ws,hs)) # default is bilinear  
        else:   
            im_data = cv2.resize(img, (ws,hs)) # default is bilinear  
            im_data = (im_data-127.5)*0.0078125 # [0,255] -> [-1,1]  
        #im_data = imResample(img, hs, ws); print "scale:", scale  
  
  
        im_data = np.swapaxes(im_data, 0, 2)  
        im_data = np.array([im_data], dtype = np.float)  
        PNet.blobs['data'].reshape(1, 3, ws, hs)  
        PNet.blobs['data'].data[...] = im_data  
        out = PNet.forward()  
      
        boxes = generateBoundingBox(out['prob1'][0,1,:,:], out['conv4-2'][0], scale, threshold[0])  
        if boxes.shape[0] != 0:  
            pick = nms(boxes, 0.5, 'Union')  
            if len(pick) > 0 :  
                boxes = boxes[pick, :]  
  
        if boxes.shape[0] != 0:  
            total_boxes = np.concatenate((total_boxes, boxes), axis=0)  
           
    #np.save('total_boxes_101.npy', total_boxes)  
  
    #####  
    # 1 #  
    #####  
    # print "[1]:",total_boxes.shape[0]  
    #print total_boxes  
    #return total_boxes, []   
  
  
    numbox = total_boxes.shape[0]  
    if numbox > 0:  
        # nms  
        pick = nms(total_boxes, 0.7, 'Union')  
        total_boxes = total_boxes[pick, :]  
        # print "[2]:",total_boxes.shape[0]  
          
        # revise and convert to square  
        regh = total_boxes[:,3] - total_boxes[:,1]  
        regw = total_boxes[:,2] - total_boxes[:,0]  
        t1 = total_boxes[:,0] + total_boxes[:,5]*regw  
        t2 = total_boxes[:,1] + total_boxes[:,6]*regh  
        t3 = total_boxes[:,2] + total_boxes[:,7]*regw  
        t4 = total_boxes[:,3] + total_boxes[:,8]*regh  
        t5 = total_boxes[:,4]  
        total_boxes = np.array([t1,t2,t3,t4,t5]).T  
        total_boxes = rerec(total_boxes) # convert box to square  
        # print "[4]:",total_boxes.shape[0]  
          
        total_boxes[:,0:4] = np.fix(total_boxes[:,0:4])  
        # print "[4.5]:",total_boxes.shape[0]  
        #print total_boxes  
        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(total_boxes, w, h)  
  
  
    numbox = total_boxes.shape[0]  
    if numbox > 0:  
        # second stage  
  
        # construct input for RNet  
        tempimg = np.zeros((numbox, 24, 24, 3)) # (24, 24, 3, numbox)  
        for k in range(numbox):  
            tmp = np.zeros((int(tmph[k]) +1, int(tmpw[k]) + 1,3))  
            tmp[int(dy[k]):int(edy[k])+1, int(dx[k]):int(edx[k])+1] = img[int(y[k]):int(ey[k])+1, int(x[k]):int(ex[k])+1]  
            #print "y,ey,x,ex", y[k], ey[k], x[k], ex[k]  
            #print "tmp", tmp.shape  
              
            tempimg[k,:,:,:] = cv2.resize(tmp, (24, 24))  
  
        #print tempimg.shape  
        #print tempimg[0,0,0,:]  
        tempimg = (tempimg-127.5)*0.0078125 # done in imResample function wrapped by python  
  
  
        # RNet  
  
        tempimg = np.swapaxes(tempimg, 1, 3)  
        #print tempimg[0,:,0,0]  
          
        RNet.blobs['data'].reshape(numbox, 3, 24, 24)  
        RNet.blobs['data'].data[...] = tempimg  
        out = RNet.forward()  
  
        score = out['prob1'][:,1]  
        #print 'score', score  
        pass_t = np.where(score>threshold[1])[0]  
        #print 'pass_t', pass_t  
          
        score =  np.array([score[pass_t]]).T  
        total_boxes = np.concatenate( (total_boxes[pass_t, 0:4], score), axis = 1)  
        # print "[5]:",total_boxes.shape[0]  
        #print total_boxes  
  
        #print "1.5:",total_boxes.shape  
          
        mv = out['conv5-2'][pass_t, :].T  
        #print "mv", mv  
        if total_boxes.shape[0] > 0:  
            pick = nms(total_boxes, 0.7, 'Union')  
            # print 'pick', pick  
            if len(pick) > 0:  
                total_boxes = total_boxes[pick, :]  
                # print "[6]:", total_boxes.shape[0]  
                total_boxes = bbreg(total_boxes, mv[:, pick])  
                # print "[7]:", total_boxes.shape[0]  
                total_boxes = rerec(total_boxes)  
                # print "[8]:", total_boxes.shape[0]  
  
    return total_boxes  
  
  
  
  
def main():  
    img_dir = "/media/xiao/学习/face_alignment/data/CelebA/Img/img_celeba.7z/img_celeba/"  
    anno_file = "celebA_bbox_landmark.txt"  
    with open(anno_file, 'r') as f:  
        annotations = f.readlines()  
    num = len(annotations)  
    print "%d pics in total" % num  
  
    # 图片大小为48*48  
    image_size = 48  
    # landmark_save_dir = "48/landmark/"  
    landmark_save_dir = "/media/xiao/软件/mtcnn/train/48/landmark/"  
    # save_dir = "./" + str(image_size)  
    f1 = open('/media/xiao/软件/mtcnn/train/48/landmark_48.txt', 'w')  
  
    l_idx = 0  # landmark  
    image_idx = 0  
  
    minsize = 40  
    caffe_model_path = "./model"  
    threshold = [0.6, 0.7, 0.7]  
    factor = 0.709  
      
    caffe.set_mode_gpu()  
    PNet = caffe.Net(caffe_model_path+"/det1.prototxt", caffe_model_path+"/det1.caffemodel", caffe.TEST)  
    RNet = caffe.Net(caffe_model_path+"/det2.prototxt", caffe_model_path+"/det2.caffemodel", caffe.TEST)   
    for annotation in annotations:  
        # imgpath = imgpath.split('\n')[0]  
        annotation = annotation.strip().split(' ')  
  
        im_path = annotation[0]  
        # bbox = map(float, annotation[1:-10])  
        pts = map(float, annotation[-10:])  
        # boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4)  
        im_path = img_dir + im_path  
        backupPts = pts[:]  
  
        #print "######\n", img_path  
        print image_idx  
        image_idx += 1  
        img = cv2.imread(im_path)  
        img_matlab = img.copy()  
        tmp = img_matlab[:,:,2].copy()  
        img_matlab[:,:,2] = img_matlab[:,:,0]  
        img_matlab[:,:,0] = tmp  
  
        # 用作者提供的net1->net2生成人脸框  
        boundingboxes = detect_face(img_matlab, minsize, PNet, RNet, threshold, False, factor)  
  
        #img = drawBoxes(img, boundingboxes)  
        #cv2.imshow('img', img)  
        #cv2.waitKey(1000)  
  
        # generate landmark samples  
        for box in boundingboxes:  
            x_left, y_top, x_right, y_bottom, _ = box    # 得到人脸bbox坐标  
            crop_w = x_right - x_left + 1  
            crop_h = y_bottom - y_top + 1  
            # ignore box that is too small or beyond image border  
            if crop_w < image_size / 2 or crop_h < image_size / 2:  
                continue  
            if x_left < 0 or y_top < 0:  
                continue  
  
            # 当五个landmark都在bbox中时，这样的样本作为我们的landmark训练样本  
            if pts[0] < x_left or pts[0] > x_right:  
                continue  
            if pts[2] < x_left or pts[2] > x_right:  
                continue  
            if pts[4] < x_left or pts[4] > x_right:  
                continue  
            if pts[6] < x_left or pts[6] > x_right:  
                continue  
            if pts[8] < x_left or pts[8] > x_right:  
                continue  
  
            if pts[1] < y_top or pts[1] > y_bottom:  
                continue  
            if pts[3] < y_top or pts[3] > y_bottom:  
                continue  
            if pts[5] < y_top or pts[5] > y_bottom:  
                continue  
            if pts[7] < y_top or pts[7] > y_bottom:  
                continue  
            if pts[9] < y_top or pts[9] > y_bottom:  
                continue  
  
            # show image  
            #img1 = drawBoxes_align(img, box)  
            #img1 = drawlandmark_align(img1, pts)  
            #cv2.imshow('img', img1)  
            #cv2.waitKey(1000)  
  
            # our method, x0,y0,x1,y1,x2,y2,x3,y3,x4,y4  
            #for k in range(len(pts) / 2):  
            #    pts[k * 2] = (pts[k * 2] - x_left) / float(crop_w);  
            #    pts[k * 2 + 1] = (pts[k * 2 + 1] - y_top) / float(crop_h);  
  
            #author method, x0,x1,x2,x3,x4,y0,y1,y2,y3,y4  
            ptsss = pts[:]  
            # landmark标注为其相对于crop区域左上角的归一化值  
            for k in range(len(ptsss) / 2):  
                pts[k] = (ptsss[k * 2] - x_left) / float(crop_w);  
                pts[5+k] = (ptsss[k * 2 + 1] - y_top) / float(crop_h);  
  
            # 将crop区域进行resize  
            cropped_im = img[int(y_top):int(y_bottom + 1), int(x_left):int(x_right + 1)]  
            resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR)  
  
            # box_ = box.reshape(1, -1)  
            # 写图片名，class坐标，bbox坐标。  
            save_file = os.path.join(landmark_save_dir, "%s.jpg" % l_idx)  
            f1.write(str(image_size) + "/landmark/%s.jpg" % l_idx + ' -1 -1 -1 -1 -1')  
  
            # 写landmark坐标并保存图片  
            for k in range(len(pts)):  
                f1.write(" %f" % pts[k])  
            f1.write("\n")  
            cv2.imwrite(save_file, resized_im)  
            l_idx += 1  
    f.close()  
    f1.close()    
  
if __name__ == "__main__":  
    main() 

人脸检测：MTCNN训练数据正负样本生成

猜你喜欢