最近跑通了MTCNN的训练代码,对其中生成positive,negative,part样本gen_48net_data2.py代码进行解读。
项目地址:https://github.com/dlunion/mtcnn
对应代码地址:https://github.com/dlunion/mtcnn/blob/master/train/gen_48net_data2.py
import sys sys.path.append('D:\\Anaconda2\\libs') # 在windows系统上,导入python库目录 import numpy as np import cv2 import os import numpy.random as npr from utils import IoU # stdsize:随机crop的窗口大小,positive,negative,part样本都对应此大小 stdsize = 48 anno_file = "E:/face_alignment/data/CelebA/Anno/mtcnn_train_label_2.txt" im_dir = "E:/face_alignment/data/CelebA/Img/img_celeba.7z/img_celeba/" pos_save_dir = str(stdsize) + "/positive" part_save_dir = str(stdsize) + "/part" neg_save_dir = str(stdsize) + '/negative' save_dir = "./" + str(stdsize) # 生成文件夹函数 def mkr(dr): if not os.path.exists(dr): os.mkdir(dr) mkr(save_dir) mkr(pos_save_dir) mkr(part_save_dir) mkr(neg_save_dir) # 打开保存pos,neg,part文件名、标签的txt文件,这三个是生成文件 f1 = open(os.path.join(save_dir, 'pos_' + str(stdsize) + '.txt'), 'w') f2 = open(os.path.join(save_dir, 'neg_' + str(stdsize) + '.txt'), 'w') f3 = open(os.path.join(save_dir, 'part_' + str(stdsize) + '.txt'), 'w') # 打开原始图片标注txt文件 with open(anno_file, 'r') as f: annotations = f.readlines() num = len(annotations) print "%d pics in total" % num p_idx = 0 # positive n_idx = 0 # negative d_idx = 0 # part idx = 0 box_idx = 0 # 原始图片根据标注的bbox,生成negative,posotive,part图片,标注形式也做相应变化 for annotation in annotations: #逐行读取,按作者的方式,每行为一个原图 annotation = annotation.strip().split(' ') #对读取的每一行,按空格进行切片 im_path = annotation[0] # 第1个为图片名 bbox = map(float, annotation[1:-10]) #第2个~~倒数第11个为bbox # pts = map(float, annotation[5:]) pts = map(float, annotation[-10:]) #倒数第10个~~倒数第1个为landmark # 对bbox进行reshape,4个一列 boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4) im_path = im_dir + im_path #图片地址拼接 img = cv2.imread(im_path) #读取图片 idx += 1 if idx % 100 == 0: print idx, "images done" height, width, channel = img.shape neg_num = 0 # 生成nagative,每个原图生成100个negative sample while neg_num < 100: # size表示neg样本大小,在40和min(width, height)/2之间随机取一个整数 size = npr.randint(40, min(width, height) / 2) # neg的左上角坐标(x1,y1),在0和(width - size)之间随机取一个整数 nx = npr.randint(0, width - size) ny = npr.randint(0, height - size) # 随机生成的bbox位置(x1,y1),(x2,y2) crop_box = np.array([nx, ny, nx + size, ny + size]) # 计算随机生成的bbox和原图中所有标注bboxs的交并比 Iou = IoU(crop_box, boxes) # 在原图中crop对应的区域图片,作为negative sample cropped_im = img[ny : ny + size, nx : nx + size, :] # 对crop的图像进行resize,大小为stdsize*stdsize resized_im = cv2.resize(cropped_im, (stdsize, stdsize), interpolation=cv2.INTER_LINEAR) # 如果crop_box与所有boxes的Iou都小于0.3,那么认为它是nagative sample if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 # 保存图片的地址和图片名 save_file = os.path.join(neg_save_dir, "%s.jpg"%n_idx) # 往neg_48.txt文件中写入该negative样本的图片地址和名字,分类标签 f2.write(str(stdsize)+"/negative/%s"%n_idx + ' 0\n') # 保存该负样本图片 cv2.imwrite(save_file, resized_im) n_idx += 1 neg_num += 1 backupPts = pts[:] # 该列表用于landmark for box in boxes: #逐行读取,每次循环处理一个box # box (x_left, y_top, x_right, y_bottom) x1, y1, x2, y2 = box w = x2 - x1 + 1 h = y2 - y1 + 1 # 忽略小脸 # in case the ground truth boxes of small faces are not accurate if max(w, h) < 12 or x1 < 0 or y1 < 0: continue # 生成 positive examples and part faces # 每个box随机生成50个box,Iou>=0.65的作为positive examples,0.4<=Iou<0.65的作为part faces,其他忽略 for i in range(50): pts = backupPts[:] # size表示随机生成样本的大小,在int(min(w, h) * 0.8) 和 np.ceil(1.25 * max(w, h)) 之间 size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h))) # delta 表示相对于标注box center的偏移量 delta_x = npr.randint(-w * 0.2, w * 0.2) delta_y = npr.randint(-h * 0.2, h * 0.2) # nx,ny表示偏移后的box坐标位置 nx1 = max(x1 + w / 2 + delta_x - size / 2, 0) ny1 = max(y1 + h / 2 + delta_y - size / 2, 0) nx2 = nx1 + size ny2 = ny1 + size # 去掉超出原图的box if nx2 > width or ny2 > height: continue crop_box = np.array([nx1, ny1, nx2, ny2]) #bbox偏移量的计算,由 x1 = nx1 + float(size)*offset_x1 推导而来,可以参考bounding box regression博客 offset_x1 = (x1 - nx1) / float(size) offset_y1 = (y1 - ny1) / float(size) offset_x2 = (x2 - nx1) / float(size) offset_y2 = (y2 - ny1) / float(size) # landmark偏移量的计算,即landmark相对于随机生成bbox的归一化相对位置。 for k in range(len(pts) / 2): pts[k*2] = (pts[k*2] - nx1) / float(size); pts[k*2+1] = (pts[k*2+1] - ny1) / float(size); cropped_im = img[int(ny1) : int(ny2), int(nx1) : int(nx2), :] resized_im = cv2.resize(cropped_im, (stdsize, stdsize), interpolation=cv2.INTER_LINEAR) # 将box reshape为一行 box_ = box.reshape(1, -1) # Iou>=0.65的作为positive examples if IoU(crop_box, box_) >= 0.65: save_file = os.path.join(pos_save_dir, "%s.jpg"%p_idx) # 将图片路径,类别,偏移量写入到positive_48.txt文件中 f1.write(str(stdsize)+"/positive/%s"%p_idx + ' 1 %f %f %f %f'%(offset_x1, offset_y1, offset_x2, offset_y2)) # 将landmark写入到positive_48.txt文件中 for k in range(len(pts)): f1.write(" %f" % pts[k]) f1.write("\n") cv2.imwrite(save_file, resized_im) p_idx += 1 # 0.4<=Iou<0.65的作为part faces elif IoU(crop_box, box_) >= 0.4: save_file = os.path.join(part_save_dir, "%s.jpg"%d_idx) f3.write(str(stdsize)+"/part/%s"%d_idx + ' -1 %f %f %f %f'%(offset_x1, offset_y1, offset_x2, offset_y2)) for k in range(len(pts)): f3.write(" %f" % pts[k]) f3.write("\n") cv2.imwrite(save_file, resized_im) d_idx += 1 box_idx += 1 print "%s images done, pos: %s part: %s neg: %s"%(idx, p_idx, d_idx, n_idx) f1.close() f2.close() f3.close()