MTCNN PyTorch实现

MTCNN 网络结构实现：

P-net

全卷积网络

中间层：

卷积层：2D卷积，激活函数：PReLU

池化层：最大池化

置信度输出：Sigmoid（激活函数）

回归框输出，地表点回归：线性输出

P_net	in_shape	in_channels	out_channels	kernel_size	stride	padding	out_shape
conv1	[batch,3,12,12]	3	10	3	1	0	[batch,10,10,10]
pool	[batch,10,10,10]	10	10	2	2	1	[batch,10,5,5]
conv2	[batch,10,5,5]	10	16	3	1	0	[batch,16,3,3]
conv3	[batch,16,3,3]	16	32	3	1	0	[batch,32,1,1]
conv4_1	[batch,32,1,1]	32	2	1	1	0	[batch,1,1,1]
conv4_2	[batch,32,1,1]	32	4	1	1	0	[batch,4,1,1]
conv4_3	[batch,32,1,1]	32	10	1	1	0	[batch,10,1,1]

R-net

R-net = 卷积层 + 全连接层

中间层：

卷积层：2D卷积，激活函数：PReLU

池化层：最大池化

全连接层：PReLU（激活函数）

置信度输出：Sigmoid（激活函数）

回归框输出，地表点回归：线性输出

conv	in_shape	in_channels	out_channels	kernel_size	stride	padding	out_shape
conv1	[batch,3,24,24]	3	28	3	1	0	[batch,28,22,22]
pool1	[batch,28,22,22]	28	28	3	2	1	[batch,28,22,22]
conv2	[batch,48,11,11]	28	48	3	1	0	[batch,48,9,9]
pool2	[batch,48,9,9]	48	48	3	2	0	[batch,48,4,4]
conv3	[batch,48,4,4]	48	64	2	1	0	[batch,48,3,3]

line	in_unit	out_unit
line1	6433	128
line2_1	128	1
line2_2	128	4
line3-3	128	10

O-net

O-net = 卷积层 + 全连接层

中间层：

卷积层：2D卷积，激活函数：PReLU

池化层：最大池化

全连接层：PReLU（激活函数）

置信度输出：Sigmoid（激活函数）

回归框输出，地表点回归：线性输出

conv	in_shape	in_channnels	out_channels	kernel_size	stride	padding	out_shape
conv1	[batch,3,48,48]	3	32	3	1	0	[batch,32,46,46]
pool1	[batch,32,46,46]	32	32	2	2	1	[batch,32,24,24]
conv2	[batch,32,24,24]	32	64	3	1	0	[batch,64,22,22]
pool2	[batch,64,22,22]	64	64	3	2	0	[batch,64,10,10]
conv3	[batch,64,10,10]	64	64	2	1	0	[batch,64,8,8]
pool3	[batch,64,8,8]	64	64	2	2	0	[batch,64,4,4]
conv4	[batch,64,4,4]	64	128	2	1	0	[batch,128,3,3]

line	in_unit	out_unit
line1	1283 3	256
line2_1	256	1
line2_2	256	4
line2_3	256	10

代码实现：

import torch
import torch.nn as nn
import torch.nn.functional as F


class PNet(nn.Module):

    def __init__(self):
        super(PNet, self).__init__()

        self.conv_layer = nn.Sequential(
            nn.Conv2d(3, 10, kernel_size=3, stride=1),  # conv1
            nn.PReLU(),  
            nn.MaxPool2d(kernel_size=2, stride=2),  # pool1
            nn.Conv2d(10, 16, kernel_size=3, stride=1),  # conv2
            nn.PReLU(),  
            nn.Conv2d(16, 32, kernel_size=3, stride=1),  # conv3
            nn.PReLU()  
        )

        self.conv4_1 = nn.Conv2d(32, 1, kernel_size=1, stride=1)
        self.conv4_2 = nn.Conv2d(32, 4, kernel_size=1, stride=1)
        self.conv4_3 = nn.Conv2d(32, 10, kernel_size=1, stride=1)

    def forward(self, x):
    
        x = self.conv_layer(x)

        cond = F.sigmoid(self.conv4_1(x))
        box_offset = self.conv4_2(x)
        land_offset = self.conv4_3(x)

        return cond, box_offset, land_offset


class RNet(nn.Module):
    def __init__(self):
        super(RNet, self).__init__()
        self.conv_layer = nn.Sequential(
            nn.Conv2d(3, 28, kernel_size=3, stride=1),  # conv1
            nn.PReLU(), 
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),  # pool1
            nn.Conv2d(28, 48, kernel_size=3, stride=1),  # conv2
            nn.PReLU(),  
            nn.MaxPool2d(kernel_size=3, stride=2),  # pool2
            nn.Conv2d(48, 64, kernel_size=2, stride=1),  # conv3
            nn.PReLU()  

        )
        self.line1 = nn.Sequential(
            nn.Linear(64 * 3 * 3, 128),
            nn.PReLU()
        )

        self.line2_1 = nn.Linear(128, 1)
        self.line2_2 = nn.Linear(128, 4)
        self.line2_3 = nn.Linear(128, 10)

    def forward(self, x):

        x = self.conv_layer(x)
        x = x.view(x.size(0), -1)
        x = self.line1(x)

        label = F.sigmoid(self.conv5_1(x))
        box_offset = self.conv5_2(x)
        land_offset = self.conv5_3(x)

        return label, box_offset, land_offset


class ONet(nn.Module):
    def __init__(self):

        super(ONet, self).__init__()

        self.conv_layer = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1),  # conv1
            nn.PReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1),  # pool1
            nn.Conv2d(32, 64, kernel_size=3, stride=1),  # conv2
            nn.PReLU(), 
            nn.MaxPool2d(kernel_size=3, stride=2),  # pool2
            nn.Conv2d(64, 64, kernel_size=3, stride=1),  # conv3
            nn.PReLU(),  
            nn.MaxPool2d(kernel_size=2, stride=2),  # pool3
            nn.Conv2d(64, 128, kernel_size=2, stride=1),  # conv4
            nn.PReLU()  
        )
        self.line1 = nn.Sequential(
            nn.Linear(128 * 3 * 3, 256),
            nn.PReLU() 
        )

        self.line2_1 = nn.Linear(256, 1)
        self.line2_2 = nn.Linear(256, 4)
        self.line2_3 = nn.Linear(256, 10)

    def forward(self, x):
 
        x = self.pre_layer(x)
        x = x.view(x.size(0), -1)
        x = self.conv5(x)
        x = self.prelu5(x)

        label = F.sigmoid(self.line2_1(x))
        box_offset = self.line2_2(x)
        land_offset = self.line2_3(x)

        return label, box_offset, land_offset

MTCNN 训练网络

import torch
from torch.utils import data 
from MTCNN_Pytorch import simpling # 导入数据类
import numpy as np
import os

class Trainer:
    """
    训练网络
    """
    def __init__(self, train_net, batch_size, data_path, save_model_path, lr=0.001, isCuda=True):
        """
        初始化类
        :param train_net: net
        :param batch_size: 批次大小
        :param data_path: 训练集地址
        :param isCuda： 是否使用CUDA，默认：True
        :param lr: 学习率 默认：0.001
        :param save_model_path: 保存模型地址
        """

        self.model = train_net
        self.data_path = data_path
        self.batch_size = batch_size
        self.lr = lr
        self.isCuda = isCuda
        self.save_path = save_model_path

        if os.path.exists(self.save_path):  # 如果有保存的模型，加载模型
            self.model = torch.load(self.save_path)

        if self.isCuda:
            self.model.cuda()

        self.face_loss = torch.nn.BCELoss()
        self.offset_loss = torch.nn.MSELoss()

        self.opt = torch.optim.Adam(params=self.model.parameters(), lr=self.lr)

        self.train_net()  # 调用训练方法

    def train_net(self):
        epoch = 1  # 记录训练次数
        IMG_DATA = simpling.FaceDataset(self.data_path)  # 获取数据
        for _ in range(10000):  # 将所有数据训练1000次
            train_data = data.DataLoader(IMG_DATA, batch_size=self.batch_size, shuffle=True, num_workers=4)
            for train in train_data:
                # 获取数据
                # img_data ：[512, 3, 24, 24]
                # label ：[512, 1]
                # offset ：[512, 4]
                img_data, label, box_offset, land_offset = train

                if self.isCuda:
                    img_data = img_data.cuda()
                    box_offset = box_offset.cuda()
                    land_offset = land_offset.cuda()

                # 获取网络输出：P-net
                # face_out : [512, 2, 1, 1]
                # box_offset_out: [512, 4, 1, 1]
                # land_offset_out: [512,10,1,1]
                # R-net、O-net
                # face_out : [512, 2, 1, 1]
                # box_offset_out: [512, 4, 1, 1]
                # land_offset_out: [512,10,1,1]
                face_out, box_offset_out,land_offset_out= self.model(img_data)

                # 降维 [512, 2, 1, 1] => [512,2]
                face_out = face_out.squeeze()
                box_offset_out = box_offset_out.squeeze()
                land_offset_out = land_offset_out.squeeze()

                # 获取1 和 0 做人脸损失
                one = torch.ne(label, 2)  # one : torch.Size([512, 1])
                one = one.squeeze()  # one : torch.Size([512]) 掩码输出： 1,0 int8

                # 获取1 和 2 做回归框损失
                two = torch.ne(label, 0)  # two : [512,1]
                two = two.squeeze()  # two : [512]

                # 将标签转为one_hot编码
                label_10 = label[one]  # [batch,1]
                label_10 = torch.Tensor([self.one_hot(int(i)) for i in label_10.squeeze().numpy()])  # [batch,2]

                # 得到人脸损失，和偏移量损失
                face_loss = self.face_loss(face_out[one], label_10.cuda())
                box_offset_loss = self.offset_loss(box_offset_out[two], box_offset[two])
                land_offset_loss = self.offset_loss(land_offset_out[two],land_offset[two])
                # 损失相加
                self.loss = face_loss + box_offset_loss + land_offset_loss
                # 优化损失
                self.opt.zero_grad()
                self.loss.backward()
                self.opt.step()
                # 每训练100次，输出损失，并保存数据
                epoch += 1
                if epoch % 100 == 0:
                    print('Epoch:', epoch, ' Loss：', self.loss.cpu().item())
                    torch.save(self.model, self.save_path)

    def one_hot(self, data):
        """
        one_hot编码
        :param data:一个值，
        :return: one_hot编码后的值
        """
        hot = np.zeros([2])
        hot[data] = 1
        return hot


if __name__ == '__main__':
    pass
    # p_train = Trainer(Pnet, 512,r"C:\Users\Administrator\Desktop\Celeba数据集\Celeba4\12", r'.\log_P_train.pt')
    # r_train = Trainer(net.Rnet(), 512, r"C:\Users\Administrator\Desktop\Celeba数据集\Celeba4\24", r'.\log_R_train.pt')
    # o_train = Trainer(net.Onet(), 512, r"C:\Users\Administrator\Desktop\Celeba数据集\Celeba4\48", r'.\log_O_train.pt')

训练P-net

from MTCNN_Pytorch import train,nets

if __name__ == '__main__':
    train.Trainer(net.Pnet(), 512, r"C:\Users\Administrator\Desktop\Celeba数据集\Celeba4\12",r'.\log_P_trian')

训练R-net

from MTCNN_Pytorch import train,nets

if __name__ == '__main__':
    train.Trainer(net.Rnet(), 512, r"C:\Users\Administrator\Desktop\Celeba数据集\Celeba4\24", r'.\log_R_trian')

训练O-net

from MTCNN_Pytorch import train,nets

if __name__ == '__main__':
    train.Trainer(net.Onet(), 512, r"C:\Users\Administrator\Desktop\train_data\48/",r'.\log_O_trian')

MTCNN测试网络

import torch
from MTCNN_Pytorch import nets, util
from PIL import Image, ImageDraw
import numpy as np
import os


# 只能传入一张图片进行扫描
class Test():
    def __init__(self, photo, net):
        self.photo = photo
        self.img, self.img_data, self.img_x, self.img_y = self.get_img_data(photo)

        self.model = net
        print(self.model)
        self.model = torch.load(r'C:\Users\Administrator\Desktop\myproject\MTCNN\log_P_trian')
        # self.model.eval()

        self.get_net_out()

    def get_img_data(self, image):  # 1. 获取图片数据
        img = 0
        if type(image) == str and os.path.exists(image) == True:
            img = Image.open(image)
        else:
            img = image

        img_x, img_y = img.size
        # 将图片转成数组形式
        img_data = torch.Tensor(np.array(img))
        # 2.对数据进行处理
        img_data = img_data / 255 - 0.5
        img_data = img_data.unsqueeze(0)  # 在原有维度 的第一维度升维

        return img, img_data.permute(0, 3, 1, 2), img_x, img_y

    # 3. 获取 P net 输出
    def get_net_out(self):

        have_face = []  # 存储网络输出值
        box_face = []
        count = 1
        while True:  # 图像金字塔
            if self.img_size(self.img_x, self.img_y) == True:

                face_out, offset = self.model(self.img_data)  # 获取P-net输出
                # face_out.size() [1, 2, 145, 295]
                # offset.size() [1, 4, 145, 295]

                box_offset = self.add_index(offset)
                # 改变形状：[1, 2, 145, 295] ==> [42775, 2]
                face_out = face_out.view(-1, face_out.size(1))  # torch.Size([42775, 2])
                # 改变形状：[ 145, 295,6]==> [42775, 6]
                offset = box_offset.view(-1, box_offset.size(2))  # torch.Size([42775, 4])
                # 获取最大值索引
                face_out = torch.argmax(face_out, 1)
                # 保存输出
                have_face.extend(face_out.detach().numpy())
                box_face.extend(offset.detach().numpy())
                # print(count, face_out.detach().numpy().shape, offset.detach().numpy().shape)
                count += 1  # 将图片缩小一次 +1
                # 图像缩小0.7倍
                self.img_x = int(self.img_x * 0.7)
                self.img_y = int(self.img_y * 0.7)
                img2 = self.img.resize((self.img_x, self.img_y))
                # 获取图片数据
                self.img_data = self.get_img_data(img2)
                self.img_data = self.img_data[1]
                print(np.array(have_face).shape)

                one = torch.ne(torch.Tensor(have_face), 0)  # 获取非0索引
                print(one)
                have_face_box = torch.Tensor(box_face)[one]  # 获取有人脸的偏移量
                self.filter(have_face_box)
            else:
                break
        self.img.show()

    # 4.筛选重复的框
    def filter(self, offset):
        # offset: [batch,4]
        # width
        # stride
        box = offset[:, :4] * 12  # 将四个偏移量乘以大卷积核宽
        x = offset[:, 4] * 2
        y = offset[:, 5] * 2
        box_x_l = box[:, 0] + x  # 加上 卷积后的横索引*2
        box_y_l = box[:, 1] + y  # 加上 卷积后的竖索引*2
        box_x_r = box[:, 2] + x  # 加上 卷积后的横索引*2
        box_y_r = box[:, 3] + y  # 加上 卷积后的竖索引*2
        ###########################返回坐标
        draw = ImageDraw.Draw(self.img)
        draw.rectangle((x[1], y[1], x[1] + 12, y[1] + 12), width=3)

        box = box_x_l
        box = np.hstack([box, box_y_l])
        box = np.hstack([box, box_x_r])
        box = np.hstack([box, box_y_r])
        box = box.reshape(-1, 4)
        # print(box.max(1))
        area = (box_x_r - box_x_l) * (box_y_r - box_y_l)
        area_max_index = np.argmax(area)
        box_max = box[area_max_index]
        area_iou = util.iou(box_max, box)
        area_iou = area_iou.reshape((area.shape[0], 1))
        box_iou = np.hstack((box, area_iou))
        n = util.nms(box_iou)
        # print(n.shape)

        # draw = ImageDraw.Draw(self.img)
        area_max_index = np.argmax((n[:, 3] - n[:, 1]) * (n[:, 4] - n[:, 2]))
        box = n[area_max_index]
        draw.rectangle((box[0], box[1], box[2], box[3]), width=3)
        # print(np.array(area).max())

    # 判断
    def img_size(self, img_x, img_y):
        if img_x < 12 or img_y < 12:  # 如果图片宽和高都小于12，退出循环
            return False
        if img_x > 12 or img_y > 12:
            return True

    def add_index(self, offset):
        ######################### 添加索引进原数据的后面
        # [1，145, 295, 4]
        # 1.降维[145, 295, 4]
        # 2.换轴 [4,145, 295]
        offset = offset.view((offset.size(2), offset.size(3), offset.size(1)))
        # print('降维后：', offset.size())
        # 一张图片
        list = []
        box_offset = []  # 使用水平堆栈将对应的值添加到对应的位置
        for i,j in zip(np.array(np.arange(offset.size(0))),offset.detach().numpy()):  # 迭代每行
            list = [[i, j] for j in range(offset.size(1))]  # 迭代每列
            # print(np.array(j).shape)
            list_1 = np.array(list)
            # print(list_1.shape)
            box_offset.append(np.hstack([j, list_1]))
            # print(np.array(box_offset).shape)
        # (145, 295, 2)
        # box_offset = []  # 使用水平堆栈将对应的值添加到对应的位置
        # for i, j in zip(offset.detach().numpy(), list):
        #     box_offset.append(np.hstack([i, j]))
        box_offset = torch.Tensor(box_offset)
        # print('获取水平堆栈后的形状：', box_offset.size())
        return box_offset
        #########################


if __name__ == '__main__':
    Test(r'test_photo.jpg', net.Pnet())

mtcnn pytorch实现

MTCNN PyTorch实现

MTCNN 网络结构实现：

P-net

R-net

O-net

MTCNN 训练网络

训练P-net

训练R-net

训练O-net

MTCNN测试网络

猜你喜欢