Pytorch学习系列之四 :卷积神经网络实现Minist数据集手写数值识别(导出为ONNX模型)

=== 全部的逻辑及解释都在代码及其注释之中,请仔细看注释!!!===



import torch as t
from torch.utils.data import DataLoader
import torchvision as tv
import cv2 as cv
import numpy as np
import os

'''$ Notice: torchvision 数据集的输出是范围在[0,1]之间的 PILImage,我们将他们转换成归一化范围为[-1,1]之间的张量 Tensors。
   $ transforms.ToTensor()能把灰度范围从[0,255]变为[0,1]。
   $ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))则能把[0,1]映射到[-1,1]区间。
'''
transform = tv.transforms.Compose([tv.transforms.ToTensor(),
                                   tv.transforms.Normalize((0.5,), (0.5,)),
                                ])
# 如果不存在就下载
train_ts = tv.datasets.MNIST(root='./data', train=True, download=True, transform=transform)#训练集共60000张图片
# print(train_ts)
test_ts = tv.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
# 训练时批大小为32
train_dl = DataLoader(train_ts, batch_size=32, shuffle=True, drop_last=False)
# 测试时,批大小为64
test_dl = DataLoader(test_ts, batch_size=64, shuffle=True, drop_last=False)

'''模型各层的参数数量说明:
Model's state_dict:
cnn_layers.0.weight  torch.Size([8, 1, 3, 3])  # n c h w
cnn_layers.0.bias 	 torch.Size([8])
cnn_layers.3.weight  torch.Size([32, 8, 3, 3])
cnn_layers.3.bias 	 torch.Size([32])
fc_layers.0.weight 	 torch.Size([200, 1568])  # 7 * 7 * 32
fc_layers.0.bias 	 torch.Size([200])
fc_layers.2.weight 	 torch.Size([100, 200])
fc_layers.2.bias 	 torch.Size([100])
fc_layers.4.weight 	 torch.Size([10, 100])
fc_layers.4.bias 	 torch.Size([10])
'''
class CNN_Mnist(t.nn.Module):
    def  __init__(self):
        super(CNN_Mnist, self).__init__()
        #卷积层(卷积+池化+激活函数)
        self.cnn_layers = t.nn.Sequential(
            t.nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, padding=1, stride=1),#第一层卷积
            t.nn.MaxPool2d(kernel_size=2, stride=2),# 第一层池化
            t.nn.ReLU(), # 激活函数
            t.nn.Conv2d(in_channels=8, out_channels=32, kernel_size=3, padding=1, stride=1),# 第二层卷积
            t.nn.MaxPool2d(kernel_size=2, stride=2),# 第二层池化
            t.nn.ReLU() # 激活函数
        )
        # 全连接层
        self.fc_layers = t.nn.Sequential(
            t.nn.Linear(7 * 7 * 32, 200),#原始图像是单通道28x28,经过卷积层中的两次池化,就变成7x7,最后一个卷积层的输出通道是32
            #第一全连接层的输入神经元数量是7x7x32,输出神经元数量是200
            t.nn.ReLU(),
            t.nn.Linear(200, 100), #第二全连接层的输入神经元数量是200,输出神经元数量是100
            t.nn.ReLU(),
            t.nn.Linear(100, 10),#第三全连接层的输入神经元数量是200,输出神经元数量是100
            t.nn.LogSoftmax(dim=1)#在第2个维度上计算
        )

    #前向推导
    def forward(self, x):
        out = self.cnn_layers(x)
        out = out.view(-1, 7 * 7 * 32) # 将卷积层的输出变为一个行向量???
        out = self.fc_layers(out)
        return out

#训练与测试,(注意这是在GPU上进行运算)
model = CNN_Mnist().cuda()

def train_and_test():
    model_filepath = "./cnn_mnist_model.pt"
    trainOrTest = os.path.exists(model_filepath)
    if trainOrTest == False :
        print("找不到到模型文件:%s,需要训练..." % model_filepath)
        # model = CNN_Mnist().cuda()
        print("Model's state_dict:")
        for param_tensor in model.state_dict():
            print(param_tensor, "\t", model.state_dict()[param_tensor].size())
        loss = t.nn.CrossEntropyLoss()
        optimizer = t.optim.Adam(model.parameters(), lr=1e-3)
        for s in range(5):
            print("run in epoch : %d" % (s + 1))
            for i, (x_train, y_train) in enumerate(train_dl):# 训练集共60,000张图,训练时的批大小是32, 60000/32 = 1875,也就是说i的取值<=1875
                x_train = x_train.cuda()
                y_train = y_train.cuda()
                y_pred = model.forward(x_train)
                train_loss = loss(y_pred, y_train)
                if (i + 1) % 100 == 0:
                    print(i + 1, train_loss.item())
                optimizer.zero_grad()
                train_loss.backward()
                optimizer.step()
        t.save(model.state_dict(), model_filepath)
        #保存为onnx格式
        dummy_input1 = t.randn(1,1,28,28)
        model.cpu()#保存为onnx之前,先将model转为CPU模式
        t.onnx.export(model, (dummy_input1), "cnn_mnist.onnx", verbose=True)
    else: # 模型文件已经存在,直接在测试集上进行测试即可
        print("找到模型文件:%s,直接在测试集上测试..." % model_filepath)
        model.load_state_dict(t.load(model_filepath))
        model.eval()  # dropout / bn8+
        total = 0
        correct_count = 0
        for test_images, test_labels in test_dl:
            pred_labels = model(test_images.cuda())
            predicted = t.max(pred_labels, 1)[1]
            correct_count += (predicted == test_labels.cuda()).sum()
            total += len(test_labels)
        print("total acc : %.2f\n" % (correct_count / total))


if __name__ == "__main__":
    train_and_test()

Guess you like

Origin blog.csdn.net/thequitesunshine007/article/details/118463341