=== 全部的逻辑及解释都在代码及其注释之中,请仔细看注释!!!===
import torch as t
from torch.utils.data import DataLoader
import torchvision as tv
import cv2 as cv
import numpy as np
import os
'''$ Notice: torchvision 数据集的输出是范围在[0,1]之间的 PILImage,我们将他们转换成归一化范围为[-1,1]之间的张量 Tensors。
$ transforms.ToTensor()能把灰度范围从[0,255]变为[0,1]。
$ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))则能把[0,1]映射到[-1,1]区间。
'''
transform = tv.transforms.Compose([tv.transforms.ToTensor(),
tv.transforms.Normalize((0.5,), (0.5,)),
])
# 如果不存在就下载
train_ts = tv.datasets.MNIST(root='./data', train=True, download=True, transform=transform)#训练集共60000张图片
# print(train_ts)
test_ts = tv.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
# 训练时批大小为32
train_dl = DataLoader(train_ts, batch_size=32, shuffle=True, drop_last=False)
# 测试时,批大小为64
test_dl = DataLoader(test_ts, batch_size=64, shuffle=True, drop_last=False)
'''模型各层的参数数量说明:
Model's state_dict:
cnn_layers.0.weight torch.Size([8, 1, 3, 3]) # n c h w
cnn_layers.0.bias torch.Size([8])
cnn_layers.3.weight torch.Size([32, 8, 3, 3])
cnn_layers.3.bias torch.Size([32])
fc_layers.0.weight torch.Size([200, 1568]) # 7 * 7 * 32
fc_layers.0.bias torch.Size([200])
fc_layers.2.weight torch.Size([100, 200])
fc_layers.2.bias torch.Size([100])
fc_layers.4.weight torch.Size([10, 100])
fc_layers.4.bias torch.Size([10])
'''
class CNN_Mnist(t.nn.Module):
def __init__(self):
super(CNN_Mnist, self).__init__()
#卷积层(卷积+池化+激活函数)
self.cnn_layers = t.nn.Sequential(
t.nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, padding=1, stride=1),#第一层卷积
t.nn.MaxPool2d(kernel_size=2, stride=2),# 第一层池化
t.nn.ReLU(), # 激活函数
t.nn.Conv2d(in_channels=8, out_channels=32, kernel_size=3, padding=1, stride=1),# 第二层卷积
t.nn.MaxPool2d(kernel_size=2, stride=2),# 第二层池化
t.nn.ReLU() # 激活函数
)
# 全连接层
self.fc_layers = t.nn.Sequential(
t.nn.Linear(7 * 7 * 32, 200),#原始图像是单通道28x28,经过卷积层中的两次池化,就变成7x7,最后一个卷积层的输出通道是32
#第一全连接层的输入神经元数量是7x7x32,输出神经元数量是200
t.nn.ReLU(),
t.nn.Linear(200, 100), #第二全连接层的输入神经元数量是200,输出神经元数量是100
t.nn.ReLU(),
t.nn.Linear(100, 10),#第三全连接层的输入神经元数量是200,输出神经元数量是100
t.nn.LogSoftmax(dim=1)#在第2个维度上计算
)
#前向推导
def forward(self, x):
out = self.cnn_layers(x)
out = out.view(-1, 7 * 7 * 32) # 将卷积层的输出变为一个行向量???
out = self.fc_layers(out)
return out
#训练与测试,(注意这是在GPU上进行运算)
model = CNN_Mnist().cuda()
def train_and_test():
model_filepath = "./cnn_mnist_model.pt"
trainOrTest = os.path.exists(model_filepath)
if trainOrTest == False :
print("找不到到模型文件:%s,需要训练..." % model_filepath)
# model = CNN_Mnist().cuda()
print("Model's state_dict:")
for param_tensor in model.state_dict():
print(param_tensor, "\t", model.state_dict()[param_tensor].size())
loss = t.nn.CrossEntropyLoss()
optimizer = t.optim.Adam(model.parameters(), lr=1e-3)
for s in range(5):
print("run in epoch : %d" % (s + 1))
for i, (x_train, y_train) in enumerate(train_dl):# 训练集共60,000张图,训练时的批大小是32, 60000/32 = 1875,也就是说i的取值<=1875
x_train = x_train.cuda()
y_train = y_train.cuda()
y_pred = model.forward(x_train)
train_loss = loss(y_pred, y_train)
if (i + 1) % 100 == 0:
print(i + 1, train_loss.item())
optimizer.zero_grad()
train_loss.backward()
optimizer.step()
t.save(model.state_dict(), model_filepath)
#保存为onnx格式
dummy_input1 = t.randn(1,1,28,28)
model.cpu()#保存为onnx之前,先将model转为CPU模式
t.onnx.export(model, (dummy_input1), "cnn_mnist.onnx", verbose=True)
else: # 模型文件已经存在,直接在测试集上进行测试即可
print("找到模型文件:%s,直接在测试集上测试..." % model_filepath)
model.load_state_dict(t.load(model_filepath))
model.eval() # dropout / bn8+
total = 0
correct_count = 0
for test_images, test_labels in test_dl:
pred_labels = model(test_images.cuda())
predicted = t.max(pred_labels, 1)[1]
correct_count += (predicted == test_labels.cuda()).sum()
total += len(test_labels)
print("total acc : %.2f\n" % (correct_count / total))
if __name__ == "__main__":
train_and_test()