Recognition of MNIST handwritten digits

I was looking for some networks recently and wanted to implement it manually, so I saw the MNIST dataset, so I found a few networks and tried to implement it myself.

Article directory

Recognition of MNIST handwritten digits
foreword
1. Dataset operation
2. Construct the network
- 1. Vgg16_net network
- 2. LeNet5 network
3. Training and testing functions
4. All codes
5. Visualization
Summarize

foreword

For this data set input, the image size is 28*28. Sometimes the selected training network input size may be different from the size of the MNIST data set. Some simple operations may be required to adjust the input size.

1. Dataset operation

The code is as follows (example):

import torch
from torch import nn
import matplotlib.pyplot as plt
import torchvision
from torchvision.transforms import ToTensor
import torch.nn.functional as F
from torch.utils.data import DataLoader


# 数据集有关操作
train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=ToTensor(),
                                           download=True
                                           )
test_dataset = torchvision.datasets.MNIST(root='./data',
                                          train=False,
                                          transform=ToTensor(),
                                          download=True
                                          )

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True)

2. Construct the network

1. Vgg16_net network

The code is as follows (example):

class VGG16_net(nn.Module):
    def __init__(self):
        super(VGG16_net, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=3),  # 32*32*64
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),  # 32*32*64
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)  # 16*16*64
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),  # 16*16*128
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),  # 16*16*128
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)  # 8*8*128
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),  # 8*8*256
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),  # 8*8*256
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)
        )
        self.layer4 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),  # 4*4*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),  # 4*4*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)
        )
        self.layer5 = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),  # 2*2*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),  # 2*2*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)  # 1*1*512
        )
        self.conv = nn.Sequential(
            self.layer1,
            self.layer2,
            self.layer3,
            self.layer4,
            self.layer5
        )
        self.fc = nn.Sequential(
            nn.Linear(512, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),

            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),

            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(-1, 512)
        x = self.fc(x)
        return x

2. LeNet5 network

The code is as follows (example):

class LeNets(nn.Module):
    def __init__(self):
        super(LeNets,self).__init__()
        self.c1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2)
        self.s2 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.c3 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0)
        self.s4 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.linear_1 = nn.Linear(16*5*5, 120)
        self.linear_2 = nn.Linear(120, 84)
        self.linear_3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.c1(x))
        x = self.s2(x)
        x = F.relu(self.c3(x))
        x = self.s4(x)
        x = x.view(-1, 16*5*5)
        x = F.relu(self.linear_1(x))
        x = F.relu(self.linear_2(x))
        x = self.linear_3(x)
        return x

3. Training and testing functions

def fit(epoch, train_dataloader, test_dataloader, model, loss_fn, optimizer):
    correct, total = 0.0, 0.0
    running_loss = 0.0
    model.train()
    for x, y in train_dataloader:
        if torch.cuda.is_available():
            x, y = x.to('cuda'), y.to('cuda')
            y_pred = model(x)
            loss = loss_fn(y_pred, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            with torch.no_grad():
                y_pred = torch.argmax(y_pred, dim=1)
                correct += (y_pred ==y).sum().item()
                total += y.size(0)
                running_loss +=loss.item()

    epoch_train_loss = running_loss / len(train_dataloader)
    epoch_train_acc = correct / total

    test_correct, test_total =0.0, 0.0
    test_running_loss = 0.0
    model.eval()
    with torch.no_grad():
        for x, y in test_dataloader:
            if torch.cuda.is_available():
                x, y = x.to('cuda'), y.to('cuda')
            y_preds = model(x)
            loss = loss_fn(y_preds, y)
            y_preds = torch.argmax(y_preds, dim=1)
            test_correct += (y_preds == y).sum().item()
            test_total += y.size(0)
            test_running_loss  += loss.item()

    epoch_test_loss = test_running_loss / len(test_dataloader)
    epoch_test_acc = test_correct / test_total

    print('epoch: ', epoch,
          'train_loss: ', round(epoch_train_loss, 3),
          'train_acc: ', round(epoch_train_acc, 3),
          'test_loss: ', round(epoch_test_loss, 3),
          'test_acc: ', round(epoch_test_acc, 3)
          )
    return epoch_train_loss, epoch_train_acc, epoch_test_loss, epoch_test_acc

4. All codes

# 提供 LeNet   LeNets  VGG16_net LeNet5
# 注意比较这几种网络内部细节处理的不同
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
# from shenjing import VGG16_net                            # 从 shenjing.py 导入 VGG16_net 函数
from torch.optim import lr_scheduler                        # 学习速率衰减策略
import datetime

# 数据集有关操作
train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=ToTensor(),
                                           download=True
                                           )

test_dataset = torchvision.datasets.MNIST(root='./data',
                                          train=False,
                                          transform=ToTensor(),
                                          download=True
                                          )


train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset,batch_size=32,shuffle=True)


class VGG16_net(nn.Module):
    def __init__(self):
        super(VGG16_net, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=3),  # 32*32*64
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),  # 32*32*64
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)  # 16*16*64
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),  # 16*16*128
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),  # 16*16*128
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)  # 8*8*128
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),  # 8*8*256
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),  # 8*8*256
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)
        )
        self.layer4 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),  # 4*4*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),  # 4*4*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)
        )
        self.layer5 = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),  # 2*2*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),  # 2*2*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)  # 1*1*512
        )
        self.conv = nn.Sequential(
            self.layer1,
            self.layer2,
            self.layer3,
            self.layer4,
            self.layer5
        )
        self.fc = nn.Sequential(
            nn.Linear(512, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),

            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),

            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(-1, 512)
        x = self.fc(x)
        return x


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.linear_1 = nn.Linear(28*28, 120)
        self.linear_2 = nn.Linear(120, 84)
        self.linear_3 = nn.Linear(84, 10)

    def forward(self, x):
        x = x.view(-1, 1*28*28)
        x = F.relu(self.linear_1(x))
        x = F.relu(self.linear_2(x))
        x = self.linear_3(x)
        return x

# 定义 LeNet 网络, 卷积层和全连接层后需要加激活函数,最后一层全连接无需加激活函数
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet,self).__init__()
        self.c1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2)  # 卷积层
        self.s2 = nn.AvgPool2d(kernel_size=2, stride=2)                        # 最大池化层,下采样
        self.c3 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0)
        self.s4 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.c5 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5)   # 卷积层
        self.flatten = nn.Flatten()                                            # 展平
        self.linear_1 = nn.Linear(120, 84)                                     # 全连接
        self.linear_2 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.c1(x))
        x = self.s2(x)
        x = F.relu(self.c3(x))
        x = self.s4(x)
        x = F.relu(self.c5(x))
        x = self.flatten(x)
        x = F.relu(self.linear_1(x))
        x = self.linear_2(x)
        return x


# 与 LeNets 效果一样,只是展平的操作不是一样
class LeNets(nn.Module):
    def __init__(self):
        super(LeNets,self).__init__()
        self.c1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2)
        self.s2 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.c3 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0)
        self.s4 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.linear_1 = nn.Linear(16*5*5, 120)
        self.linear_2 = nn.Linear(120, 84)
        self.linear_3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.c1(x))
        x = self.s2(x)
        x = F.relu(self.c3(x))
        x = self.s4(x)
        x = x.view(-1, 16*5*5)
        x = F.relu(self.linear_1(x))
        x = F.relu(self.linear_2(x))
        x = self.linear_3(x)
        return x


class Net_5(nn.Module):
    def __init__(self):
        super(Net_5,self).__init__()
        self.conv_1 = nn.Conv2d(1, 6, 5)             # 卷积层  24*24*6
        self.pool_2 = nn.MaxPool2d((2, 2))           # 池化层  12*12*6
        self.conv_3 = nn.Conv2d(6, 16, 5)            # 卷积层  8*8*16
        self.linear_4 = nn.Linear(16*8*8, 256)
        self.linear_5 = nn.Linear(256, 10)

    def forward(self, x):
        x = F.relu(self.conv_1(x))
        x = self.pool_2(x)
        x = F.relu(self.conv_3(x))
        # print(x.size())                              # torch.Size([32, 16, 8, 8]) batch:32  channels:16  size: 4*4
        x = x.view(-1, 1024)
        x = F.relu(self.linear_4(x))
        x = self.linear_5(x)
        return x


class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5,self).__init__()
        self.c1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=2)
        self.s2 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.c3 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0)
        self.s4 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.linear_1 = nn.Linear(400, 120)
        self.linear_2 = nn.Linear(120, 84)
        self.linear_3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.c1(x))
        x = self.s2(x)
        x = F.relu(self.c3(x))
        x = self.s4(x)
        x = x.view(-1, 400)
        x = F.relu(self.linear_1(x))
        x = F.relu(self.linear_2(x))
        x = self.linear_3(x)
        return x


# 定义设备及初始化模型
model = VGG16_net()
if torch.cuda.is_available():
    model.to('cuda')

def printlog(info):
    nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print("\n" + "=========="*8 + "%s"%nowtime)
    print(str(info)+"\n")

# 优化器及损失函数初始化
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()


# 定义训练,测试函数
def fit(epoch, train_dataloader, test_dataloader, model, loss_fn, optimizer):
    correct, total = 0.0, 0.0
    running_loss = 0.0
    model.train()
    for x, y in train_dataloader:
        if torch.cuda.is_available():
            x, y = x.to('cuda'), y.to('cuda')
            y_pred = model(x)
            loss = loss_fn(y_pred, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            with torch.no_grad():
                y_pred = torch.argmax(y_pred, dim=1)
                correct += (y_pred ==y).sum().item()
                total += y.size(0)
                running_loss +=loss.item()

    epoch_train_loss = running_loss / len(train_dataloader)
    epoch_train_acc = correct / total

    test_correct, test_total =0.0, 0.0
    test_running_loss = 0.0
    model.eval()
    with torch.no_grad():
        for x, y in test_dataloader:
            if torch.cuda.is_available():
                x, y = x.to('cuda'), y.to('cuda')
            y_preds = model(x)
            loss = loss_fn(y_preds, y)
            y_preds = torch.argmax(y_preds, dim=1)
            test_correct += (y_preds == y).sum().item()
            test_total += y.size(0)
            test_running_loss  += loss.item()

    epoch_test_loss = test_running_loss / len(test_dataloader)
    epoch_test_acc = test_correct / test_total

    print('epoch: ', epoch,
          'train_loss: ', round(epoch_train_loss, 3),
          'train_acc: ', round(epoch_train_acc, 3),
          'test_loss: ', round(epoch_test_loss, 3),
          'test_acc: ', round(epoch_test_acc, 3)
          )
    return epoch_train_loss, epoch_train_acc, epoch_test_loss, epoch_test_acc


# 编写训练循环, epoch=50, 每一个 epoch 代表将全部数据集训练一遍
epochs = 30
train_loss = []
train_acc = []
test_loss = []
test_acc = []
for epoch in range(1, epochs+1):
    printlog("Epoch: {0} / {1}".format(epoch, epochs))
    epoch_train_loss, epoch_train_acc, epoch_test_loss, epoch_test_acc = fit(epoch,
                                                                             train_dataloader,
                                                                             test_dataloader,
                                                                             model,
                                                                             loss_fn,
                                                                             optimizer
                                                                             )
    train_loss.append(epoch_train_loss)
    train_acc.append(epoch_train_acc)
    test_loss.append(epoch_test_loss)
    test_acc.append(epoch_test_acc)

print('done')
plt.plot(range(epochs), train_loss, label='train_loss')
plt.plot(range(epochs), test_loss, label='test_loss')
plt.plot(range(1, epochs+1), train_acc, label='train_acc')
plt.plot(range(1, epochs+1), test_acc, label='test_acc')
plt.legend()
plt.show()

5. Visualization

insert image description here

Summarize

The above gives two different neural network models. If you want to replace them, you only need to replace them on the model instantiation statement. model = VGG16_net().to(device) Just replace the network of this statement. The code of the test function needs further optimization.