Using LeNet-5 to recognize handwritten digits MNIST

LeNet5

LeNet-5 convolutional neural network model
LeNet-5: It is a convolutional neural network designed by Yann LeCun in 1998 for handwritten digit recognition. At that time, most banks in the United States used it to recognize handwritten digits on checks. It is One of the most representative experimental systems in early convolutional neural networks.

LenNet-5 has a total of 7 layers (not including the input layer), and each layer contains a different number of training parameters, as shown in the figure below.
Insert image description here
LeNet-5 mainly has 2 convolutional layers, 2 downsampling layers (pooling layers), and 3 fully connected layers.

Use LeNet5 to identify MNIST

Preliminary version:

import torch
import torchvision

import torch.nn as nn
from matplotlib import pyplot as plt

from torch.utils.data import DataLoader

# 先定义一个绘图工具
def plot_curve(data):
    fig = plt.figure()
    plt.plot(range(len(data)),data,color = 'blue')
    plt.legend(['value'],loc = 'upper right')
    plt.xlabel('step')
    plt.ylabel('value')
    plt.show()

device=torch.device('cuda' if torch.cuda.is_available() else "cpu")

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1=nn.Sequential(
            nn.Conv2d(1,6,5,1,2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2)
        )
        self.conv2=nn.Sequential(
            nn.Conv2d(6,16,5),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )
        self.fc1=nn.Sequential(
            nn.Linear(16*5*5,120),
            nn.ReLU()
        )
        self.fc2=nn.Sequential(
            nn.Linear(120,84),
            nn.ReLU()
        )
        self.fc3=nn.Linear(84,10)

        # self.model=nn.Sequential(
        #     nn.Conv2d(1,6,5,1,2),
        #     nn.ReLU(),
        #     nn.MaxPool2d(2,2),
        #     nn.Conv2d(6,16,5),
        #     nn.ReLU(),
        #     nn.MaxPool2d(2,2),
        #     nn.Flatten(),
        #     nn.Linear(16*5*5,120),
        #     nn.ReLU(),
        #     nn.Linear(120,84),
        #     nn.ReLU(),
        #     nn.Linear(84,10)
        # )

    def forward(self, x):
        x=self.conv1(x)
        x=self.conv2(x)
        # nn.Linear()的输入输出都是维度为1的值,所以要把多维度的tensor展平或一维
        x=x.view(x.size()[0], -1)
        x=self.fc1(x)
        x=self.fc2(x)
        x=self.fc3(x)
        # x=self.model(x)
        return x

epoch=8
batch_size=64
lr=0.001

traindata=torchvision.datasets.MNIST(root='./dataset', train=True, transform=torchvision.transforms.ToTensor(),download=True)
testdata=torchvision.datasets.MNIST(root='./dataset', train=False, transform=torchvision.transforms.ToTensor(),download=True)

trainloader=DataLoader(traindata,batch_size=batch_size,shuffle=True)
testloader=DataLoader(testdata,batch_size=batch_size,shuffle=False)

net=LeNet().to(device)

loss_fn=nn.CrossEntropyLoss().to(device)

optimizer=torch.optim.SGD(net.parameters(),lr=lr,momentum=0.9)

train_loss=[]
accuracy=[]
train_step=0
for epoch in range(epoch):
    sum_loss=0
    for data in trainloader:
        inputs,labels=data
        inputs,labels=inputs.to(device),labels.to(device)

        optimizer.zero_grad()
        outputs=net(inputs)
        loss=loss_fn(outputs,labels)
        loss.backward()
        optimizer.step()
        train_step+=1
        sum_loss+=loss.item()
        if train_step % 100==99:
            print("[epoch:{},轮次:{},sum_loss:{}".format(epoch+1,train_step,sum_loss/100))
            train_loss.append(sum_loss/100)
            sum_loss=0

    with torch.no_grad():
        correct=0
        total=0
        for data in testloader:
            images, labels=data
            images,labels=images.to(device),labels.to(device)
            outputs=net(images)
            _,predicted=torch.max(outputs.data,1)
            total+=labels.size(0)
            correct+=(predicted==labels).sum()
        accuracy.append(correct)
        print("第{}个epoch的识别准确率为:{}".format(epoch+1,correct/total))

plot_curve(train_loss)
plot_curve(accuracy)

Running results: The recognition accuracy is still good
Insert image description here

The change of training loss value at each step:
Insert image description here
the recognition accuracy of each round of test set:

Insert image description here

Optimize the code:

import torch
import torchvision

import torch.nn as nn
from matplotlib import pyplot as plt

from torch.utils.data import DataLoader

# 先定义一个绘图工具
def plot_curve(data):
    fig = plt.figure()
    plt.plot(range(len(data)),data,color = 'blue')
    plt.legend(['value'],loc = 'upper right')
    plt.xlabel('step')
    plt.ylabel('value')
    plt.show()

device=torch.device('cuda' if torch.cuda.is_available() else "cpu")

# 定义LeNet网络
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.model=nn.Sequential(
            # MNIST数据集大小为28x28,要先做padding=2的填充才满足32x32的输入大小
            nn.Conv2d(1,6,5,1,2),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(6,16,5),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Flatten(),
            nn.Linear(16*5*5,120),
            nn.ReLU(),
            nn.Linear(120,84),
            nn.ReLU(),
            nn.Linear(84,10)
        )

    def forward(self, x):
        x=self.model(x)
        return x

epoch=8
batch_size=64
lr=0.001

# 导入数据集
traindata=torchvision.datasets.MNIST(root='./dataset', train=True, transform=torchvision.transforms.ToTensor(),download=True)
testdata=torchvision.datasets.MNIST(root='./dataset', train=False, transform=torchvision.transforms.ToTensor(),download=True)

test_size=len(testdata)

# 加载数据集
trainloader=DataLoader(traindata,batch_size=batch_size,shuffle=True)
testloader=DataLoader(testdata,batch_size=batch_size,shuffle=False)

net=LeNet().to(device)

loss_fn=nn.CrossEntropyLoss().to(device)

optimizer=torch.optim.SGD(net.parameters(),lr=lr,momentum=0.9)

train_loss=[]
precision=[]
train_step=0
for epoch in range(epoch):
    net.train()
    sum_loss=0
    for data in trainloader:
        inputs,labels=data
        inputs,labels=inputs.to(device),labels.to(device)

        # 更新梯度
        optimizer.zero_grad()
        outputs=net(inputs)
        loss=loss_fn(outputs,labels)
        loss.backward()
        optimizer.step()

        train_step+=1
        sum_loss+=loss.item()
        if train_step % 100==99:
            print("[epoch:{},轮次:{},sum_loss:{}]".format(epoch+1,train_step,sum_loss/100))
            train_loss.append(sum_loss/100)
            sum_loss=0

    net.eval()
    with torch.no_grad():
        correct=0
        # total=0
        accuracy=0
        for data in testloader:
            images, labels=data
            images,labels=images.to(device),labels.to(device)
            outputs=net(images)
            # _,predicted=torch.max(outputs.data,1)
            # total+=labels.size(0)
            # correct+=(predicted==labels).sum()
            correct+=(outputs.argmax(1)==labels).sum()
        accuracy=correct/test_size
        print("第{}个epoch的识别准确率为:{}".format(epoch+1,accuracy))
        precision.append(accuracy.cpu())

plot_curve(train_loss)
plot_curve(precision)

operation result:
Insert image description here

The change of training loss at each step and
Insert image description here
the accuracy of each round of the test set
Insert image description here

Guess you like

Origin blog.csdn.net/qq_44243059/article/details/130109100