LeNet5
LeNet-5 convolutional neural network model
LeNet-5: It is a convolutional neural network designed by Yann LeCun in 1998 for handwritten digit recognition. At that time, most banks in the United States used it to recognize handwritten digits on checks. It is One of the most representative experimental systems in early convolutional neural networks.
LenNet-5 has a total of 7 layers (not including the input layer), and each layer contains a different number of training parameters, as shown in the figure below.
LeNet-5 mainly has 2 convolutional layers, 2 downsampling layers (pooling layers), and 3 fully connected layers.
Use LeNet5 to identify MNIST
Preliminary version:
import torch
import torchvision
import torch.nn as nn
from matplotlib import pyplot as plt
from torch.utils.data import DataLoader
# 先定义一个绘图工具
def plot_curve(data):
fig = plt.figure()
plt.plot(range(len(data)),data,color = 'blue')
plt.legend(['value'],loc = 'upper right')
plt.xlabel('step')
plt.ylabel('value')
plt.show()
device=torch.device('cuda' if torch.cuda.is_available() else "cpu")
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.conv1=nn.Sequential(
nn.Conv2d(1,6,5,1,2),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,stride=2)
)
self.conv2=nn.Sequential(
nn.Conv2d(6,16,5),
nn.ReLU(),
nn.MaxPool2d(2,2)
)
self.fc1=nn.Sequential(
nn.Linear(16*5*5,120),
nn.ReLU()
)
self.fc2=nn.Sequential(
nn.Linear(120,84),
nn.ReLU()
)
self.fc3=nn.Linear(84,10)
# self.model=nn.Sequential(
# nn.Conv2d(1,6,5,1,2),
# nn.ReLU(),
# nn.MaxPool2d(2,2),
# nn.Conv2d(6,16,5),
# nn.ReLU(),
# nn.MaxPool2d(2,2),
# nn.Flatten(),
# nn.Linear(16*5*5,120),
# nn.ReLU(),
# nn.Linear(120,84),
# nn.ReLU(),
# nn.Linear(84,10)
# )
def forward(self, x):
x=self.conv1(x)
x=self.conv2(x)
# nn.Linear()的输入输出都是维度为1的值,所以要把多维度的tensor展平或一维
x=x.view(x.size()[0], -1)
x=self.fc1(x)
x=self.fc2(x)
x=self.fc3(x)
# x=self.model(x)
return x
epoch=8
batch_size=64
lr=0.001
traindata=torchvision.datasets.MNIST(root='./dataset', train=True, transform=torchvision.transforms.ToTensor(),download=True)
testdata=torchvision.datasets.MNIST(root='./dataset', train=False, transform=torchvision.transforms.ToTensor(),download=True)
trainloader=DataLoader(traindata,batch_size=batch_size,shuffle=True)
testloader=DataLoader(testdata,batch_size=batch_size,shuffle=False)
net=LeNet().to(device)
loss_fn=nn.CrossEntropyLoss().to(device)
optimizer=torch.optim.SGD(net.parameters(),lr=lr,momentum=0.9)
train_loss=[]
accuracy=[]
train_step=0
for epoch in range(epoch):
sum_loss=0
for data in trainloader:
inputs,labels=data
inputs,labels=inputs.to(device),labels.to(device)
optimizer.zero_grad()
outputs=net(inputs)
loss=loss_fn(outputs,labels)
loss.backward()
optimizer.step()
train_step+=1
sum_loss+=loss.item()
if train_step % 100==99:
print("[epoch:{},轮次:{},sum_loss:{}".format(epoch+1,train_step,sum_loss/100))
train_loss.append(sum_loss/100)
sum_loss=0
with torch.no_grad():
correct=0
total=0
for data in testloader:
images, labels=data
images,labels=images.to(device),labels.to(device)
outputs=net(images)
_,predicted=torch.max(outputs.data,1)
total+=labels.size(0)
correct+=(predicted==labels).sum()
accuracy.append(correct)
print("第{}个epoch的识别准确率为:{}".format(epoch+1,correct/total))
plot_curve(train_loss)
plot_curve(accuracy)
Running results: The recognition accuracy is still good
The change of training loss value at each step:
the recognition accuracy of each round of test set:
Optimize the code:
import torch
import torchvision
import torch.nn as nn
from matplotlib import pyplot as plt
from torch.utils.data import DataLoader
# 先定义一个绘图工具
def plot_curve(data):
fig = plt.figure()
plt.plot(range(len(data)),data,color = 'blue')
plt.legend(['value'],loc = 'upper right')
plt.xlabel('step')
plt.ylabel('value')
plt.show()
device=torch.device('cuda' if torch.cuda.is_available() else "cpu")
# 定义LeNet网络
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.model=nn.Sequential(
# MNIST数据集大小为28x28,要先做padding=2的填充才满足32x32的输入大小
nn.Conv2d(1,6,5,1,2),
nn.ReLU(),
nn.MaxPool2d(2,2),
nn.Conv2d(6,16,5),
nn.ReLU(),
nn.MaxPool2d(2,2),
nn.Flatten(),
nn.Linear(16*5*5,120),
nn.ReLU(),
nn.Linear(120,84),
nn.ReLU(),
nn.Linear(84,10)
)
def forward(self, x):
x=self.model(x)
return x
epoch=8
batch_size=64
lr=0.001
# 导入数据集
traindata=torchvision.datasets.MNIST(root='./dataset', train=True, transform=torchvision.transforms.ToTensor(),download=True)
testdata=torchvision.datasets.MNIST(root='./dataset', train=False, transform=torchvision.transforms.ToTensor(),download=True)
test_size=len(testdata)
# 加载数据集
trainloader=DataLoader(traindata,batch_size=batch_size,shuffle=True)
testloader=DataLoader(testdata,batch_size=batch_size,shuffle=False)
net=LeNet().to(device)
loss_fn=nn.CrossEntropyLoss().to(device)
optimizer=torch.optim.SGD(net.parameters(),lr=lr,momentum=0.9)
train_loss=[]
precision=[]
train_step=0
for epoch in range(epoch):
net.train()
sum_loss=0
for data in trainloader:
inputs,labels=data
inputs,labels=inputs.to(device),labels.to(device)
# 更新梯度
optimizer.zero_grad()
outputs=net(inputs)
loss=loss_fn(outputs,labels)
loss.backward()
optimizer.step()
train_step+=1
sum_loss+=loss.item()
if train_step % 100==99:
print("[epoch:{},轮次:{},sum_loss:{}]".format(epoch+1,train_step,sum_loss/100))
train_loss.append(sum_loss/100)
sum_loss=0
net.eval()
with torch.no_grad():
correct=0
# total=0
accuracy=0
for data in testloader:
images, labels=data
images,labels=images.to(device),labels.to(device)
outputs=net(images)
# _,predicted=torch.max(outputs.data,1)
# total+=labels.size(0)
# correct+=(predicted==labels).sum()
correct+=(outputs.argmax(1)==labels).sum()
accuracy=correct/test_size
print("第{}个epoch的识别准确率为:{}".format(epoch+1,accuracy))
precision.append(accuracy.cpu())
plot_curve(train_loss)
plot_curve(precision)
operation result:
The change of training loss at each step and
the accuracy of each round of the test set