Pytorch实现 分别使用AlexNet、VGG、NiN对LeNet网络改进
1.基于AlexNet结构改进LeNet
AlexNet结构:
①与相对较⼩的LeNet相⽐, AlexNet包含8层变换,其中有5层卷积和2层全连接隐藏层,以及1个全连接输出层。
②AlexNet将sigmoid激活函数改成了更加简单的ReLU激活函数。
③AlexNet通过丢弃法来控制全连接层的模型复杂度。
④AlexNet引⼊了⼤量的图像增⼴,如翻转、裁剪和颜⾊变化,从⽽进⼀步扩⼤数据集来缓解过拟合。
在LeNet基础上改进:首先将fashionMNIST数据集中用于训练和测试的图片尺寸扩张成32×32。
transform = transforms.Compose(
[transforms.Resize(32),
transforms.ToTensor()])
mnist_train = torchvision.datasets.FashionMNIST(
root="./DataSets/FashionMNIST",
train=True,
download=True,
transform=transform)
mnist_test = torchvision.datasets.FashionMNIST(
root="./DataSets/FashionMNIST",
train=False,
download=True,
transform=transform)
完整代码:
import os
import torch
import torch.nn as nn
import sys
import time
import d2lzh_pytorch as d2l
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.__version__)
print(device)
start = time.time()
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(1, 6, 5), # in_channels, out_channels, kernel_size
nn.ReLU(),
nn.MaxPool2d(2, 2), # kernel_size, stride
nn.Conv2d(6, 16, 5),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 120, 5),
nn.ReLU()
)
self.fc = nn.Sequential(
nn.Linear(120, 84),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(84,10)
)
def forward(self, img):
feature = self.conv(img)
output = self.fc(feature.view(img.shape[0], -1))
return output
net = LeNet()
print(net)
batch_size = 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)
def evaluate_accuracy(data_iter, net):
acc_sum, n = 0.0, 0
with torch.no_grad():
for X, y in data_iter:
if isinstance(net, torch.nn.Module):
net.eval() # 评估模式, 这会关闭dropout
acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
net.train() # 改回训练模式
else: # 自定义的模型, 3.13节之后不会用到, 不考虑GPU
if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
# 将is_training设置成False
acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item()
else:
acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
n += y.shape[0]
return acc_sum / n
lr, num_epochs = 0.001, 10
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
#train
net = net.to(device)
print("training on ", device)
loss = torch.nn.CrossEntropyLoss()
batch_count = 0
total_time = 0
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
for X, y in train_iter:
X = X.to(device)
y = y.to(device)
y_hat = net(X)
l = loss(y_hat, y)
optimizer.zero_grad()
l.backward()
optimizer.step()
train_l_sum += l.cpu().item()
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
n += y.shape[0]
batch_count += 1
test_acc = evaluate_accuracy(test_iter, net)
total_time += round(time.time()-start,2)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
% (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))
end = time.time()
print('total time:%.2f sec'%(total_time))
2.基于VGG结构改进LeNet
VGG块的组成规律是:连续使⽤数个相同的填充为1、窗⼝形状为 的卷积层后接上⼀个步幅为2、窗⼝形状为 的最⼤池化层。卷积层保持输⼊的⾼和宽不变,⽽池化层则对其减半。使⽤ vgg_block 函数来实现这个基础的VGG块,它可以指定卷积层的数量和输⼊输出通道数。
和AlexNet结构改进LeNet同样的首先改变输入图片的尺寸大小为32×32。
transform = transforms.Compose(
[transforms.Resize(32),
transforms.ToTensor()])
mnist_train = torchvision.datasets.FashionMNIST(
root="./DataSets/FashionMNIST",
train=True,
download=True,
transform=transform)
mnist_test = torchvision.datasets.FashionMNIST(
root="./DataSets/FashionMNIST",
train=False,
download=True,
transform=transform)
完整代码:
import os
import torch
import torch.nn as nn
import sys
import time
import d2lzh_pytorch as d2l
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.__version__)
print(device)
start = time.time()
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(1, 6, 3), # in_channels, out_channels, kernel_size
nn.ReLU(),
nn.Conv2d(6, 6, 3),
nn.ReLU(),
nn.MaxPool2d(2, 2), # kernel_size, stride
nn.Conv2d(6, 16, 3),
nn.ReLU(),
nn.Conv2d(16, 16, 3),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 120, 3),
nn.ReLU(),
nn.Conv2d(120, 120, 3),
nn.ReLU()
)
self.fc = nn.Sequential(
nn.Linear(120, 84),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(84,10)
)
def forward(self, img):
feature = self.conv(img)
output = self.fc(feature.view(img.shape[0], -1))
return output
net = LeNet()
print(net)
batch_size = 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)
def evaluate_accuracy(data_iter, net):
acc_sum, n = 0.0, 0
with torch.no_grad():
for X, y in data_iter:
if isinstance(net, torch.nn.Module):
net.eval() # 评估模式, 这会关闭dropout
acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
net.train() # 改回训练模式
else: # 自定义的模型, 3.13节之后不会用到, 不考虑GPU
if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
# 将is_training设置成False
acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item()
else:
acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
n += y.shape[0]
return acc_sum / n
lr, num_epochs = 0.001, 10
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
#train
net = net.to(device)
print("training on ", device)
loss = torch.nn.CrossEntropyLoss()
batch_count = 0
total_time = 0
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
for X, y in train_iter:
X = X.to(device)
y = y.to(device)
y_hat = net(X)
l = loss(y_hat, y)
optimizer.zero_grad()
l.backward()
optimizer.step()
train_l_sum += l.cpu().item()
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
n += y.shape[0]
batch_count += 1
test_acc = evaluate_accuracy(test_iter, net)
total_time += round(time.time()-start,2)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
% (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))
end = time.time()
print('total time:%.2f sec'%(total_time))
对于给定的感受野(与输出有关的输⼊图⽚的局部⼤⼩),采⽤堆积的⼩卷积核优于采⽤⼤的卷积核,因为可以增加⽹络深度来保证学习更复杂的模式,⽽且代价还⽐较⼩(参数更少)。使⽤2个3x3卷积核来代替5*5卷积核,这样做的主要⽬的是在保证具有相同感受野的条件下,提升了⽹络的深度,在⼀定程度上提升了神经⽹络的效果。
3.基于NiN结构改进LeNet
卷积层的输⼊和输出通常是四维数组(样本,通道,⾼,宽),⽽全连接层的输⼊和输出则通常是⼆维数组(样本,特征)。如果想在全连接层后再接上卷积层,则需要将全连接层的输出变换为四维。它可以看成全连接层,其中空间维度(⾼和宽)上的每个元素相当于样本,通道相当于特征。 NiN使⽤卷积层来替代全连接层,从⽽使空间信息能够⾃然传递到后⾯的层中去。
和AlexNet结构改进LeNet同样的首先改变输入图片的尺寸大小为32×32。
transform = transforms.Compose(
[transforms.Resize(32),
transforms.ToTensor()])
mnist_train = torchvision.datasets.FashionMNIST(
root="./DataSets/FashionMNIST",
train=True,
download=True,
transform=transform)
mnist_test = torchvision.datasets.FashionMNIST(
root="./DataSets/FashionMNIST",
train=False,
download=True,
transform=transform)
完整代码:
import os
import torch
import torch.nn as nn
import sys
import time
import d2lzh_pytorch as d2l
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.__version__)
print(device)
start = time.time()
def nin_block(in_channels, out_channels, kernel_size, strides, padding):
blk = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size, strides, padding),
nn.ReLU(),
nn.Conv2d(out_channels, out_channels, kernel_size=1),
nn.ReLU(),
nn.Conv2d(out_channels, out_channels, kernel_size=1),
nn.ReLU()
)
return blk
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.conv = nn.Sequential(
nin_block(1, 6, 5, 1, 0),
nn.MaxPool2d(2,2),
nin_block(6, 16, 5, 1, 0),
nn.MaxPool2d(2, 2),
nin_block(16, 120, 5, 1, 0)
)
self.fc = nn.Sequential(
nn.Linear(120, 10))
def forward(self, img):
feature = self.conv(img)
output = self.fc(feature.view(img.shape[0], -1))
return output
net = LeNet()
print(net)
batch_size = 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)
def evaluate_accuracy(data_iter, net):
acc_sum, n = 0.0, 0
with torch.no_grad():
for X, y in data_iter:
if isinstance(net, torch.nn.Module):
net.eval() # 评估模式, 这会关闭dropout
acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
net.train() # 改回训练模式
else: # 自定义的模型, 3.13节之后不会用到, 不考虑GPU
if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
# 将is_training设置成False
acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item()
else:
acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
n += y.shape[0]
return acc_sum / n
lr, num_epochs = 0.001, 10
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
#train
net = net.to(device)
print("training on ", device)
loss = torch.nn.CrossEntropyLoss()
batch_count = 0
total_time = 0
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
for X, y in train_iter:
X = X.to(device)
y = y.to(device)
y_hat = net(X)
l = loss(y_hat, y)
optimizer.zero_grad()
l.backward()
optimizer.step()
train_l_sum += l.cpu().item()
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
n += y.shape[0]
batch_count += 1
test_acc = evaluate_accuracy(test_iter, net)
total_time += round(time.time()-start,2)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
% (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))
end = time.time()
print('total time:%.2f sec'%(total_time))