本次实现目标
学习pytorch官网网络之Alexnet。
易混的概念
1、torch.nn 与 torch.nn.functional 的区别:
单纯的看,torch.nn 与 torch.nn.functional均可以实现相同的功能,如Conv2d,在两者当中都有,分别为torch.nn.functional.conv2d 、torch.nn.Conv2d 。那为什么还要分开定义呢?
进入 torch.nn.Conv2d 中,就会发现,torch.nn.Conv2d 其实也是由 torch.nn.functional.conv2d 实现的,源码如下:
class Conv2d(_ConvNd):
""" 省略了注释"""
def __init__(
self,
in_channels: int,
out_channels: int,
kernel_size: _size_2_t,
stride: _size_2_t = 1,
padding: Union[str, _size_2_t] = 0,
dilation: _size_2_t = 1,
groups: int = 1,
bias: bool = True,
padding_mode: str = 'zeros', # TODO: refine this type
device=None,
dtype=None
) -> None:
factory_kwargs = {'device': device, 'dtype': dtype}
kernel_size_ = _pair(kernel_size)
stride_ = _pair(stride)
padding_ = padding if isinstance(padding, str) else _pair(padding)
dilation_ = _pair(dilation)
super(Conv2d, self).__init__(
in_channels, out_channels, kernel_size_, stride_, padding_, dilation_,
False, _pair(0), groups, bias, padding_mode, **factory_kwargs)
def _conv_forward(self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
if self.padding_mode != 'zeros':
return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
weight, bias, self.stride,
_pair(0), self.dilation, self.groups)
return F.conv2d(input, weight, bias, self.stride,
self.padding, self.dilation, self.groups)
def forward(self, input: Tensor) -> Tensor:
return self._conv_forward(input, self.weight, self.bias)
那都使用 torch.nn.functional.conv2d 不就好了嘛,干嘛这么麻烦。原因如下:
1、torch.nn.Conv2d 是一个类,而 torch.nn.functional.conv2d 是一个函数,类的方法可以提取变化的学习参数,函数的方式,只能以固定的模型运行。
2、有很多的variable变量,我们需要实时的更新,并将其运用到下一个输入当中,函数实现不了上述功能。但同时,有很多不需要可更新变量的一些网络层,我们不需要去使用类的方法来创造,使用函数更加简便。
一般的使用选择为:没有需要更新的参数的地方,用nn.functional.xx,否则使用nn.xx 的方式
基础概念
1、transforms.ToTensor() ,将一个PIL图像转换为tensor。即, H ∗ W ∗ C H * W * C H∗W∗C范围在[0,255]的PIL图像转换为 C ∗ H ∗ W C*H*W C∗H∗W范围在[0,1]的torch.tensor。
2、transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ,用均值(0.5, 0.5, 0.5)和标准差(0.5, 0.5, 0.5)对图像做归一化处理。
3、transforms.Compose(),其中可以放置一些操作的列表,将这些操作链接起来,如 trans = [transforms.ToTensor(),transforms.Resize(resize)];trans = transforms.Compose(trans),这里就链接了两个操作操作,更多transforms函数可参考链接。
定义网络
# 导入必要的文件包
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils import data
from torchsummary import summary
from torchvision import transforms
from IPython import display
import matplotlib
import matplotlib.pyplot as plt
# 定义一些超参数
batch_size = 32
img_shape = 32
num_epochs = 60
# 定义我们的加载图片函数
def load_data_fashion_mnist(batch_size, resize=None):
"""下载Fashion-MNIST数据集,然后将其加载到内存中"""
trans = [transforms.ToTensor()]
if resize:
trans.insert(0, transforms.Resize(resize))
trans = transforms.Compose(trans)
# transform=trans 代表使用 trans函数对数据进行处理
mnist_train = torchvision.datasets.FashionMNIST(
root="./data", train=True, transform=trans, download=True)
mnist_test = torchvision.datasets.FashionMNIST(
root="./data", train=False, transform=trans, download=True)
return (data.DataLoader(mnist_train, batch_size, shuffle=True,
num_workers=8),
data.DataLoader(mnist_test, batch_size, shuffle=False,
num_workers=8))
# 使用类的方式定义网络
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# 两层卷积
self.conv1 = nn.Conv2d(in_channels=1, out_channels=6,kernel_size=5)
self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
# 三层全连接
self.fc1 = nn.Linear(in_features=16 * 5 * 5,out_features=120)
self.fc2 = nn.Linear(in_features=120, out_features=84)
self.fc3 = nn.Linear(in_features=84, out_features=10)
def forward(self, x):
#使用窗口为(2,2)的最大池化操作
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
# 将所有输入的维度展平
x = torch.flatten(x, 1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
# 定义训练函数
def train_epoch(net, train_iter, loss, updater):
"""训练模型一个迭代周期"""
# 将模型设置为训练模式
if isinstance(net, torch.nn.Module):
net.train()
# 训练损失总和、训练准确度总和、样本数
metric = Accumulator(3)
for X, y in train_iter:
# 计算梯度并更新参数
y_hat = net(X)
# y_hat = y_hat.argmax(dim=1)
# y_hat = y_hat[:-1]
# y = y.long()
# print(y,y_hat)
l = loss(y_hat, y)
if isinstance(updater, torch.optim.Optimizer):
# 使用PyTorch内置的优化器和损失函数
updater.zero_grad()
l.mean().backward()
updater.step()
else:
# 使用定制的优化器和损失函数
l.sum().backward()
updater(X.shape[0])
metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
# 返回训练损失和训练精度
return metric[0] / metric[2], metric[1] / metric[2]
def accuracy(y_hat, y):
"""计算预测正确的数量"""
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
y_hat = y_hat.argmax(axis=1)
cmp = y_hat.type(y.dtype) == y
return float(cmp.type(y.dtype).sum())
def evaluate_accuracy(net, data_iter):
"""计算在指定数据集上模型的精度"""
if isinstance(net, torch.nn.Module):
net.eval() # 将模型设置为评估模式
metric = Accumulator(2) # 正确预测数、预测总数
with torch.no_grad():
for X, y in data_iter:
metric.add(accuracy(net(X), y), y.numel())
return metric[0] / metric[1]
# 定义一些动态显示函数
def use_svg_display():
"""使用svg格式在Jupyter中显示绘图。"""
display.set_matplotlib_formats('svg')
def set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):
"""设置matplotlib的轴。"""
axes.set_xlabel(xlabel)
axes.set_ylabel(ylabel)
axes.set_xscale(xscale)
axes.set_yscale(yscale)
axes.set_xlim(xlim)
axes.set_ylim(ylim)
if legend:
axes.legend(legend)
axes.grid()
class Accumulator:
"""在n个变量上累加"""
def __init__(self, n):
self.data = [0.0] * n
def add(self, *args):
self.data = [a + float(b) for a, b in zip(self.data, args)]
def reset(self):
self.data = [0.0] * len(self.data)
def __getitem__(self, idx):
return self.data[idx]
class Animator:
"""在动画中绘制数据"""
def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,
ylim=None, xscale='linear', yscale='linear',
fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,
figsize=(3.5, 2.5)):
# 增量地绘制多条线
if legend is None:
legend = []
use_svg_display()
self.fig, self.axes = plt.subplots(nrows, ncols, figsize=figsize)
if nrows * ncols == 1:
self.axes = [self.axes, ]
# 使用lambda函数捕获参数
self.config_axes = lambda: set_axes(
self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
self.X, self.Y, self.fmts = None, None, fmts
def add(self, x, y):
# 向图表中添加多个数据点
if not hasattr(y, "__len__"):
y = [y]
n = len(y)
if not hasattr(x, "__len__"):
x = [x] * n
if not self.X:
self.X = [[] for _ in range(n)]
if not self.Y:
self.Y = [[] for _ in range(n)]
for i, (a, b) in enumerate(zip(x, y)):
if a is not None and b is not None:
self.X[i].append(a)
self.Y[i].append(b)
self.axes[0].cla()
for x, y, fmt in zip(self.X, self.Y, self.fmts):
self.axes[0].plot(x, y, fmt)
self.config_axes()
display.display(self.fig)
display.clear_output(wait=True)
def train(net, train_iter, test_iter, loss, num_epochs, updater):
"""训练模型"""
# 尝试加载训练好的参数
try:
net.load_state_dict(torch.load('net.params'))
print("net load weight success")
except:
print("net load weight fail")
animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],
legend=['train loss', 'train acc', 'test acc'])
for epoch in range(num_epochs):
train_metrics = train_epoch(net, train_iter, loss, updater)
test_acc = evaluate_accuracy(net, test_iter)
animator.add(epoch + 1, train_metrics + (test_acc,))
train_loss, train_acc = train_metrics
assert train_loss < 0.5, train_loss
assert train_acc <= 1 and train_acc > 0.7, train_acc
assert test_acc <= 1 and test_acc > 0.7, test_acc
# 初始化网络
net = Net()
#加载图片,并将其调整为 32*32 的大小
train_iter, test_iter = load_data_fashion_mnist(batch_size , resize=img_shape)
# 定义优化器
updater= optim.SGD(net.parameters(), lr=0.01)
# 定义损失函数
criterion = nn.CrossEntropyLoss(reduction='none')
# 打印网络
print(net)
# 下面这种打印有点类似于tensorflow
summary(net, (1, 32, 32))
# 开始训练
train(net, train_iter, test_iter, criterion, num_epochs, updater)
# 保存训练好的模型
torch.save(net.state_dict(), 'net.params')