pyotrch学习篇——第一篇

本次实现目标

   学习pytorch官网网络之Alexnet。

易混的概念

   1、torch.nn 与 torch.nn.functional 的区别:
   单纯的看,torch.nn 与 torch.nn.functional均可以实现相同的功能,如Conv2d,在两者当中都有,分别为torch.nn.functional.conv2d 、torch.nn.Conv2d 。那为什么还要分开定义呢?
   进入 torch.nn.Conv2d 中,就会发现,torch.nn.Conv2d 其实也是由 torch.nn.functional.conv2d 实现的,源码如下:

class Conv2d(_ConvNd):
	""" 省略了注释"""
    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        kernel_size: _size_2_t,
        stride: _size_2_t = 1,
        padding: Union[str, _size_2_t] = 0,
        dilation: _size_2_t = 1,
        groups: int = 1,
        bias: bool = True,
        padding_mode: str = 'zeros',  # TODO: refine this type
        device=None,
        dtype=None
    ) -> None:
        factory_kwargs = {'device': device, 'dtype': dtype}
        kernel_size_ = _pair(kernel_size)
        stride_ = _pair(stride)
        padding_ = padding if isinstance(padding, str) else _pair(padding)
        dilation_ = _pair(dilation)
        super(Conv2d, self).__init__(
            in_channels, out_channels, kernel_size_, stride_, padding_, dilation_,
            False, _pair(0), groups, bias, padding_mode, **factory_kwargs)

    def _conv_forward(self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
        if self.padding_mode != 'zeros':
            return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
                            weight, bias, self.stride,
                            _pair(0), self.dilation, self.groups)
        return F.conv2d(input, weight, bias, self.stride,
                        self.padding, self.dilation, self.groups)

    def forward(self, input: Tensor) -> Tensor:
        return self._conv_forward(input, self.weight, self.bias)

  那都使用 torch.nn.functional.conv2d 不就好了嘛,干嘛这么麻烦。原因如下
1、torch.nn.Conv2d 是一个类,而 torch.nn.functional.conv2d 是一个函数,类的方法可以提取变化的学习参数,函数的方式,只能以固定的模型运行。
2、有很多的variable变量,我们需要实时的更新,并将其运用到下一个输入当中,函数实现不了上述功能。但同时,有很多不需要可更新变量的一些网络层,我们不需要去使用类的方法来创造,使用函数更加简便。

一般的使用选择为:没有需要更新的参数的地方,用nn.functional.xx,否则使用nn.xx 的方式

基础概念

1、transforms.ToTensor() ,将一个PIL图像转换为tensor。即, H ∗ W ∗ C H * W * C HWC范围在[0,255]的PIL图像转换为 C ∗ H ∗ W C*H*W CHW范围在[0,1]的torch.tensor。
2、transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ,用均值(0.5, 0.5, 0.5)和标准差(0.5, 0.5, 0.5)对图像做归一化处理。
3、transforms.Compose(),其中可以放置一些操作的列表,将这些操作链接起来,如 trans = [transforms.ToTensor(),transforms.Resize(resize)];trans = transforms.Compose(trans),这里就链接了两个操作操作,更多transforms函数可参考链接

定义网络

# 导入必要的文件包
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils import data
from torchsummary import summary
from torchvision import transforms
from IPython import display
import matplotlib
import matplotlib.pyplot as plt

# 定义一些超参数
batch_size = 32
img_shape = 32
num_epochs = 60 

# 定义我们的加载图片函数
def load_data_fashion_mnist(batch_size, resize=None): 
    """下载Fashion-MNIST数据集,然后将其加载到内存中"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    #  transform=trans 代表使用 trans函数对数据进行处理
    mnist_train = torchvision.datasets.FashionMNIST(
        root="./data", train=True, transform=trans, download=True)
    mnist_test = torchvision.datasets.FashionMNIST(
        root="./data", train=False, transform=trans, download=True)
    
    return (data.DataLoader(mnist_train, batch_size, shuffle=True,
                            num_workers=8),
            data.DataLoader(mnist_test, batch_size, shuffle=False,
                            num_workers=8))
                            

# 使用类的方式定义网络
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 两层卷积
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6,kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        
       # 三层全连接
        self.fc1 = nn.Linear(in_features=16 * 5 * 5,out_features=120)  
        self.fc2 = nn.Linear(in_features=120, out_features=84)
        self.fc3 = nn.Linear(in_features=84, out_features=10)

    def forward(self, x):
        #使用窗口为(2,2)的最大池化操作
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        
        # 将所有输入的维度展平
        x = torch.flatten(x, 1) 
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
        
 # 定义训练函数
def train_epoch(net, train_iter, loss, updater): 
    """训练模型一个迭代周期"""
    # 将模型设置为训练模式
    if isinstance(net, torch.nn.Module):
        net.train()
    # 训练损失总和、训练准确度总和、样本数
    metric = Accumulator(3)
    for X, y in train_iter:
        # 计算梯度并更新参数
        y_hat = net(X)
#         y_hat = y_hat.argmax(dim=1)
#         y_hat = y_hat[:-1]
#         y = y.long()
#         print(y,y_hat)
        l = loss(y_hat, y)
        if isinstance(updater, torch.optim.Optimizer):
            # 使用PyTorch内置的优化器和损失函数
            updater.zero_grad()
            l.mean().backward()
            updater.step()
        else:
            # 使用定制的优化器和损失函数
            l.sum().backward()
            updater(X.shape[0])
        metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    # 返回训练损失和训练精度
    return metric[0] / metric[2], metric[1] / metric[2]

def accuracy(y_hat, y): 
    """计算预测正确的数量"""
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())

def evaluate_accuracy(net, data_iter): 
    """计算在指定数据集上模型的精度"""
    if isinstance(net, torch.nn.Module):
        net.eval()  # 将模型设置为评估模式
    metric = Accumulator(2)  # 正确预测数、预测总数
    with torch.no_grad():
        for X, y in data_iter:
            metric.add(accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]

# 定义一些动态显示函数
def use_svg_display():  
    """使用svg格式在Jupyter中显示绘图。"""
    display.set_matplotlib_formats('svg')

def set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):
    """设置matplotlib的轴。"""
    axes.set_xlabel(xlabel)
    axes.set_ylabel(ylabel)
    axes.set_xscale(xscale)
    axes.set_yscale(yscale)
    axes.set_xlim(xlim)
    axes.set_ylim(ylim)
    if legend:
        axes.legend(legend)
    axes.grid()
    
class Accumulator:  
    """在n个变量上累加"""
    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]
    
class Animator: 
    """在动画中绘制数据"""
    def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,
                 ylim=None, xscale='linear', yscale='linear',
                 fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,
                 figsize=(3.5, 2.5)):
        # 增量地绘制多条线
        if legend is None:
            legend = []
        use_svg_display()
        self.fig, self.axes = plt.subplots(nrows, ncols, figsize=figsize)
        if nrows * ncols == 1:
            self.axes = [self.axes, ]
        # 使用lambda函数捕获参数
        self.config_axes = lambda: set_axes(
            self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
        self.X, self.Y, self.fmts = None, None, fmts

    def add(self, x, y):
        # 向图表中添加多个数据点
        if not hasattr(y, "__len__"):
            y = [y]
        n = len(y)
        if not hasattr(x, "__len__"):
            x = [x] * n
        if not self.X:
            self.X = [[] for _ in range(n)]
        if not self.Y:
            self.Y = [[] for _ in range(n)]
        for i, (a, b) in enumerate(zip(x, y)):
            if a is not None and b is not None:
                self.X[i].append(a)
                self.Y[i].append(b)
        self.axes[0].cla()
        for x, y, fmt in zip(self.X, self.Y, self.fmts):
            self.axes[0].plot(x, y, fmt)
        self.config_axes()
        display.display(self.fig)
        display.clear_output(wait=True)
         
def train(net, train_iter, test_iter, loss, num_epochs, updater):
    """训练模型"""
    # 尝试加载训练好的参数
    try:
        net.load_state_dict(torch.load('net.params'))
        print("net load weight success")
    except:
    	print("net load weight fail")
    animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],
                        legend=['train loss', 'train acc', 'test acc'])
    for epoch in range(num_epochs):
        train_metrics = train_epoch(net, train_iter, loss, updater)
        test_acc = evaluate_accuracy(net, test_iter)
        animator.add(epoch + 1, train_metrics + (test_acc,))
    train_loss, train_acc = train_metrics
    assert train_loss < 0.5, train_loss
    assert train_acc <= 1 and train_acc > 0.7, train_acc
    assert test_acc <= 1 and test_acc > 0.7, test_acc
    
# 初始化网络
net = Net()

#加载图片,并将其调整为 32*32 的大小 
train_iter, test_iter = load_data_fashion_mnist(batch_size , resize=img_shape) 

# 定义优化器
updater= optim.SGD(net.parameters(), lr=0.01)

# 定义损失函数
criterion = nn.CrossEntropyLoss(reduction='none')

# 打印网络
print(net)
# 下面这种打印有点类似于tensorflow
summary(net, (1, 32, 32))

# 开始训练
train(net, train_iter, test_iter, criterion, num_epochs, updater)

# 保存训练好的模型
torch.save(net.state_dict(), 'net.params')                     

猜你喜欢

转载自blog.csdn.net/To_be_little/article/details/124389918
今日推荐