【PyTorch】在CIFAR10数据集上实现卷积神经网络ResNet34

ResNet34介绍

  1. 定义

    • 残差网络(ResNet)是由来自Microsoft Research的4位学者提出的卷积神经网络,在2015年的ImageNet大规模视觉识别竞赛(ImageNet Large Scale Visual Recognition Challenge, ILSVRC)中获得了图像分类和物体识别的优胜。 残差网络的特点是容易优化,并且能够通过增加相当的深度来提高准确率。其内部的残差块使用了跳跃连接(short cut),缓解了在深度神经网络中增加深度带来的梯度消失问题

    • 深度残差网络(ResNet)除最开始的卷积池化和最后池化的全连接之外,网络中有很多结构相似的单元,这些重复的单元的共同点就是有个跨层直连的short cut,同时将这些单元称作Residual Block。Residual Block的构造图如下(图中 x identity 标注的曲线表示 short cut):

      XLUkSP.png

  2. 网络结构图

    XLN2R0.png

CIFAR10数据集

  • 该数据集在另一篇文章里已经介绍过了,这里就不再重复介绍了,主要看一下代码实现

代码实现

  1. resnet.py

    import torch
    from torch import nn
    from torch.nn import functional as F
    
    class ResBlk(nn.Module):
        # resnet block
        def __init__(self, ch_in, ch_out, stride = 1):
            super(ResBlk, self).__init__()
    
            self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=stride, padding=1)
            self.bn1 = nn.BatchNorm2d(ch_out)
            self.relu = nn.ReLU(inplace=True)
            self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1)
            self.bn2 = nn.BatchNorm2d(ch_out)
    
            self.extra = nn.Sequential()
            if ch_out != ch_in:
                # [b, ch_in, h, w] -> [b, ch_out, h, w]
                self.extra = nn.Sequential(
                    nn.Conv2d(ch_in, ch_out, kernel_size=1, stride=2),
                    nn.BatchNorm2d(ch_out),
                )
    
        def forward(self, x):
            # x:[b, ch, h, w]
            out =F.relu(self.bn1(self.conv1(x)))
            out =self.bn2(self.conv2(out))
            # short cut
            out = self.extra(x) + out
            # out = F.relu(out)
            return out
    
    # ResNet34
    class ResNet18(nn.Module):
        def __init__(self, block):
            super(ResNet18, self).__init__()
    
            self.conv1 = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
            )
            # followed 4 blocks
            # [b, 64, h, w] -> [b, 128, h, w]
            # self.blk1 = ResBlk(16, 16)
            self.blk1 = nn.Sequential(
                ResBlk(64, 64, 1),
                ResBlk(64, 64, 1),
                ResBlk(64, 64, 1)
            )
            # [b, 128, h, w] -> [b, 256, h, w]
            # self.blk2 = ResBlk(16, 32)
            self.blk2 = nn.Sequential(
                ResBlk(64, 128, 2),
                ResBlk(128, 128, 1),
                ResBlk(128, 128, 1),
                ResBlk(128, 128, 1),
            )
            # [b, 256, h, w] -> [b, 512, h, w]
            self.blk3 = nn.Sequential(
                ResBlk(128, 256, 2),
                ResBlk(256, 256, 1),
                ResBlk(256, 256, 1),
                ResBlk(256, 256, 1),
                ResBlk(256, 256, 1),
                ResBlk(256, 256, 1),
            )
            # self.blk3 = ResBlk(128, 256)
            # [b, 512, h, w] -> [b, 1024, h, w]
            self.blk4 = nn.Sequential(
                ResBlk(256, 512, 2),
                ResBlk(512, 512, 1),
                ResBlk(512, 512, 1),
            )
            # self.blk4 = ResBlk(256, 512)
    
            self.avg_pool = nn.AvgPool2d(kernel_size=2)
            self.outlayer = nn.Linear(512, 10)
    
        def forward(self, x):
            x = F.relu(self.conv1(x))
    
            # [b, 64, h, w] -> [b, 128, h, w]
            x = self.blk1(x)
            x = self.blk2(x)
            x = self.blk3(x)
            x = self.blk4(x)
            x = self.avg_pool(x)
    
    
            x = x.view(x.size(0), -1)
            x = self.outlayer(x)
            return x
    
    def main():
        # blk = ResBlk(64, 128, 1)
        # tmp = torch.randn(2, 64, 32, 32)
        # out = blk(tmp)
        # # torch.Size([2, 128, 32, 32])
        # print('blk:', out.shape)
    
        model = ResNet18(ResBlk)
        tmp = torch.randn(2, 3, 32, 32)
        out = model(tmp)
        print('resnet:', out.shape)
    
    
    if __name__ == '__main__':
        main()
    
  2. CNN.py

    import torch
    from torch.utils.data import DataLoader
    from torchvision import datasets
    from torch import nn, optim
    from torchvision import transforms
    # from lenet5 import Lenet5
    from resnet import ResNet18, ResBlk
    import matplotlib.pyplot as plt
    
    
    
    def main():
        batchsz = 32
        # 加载CIFAR10数据集
        cifar_train = datasets.CIFAR10('cifar', True, transform = transforms.Compose([
            transforms.Resize([32, 32]),
            transforms.ToTensor()
        ]), download = True)
    
        cifar_train = DataLoader(cifar_train, batch_size=batchsz, shuffle=True)
    
        cifar_test = datasets.CIFAR10('cifar', False, transform=transforms.Compose([
            transforms.Resize([32, 32]),
            transforms.ToTensor()
        ]), download=True)
    
        cifar_test = DataLoader(cifar_test, batch_size=batchsz, shuffle=True)
    
        # 测试
        x, label = iter(cifar_train).next()
        print('x:', x.shape, 'label:', label.shape)
    
    
        device = torch.device('cuda')
        # model = Lenet5().to(device)
        model = ResNet18(ResBlk).to(device)
        criteon = nn.CrossEntropyLoss().to(device)
        optimizer = optim.Adam(model.parameters(), lr=1e-3)
        print(model)
    
        train_result = []
        test_result = []
    
        for epoch in range(100):
            model.train()
            for batchidx, (x, label) in enumerate(cifar_train):
                # print(batchidx)
                x, label = x.to(device), label.to(device)
                # logits:[b, 10]
                # label:[b]
                # loss:tensor scalar
                logits = model(x)
                loss = criteon(logits, label)
    
                # backprop
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
    
            # 完成一个epoch
            print(epoch, "loss = ", loss.item())
            train_result.append(loss.item())
    
            model.eval()
            with torch.no_grad():
                # test
                total_correct = 0
                total_num = 0
                for x, label in cifar_test:
                    x, label = x.to(device), label.to(device)
    
                    logits = model(x)
                    # 得到最大值所在的索引
                    pred = logits.argmax(dim=1)
                    # [b] vs [b] -> scalar tensor
                    total_correct += torch.eq(pred, label).float().sum().item()
                    total_num += x.size(0)
                acc = total_correct / total_num
                test_result.append(acc)
                print(epoch, acc)
    
        # print(train_result)
        # print(test_result)
        plt.plot(train_result, label="train_loss")
        plt.plot(test_result, label="test_acc")
        plt.legend()
    
        plt.savefig("picture", dpi=300)
    
        # plt.show()
        # file = open("result.txt", 'a')
        # for line1, line2 in train_result, test_result:
        #     file.write(str(line1) + '\n')
        #     file.write(str(line2) + '\n')
        # # file.write(train_result)
        # # file.write(test_result)
        # file.close()
    
    if __name__ == '__main__':
        main()
    

结果展示

总共进行了100轮试验,相较于 LeNet-5 网络,ResNet 网络在准确率上有了很大的提升,大约在 80% 左右

XLNwM8.png

猜你喜欢

转载自blog.csdn.net/CBB_FT/article/details/125343958