要約する

データセット

使用したデータセットは CIFAR10 で、cifar 10 データセットには合計 50,000 のトレーニングセットと 10,000 のテストセットがあります。2 つのデータセットの画像はすべて PNG カラー画像で、画像サイズは 32 x 32 x 3 です。分類問題は合計 10 個あり、飛行機、車、鳥、猫、鹿、犬、カエル、馬、船、トラックが含まれます。このデータセットは、ネットワークパフォーマンステストにとって非常に重要な指標です。このデータセットでネットワークのパフォーマンスが別のネットワークより優れている場合は、このネットワークのパフォーマンスが他のネットワークのパフォーマンスより優れているはずであると言えます。現在、このデータセットの最良の結果は、テストセットの精度の約 95% です。

from torchvision.datasets import CIFAR10
import torch
import cv2
import numpy as np

def data_tf(x):
    x = np.array(x, dtype='float32') / 255
    x = (x - 0.5) / 0.5  # 标准化，这个技巧之后会讲到
    x = cv2.resize(x, (224, 224))
    x = x.transpose((2, 0, 1))  # 将 channel 放到第一维，只是 pytorch 要求的输入方式
    x = torch.from_numpy(x)
    return x

##下载数据集
train_set = CIFAR10('./data', train=True, transform=data_tf, download=False)
test_set = CIFAR10('./data', train=False,  transform=data_tf, download=False)

ネット

これがGoogLeNetのネットワーク構造です

class BasicConv2d(nn.Module):
    def __init__(self,in_channels, out_channels, kernel, stride=1, padding=0):
        super(BasicConv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels,
                              kernel_size=kernel, stride=stride, padding=padding,
                              bias=False)
        self.bn = nn.BatchNorm2d(out_channels, eps=0.001)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return F.relu(x, inplace=True)

'''
in_channels         输入数据的通道
out_channels_1x1    1*1卷积深度
out_channels_1x1_3  3*3前面的1*1卷积深度
out_channels_3x3    3*3卷积深度
out_channels_1x1_5  5*5前面的1*1卷积深度
out_channels_5x5    5*5卷积深度
out_channels_pool   池化后面的1*1卷积深度
'''
class Inception(nn.Module):
    def __init__(self, in_channels, out_channels_1x1,
                 out_channels_1x1_3,  out_channels_3x3,
                 out_channels_1x1_5, out_channels_5x5,
                 out_channels_pool ):
        super(Inception, self).__init__()
        ##第一条线
        self.branch1x1 = BasicConv2d(in_channels, out_channels_1x1, 1)

        ##第二条线
        self.branch3x3 = nn.Sequential(
            BasicConv2d(in_channels, out_channels_1x1_3, 1),
            BasicConv2d(out_channels_1x1_3, out_channels_3x3, 3, 1, 1)
        )

        ##第三条线
        self.branch5x5 = nn.Sequential(
            BasicConv2d(in_channels, out_channels_1x1_5, 1),
            BasicConv2d(out_channels_1x1_5, out_channels_5x5, 5, 1, 2)
        )

        ##第四条线
        self.branch_pool = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            BasicConv2d(in_channels, out_channels_pool, 1)
        )

    def forward(self, x):
        branch1x1 = self.branch1x1(x)

        branch3x3 = self.branch3x3(x)

        branch5x5 = self.branch5x5(x)

        branch_pool = self.branch_pool(x)

        output = [branch1x1, branch3x3, branch5x5, branch_pool]
        return torch.cat(output, 1)
    
class GoogLeNet(nn.Module):
    def __init__(self, in_channels, num_class):
        super(GoogLeNet, self).__init__()
        ##第 1 个模块
        self.block1 = nn.Sequential(
            nn.Conv2d(in_channels, 64, 7, 2, 3),
            nn.MaxPool2d(3, 2, 1)
        )
        ##第 2 个模块
        self.block2 = nn.Sequential(
            nn.Conv2d(64, 192, 3, 1, 1),
            nn.Conv2d(192, 192, 3, 1, 1),
            nn.MaxPool2d(3, 2, 1)
        )
        ##第 3 个模块
        self.block3 = nn.Sequential(
            Inception(192, 64, 96, 128, 16, 32, 32),
            Inception(256, 128, 128, 192, 32, 96, 64),
            nn.MaxPool2d(3, 2, 1)
        )
        ##第 4 个模块
        self.block4 = nn.Sequential(
            Inception(480, 192, 96, 208, 16, 48, 64),
            Inception(512, 160, 112, 224, 24, 64, 64),  #这里究极体会输出
            Inception(512, 128, 128, 256, 24, 64, 64),
            Inception(512, 112, 144, 288, 32, 64, 64),
            Inception(528, 256, 160, 320, 32, 128, 128), #这里究极体会输出
            nn.MaxPool2d(3, 2, 1)
        )
        ##第 4 个模块
        self.block5 = nn.Sequential(
            Inception(832, 256, 160, 320, 32, 128, 128),
            Inception(832, 384, 192, 384, 48, 128, 128),
            nn.AvgPool2d(7, 1)
        )
        self.classifier = nn.Sequential(
            nn.Dropout(0.4),
            nn.Linear(1024, num_class),
            # nn.Sigmoid(1024,out_channels)
        )

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)

        x = torch.reshape(x, (x.shape[0], -1))
        x = self.classifier(x)

        return x

詳細については私のブログを参照してください

いくつかの古典的なネットワーク構造を簡単に記録します_Zi Gen のブログ - CSDN ブログ

次のステップは、uilt でトレーニングコードをモジュール化することです。

遠吠え

#  开发人员：    骆根强
#  开发时间：    2022/7/30 10:50
#  功能作用：    未知

import torch
import time
import tqdm

from torch.autograd import Variable

'''
参数介绍：
epoches, 训练几轮
train_data, 训练的数据
model, 训练模型
device, 使用的设备是
criterion, 损失函数
optimizer, 优化函数
pth_name    参数文件的名字
'''

def train(epoches, train_data, model, device, criterion, optimizer, pth_name):
    # 开始训练
    losses_men = []
    acces_men = []

    start = time.time()
    for epoche in range(epoches):
        train_loss = 0
        train_acc = 0
        time1 = time.time()
        print()
        print(f'开始，第 {epoche + 1} / {epoches} 个Epoche中：')
        for image_data, image_label in tqdm.tqdm(train_data):
            image_data = Variable(image_data.to(device))
            image_label = Variable(image_label.to(device))

            ##前向传播
            out = model(image_data)
            loss = criterion(out, image_label)
            # print(out.shape)
            ##反向传播
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            ##记录误差
            train_loss += loss.item()

            ##记录准确率
            _, pred_label = out.max(1)
            num_correct = (pred_label == image_label).sum().item()  ##计算每次batch_size正确的个数
            acc = num_correct / out.shape[0]
            train_acc += acc

        losses_men.append(train_loss / len(train_data))
        acces_men.append(train_acc / len(train_data))
        time2 = time.time()

        torch.save(model.state_dict(), f'./params/{pth_name}_{epoches}.pth')

        print(f'Epoch_time : ', time2 - time1)
        print()
        print('train_loss : ', losses_men)
        print()
        print('train_acc : ', acces_men)

    print(f'All time : {int((time.time() - start) / 3600)} H '
          f'{int((time.time() - start) / 60)} m {int((time.time() - start) % 60)} s  ')

その後、完全なトレーニングプロセスに進みます

訓練

import torch
import cv2
import numpy as np
import torch.nn as nn

import four_net
import uilt

from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torch import optim

##定义一些参数
epoches = 1
batch_size = 3
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def data_tf(x):
    x = np.array(x, dtype='float32') / 255
    x = (x - 0.5) / 0.5  # 标准化，这个技巧之后会讲到
    x = cv2.resize(x, (224, 224))
    x = x.transpose((2, 0, 1))  # 将 channel 放到第一维，只是 pytorch 要求的输入方式
    x = torch.from_numpy(x)
    return x

##下载数据集
train_set = CIFAR10('./data', train=True, transform=data_tf, download=False)
test_set = CIFAR10('./data', train=False,  transform=data_tf, download=False)

train_data = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_data = DataLoader(test_set, batch_size=batch_size, shuffle=False)

model = four_net.GoogLeNet(3,10).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-1)

print()
print('目前使用的是： ', device)
uilt.train(epoches, train_data, model, device, criterion, optimizer, pth_name='googlenet'

要約する

************************************************************************************************************************ **********************************************************************************************

train_acc : [0.4877117966751918、0.7165121483375959、0.8027293797953964、0.8466272378516624、0.8776974104859335、0.9011149296675 192、0.9178388746803069、0.9341432225063938、0.9479699488491049、0.9559223145780051、0.9642543158567775、0.9674312659846548 、0.9730458759590793、0.9789402173913043、0.9809782608695652、0.984934462915601、0.9881713554987213、0.9888507033248082、0。 9883112212276215、0.9879515664961637、0.9934063299232737、0.9944453324808185、0.9930466751918159、0.9949048913043478、0.993 7859654731458、0.9963834718670077、0.9977221867007673、0.998321611253197、0.9973025895140665、0.9927070012787724]

精度曲線をプロットします。

import matplotlib.pyplot as plt
acces_men = [0.4877117966751918, 0.7165121483375959, 0.8027293797953964, 
             0.8466272378516624, 0.8776974104859335, 0.9011149296675192, 
             0.9178388746803069, 0.9341432225063938, 0.9479699488491049, 
             0.9559223145780051, 0.9642543158567775, 0.9674312659846548, 
             0.9730458759590793, 0.9789402173913043, 0.9809782608695652, 
             0.984934462915601, 0.9881713554987213, 0.9888507033248082, 
             0.9883112212276215, 0.9879515664961637, 0.9934063299232737, 
             0.9944453324808185, 0.9930466751918159, 0.9949048913043478, 
             0.9937859654731458, 0.9963834718670077, 0.9977221867007673, 
             0.998321611253197, 0.9973025895140665, 0.9927070012787724]

###画出LOSS曲线和准确率曲线
plt.plot(np.arange(len(acces_men)), acces_men, label ='train acc')
plt.show()

GoogLeNet モデルトレーニングの収束速度は VGG の収束速度よりも高速です。最初のエポックだけで、精度率は 0.3 に達しました。VGG (BatchNorm1d 後) が 0.3 を超えたのは 3 エポック以降であることを知っておく必要があります。Vgg に BatchNorm1d がない場合、それは弟です。20 ラウンドのトレーニングの後、精度は 0.1 にさえ到達できません。

GoogLeNet は 20 ラウンド後の精度が 99% です

結局、妻は、

気に入っていただけましたら、妻に高評価をお願いします☺

単純な GoogLeNet ネットワークの記録、pytorch+GoogLeNet+CIFAR10

データセット

ネット

遠吠え

訓練

要約する

おすすめ