Complete model training exercise(1)

Complete model training exercise(1)

Complete network training steps: (Relevant data can be visualized through tensorboard)—CIFAR-10

  • Prepare data sets (training, testing)

  • Use DataLoader to load data sets

  • Build a network model and instantiate network objects

  • Instantiate the loss function object

  • Instantiate the optimizer object

  • Set some parameters for training

  • Start training: mymodule.train()

    • Get data from the trained dataloader
    • Pass in the model for training and output outputs and targets.
    • Pass the output and targets into the loss function loss to calculate the loss.
    • Optimize the parameters of the network model through the optimizer object (the last gradient becomes 0, back propagation of the loss function, optimize the parameters step())
  • Start testing: mymodule.eval()

    • Get data from the test dataloader
    • Pass in the model for training and output outputs and targets.
    • Pass the output and targets into the loss function loss to calculate the loss.
    • Use the accuracy rate on the test data set to evaluate whether the model is well trained.
  • Save model

1. Complete steps of simple model training

import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader

# 下载训练 的数据集
from torch.utils.tensorboard import SummaryWriter

train_data = torchvision.datasets.CIFAR10(root="./dataset",train=True,transform=torchvision.transforms.ToTensor(),download=True)
# 下载测试 的数据集
test_data = torchvision.datasets.CIFAR10(root="./dataset",train=False,transform=torchvision.transforms.ToTensor(),download=True)

# 输出数据集长度
train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练集的长度:{}".format(train_data_size))
print("测试集的长度:{}".format(test_data_size))


# 利用DataLoader加载数据集
train_dataloader = DataLoader(train_data,batch_size=64)
test_dataloader =  DataLoader(test_data,batch_size=64)

# 搭建神经网络
class Module(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3,32,5,1,2),  # 卷积
            nn.MaxPool2d(2) ,       # 最大池化
            nn.Conv2d(32,32,5,1,2),
            nn.MaxPool2d(2),
            nn.Conv2d(32,64,5,1,2),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(64*4*4,64),
            nn.Linear(64,10)
        )

    def forward(self,input):
        input = self.model(input)
        return input

# 创建网络模型
mymodule = Module()

# 损失函数(交叉熵)
loss_fn = nn.CrossEntropyLoss()

# 优化器
learning_rate = 0.01  #  优化参数(学习速率)
optimizer = torch.optim.SGD(mymodule.parameters(),lr = learning_rate) # 对模型的参数进行优化,lr为学习速率

# 设置训练网络的一些参数
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 训练的轮数
epoch = 10

# 添加 tensorboard
writer = SummaryWriter("p10")

if __name__ == '__main__':
    # input = torch.ones((64,3,32,32))
    # output = mymodule(input)
    # print(output.shape)   # 可以通过打印操作 鉴别网络搭建的是否正确

    for i in range(epoch):
        print("-------第 {} 轮训练开始------".format(i+1))

        # 训练步骤开始
        mymodule.train()
        for data in train_dataloader:
            imgs,targets = data
            output = mymodule(imgs)
            loss = loss_fn(output,targets)  # 得到损失

            # 优化器优化模型
            optimizer.zero_grad()  # 优化前将梯度清0
            loss.backward()        # 反向传播,求得每一个节点的梯度
            optimizer.step()       # 对模型的每一个参数进行优化

            total_train_step += 1
            if total_train_step % 100 == 0:
                print("训练次数: {},loss: {}".format(total_train_step,loss.item()))
                writer.add_scalar("train_loss",loss.item(),total_train_step)

        # 在模型训练时候,如何知道模型是否达到我们的要求
        # 在模型每训练完成一轮,就对其进行一次测试,在测试数据集上跑一遍,以测试数据集上的正确率来评估模型是否训练好
        # 在测试时,是在现有的模型上进行测试,不需要调优
        # 测试步骤开始
        mymodule.eval()
        total_test_loss = 0
        total_accuracy = 0  #  整体正确的个数
        with torch.no_grad():  # 该部分代码,无梯度,就不会对其进行调优
            for data in test_dataloader:
                imgs,targets = data
                outputs = mymodule(imgs)
                loss = loss_fn(outputs,targets)
                total_test_loss = total_test_loss + loss.item()
                accuracy = (outputs.argmax(1) == targets).sum()
                total_accuracy = total_accuracy + accuracy

        print("整体数据集上测试的loss: {}".format(total_test_loss))
        print("整体测试集上的正确率: {}".format(total_accuracy/test_data_size))
        writer.add_scalar("test_loss",total_test_loss,total_test_step)
        writer.add_scalar("test_accuracy", total_accuracy/test_data_size, total_test_step)
        total_test_step = total_test_step + 1

        # 将每一轮训练的模型进行保存
        torch.save(mymodule,"./models/mymodule_{}.pth".format(i))
        # torch.save(mymodule.state_dict(),"./models/mymodule_{}.pth".format(i))
        print("模型已保存")

    writer.close()

2. GPU acceleration

1.Method 1

找到
# 网络模型
# 数据(输入、标注)
# 损失函数
.cuda()
# 网络模型
mymodule = Module() 
if torch.cuda.is_available():
	mymodule = torch.cuda(mymodule)

# 损失函数
loss_fn = nn.CrossEntropyLoss()
if torch.cuda.is_available():
	loss_fn = torch.cuda(loss_fn)

# 训练数据
mymodule.train()
        for data in train_dataloader:
            imgs,targets = data
            # 对数据进行GPU加速
            if torch.cuda.is_available():
            	imgs = imgs.cuda()
            	targets = targets.cuda()
            output = mymodule(imgs)
            loss = loss_fn(output,targets)  # 得到损失
            
# 测试数据
        mymodule.eval()
        total_test_loss = 0
        total_accuracy = 0  #  整体正确的个数
        with torch.no_grad():  # 该部分代码,无梯度,就不会对其进行调优
            for data in test_dataloader:
                imgs,targets = data
                # 对数据进行GPU加速
                if torch.cuda.is_available():
                	imgs = imgs.cuda()
                	targets = targets.cuda()

2. Method 2

1.定义训练的设备
device = torch.device("cpu") # cpu训练

device = torch.device("cuda")  # GPU训练
device = torch.device("cuda:0")  # 选择电脑上的第一张显卡
device = torch.device("cuda:1")  # 选择电脑上的第2张显卡


找到
# 网络模型
# 数据(输入、标注)
# 损失函数
.to(device)
# 定义训练的设备
device = torch.device("cpu")
# device = torch.device("cuda")

# 创建网络模型
mymodule = Module()
mymodule = mymodule.to(device)

# 损失函数
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)

# 训练数据
        mymodule.train()
        for data in train_dataloader:
            imgs,targets = data
            # 对数据进行GPU加速
            imgs = imgs.to(device)
            targets = targets.to(device)
            
# 测试数据
        mymodule.eval()
        total_test_loss = 0
        total_accuracy = 0  #  整体正确的个数
        with torch.no_grad():  # 该部分代码,无梯度,就不会对其进行调优
            for data in test_dataloader:
                imgs,targets = data
                # 对数据进行GPU加速
                imgs = imgs.to(device)
                targets = targets.to(device)

3. Test

Use the trained model to provide input and test it

To train on a system with GPU, the code is as follows:

import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader

# 定义训练的设备
# device = torch.device("cpu")
device = torch.device("cuda")

# 下载训练 的数据集
# from torch.utils.tensorboard import SummaryWriter

train_data = torchvision.datasets.CIFAR10(root="./dataset",train=True,transform=torchvision.transforms.ToTensor(),download=True)
# 下载测试 的数据集
test_data = torchvision.datasets.CIFAR10(root="./dataset",train=False,transform=torchvision.transforms.ToTensor(),download=True)

# 输出数据集长度
train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练集的长度:{}".format(train_data_size))
print("测试集的长度:{}".format(test_data_size))


# 利用DataLoader加载数据集
train_dataloader = DataLoader(train_data,batch_size=64)
test_dataloader =  DataLoader(test_data,batch_size=64)

# 搭建神经网络
class Module(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3,32,5,1,2),  # 卷积
            nn.MaxPool2d(2) ,       # 最大池化
            nn.Conv2d(32,32,5,1,2),
            nn.MaxPool2d(2),
            nn.Conv2d(32,64,5,1,2),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(64*4*4,64),
            nn.Linear(64,10)
        )

    def forward(self,input):
        input = self.model(input)
        return input

# 创建网络模型
mymodule = Module()
mymodule = mymodule.to(device)

# 损失函数
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)

# 优化器
learning_rate = 0.01  #  优化参数(学习速率)
optimizer = torch.optim.SGD(mymodule.parameters(),lr = learning_rate) # 对模型的参数进行优化,lr为学习速率

# 设置训练网络的一些参数
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 训练的轮数
epoch = 30

# 添加 tensorboard
# writer = SummaryWriter("p10")

if __name__ == '__main__':
    # input = torch.ones((64,3,32,32))
    # output = mymodule(input)
    # print(output.shape)   # 可以通过打印操作 鉴别网络搭建的是否正确

    for i in range(epoch):
        print("-------第 {} 轮训练开始------".format(i+1))

        # 训练步骤开始
        mymodule.train()
        for data in train_dataloader:
            imgs,targets = data
            # 对数据进行GPU加速
            imgs = imgs.to(device)
            targets = targets.to(device)
            output = mymodule(imgs)
            loss = loss_fn(output,targets)  # 得到损失

            # 优化器优化模型
            optimizer.zero_grad()  # 优化前将梯度清0
            loss.backward()
            optimizer.step()

            total_train_step += 1
            if total_train_step % 100 == 0:
                print("训练次数: {},loss: {}".format(total_train_step,loss.item()))
                # writer.add_scalar("train_loss",loss.item(),total_train_step)

        # 在模型训练时候,如何知道模型是否达到我们的要求
        # 在模型每训练完成一轮,就对其进行一次测试,在测试数据集上跑一遍,以测试数据集上的正确率来评估模型是否训练好
        # 在测试时,是在现有的模型上进行测试,不需要调优
        # 测试步骤开始
        mymodule.eval()
        total_test_loss = 0
        total_accuracy = 0  #  整体正确的个数
        with torch.no_grad():  # 该部分代码,无梯度,就不会对其进行调优
            for data in test_dataloader:
                imgs,targets = data
                # 对数据进行GPU加速
                imgs = imgs.to(device)
                targets = targets.to(device)
                outputs = mymodule(imgs)
                loss = loss_fn(outputs,targets)
                total_test_loss = total_test_loss + loss.item()
                accuracy = (outputs.argmax(1) == targets).sum()
                total_accuracy = total_accuracy + accuracy

        print("整体数据集上测试的loss: {}".format(total_test_loss))
        print("整体测试集上的正确率: {}".format(total_accuracy/test_data_size))
        # writer.add_scalar("test_loss",total_test_loss,total_test_step)
        # writer.add_scalar("test_accuracy", total_accuracy/test_data_size, total_test_step)
        total_test_step = total_test_step + 1

        # 将每一轮训练的模型进行保存
        torch.save(mymodule,r".//models//mymodule_{}.pth".format(i))
        # torch.save(mymodule.state_dict(),"./models/mymodule_{}.pth".format(i))
        print("模型已保存")

    # writer.close()

Find pictures:

image-20221220214049431

Test code:

from os import O_TEMPORARY
import torchvision
from PIL import Image
import torch
from torch import nn

"""
    (改代码中用到的模型是通过GPU进行训练的)网络参数是GPU类型,那解决方法就是将输入类型转变为GPU类型
"""

# 定义训练的设备
device = torch.device('cuda:0')

transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32,32)),
                                            torchvision.transforms.ToTensor()])

# 导入测试图片
image_path = ".//imgs//1_dog.png"
img = Image.open(image_path)
print(img)   # <PIL.PngImagePlugin.PngImageFile image mode=RGB size=465x567 at 0x17EF217AAC0>



img = transform(img)
print(img.shape)  # 鉴于网络模型的输入要求需要32*32 要对图片的尺寸进行修改  torch.Size([3, 32, 32])

# 测试集
test_set = torchvision.datasets.CIFAR10(root="./dataset",train=False,download=True)

# 搭建神经网络
class Module(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3,32,5,1,2),  # 卷积
            nn.MaxPool2d(2) ,       # 最大池化
            nn.Conv2d(32,32,5,1,2),
            nn.MaxPool2d(2),
            nn.Conv2d(32,64,5,1,2),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(64*4*4,64),
            nn.Linear(64,10)
        )

    def forward(self,input):
        input = self.model(input)
        return input

# 加载训练完毕的模型(在加载前,需要在源码中添加创建的网络模型架构)
mymodule_29 = torch.load(".//models//mymodule_29.pth")
print(mymodule_29)

# 输出测试集的分类 (对应的target就是其索引对应分类)
print(test_set.classes)  # ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


# 开始测试
img = torch.reshape(img,(1,3,32,32))
# 将输入转换为GPU类型
img = img.to(device)
mymodule_29.eval()
with torch.no_grad():
    output = mymodule_29(img)
print(output)

"""
输出:
    tensor([[-3.2843, -9.2020,  2.2531,  2.7957,  4.7990,  7.2086, -1.6534,  0.9546, 0.8856, -6.4821]], device='cuda:0')
    可知 target = 5  概率最大 7.2086
"""

target = output.argmax(1).item()  # 输出为5
print(test_set.classes[target])   # 输出为dog

operation result:

<PIL.PngImagePlugin.PngImageFile image mode=RGB size=465x567 at 0x27B3547AAC0>
torch.Size([3, 32, 32])
Files already downloaded and verified
Module(
  (model): Sequential(
    (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Flatten(start_dim=1, end_dim=-1)
    (7): Linear(in_features=1024, out_features=64, bias=True)
    (8): Linear(in_features=64, out_features=10, bias=True)
  )
)
['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
tensor([[-3.2843, -9.2020,  2.2531,  2.7957,  4.7990,  7.2086, -1.6534,  0.9546,
          0.8856, -6.4821]], device='cuda:0')
dog

Guess you like

Origin blog.csdn.net/A2000613/article/details/128516253