Complete model training exercise(1)
Complete network training steps: (Relevant data can be visualized through tensorboard)—CIFAR-10
-
Prepare data sets (training, testing)
-
Use DataLoader to load data sets
-
Build a network model and instantiate network objects
-
Instantiate the loss function object
-
Instantiate the optimizer object
-
Set some parameters for training
-
Start training: mymodule.train()
- Get data from the trained dataloader
- Pass in the model for training and output outputs and targets.
- Pass the output and targets into the loss function loss to calculate the loss.
- Optimize the parameters of the network model through the optimizer object (the last gradient becomes 0, back propagation of the loss function, optimize the parameters step())
-
Start testing: mymodule.eval()
- Get data from the test dataloader
- Pass in the model for training and output outputs and targets.
- Pass the output and targets into the loss function loss to calculate the loss.
- Use the accuracy rate on the test data set to evaluate whether the model is well trained.
-
Save model
1. Complete steps of simple model training
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
# 下载训练 的数据集
from torch.utils.tensorboard import SummaryWriter
train_data = torchvision.datasets.CIFAR10(root="./dataset",train=True,transform=torchvision.transforms.ToTensor(),download=True)
# 下载测试 的数据集
test_data = torchvision.datasets.CIFAR10(root="./dataset",train=False,transform=torchvision.transforms.ToTensor(),download=True)
# 输出数据集长度
train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练集的长度:{}".format(train_data_size))
print("测试集的长度:{}".format(test_data_size))
# 利用DataLoader加载数据集
train_dataloader = DataLoader(train_data,batch_size=64)
test_dataloader = DataLoader(test_data,batch_size=64)
# 搭建神经网络
class Module(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Conv2d(3,32,5,1,2), # 卷积
nn.MaxPool2d(2) , # 最大池化
nn.Conv2d(32,32,5,1,2),
nn.MaxPool2d(2),
nn.Conv2d(32,64,5,1,2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(64*4*4,64),
nn.Linear(64,10)
)
def forward(self,input):
input = self.model(input)
return input
# 创建网络模型
mymodule = Module()
# 损失函数(交叉熵)
loss_fn = nn.CrossEntropyLoss()
# 优化器
learning_rate = 0.01 # 优化参数(学习速率)
optimizer = torch.optim.SGD(mymodule.parameters(),lr = learning_rate) # 对模型的参数进行优化,lr为学习速率
# 设置训练网络的一些参数
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 训练的轮数
epoch = 10
# 添加 tensorboard
writer = SummaryWriter("p10")
if __name__ == '__main__':
# input = torch.ones((64,3,32,32))
# output = mymodule(input)
# print(output.shape) # 可以通过打印操作 鉴别网络搭建的是否正确
for i in range(epoch):
print("-------第 {} 轮训练开始------".format(i+1))
# 训练步骤开始
mymodule.train()
for data in train_dataloader:
imgs,targets = data
output = mymodule(imgs)
loss = loss_fn(output,targets) # 得到损失
# 优化器优化模型
optimizer.zero_grad() # 优化前将梯度清0
loss.backward() # 反向传播,求得每一个节点的梯度
optimizer.step() # 对模型的每一个参数进行优化
total_train_step += 1
if total_train_step % 100 == 0:
print("训练次数: {},loss: {}".format(total_train_step,loss.item()))
writer.add_scalar("train_loss",loss.item(),total_train_step)
# 在模型训练时候,如何知道模型是否达到我们的要求
# 在模型每训练完成一轮,就对其进行一次测试,在测试数据集上跑一遍,以测试数据集上的正确率来评估模型是否训练好
# 在测试时,是在现有的模型上进行测试,不需要调优
# 测试步骤开始
mymodule.eval()
total_test_loss = 0
total_accuracy = 0 # 整体正确的个数
with torch.no_grad(): # 该部分代码,无梯度,就不会对其进行调优
for data in test_dataloader:
imgs,targets = data
outputs = mymodule(imgs)
loss = loss_fn(outputs,targets)
total_test_loss = total_test_loss + loss.item()
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy = total_accuracy + accuracy
print("整体数据集上测试的loss: {}".format(total_test_loss))
print("整体测试集上的正确率: {}".format(total_accuracy/test_data_size))
writer.add_scalar("test_loss",total_test_loss,total_test_step)
writer.add_scalar("test_accuracy", total_accuracy/test_data_size, total_test_step)
total_test_step = total_test_step + 1
# 将每一轮训练的模型进行保存
torch.save(mymodule,"./models/mymodule_{}.pth".format(i))
# torch.save(mymodule.state_dict(),"./models/mymodule_{}.pth".format(i))
print("模型已保存")
writer.close()
2. GPU acceleration
1.Method 1
找到
# 网络模型
# 数据(输入、标注)
# 损失函数
.cuda()
# 网络模型
mymodule = Module()
if torch.cuda.is_available():
mymodule = torch.cuda(mymodule)
# 损失函数
loss_fn = nn.CrossEntropyLoss()
if torch.cuda.is_available():
loss_fn = torch.cuda(loss_fn)
# 训练数据
mymodule.train()
for data in train_dataloader:
imgs,targets = data
# 对数据进行GPU加速
if torch.cuda.is_available():
imgs = imgs.cuda()
targets = targets.cuda()
output = mymodule(imgs)
loss = loss_fn(output,targets) # 得到损失
# 测试数据
mymodule.eval()
total_test_loss = 0
total_accuracy = 0 # 整体正确的个数
with torch.no_grad(): # 该部分代码,无梯度,就不会对其进行调优
for data in test_dataloader:
imgs,targets = data
# 对数据进行GPU加速
if torch.cuda.is_available():
imgs = imgs.cuda()
targets = targets.cuda()
2. Method 2
1.定义训练的设备
device = torch.device("cpu") # cpu训练
device = torch.device("cuda") # GPU训练
device = torch.device("cuda:0") # 选择电脑上的第一张显卡
device = torch.device("cuda:1") # 选择电脑上的第2张显卡
找到
# 网络模型
# 数据(输入、标注)
# 损失函数
.to(device)
# 定义训练的设备
device = torch.device("cpu")
# device = torch.device("cuda")
# 创建网络模型
mymodule = Module()
mymodule = mymodule.to(device)
# 损失函数
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)
# 训练数据
mymodule.train()
for data in train_dataloader:
imgs,targets = data
# 对数据进行GPU加速
imgs = imgs.to(device)
targets = targets.to(device)
# 测试数据
mymodule.eval()
total_test_loss = 0
total_accuracy = 0 # 整体正确的个数
with torch.no_grad(): # 该部分代码,无梯度,就不会对其进行调优
for data in test_dataloader:
imgs,targets = data
# 对数据进行GPU加速
imgs = imgs.to(device)
targets = targets.to(device)
3. Test
Use the trained model to provide input and test it
To train on a system with GPU, the code is as follows:
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
# 定义训练的设备
# device = torch.device("cpu")
device = torch.device("cuda")
# 下载训练 的数据集
# from torch.utils.tensorboard import SummaryWriter
train_data = torchvision.datasets.CIFAR10(root="./dataset",train=True,transform=torchvision.transforms.ToTensor(),download=True)
# 下载测试 的数据集
test_data = torchvision.datasets.CIFAR10(root="./dataset",train=False,transform=torchvision.transforms.ToTensor(),download=True)
# 输出数据集长度
train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练集的长度:{}".format(train_data_size))
print("测试集的长度:{}".format(test_data_size))
# 利用DataLoader加载数据集
train_dataloader = DataLoader(train_data,batch_size=64)
test_dataloader = DataLoader(test_data,batch_size=64)
# 搭建神经网络
class Module(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Conv2d(3,32,5,1,2), # 卷积
nn.MaxPool2d(2) , # 最大池化
nn.Conv2d(32,32,5,1,2),
nn.MaxPool2d(2),
nn.Conv2d(32,64,5,1,2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(64*4*4,64),
nn.Linear(64,10)
)
def forward(self,input):
input = self.model(input)
return input
# 创建网络模型
mymodule = Module()
mymodule = mymodule.to(device)
# 损失函数
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)
# 优化器
learning_rate = 0.01 # 优化参数(学习速率)
optimizer = torch.optim.SGD(mymodule.parameters(),lr = learning_rate) # 对模型的参数进行优化,lr为学习速率
# 设置训练网络的一些参数
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 训练的轮数
epoch = 30
# 添加 tensorboard
# writer = SummaryWriter("p10")
if __name__ == '__main__':
# input = torch.ones((64,3,32,32))
# output = mymodule(input)
# print(output.shape) # 可以通过打印操作 鉴别网络搭建的是否正确
for i in range(epoch):
print("-------第 {} 轮训练开始------".format(i+1))
# 训练步骤开始
mymodule.train()
for data in train_dataloader:
imgs,targets = data
# 对数据进行GPU加速
imgs = imgs.to(device)
targets = targets.to(device)
output = mymodule(imgs)
loss = loss_fn(output,targets) # 得到损失
# 优化器优化模型
optimizer.zero_grad() # 优化前将梯度清0
loss.backward()
optimizer.step()
total_train_step += 1
if total_train_step % 100 == 0:
print("训练次数: {},loss: {}".format(total_train_step,loss.item()))
# writer.add_scalar("train_loss",loss.item(),total_train_step)
# 在模型训练时候,如何知道模型是否达到我们的要求
# 在模型每训练完成一轮,就对其进行一次测试,在测试数据集上跑一遍,以测试数据集上的正确率来评估模型是否训练好
# 在测试时,是在现有的模型上进行测试,不需要调优
# 测试步骤开始
mymodule.eval()
total_test_loss = 0
total_accuracy = 0 # 整体正确的个数
with torch.no_grad(): # 该部分代码,无梯度,就不会对其进行调优
for data in test_dataloader:
imgs,targets = data
# 对数据进行GPU加速
imgs = imgs.to(device)
targets = targets.to(device)
outputs = mymodule(imgs)
loss = loss_fn(outputs,targets)
total_test_loss = total_test_loss + loss.item()
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy = total_accuracy + accuracy
print("整体数据集上测试的loss: {}".format(total_test_loss))
print("整体测试集上的正确率: {}".format(total_accuracy/test_data_size))
# writer.add_scalar("test_loss",total_test_loss,total_test_step)
# writer.add_scalar("test_accuracy", total_accuracy/test_data_size, total_test_step)
total_test_step = total_test_step + 1
# 将每一轮训练的模型进行保存
torch.save(mymodule,r".//models//mymodule_{}.pth".format(i))
# torch.save(mymodule.state_dict(),"./models/mymodule_{}.pth".format(i))
print("模型已保存")
# writer.close()
Find pictures:
Test code:
from os import O_TEMPORARY
import torchvision
from PIL import Image
import torch
from torch import nn
"""
(改代码中用到的模型是通过GPU进行训练的)网络参数是GPU类型,那解决方法就是将输入类型转变为GPU类型
"""
# 定义训练的设备
device = torch.device('cuda:0')
transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32,32)),
torchvision.transforms.ToTensor()])
# 导入测试图片
image_path = ".//imgs//1_dog.png"
img = Image.open(image_path)
print(img) # <PIL.PngImagePlugin.PngImageFile image mode=RGB size=465x567 at 0x17EF217AAC0>
img = transform(img)
print(img.shape) # 鉴于网络模型的输入要求需要32*32 要对图片的尺寸进行修改 torch.Size([3, 32, 32])
# 测试集
test_set = torchvision.datasets.CIFAR10(root="./dataset",train=False,download=True)
# 搭建神经网络
class Module(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Conv2d(3,32,5,1,2), # 卷积
nn.MaxPool2d(2) , # 最大池化
nn.Conv2d(32,32,5,1,2),
nn.MaxPool2d(2),
nn.Conv2d(32,64,5,1,2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(64*4*4,64),
nn.Linear(64,10)
)
def forward(self,input):
input = self.model(input)
return input
# 加载训练完毕的模型(在加载前,需要在源码中添加创建的网络模型架构)
mymodule_29 = torch.load(".//models//mymodule_29.pth")
print(mymodule_29)
# 输出测试集的分类 (对应的target就是其索引对应分类)
print(test_set.classes) # ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
# 开始测试
img = torch.reshape(img,(1,3,32,32))
# 将输入转换为GPU类型
img = img.to(device)
mymodule_29.eval()
with torch.no_grad():
output = mymodule_29(img)
print(output)
"""
输出:
tensor([[-3.2843, -9.2020, 2.2531, 2.7957, 4.7990, 7.2086, -1.6534, 0.9546, 0.8856, -6.4821]], device='cuda:0')
可知 target = 5 概率最大 7.2086
"""
target = output.argmax(1).item() # 输出为5
print(test_set.classes[target]) # 输出为dog
operation result:
<PIL.PngImagePlugin.PngImageFile image mode=RGB size=465x567 at 0x27B3547AAC0>
torch.Size([3, 32, 32])
Files already downloaded and verified
Module(
(model): Sequential(
(0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(4): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(6): Flatten(start_dim=1, end_dim=-1)
(7): Linear(in_features=1024, out_features=64, bias=True)
(8): Linear(in_features=64, out_features=10, bias=True)
)
)
['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
tensor([[-3.2843, -9.2020, 2.2531, 2.7957, 4.7990, 7.2086, -1.6534, 0.9546,
0.8856, -6.4821]], device='cuda:0')
dog