13.1 Using GPU training (method 1)
① GPU training mainly consists of three parts: network model, data (input, annotation), and loss function. These three parts are placed on the GPU.
import torchvision
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
# from model import * 相当于把 model中的所有内容写到这里,这里直接把 model 写在这里
class Tudui(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.model1 = nn.Sequential(
nn.Conv2d(3,32,5,1,2), # 输入通道3,输出通道32,卷积核尺寸5×5,步长1,填充2
nn.MaxPool2d(2),
nn.Conv2d(32,32,5,1,2),
nn.MaxPool2d(2),
nn.Conv2d(32,64,5,1,2),
nn.MaxPool2d(2),
nn.Flatten(), # 展平后变成 64*4*4 了
nn.Linear(64*4*4,64),
nn.Linear(64,10)
)
def forward(self, x):
x = self.model1(x)
return x
# 准备数据集
train_data = torchvision.datasets.CIFAR10("./dataset",train=True,transform=torchvision.transforms.ToTensor(),download=True)
test_data = torchvision.datasets.CIFAR10("./dataset",train=False,transform=torchvision.transforms.ToTensor(),download=True)
# length 长度
train_data_size = len(train_data)
test_data_size = len(test_data)
# 如果train_data_size=10,则打印:训练数据集的长度为:10
print("训练数据集的长度:{}".format(train_data_size))
print("测试数据集的长度:{}".format(test_data_size))
# 利用 Dataloader 来加载数据集
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
# 创建网络模型
tudui = Tudui()
if torch.cuda.is_available():
tudui = tudui.cuda() # 网络模型转移到cuda上
# 损失函数
loss_fn = nn.CrossEntropyLoss() # 交叉熵,fn 是 fuction 的缩写
if torch.cuda.is_available():
loss_fn = loss_fn.cuda() # 损失函数转移到cuda上
# 优化器
learning = 0.01 # 1e-2 就是 0.01 的意思
optimizer = torch.optim.SGD(tudui.parameters(),learning) # 随机梯度下降优化器
# 设置网络的一些参数
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 训练的轮次
epoch = 10
# 添加 tensorboard
writer = SummaryWriter("logs")
for i in range(epoch):
print("-----第 {} 轮训练开始-----".format(i+1))
# 训练步骤开始
tudui.train() # 当网络中有dropout层、batchnorm层时,这些层能起作用
for data in train_dataloader:
imgs, targets = data
if torch.cuda.is_available():
imgs = imgs.cuda() # 数据放到cuda上
targets = targets.cuda() # 数据放到cuda上
outputs = tudui(imgs)
loss = loss_fn(outputs, targets) # 计算实际输出与目标输出的差距
# 优化器对模型调优
optimizer.zero_grad() # 梯度清零
loss.backward() # 反向传播,计算损失函数的梯度
optimizer.step() # 根据梯度,对网络的参数进行调优
total_train_step = total_train_step + 1
if total_train_step % 100 == 0:
print("训练次数:{},Loss:{}".format(total_train_step,loss.item())) # 方式二:获得loss值
writer.add_scalar("train_loss",loss.item(),total_train_step)
# 测试步骤开始(每一轮训练后都查看在测试数据集上的loss情况)
tudui.eval() # 当网络中有dropout层、batchnorm层时,这些层不能起作用
total_test_loss = 0
total_accuracy = 0
with torch.no_grad(): # 没有梯度了
for data in test_dataloader: # 测试数据集提取数据
imgs, targets = data # 数据放到cuda上
if torch.cuda.is_available():
imgs = imgs.cuda() # 数据放到cuda上
targets = targets.cuda()
outputs = tudui(imgs)
loss = loss_fn(outputs, targets) # 仅data数据在网络模型上的损失
total_test_loss = total_test_loss + loss.item() # 所有loss
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy = total_accuracy + accuracy
print("整体测试集上的Loss:{}".format(total_test_loss))
print("整体测试集上的正确率:{}".format(total_accuracy/test_data_size))
writer.add_scalar("test_loss",total_test_loss,total_test_step)
writer.add_scalar("test_accuracy",total_accuracy/test_data_size,total_test_step)
total_test_step = total_test_step + 1
torch.save(tudui, "./model/tudui_{}.pth".format(i)) # 保存每一轮训练后的结果
#torch.save(tudui.state_dict(),"tudui_{}.path".format(i)) # 保存方式二
print("模型已保存")
writer.close()
result:
Files already downloaded and verified Files already downloaded and verified The length of the training data set: 50000 The length of the test data set: 10000 -----The first round of training starts----- Number of trainings: 100, Loss: 2.289992094039917Number of trainings: 200, Loss: 2.2927844524383545 Training times: 300, Loss: 2.2730984687805176 Training times: 400, Loss: 2.2006278038024902 Training times: 500, Loss: 2.1675028800964355 Training times: 600, Lo ss: 2.116072416305542 Training times: 700, Loss: 2.04477596282959 on the overall test set Loss: 317.0560564994812 Accuracy rate on the overall test set: 0.28700000047683716 Model saved -----The second round of training starts----- Number of trainings: 800, Loss: 1.893830418586731Numbers of training : 900, Loss: 1.8772207498550415Numbers of training: 10 00 , Loss: 1.9800275564193726 Training times: 1200, Loss: 1.7352533340454102 Training times: 1100, Loss: 2.007078170776367 Number of training: 1300, Loss: 1.6947956085205078 Number of training: 1400, Loss: 1.756855845451355 Number of training: 1500, Loss: 1.8372352123260498 Loss on the overall test set: 299.94190883636475 On the overall test set Correct rate: 0.31619998812675476 Model has been saved -----No. 3 rounds of training start----- Number of training: 1600, Loss: 1.7673416137695312Number of training: 1700, Loss: 1.6654351949691772Number of training: 1800, Loss: 1.9246405363082886Number of training : 1900, Loss: 1.7132933139 801025 Number of trainings: 2000, Loss: 1.93990159034729 training Number of times: 2100, Loss: 1.4903961420059204 Number of training times: 2200, Loss: 1.4754142761230469 Number of training times: 2300, Loss: 1.7652970552444458 Loss on the overall test set: 272.9526561498642 Correct on the overall test set Rate: 0.37139999866485596 Model saved Training times: 2500, Loss: 1.3386430740356445 -----The 4th round of training begins----- Training times: 2400, Loss: 1.7254819869995117 Training times: 2600, Loss: 1.5852587223052979 Training times: 2700, Loss: 1.648303508758545 Training times: 2800, Loss: 1.4971883296966553 Training times :2900, Loss: 1.5891362428665161 Training times: 3000, Loss: 1.3380193710327148 Training times :3100, Loss: 1.542701005935669 Loss on the overall test set: 278.19843327999115 Accuracy rate on the overall test set: 0.36139997839927673 Model saved -----The 5th round of training starts----- Number of trainings: 3200, Loss: 1.3419318199157 715 training Number of training times: 3300, Loss: 1.468044400215149 Number of training times: 3400, Loss: 1.484485149383545 Number of training times: 3500, Loss: 1.54210364818573 Number of training times: 3600, Loss: 1.5797978639602661 Number of training times: 370 0, Loss: 1.3390973806381226 Training times: 3800, Loss: 1.3077597618103027 Training times: 3900, Loss: 1.4766919612884521 Loss on the overall test set: 269.36583971977234 training times: 5200, Loss: 1.3147145509719849 set Correct rate on: 0.3871999979019165 model saved -----The 6th round of training begins----- Number of training: 4000, Loss: 1.439847469329834Number of training: 4100, Loss: 1.436941146850586Number of training: 4200, Loss: 1.5766061544418335Number of training: 4300 , Loss: 1.2490197420 12024 training times: 4400, Loss: 1.164270281791687 Training times: 4500, Loss: 1.4175126552581787 Training times: 4600, Loss: 1.4056789875030518 Loss on the overall test set: 252.13275730609894 Correct rate on the overall test set: 0.4244000017642975 model has been saved -----Seventh round of training Start----- Number of trainings: 4700, Loss: 1.3679763078689575Numbers of trainings: 4800, Loss: 1.526027798652649Numbers of trainings: 4900, Loss: 1.3590809106826782Numbers of trainings: 5000, Loss: 1.429600358009338 4 Training times: 5100, Loss: 0.9916519522666931 Training times: 5300 , Loss: 1.2122020721435547 Training times: 5400, Loss: 1.3860883712768555 Loss on the overall test set: 235.14292180538177 Accuracy rate on the overall test set: 0.46209999918937683 Model saved -----The 8th round of training starts----- Number of trainings: 5500, Loss: 1.2311736345291138 Number of trainings: 5600, Loss: 1.2175472974777222 Training times: 5700, Loss: 1.2189043760299683 Training times: 5800, Loss: 1.2750414609909058 Training times: 5900, Loss: 1.3556095361709595 Training times: 6000, Loss: 1 .5370352268218994 Training times: 6100, Loss: 1.025504231452942 Training times: 6200, Loss: 1.0661875009536743 Overall Loss on the test set: 222.47956597805023 Accuracy rate on the overall test set: 0.4927999973297119 Model saved -----The 9th round of training begins----- Number of trainings: 6300, Loss: 1.4051152467727661Number of trainings: 6400, Loss: 1.139202237129 2114 Training times: 6600, Loss: 1.0815491676330566 Training times: 6700, Loss: 1.048026442527771 Training times: 6500, Loss: 1.6226587295532227 Number of training: 6800, Loss: 1.1510660648345947 Number of training: 6900, Loss: 1.1476961374282837 Number of training: 7000, Loss: 0.9481611847877502 Loss on the overall test set: 212.00453734397888 On the overall test set Accuracy rate: 0.5181999802589417 Model saved -----No. 10 rounds of training start----- Number of training: 7100, Loss: 1.2802095413208008Number of training: 7200, Loss: 0.9643581509590149Number of training: 7300, Loss: 1.098695993423462Number of training : 7400, Loss: 0.8831453323 364258 Training times: 7500, Loss: 1.19520902633667 training Number of times: 7600, Loss: 1.2724679708480835 Number of training times: 7700, Loss: 0.8894400000572205 Number of training times: 7800, Loss: 1.205102801322937 Loss on the overall test set: 202.72463756799698 Correct on the overall test set Rate: 0.54339998960495 Model saved
13.2 GPU training time
import torchvision
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import time
# from model import * 相当于把 model中的所有内容写到这里,这里直接把 model 写在这里
class Tudui(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.model1 = nn.Sequential(
nn.Conv2d(3,32,5,1,2), # 输入通道3,输出通道32,卷积核尺寸5×5,步长1,填充2
nn.MaxPool2d(2),
nn.Conv2d(32,32,5,1,2),
nn.MaxPool2d(2),
nn.Conv2d(32,64,5,1,2),
nn.MaxPool2d(2),
nn.Flatten(), # 展平后变成 64*4*4 了
nn.Linear(64*4*4,64),
nn.Linear(64,10)
)
def forward(self, x):
x = self.model1(x)
return x
# 准备数据集
train_data = torchvision.datasets.CIFAR10("./dataset",train=True,transform=torchvision.transforms.ToTensor(),download=True)
test_data = torchvision.datasets.CIFAR10("./dataset",train=False,transform=torchvision.transforms.ToTensor(),download=True)
# length 长度
train_data_size = len(train_data)
test_data_size = len(test_data)
# 如果train_data_size=10,则打印:训练数据集的长度为:10
print("训练数据集的长度:{}".format(train_data_size))
print("测试数据集的长度:{}".format(test_data_size))
# 利用 Dataloader 来加载数据集
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
# 创建网络模型
tudui = Tudui()
if torch.cuda.is_available():
tudui = tudui.cuda() # 网络模型转移到cuda上
# 损失函数
loss_fn = nn.CrossEntropyLoss() # 交叉熵,fn 是 fuction 的缩写
if torch.cuda.is_available():
loss_fn = loss_fn.cuda() # 损失函数转移到cuda上
# 优化器
learning = 0.01 # 1e-2 就是 0.01 的意思
optimizer = torch.optim.SGD(tudui.parameters(),learning) # 随机梯度下降优化器
# 设置网络的一些参数
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 训练的轮次
epoch = 10
# 添加 tensorboard
writer = SummaryWriter("logs")
start_time = time.time()
for i in range(epoch):
print("-----第 {} 轮训练开始-----".format(i+1))
# 训练步骤开始
tudui.train() # 当网络中有dropout层、batchnorm层时,这些层能起作用
for data in train_dataloader:
imgs, targets = data
if torch.cuda.is_available():
imgs = imgs.cuda() # 数据放到cuda上
targets = targets.cuda() # 数据放到cuda上
outputs = tudui(imgs)
loss = loss_fn(outputs, targets) # 计算实际输出与目标输出的差距
# 优化器对模型调优
optimizer.zero_grad() # 梯度清零
loss.backward() # 反向传播,计算损失函数的梯度
optimizer.step() # 根据梯度,对网络的参数进行调优
total_train_step = total_train_step + 1
if total_train_step % 100 == 0:
end_time = time.time()
print(end_time - start_time) # 运行训练一百次后的时间间隔
print("训练次数:{},Loss:{}".format(total_train_step,loss.item())) # 方式二:获得loss值
writer.add_scalar("train_loss",loss.item(),total_train_step)
# 测试步骤开始(每一轮训练后都查看在测试数据集上的loss情况)
tudui.eval() # 当网络中有dropout层、batchnorm层时,这些层不能起作用
total_test_loss = 0
total_accuracy = 0
with torch.no_grad(): # 没有梯度了
for data in test_dataloader: # 测试数据集提取数据
imgs, targets = data # 数据放到cuda上
if torch.cuda.is_available():
imgs = imgs.cuda() # 数据放到cuda上
targets = targets.cuda()
outputs = tudui(imgs)
loss = loss_fn(outputs, targets) # 仅data数据在网络模型上的损失
total_test_loss = total_test_loss + loss.item() # 所有loss
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy = total_accuracy + accuracy
print("整体测试集上的Loss:{}".format(total_test_loss))
print("整体测试集上的正确率:{}".format(total_accuracy/test_data_size))
writer.add_scalar("test_loss",total_test_loss,total_test_step)
writer.add_scalar("test_accuracy",total_accuracy/test_data_size,total_test_step)
total_test_step = total_test_step + 1
torch.save(tudui, "./model/tudui_{}.pth".format(i)) # 保存每一轮训练后的结果
#torch.save(tudui.state_dict(),"tudui_{}.path".format(i)) # 保存方式二
print("模型已保存")
writer.close()
result:
Files already downloaded and verified Files already downloaded and verified The length of the training data set: 50000 The length of the test data set: 10000 -----The first round of training starts----- 1.0935008525848389 Number of trainings: 100, Loss: 2.2871038913726807 2.1766483783721924 training次数:200,Loss:2.2836720943450928 3.27374267578125 训练次数:300,Loss:2.259164333343506 4.42803692817688 训练次数:400,Loss:2.170818328857422 5.506956577301025 训练次数:500,Loss:2.1002814769744873 6.58754301071167 训练次数:600,Loss:2.0413668155670166 7.650376319885254 训练次数:700,Loss : 2.0200154781341553 Loss on the overall test set: 316.68364894390106 Correct rate on the overall test set: 0.2789999842643738 Model has been saved -----The second round of training begins----- 20.33113145828247 10.175889730453491 训练次数:800,Loss:1.8918509483337402 11.24414849281311 训练次数:900,Loss:1.8798954486846924 12.356922149658203 训练次数:1000,Loss:1.970682978630066 13.43547511100769 训练次数:1100,Loss:2.0064470767974854 14.509244680404663 训练次数:1200,Loss:1.7197221517562866 15.598143815994263 训练次数:1300 , Loss: 1.6999645233154297 16.67508888244629 Training times: 1400, Loss: 1.7595139741897583 17.747746229171753 Training times: 1500, Loss: 1.849331259727478 Overall test Loss on the set: 304.3353645801544 Accuracy rate on the overall test set: 0.31610000133514404 Model has been saved ----- Round 3 Training starts----- Number of trainings: 1600, Loss: 1.7673357725143433 21.411443948745728 训练次数:1700,Loss:1.6436196565628052 22.475884914398193 训练次数:1800,Loss:1.9101005792617798 23.543425798416138 训练次数:1900,Loss:1.7177188396453857 24.60761523246765 训练次数:2000,Loss:1.9782830476760864 25.691354751586914 训练次数:2100,Loss : 1.523171067237854 26.782272815704346 Number of training: 2200, Loss: 1.4762014150619507 27.82503628730774 Number of training: 2300, Loss: 1.7781658172607422 Lo on the overall test set ss: 272.44360399246216 Accuracy rate on the overall test set: 0.37199997901916504 Model has been saved -----The 4th round of training begins ----- 30.293652772903442 Training times: 2400, Loss: 1.7340704202651978 31.373929500579834 Number of trainings: 2500, Loss: 1.3520257472991943 32.44764447212219 训练次数:2600,Loss:1.574364423751831 33.513572454452515 训练次数:2700,Loss:1.6468950510025024 34.61698246002197 训练次数:2800,Loss:1.4663115739822388 35.69143986701965 训练次数:2900,Loss:1.6123905181884766 36.75266122817993 训练次数:3000,Loss:1.3316911458969116 37.8302538394928 训练次数:3100, Loss: 1.5095850229263306 Loss on the overall test set: 264.94398534297943 Accuracy rate on the overall test set: 0.3986999988555908 Model saved -----The 5th round of training begins----- 40.43262219429016 Number of trainings: 3200, Loss: 1.372 7346658706665 41.48542404174805 times of training :3300, Loss: 1.443982481956482 42.52226686477661 Number of trainings: 3500, Loss: 1.5449475049972534 Training times: 3400, Loss: 1.519631981849670 4 43.57080316543579 44.60450720787048 训练次数:3600,Loss:1.568708062171936 45.64966917037964 训练次数:3700,Loss:1.3194901943206787 46.709717750549316 训练次数:3800,Loss:1.2732317447662354 47.74911880493164 训练次数:3900,Loss:1.415683388710022 整体测试集上的Loss:253.18030643463135 整体测试集上的Correct rate: 0.42249998450279236 Model saved -----The 6th round of training begins----- 50.21744728088379 Number of trainings: 4000, Loss: 1.3912277221679688 51.265125036239624 Number of trainings: 4100, Loss: 1.41090 1665687561 52.28390049934387 Training times : 4200, Loss: 1.521787405014038 53.33956241607666 Number of trainings: 4300, Loss: 1.2260788679122925 54.391708850860596 Number of training times: 4400, Loss: 1.1339644193649292 55.45666837692261 Number of training: 4500, Loss: 1.3752398490905762 56.52565860748291 Number of training: 4600, Loss: 1.4126766920089722 Loss on the overall test set: 236.17250859737396 Correct rate on the overall test set: 0.457199 99074935913 Model has been saved -----The 7th round of training begins--- -- 58.975016832351685 训练次数:4700,Loss:1.327752947807312 60.01860165596008 训练次数:4800,Loss:1.5265493392944336 61.06228733062744 训练次数:4900,Loss:1.382441520690918 62.13616943359375 训练次数:5000,Loss:1.4380030632019043 63.18708825111389 训练次数:5100,Loss:1.0084904432296753 64.28091526031494 训练次数:5200, Loss: 1.312524437904358 65.38735771179199 Training times: 5300, Loss: 1.193513751029968 3 66.48723554611206 Training times: 5400, Loss: 1.3607358932495117 Loss on the overall test set: 223.93975222110748 Accuracy rate on the overall test set: 0.4819999933242798 Model saved -----The 8th round of training starts----- 69.10827493667603 Number of trainings: 5500, Loss: 1.1847436428070068 70.249 65786933899Number of trainings: 5600, Loss:1.2199389934539795 71.35597825050354 训练次数:5700,Loss:1.2233123779296875 72.44557046890259 训练次数:5800,Loss:1.2635695934295654 73.50763511657715 训练次数:5900,Loss:1.3924380540847778 74.50808930397034 训练次数:6000,Loss:1.5825486183166504 75.47654867172241 训练次数:6100,Loss:1.035813570022583 76.46864604949951 训练Times: 6200, Loss: 1.1380523443222046 Model has been saved -----The 9th round of training begins----- Loss on the overall test set: 212.88353633880615 整体测试集上的正确率:0.513700008392334 78.92930197715759 训练次数:6300,Loss:1.4175732135772705 80.02452445030212 训练次数:6400,Loss:1.1150474548339844 81.0898060798645 训练次数:6500,Loss:1.5558857917785645 82.11342310905457 训练次数:6600,Loss:1.095849633216858 83.19743394851685 训练次数:6700 , Loss: 1.061813235282898 84.28776097297668 Training times: 6800, Loss: 1.160451054573059 85.28279232978821 Training times: 6900, Loss: 1.1402560472488403 86. 31971287727356 Training times: 7000, Loss: 0.9515166282653809 Loss on the overall test set: 203.51595824956894 Accuracy rate on the overall test set: 0.5372999906539917 The model has been Save -----The 10th round of training starts ----- 88.85867476463318 Number of trainings: 7100, Loss: 1.2563235759735107 89.92428064346313 训练次数:7200,Loss:1.028809905052185 90.95707082748413 训练次数:7300,Loss:1.08479642868042 91.98656606674194 训练次数:7400,Loss:0.8235641717910767 92.97793579101562 训练次数:7500,Loss:1.2311100959777832 93.9680666923523 训练次数:7600,Loss : 1.2486273050308228 94.95079374313354 Training times: 7700, Loss: 0.9207454919815063 95.94353938102722 Training times: 7800, Loss: 1.2435222864151 Loss on the overall test set: 194.90294301509857 Accuracy rate on the overall test set: 0.557200014591217 Model saved
13.3 CPU training time
import torchvision
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import time
# from model import * 相当于把 model中的所有内容写到这里,这里直接把 model 写在这里
class Tudui(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.model1 = nn.Sequential(
nn.Conv2d(3,32,5,1,2), # 输入通道3,输出通道32,卷积核尺寸5×5,步长1,填充2
nn.MaxPool2d(2),
nn.Conv2d(32,32,5,1,2),
nn.MaxPool2d(2),
nn.Conv2d(32,64,5,1,2),
nn.MaxPool2d(2),
nn.Flatten(), # 展平后变成 64*4*4 了
nn.Linear(64*4*4,64),
nn.Linear(64,10)
)
def forward(self, x):
x = self.model1(x)
return x
# 准备数据集
train_data = torchvision.datasets.CIFAR10("./dataset",train=True,transform=torchvision.transforms.ToTensor(),download=True)
test_data = torchvision.datasets.CIFAR10("./dataset",train=False,transform=torchvision.transforms.ToTensor(),download=True)
# length 长度
train_data_size = len(train_data)
test_data_size = len(test_data)
# 如果train_data_size=10,则打印:训练数据集的长度为:10
print("训练数据集的长度:{}".format(train_data_size))
print("测试数据集的长度:{}".format(test_data_size))
# 利用 Dataloader 来加载数据集
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
# 创建网络模型
tudui = Tudui()
# 损失函数
loss_fn = nn.CrossEntropyLoss() # 交叉熵,fn 是 fuction 的缩写
# 优化器
learning = 0.01 # 1e-2 就是 0.01 的意思
optimizer = torch.optim.SGD(tudui.parameters(),learning) # 随机梯度下降优化器
# 设置网络的一些参数
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 训练的轮次
epoch = 10
# 添加 tensorboard
writer = SummaryWriter("logs")
start_time = time.time()
for i in range(epoch):
print("-----第 {} 轮训练开始-----".format(i+1))
# 训练步骤开始
tudui.train() # 当网络中有dropout层、batchnorm层时,这些层能起作用
for data in train_dataloader:
imgs, targets = data
outputs = tudui(imgs)
loss = loss_fn(outputs, targets) # 计算实际输出与目标输出的差距
# 优化器对模型调优
optimizer.zero_grad() # 梯度清零
loss.backward() # 反向传播,计算损失函数的梯度
optimizer.step() # 根据梯度,对网络的参数进行调优
total_train_step = total_train_step + 1
if total_train_step % 100 == 0:
end_time = time.time()
print(end_time - start_time) # 运行训练一百次后的时间间隔
print("训练次数:{},Loss:{}".format(total_train_step,loss.item())) # 方式二:获得loss值
writer.add_scalar("train_loss",loss.item(),total_train_step)
# 测试步骤开始(每一轮训练后都查看在测试数据集上的loss情况)
tudui.eval() # 当网络中有dropout层、batchnorm层时,这些层不能起作用
total_test_loss = 0
total_accuracy = 0
with torch.no_grad(): # 没有梯度了
for data in test_dataloader: # 测试数据集提取数据
imgs, targets = data
outputs = tudui(imgs)
loss = loss_fn(outputs, targets) # 仅data数据在网络模型上的损失
total_test_loss = total_test_loss + loss.item() # 所有loss
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy = total_accuracy + accuracy
print("整体测试集上的Loss:{}".format(total_test_loss))
print("整体测试集上的正确率:{}".format(total_accuracy/test_data_size))
writer.add_scalar("test_loss",total_test_loss,total_test_step)
writer.add_scalar("test_accuracy",total_accuracy/test_data_size,total_test_step)
total_test_step = total_test_step + 1
torch.save(tudui, "./model/tudui_{}.pth".format(i)) # 保存每一轮训练后的结果
#torch.save(tudui.state_dict(),"tudui_{}.path".format(i)) # 保存方式二
print("模型已保存")
writer.close()
result:
Files already downloaded and verified Files already downloaded and verified The length of the training data set: 50000 The length of the test data set: 10000 -----The first round of training starts----- 3.761235237121582 Number of trainings: 100, Loss: 2.291699171066284 7.478676080703735 training次数:200,Loss:2.2810616493225098 11.149278163909912 训练次数:300,Loss:2.2673659324645996 14.876582384109497 训练次数:400,Loss:2.210559606552124 18.794732332229614 训练次数:500,Loss:2.074248790740967 22.666887521743774 训练次数:600,Loss:2.029463052749634 26.518835306167603 训练次数:700,Loss : 2.025493860244751 Loss on the overall test set: 315.7099049091339 Accuracy rate on the overall test set: 0.2777999937534332 Model has been saved -----The second round of training begins----- 33.49093294143677Number of trainings: 800, Loss: 1.8920475244522095 37.37390112876892Numbers of training: 900, Loss: 1.8434715270996094 41.43157577514648 4 Training times: 1000, Loss: 1.9236050844192505 45.389270067214966 Training times: 1100, Loss : 2.011040687561035 49.43605923652649 训练次数:1200,Loss:1.6993070840835571 53.62735366821289 训练次数:1300,Loss:1.6654363870620728 58.2660493850708 训练次数:1400,Loss:1.753265142440796 62.52872014045715 训练次数:1500,Loss:1.813820481300354 整体测试集上的Loss:304.07691729068756 整体测试集上的Correct rate: 0.3098999857902527 Model saved -----The third round of training begins----- 70.04687976837158 Number of trainings: 1600, Loss: 1.7496393918991089 74.19148874282837 训练次数:1700,Loss:1.6370826959609985 78.51184940338135 训练次数:1800,Loss:1.8948217630386353 83.03685450553894 训练次数:1900,Loss:1.7091740369796753 87.36472058296204 训练次数:2000,Loss:1.9168915748596191 91.5152907371521 训练次数:2100,Loss:1.5194813013076782 95.88392543792725 训练次数:2200, Loss: 1.4738638401031494 100.08612132072449 Training times: 2300, Loss: 1.7649239301681519 Loss on the overall test set: 266.925869345665 Correct rate on the overall test set: 0.38499999046325684 The model has been saved -----The 4th round of training starts----- 107.81971716880798 times of training :2400, Loss: 1.7411062717437744 111.95616102218628 Training times: 2500, Loss: 1.3490957021713257 116.07963228225708 Number of trainings: 2600, Loss: 1.577816367149353 120.41316413879395 训练次数:2700,Loss:1.6967650651931763 124.64287948608398 训练次数:2800,Loss:1.4929475784301758 128.7123486995697 训练次数:2900,Loss:1.6131006479263306 132.94610214233398 训练次数:3000,Loss:1.347227931022644 137.22871589660645 训练次数:3100,Loss:1.4926567077636719 整体测试集上的Loss: 260.8921568393707 Accuracy rate on the overall test set: 0.40639999508857727 Model saved -----The 5th round of training starts----- 145.22107672691345 Number of trainings: 3200, Loss: 1.3609188795089722 149.55124926 567078 Number of trainings: 3300, Loss : 1.459675669670105 153.86187386512756 training Number of times: 3400, Loss: 1.4940723180770874 162.51304960250854 158.21399784088135 Number of trainings: 3500, Loss: 1.5735642910003662 Numbers of trainings: 3600, Loss: 1.6013926267623901 166.73556113243103 Numbers of trainings: 3700, Loss: 1.3678141832351685 170.6803758 1443787 Training times: 3800, Loss: 1.2831741571426392 174.55300641059875 Training times: 3900, Loss: 1.4196735620498657 Loss on the overall test set: 258.5555330514908 Accuracy rate on the overall test set: 0.4147000014781952 Model saved -----The 6th round of training begins----- 181.89517664909363 Number of trainings: 4000, Loss: 1.394544243812561 185.815289735794 07 Training times: 4100, Loss: 1.4785242080688477 189.75436854362488 Training times: 4200, Loss: 1.504089593887329 193.7331829071045 Training times: 4300, Loss: 1.1989901065826416 197.86846470832825 Training times: 4400, Loss: 1.16918706893920 9 202.10944604873657 241.23810291290283 Number of trainings: 4500, Loss: 1.3368093967437744 206.46737694740295 Number of trainings: 4600, Loss: 1.4030650854110718 Loss on the overall test set: 248.35702466964722 Accuracy rate on the overall test set: 0.43479999899864197 Model saved -----The 7th round of training starts----- 213.7 9702472686768 training times: 4700, Loss:1.2863177061080933 217.70893836021423 训练次数:4800,Loss:1.5342319011688232 221.56816983222961 训练次数:4900,Loss:1.412546157836914 225.4557182788849 训练次数:5000,Loss:1.435633897781372 229.29314064979553 训练次数:5100,Loss:1.050623893737793 233.22323894500732 训练次数:5200,Loss:1.327545166015625 237.1871302127838 训练Number of times: 5300, Loss: 1.2706438302993774 Number of training times: 5400, Loss: 1.3970144987106323 Loss on the overall test set: 238.9216102361679 Accuracy rate on the overall test set: 0.4553000032901764 Model saved -----The 8th round of training starts----- 248.59216332435608 Number of trainings: 5500, Loss: 1.1989145278930664 252.57087922096252 Number of trainings: 5600, Loss: 1. 2739124298095703 256.6464595794678 Training times: 5700 ,Loss:1.2550328969955444 260.85662841796875 训练次数:5800,Loss:1.2594654560089111 264.96409726142883 训练次数:5900,Loss:1.352506399154663 269.10122084617615 训练次数:6000,Loss:1.5692474842071533 273.26241970062256 训练次数:6100,Loss:1.051681399345398 277.37177181243896 训练次数:6200,Loss:1.1093714237213135 整体Loss on the test set: 229.03875291347504 Correct rate on the overall test set: 0.48089998960494995 model has been saved ----- the ninth round of training started ----- 285.03535556793213 训练次数:6300,Loss:1.438887119293213 289.1406488418579 训练次数:6400,Loss:1.1292884349822998 293.3350794315338 训练次数:6500,Loss:1.5554381608963013 297.4605076313019 训练次数:6600,Loss:1.12319815158844 301.41761565208435 训练次数:6700,Loss:1.0609500408172607 305.4384708404541 训练次数: 6800, Loss: 1.1414461135864258 309.32322096824646 Training times: 6900, Loss: 1.0653573274612427 313.22136521339417 Training times: 7000, Loss: 0.96454161405 56335Loss on the overall test set: 217.3968950510025 Accuracy rate on the overall test set: 0.508400022983551 Model has been saved -----No. 10 rounds of training start ----- 320.61516642570496 Number of trainings: 7100, Loss: 1.252223253250122 324.5729761123657 训练次数:7200,Loss:1.0116769075393677 328.631311416626 训练次数:7300,Loss:1.1434015035629272 332.65182423591614 训练次数:7400,Loss:0.8558588624000549 336.61728739738464 训练次数:7500,Loss:1.2400795221328735 340.65006160736084 训练次数:7600,Loss:1.3492536544799805 344.64593052864075 训练次数:7700, Loss: 0.9260987043380737 348.731153011322 Training times: 7800, Loss: 1.3142049312591553 Loss on the overall test set: 208.29399240016937 Correct rate on the overall test set: 0.5317999720573425 model saved
13.4 Using GPU training (method 2)
① When there are two graphics cards on the computer, you can specify cuda:0 and cuda:1.
import torchvision
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import time
# 定义训练的设备
#device = torch.device("cpu")
#device = torch.device("cuda") # 使用 GPU 方式一
#device = torch.device("cuda:0") # 使用 GPU 方式二
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# from model import * 相当于把 model中的所有内容写到这里,这里直接把 model 写在这里
class Tudui(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.model1 = nn.Sequential(
nn.Conv2d(3,32,5,1,2), # 输入通道3,输出通道32,卷积核尺寸5×5,步长1,填充2
nn.MaxPool2d(2),
nn.Conv2d(32,32,5,1,2),
nn.MaxPool2d(2),
nn.Conv2d(32,64,5,1,2),
nn.MaxPool2d(2),
nn.Flatten(), # 展平后变成 64*4*4 了
nn.Linear(64*4*4,64),
nn.Linear(64,10)
)
def forward(self, x):
x = self.model1(x)
return x
# 准备数据集
train_data = torchvision.datasets.CIFAR10("./dataset",train=True,transform=torchvision.transforms.ToTensor(),download=True)
test_data = torchvision.datasets.CIFAR10("./dataset",train=False,transform=torchvision.transforms.ToTensor(),download=True)
# length 长度
train_data_size = len(train_data)
test_data_size = len(test_data)
# 如果train_data_size=10,则打印:训练数据集的长度为:10
print("训练数据集的长度:{}".format(train_data_size))
print("测试数据集的长度:{}".format(test_data_size))
# 利用 Dataloader 来加载数据集
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
# 创建网络模型
tudui = Tudui()
tudui = tudui.to(device) # 也可以不赋值,直接 tudui.to(device)
# 损失函数
loss_fn = nn.CrossEntropyLoss() # 交叉熵,fn 是 fuction 的缩写
loss_fn = loss_fn.to(device) # 也可以不赋值,直接loss_fn.to(device)
# 优化器
learning = 0.01 # 1e-2 就是 0.01 的意思
optimizer = torch.optim.SGD(tudui.parameters(),learning) # 随机梯度下降优化器
# 设置网络的一些参数
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 训练的轮次
epoch = 10
# 添加 tensorboard
writer = SummaryWriter("logs")
start_time = time.time()
for i in range(epoch):
print("-----第 {} 轮训练开始-----".format(i+1))
# 训练步骤开始
tudui.train() # 当网络中有dropout层、batchnorm层时,这些层能起作用
for data in train_dataloader:
imgs, targets = data
imgs = imgs.to(device) # 也可以不赋值,直接 imgs.to(device)
targets = targets.to(device) # 也可以不赋值,直接 targets.to(device)
outputs = tudui(imgs)
loss = loss_fn(outputs, targets) # 计算实际输出与目标输出的差距
# 优化器对模型调优
optimizer.zero_grad() # 梯度清零
loss.backward() # 反向传播,计算损失函数的梯度
optimizer.step() # 根据梯度,对网络的参数进行调优
total_train_step = total_train_step + 1
if total_train_step % 100 == 0:
end_time = time.time()
print(end_time - start_time) # 运行训练一百次后的时间间隔
print("训练次数:{},Loss:{}".format(total_train_step,loss.item())) # 方式二:获得loss值
writer.add_scalar("train_loss",loss.item(),total_train_step)
# 测试步骤开始(每一轮训练后都查看在测试数据集上的loss情况)
tudui.eval() # 当网络中有dropout层、batchnorm层时,这些层不能起作用
total_test_loss = 0
total_accuracy = 0
with torch.no_grad(): # 没有梯度了
for data in test_dataloader: # 测试数据集提取数据
imgs, targets = data # 数据放到cuda上
imgs = imgs.to(device) # 也可以不赋值,直接 imgs.to(device)
targets = targets.to(device) # 也可以不赋值,直接 targets.to(device)
outputs = tudui(imgs)
loss = loss_fn(outputs, targets) # 仅data数据在网络模型上的损失
total_test_loss = total_test_loss + loss.item() # 所有loss
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy = total_accuracy + accuracy
print("整体测试集上的Loss:{}".format(total_test_loss))
print("整体测试集上的正确率:{}".format(total_accuracy/test_data_size))
writer.add_scalar("test_loss",total_test_loss,total_test_step)
writer.add_scalar("test_accuracy",total_accuracy/test_data_size,total_test_step)
total_test_step = total_test_step + 1
torch.save(tudui, "./model/tudui_{}.pth".format(i)) # 保存每一轮训练后的结果
#torch.save(tudui.state_dict(),"tudui_{}.path".format(i)) # 保存方式二
print("模型已保存")
writer.close()
result:
Files already downloaded and verified Files already downloaded and verified The length of the training data set: 50000 The length of the test data set: 10000 -----The first round of training starts----- 1.1190404891967773 Training times: 100, Loss: 2.2926671504974365 2.2812979221343994 training次数:200,Loss:2.291703701019287 3.386057138442993 训练次数:300,Loss:2.2745745182037354 4.541907548904419 训练次数:400,Loss:2.221169948577881 5.640037298202515 训练次数:500,Loss:2.143411159515381 6.726482629776001 训练次数:600,Loss:2.0441091060638428 7.838879585266113 训练次数:700,Loss : 2.0090014934539795 Loss on the overall test set: 312.4657955169678 Correct rate on the overall test set: 0.28279998898506165 Model saved -----The second round of training begins----- 10.41140604019165 训练次数:800,Loss:1.8645917177200317 11.455690383911133 训练次数:900,Loss:1.827837347984314 12.512084007263184 训练次数:1000,Loss:1.9033353328704834 13.599088907241821 训练次数:1100,Loss:2.0170090198516846 14.64348030090332 训练次数:1200,Loss:1.7100862264633179 15.72208046913147 训练次数:1300 , Loss: 1.6826354265213013 16.752166986465454 Training times: 1400, Loss: 1.7191925048828125 17.81931185722351 Training times: 1500, Loss: 1.8116774559020996 Overall Loss on the test set: 306.7045053243637 Accuracy rate on the overall test set: 0.3068999946117401 Model has been saved ----- Round 3 Training starts----- 20.318028688430786 Number of trainings: 1600, Loss: 1.7589811086654663 21.38711452484131 训练次数:1700,Loss:1.6722180843353271 22.505618572235107 训练次数:1800,Loss:1.9415262937545776 23.604503393173218 训练次数:1900,Loss:1.7454909086227417 24.74000310897827 训练次数:2000,Loss:1.9074403047561646 25.785309076309204 训练次数:2100,Loss : 1.5321683883666992 26.833311796188354 Training times: 2200, Loss: 1.4686038494110107 27.883039236068726 Training times: 2300, Loss: 1.8088748455047607 on the overall test set Loss: 264.2274956703186 The correct rate on the overall test set: 0.3926999866962433 The model has been saved ----- the fourth round of training begins ----- 30.434141159057617 Training times: 2400, Loss: 1.7530766725540161 31.50102210044861 Training times: 2500, Loss: 1.3466917276382446 32.588942766189575 训练次数:2600,Loss:1.5937833786010742 33.64913892745972 训练次数:2700,Loss:1.6885923147201538 34.69320559501648 训练次数:2800,Loss:1.5292593240737915 35.72002124786377 训练次数:2900,Loss:1.6046268939971924 36.74435377120972 训练次数:3000,Loss : 1.3702434301376343 37.789002656936646 Training times: 3100, Loss: 1.5583586692810059 Loss on the overall test set: 247.68864715099335 Correct rate on the overall test set: 0.42879999868392944 3 Model has been saved -----The fifth round of training starts----- 40.23552346229553 times of training: 3200, Loss: 1.3889607191085815 41.28690481185913 Training times: 3300, Loss: 1.4547197818756104 42.32324028015137 Training times: 3400, Loss: 1.48745143413543 7 43.36912536621094 Training times: 3500, Loss: 1.6039626598358154 44.43635702133179 Training times: 3600, Loss: 1.5406546592712402 45.52009439468384 Training times: 3700, Loss: 1.355963110923767 46.61804127693176 Training times: 3800, Lo ss: 1.293853521347046 47.66825032234192 Training times: 3900, Loss : 1.4567005634307861 Loss on the overall test set: 239.61021220684052 Correct on the overall test set Rate: 0.44669997692108154 Model saved -----The 6th round of training starts----- 50.18902587890625 Number of trainings: 4000, Loss: 1.4021949768066406 51.221325397491455 Numbers of trainings: 4100, Loss: 1.468636 9895935059 52.25768494606018 Training times: 4200, Loss : 1.5711930990219116 53.29710626602173 training Number of times: 4300, Loss: 1.2274739742279053 55.45258617401123 54.35805821418762 Number of training: 4400, Loss: 1.1256041526794434 Number of training: 4500, Loss: 1.346487045288086 56.498899936676025 Number of training: 4600, Loss: 1.4574103355407715 Loss on the overall test set: 229. 56566536426544 Accuracy rate on the overall test set: 0.4640999734401703 Model has been saved ----- The seventh round of training begins ----- 58.9901008605957 Training times: 4700, Loss: 1.3305902481079102 60.09166860580444 Training times: 4800, Loss: 1.5128451585769653 61.15304517745972 Training Number of times: 4900, Loss: 1.4225473403930664 62.24405121803284 Number of training times: 5000, Loss: 1.4352083206176758 63.328041315078735 Number of training times: 5100, Loss: 1.0108458995819092 64.43191266059875 Training times: 5200, Loss: 1.2999461889266968 65.55889964103699 Training times: 5300, Loss: 1.24830412864685 06 66.67005276679993 Training times: 5400, Loss: 1.40975821018219 Loss on the overall test set: 221.8911657333374 Accuracy rate on the overall test set: 0.4901999831199646 Model saved -----The 8th round of training begins----- 69.31889057159424 Number of trainings: 5500, Loss: 1.2309132 814407349 70.37002444267273 Number of trainings: 5600 ,Loss:1.2406929731369019 71.45024251937866 训练次数:5700,Loss:1.206421136856079 72.53801417350769 训练次数:5800,Loss:1.2449841499328613 73.61350750923157 训练次数:5900,Loss:1.382934331893921 74.64801716804504 训练次数:6000,Loss:1.5476189851760864 75.68919968605042 训练次数:6100,Loss:1.0594358444213867 76.78617668151855 Training times: 6200, Loss: 1.1037648916244507 Loss on the overall test set: 214.6394373178482 The correct rate on the overall test set: 0.5138999819755554 The model has been saved ----- the ninth round of training begins ----- 79.35270118713379 训练次数:6300,Loss:1.4193459749221802 80.38360047340393 训练次数:6400,Loss:1.1300890445709229 81.4340546131134 训练次数:6500,Loss:1.5622072219848633 82.51292634010315 训练次数:6600,Loss:1.119008183479309 83.57669281959534 训练次数:6700 , Loss: 1.0774811506271362 84.61026763916016 Training times: 6800, Loss: 1.1881333589553833 85.65419411659241 Training times: 6900, Loss: 1.116170048713684 86 .69365286827087 Training times: 7000, Loss: 0.9820349812507629 Loss on the overall test set: 204.89984810352325 Accuracy rate on the overall test set: 0.5370000004768372 The model has Save -----The 10th round of training begins----- 89.25331830978394 训练次数:7100,Loss:1.339141607284546 90.34024834632874 训练次数:7200,Loss:0.8925604224205017 91.38928580284119 训练次数:7300,Loss:1.134442925453186 92.44890975952148 训练次数:7400,Loss:0.8384325504302979 93.53598165512085 训练次数:7500,Loss:1.2126699686050415 94.57306551933289 训练次数:7600,Loss:1.2007839679718018 95.60608768463135 训练次数:7700, Loss: 0.8869692087173462 96.65610480308533 Training times: 7800, Loss: 1.3008511066436768 Loss on the overall test set: 195.62357383966446 Correct rate on the overall test set: 0.5604999661445618 model saved
13.5 Run Terminal statement
① To run the command run on the terminal, you can enter the statement in the code block, and add an exclamation mark before the statement.
② Enter !nvidia-smi to view the graphics card configuration.
!nvidia-smi
Thu Mar 31 17:24:49 2022 +-----------------------------------------------------------------------------+ | NVIDIA-SMI 471.35 Driver Version: 471.35 CUDA Version: 11.4 | |-------------------------------+----------------------+----------------------+ | GPU Name TCC/WDDM | Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | | | | MIG M. | |===============================+======================+======================| | 0 NVIDIA GeForce ... WDDM | 00000000:01:00.0 On | N/A | | N/A 61C P0 47W / N/A | 2913MiB / 16384MiB | 10% Default | | | | N/A | +-------------------------------+----------------------+----------------------+ +-----------------------------------------------------------------------------+ | Processes: | | GPU GI CI PID Type Process name GPU Memory | | ID ID Usage | |=============================================================================| | 0 N/A N/A 1868 C+G Insufficient Permissions N/A | | 0 N/A N/A 14152 C+G ...4__htrsf667h5kn2\AWCC.exe N/A | | 0 N/A N/A 14904 C+G ...2\extracted\WeChatApp.exe N/A | | 0 N/A N/A 19304 C+G ...y\AccountsControlHost.exe N/A | | 0 N/A N/A 21816 C+G ...5n1h2txyewy\SearchApp.exe N/A | | 0 N/A N/A 23044 C+G Insufficient Permissions N/A | | 0 N/A N/A 23480 C+G ...2txyewy\TextInputHost.exe N/A | | 0 N/A N/A 24180 C+G ...tracted\WechatBrowser.exe N/A | | 0 N/A N/A 24376 C+G ...erver\YourPhoneServer.exe N/A | | 0 N/A N/A 24912 C+G ...kzcwy\mcafee-security.exe N/A | | 0 N/A N/A 25524 C+G ...me\Application\chrome.exe N/A | | 0 N/A N/A 27768 C+G ...cw5n1h2txyewy\LockApp.exe N/A | | 0 N/A N/A 27788 C ...a\envs\py3.6.3\python.exe N/A | | 0 N/A N/A 27960 C+G ...y\ShellExperienceHost.exe N/A | | 0 N/A N/A 31320 C+G C:\Windows\explorer.exe N/A | | 0 N/A N/A 32796 C+G ...e\StoreExperienceHost.exe N/A | | 0 N/A N/A 35728 C+G ...artMenuExperienceHost.exe N/A | +-----------------------------------------------------------------------------+