以下代码均来自bilibili:[适用于初学者的Pytorch编程教学]
以下为完整代码,复制即可运行。
import torch
import time
import json
import torchvision
import torchvision.transforms as transforms # 将图像数据转化为torch.tensor张量
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as F # 用到了F.relu 和 F.cross_entropy
import torch.nn as nn
import torch.optim as optim # optim.Adam 优化器
from IPython.display import clear_output #
from torch.utils.data import DataLoader
from itertools import product
from torch.utils.tensorboard import SummaryWriter
from sklearn.metrics import confusion_matrix # 绘制混淆矩阵
from collections import OrderedDict
from collections import namedtuple
torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True) # 打开梯度(grad)追踪
<torch.autograd.grad_mode.set_grad_enabled at 0x19e1fbc2f48>
# 这里建立了一个神经网络的模型(其实就是一个函数,用来预测输入张量的类)
class Network(nn.Module):
def __init__(self):
super(Network,self).__init__()
self.conv1 = nn.Conv2d(in_channels=1,out_channels=6,kernel_size=5,stride=1) # 输入通道的 1 代表输入的图像是单通道(灰度)的 stride表示的是滤波器 # 每次移动的步长
self.conv2 = nn.Conv2d(in_channels=6,out_channels=12,kernel_size=5)
self.fc1 = nn.Linear(in_features=12*4*4,out_features=120,bias = True) # bias 表示 偏差
self.fc2 = nn.Linear(in_features=120,out_features=60)
self.out = nn.Linear(in_features=60,out_features=10) # 这里的 10 的意思是,分成10个类
def __repr__(self):
return "this is the string of the lizard"
def forward(self,t):
# (1) input layer
t = t
#(2) hidden conv layer
t = F.max_pool2d(F.relu(self.conv1(t)),kernel_size=2,stride=2) # 在2X2的子矩阵中选出最大值
#(3) hidden conv layer
t = F.max_pool2d(F.relu(self.conv2(t)),kernel_size=2,stride=2)
#(4) hidden Liner layer
t = t.reshape(-1,12*4*4)
t = F.relu(self.fc1(t))
#(5) hidden Liner layer
t = F.relu(self.fc2(t))
#(6) output layer
t = self.out(t)
return t
# 加载数据集,如果相应文件下没有数据集合化就下载 download = True
train_set = torchvision.datasets.FashionMNIST( # 下载数据集
root = './data/FashionMNIST', # 数据集存放的位置
train=True, # True表示下载的是训练集
download=True, # 如果上述位置中不存在数据集的话就下载
transform = transforms.Compose( # 将数据集转化为我们需要的张量的类型
[
transforms.ToTensor()
]))
class RunBuilder():
@staticmethod
def get_runs(params):
Run = namedtuple('Run',params.keys())
runs = []
for v in product(*params.values()):
runs.append(Run(*v))
return runs
class RunManager():
def __init__(self):
self.epoch_count = 0
self.epoch_loss = 0
self.epoch_num_correct = 0
self.epoch_start_time = 0
self.run_params = None
self.run_count = 0
self.run_data = []
self.run_start_time = None
self.network = None
self.loader = None
self.tb = None
def begin_run(self, run, network, loader):
self.run_start_time = time.time()
self.run_params = run
self.run_count += 1
self.network = network
self.loader = loader
self.tb = SummaryWriter(comment = f'-{run}')
images,labels = next(iter(self.loader))
grid = torchvision.utils.make_grid(images)
self.tb.add_image('images',grid)
self.tb.add_graph(network,images)
def end_run(self):
self.tb.close()
self.epoch_count = 0
def begin_epoch(self):
self.epoch_start_time = time.time()
self.epoch_count += 1
self.epoch_loss = 0
self.epoch_num_correct = 0
def end_epoch(self):
epoch_duration = time.time() - self.epoch_start_time
run_duration = time.time() - self.run_start_time
loss = self.epoch_loss / len(self.loader) # .dataset
accuracy = self.epoch_num_correct.item() / len(self.loader.dataset)*100
self.tb.add_scalar('Loss', loss, self.epoch_count)
self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)
for name,param in self.network.named_parameters():
self.tb.add_histogram(name, param, self.epoch_count)
self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)
results = OrderedDict()
results['run'] = self.run_count
results['epoch'] = self.epoch_count
results['loss'] = loss
results['accuracy'] = f'{accuracy:.2f}%'
results['epoch duration'] = epoch_duration
results['run duration'] = run_duration
for k,v in self.run_params._asdict().items():results[k] = v
self.run_data.append(results)
df = pd.DataFrame.from_dict(self.run_data, orient='columns')
clear_output(wait = True)
display(df)
# results = OrderedDict()
def track_loss(self, loss):
self.epoch_loss += loss.item()
def track_num_correct(self, preds, labels):
self.epoch_num_correct += self._get_num_correct(preds, labels)
@torch.no_grad()
def _get_num_correct(self,preds,labels):
return torch.argmax(F.softmax(preds),dim=1).eq(labels).sum()
def save(self, filename):
pd.DataFrame.from_dict(
self.run_data
,orient = 'columns'
).to_csv(f'{filename}.csv')
with open(f'{filename}.json', 'w+', encoding='utf-8') as f:
for dic in self.run_data:
json.dump(dic, f, ensure_ascii=False, indent=4) #
params = OrderedDict(
lr = [.01]
,batch_size = [1000]
# ,shuffle = [True, False]
,num_workers = [1]
)
m = RunManager()
for run in RunBuilder.get_runs(params):
network = Network()
loader = DataLoader(train_set,batch_size=run.batch_size,num_workers=run.num_workers)
optimizer = optim.Adam(network.parameters(), lr=run.lr)
m.begin_run(run, network, loader)
for epoch in range(10):
m.begin_epoch()
for batch in loader:
images, labels = batch
preds = network(images)
loss = F.cross_entropy(preds,labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
m.track_loss(loss)
m.track_num_correct(preds,labels)
m.end_epoch()
m.end_run()
m.save('results')
run | epoch | loss | accuracy | epoch duration | run duration | lr | batch_size | num_workers | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 | 1 | 0.943128 | 63.89% | 5.532876 | 10.976489 | 0.01 | 1000 | 1 |
1 | 1 | 2 | 0.537875 | 78.96% | 5.653928 | 16.732941 | 0.01 | 1000 | 1 |
2 | 1 | 3 | 0.442508 | 83.57% | 5.632912 | 22.457873 | 0.01 | 1000 | 1 |
3 | 1 | 4 | 0.387121 | 85.72% | 5.735993 | 28.281893 | 0.01 | 1000 | 1 |
4 | 1 | 5 | 0.355424 | 86.92% | 5.648000 | 34.019917 | 0.01 | 1000 | 1 |
5 | 1 | 6 | 0.330770 | 87.83% | 5.721980 | 39.835942 | 0.01 | 1000 | 1 |
6 | 1 | 7 | 0.315759 | 88.22% | 5.666653 | 45.596620 | 0.01 | 1000 | 1 |
7 | 1 | 8 | 0.296206 | 88.97% | 5.591570 | 51.281192 | 0.01 | 1000 | 1 |
8 | 1 | 9 | 0.289356 | 89.29% | 5.714649 | 57.087968 | 0.01 | 1000 | 1 |
9 | 1 | 10 | 0.278595 | 89.71% | 5.812043 | 62.992033 | 0.01 | 1000 | 1 |
# 保存模型 方法 1 (这个方法需要在读取模型的文件中import相应包和对模型定义)
from sklearn.externals import joblib
import os
dirs = 'testModels'
if not os.path.exists(dirs):
os.makedirs(dirs)
joblib.dump(network, dirs+'/network.pkl')
# 读取模型
read_network = joblib.load(dirs+'/network.pkl')
['testModels/network.pkl']
# 读取模型 方法 2
# 保存
torch.save(network,dirs+'/network.pt')
# 读取
read_model = torch.load(dirs+'/network.pt')