部分内容摘自知乎专栏:
[深度学习框架]PyTorch常用代码段
PyTorch Cookbook(常用代码段整理合集)
可能会用到的一些库/小工具
Pytorch代码调试:TorchSnooper
模型可解释性:captum
Keras风格的Summary:pytorch-summary
模型可视化:pytorchviz
预训练模型:pretrained-models.pytorch
卷积层配置辅助:convolution-visualizer
导入包和版本查询
import torch
import torch.nn as nn
import torchvision
print(torch.__version__)
print(torch.version.cuda)
print(torch.backends.cudnn.version())
print(torch.cuda.get_device_name(0))
可复现性(同一设备上)
np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
显卡设置相关
# 程序内指定显卡
import os
os.environ['CUDA_VISIBLE_DEVICES'] = 0,1
# 程序外指定显卡
CUDA_VISIBLE_DEVICES=0,1 python train.py
# 使用显卡
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 清除现存
torch.cuda.empty_cache()
# 命令行重置GPU的指令
nvidia-smi --gpu-reset -i [gpu_id]
Tensor相关API
# 基本API
tensor = torch.randn(3,4,5)
print(tensor.type()) # 数据类型
print(tensor.size()) # 张量的shape,是个元组
print(tensor.dim()) # 维度的数量
# 设置默认类型,pytorch中的FloatTensor远远快于DoubleTensor
torch.set_default_tensor_type(torch.FloatTensor)
# 类型转换
tensor = tensor.cuda()
tensor = tensor.cpu()
tensor = tensor.float()
tensor = tensor.long()
# reshape
tensor = torch.reshape(tensor, shape)
# 打乱第一个维度
tensor = tensor[torch.randperm(tensor.size(0))]
# 水平翻转
# pytorch不支持tensor[::-1]这样的负步长操作,水平翻转可以通过张量索引实现
# 假设张量的维度为[N, D, H, W].
tensor = tensor[:,:,:,torch.arange(tensor.size(3) - 1, -1, -1).long()]
# 复制张量
# Operation | New/Shared memory | Still in computation graph |
tensor.clone() # | New | Yes |
tensor.detach() # | Shared | No |
tensor.detach.clone()() # | New | No |
# 拼接张量
'''
注意torch.cat和torch.stack的区别在于torch.cat沿着给定的维度拼接,
而torch.stack会新增一维。例如当参数是3个10x5的张量,torch.cat的结果是30x5的张量,
而torch.stack的结果是3x10x5的张量。
'''
tensor = torch.cat(list_of_tensors, dim=0)
tensor = torch.stack(list_of_tensors, dim=0)
# 张量扩展
# Expand tensor of shape 64*512 to shape 64*512*7*7.
tensor = torch.rand(64,512)
torch.reshape(tensor, (64, 512, 1, 1)).expand(64, 512, 7, 7)
# 矩阵乘法
# Matrix multiplcation: (m*n) * (n*p) * -> (m*p).
result = torch.mm(tensor1, tensor2)
# Batch matrix multiplication: (b*m*n) * (b*n*p) -> (b*m*p)
result = torch.bmm(tensor1, tensor2)
# Element-wise multiplication.
result = tensor1 * tensor2
将整数标签转为one-hot编码
# pytorch的标记默认从0开始
tensor = torch.tensor([0, 2, 1, 3])
N = tensor.size(0)
num_classes = 4
one_hot = torch.zeros(N, num_classes).long()
one_hot.scatter_(dim=1, index=torch.unsqueeze(tensor, dim=1), src=torch.ones(N, num_classes).long())
计算两组数据之间的两两欧式距离
# 利用broadcast机制
dist = torch.sqrt(torch.sum((X1[:,None,:] - X2) ** 2, dim=2))
各种类型转换
# np.ndarray与PIL.Image的转换
image = PIL.Image.fromarray(ndarray.astype(np.uint8))
ndarray = np.asarray(PIL.Image.open(path))
# torch.Tensor与np.ndarray转换
ndarray = tensor.cpu().numpy()
tensor = torch.from_numpy(ndarray).float()
tensor = torch.from_numpy(ndarray.copy()).float() # If ndarray has negative stride
# Torch.tensor与PIL.Image转换
# pytorch中的张量默认采用[N, C, H, W]的顺序,并且数据范围在[0,1],需要进行转置和规范化
# torch.Tensor -> PIL.Image
image = PIL.Image.fromarray(torch.clamp(tensor*255, min=0, max=255).byte().permute(1,2,0).cpu().numpy())
image = torchvision.transforms.functional.to_pil_image(tensor) # Equivalently way
# PIL.Image -> torch.Tensor
path = r'./figure.jpg'
tensor = torch.from_numpy(np.asarray(PIL.Image.open(path))).permute(2,0,1).float() / 255
tensor = torchvision.transforms.functional.to_tensor(PIL.Image.open(path)) # Equivalently way
计算模型整体参数量
num_parameters = sum(torch.numel(parameter) for parameter in model.parameters())
学习率衰减
# Reduce learning rate when validation accuarcy plateau.
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=5, verbose=True)
for t in range(0, 80):
train(...)
val(...)
scheduler.step(val_acc)
# Cosine annealing learning rate.
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=80)
# Reduce learning rate by 10 at given epochs.
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[50, 70], gamma=0.1)
for t in range(0, 80):
scheduler.step()
train(...)
val(...)
# Learning rate warmup by 10 epochs.
scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda t: t / 10)
for t in range(0, 10):
scheduler.step()
train(...)
val(...)
TensorBoard
pip install tensorboard
tensorboard --logdir=runs
from torch.utils.tensorboard import SummaryWriter
import numpy as np
writer = SummaryWriter()
for n_iter in range(100):
writer.add_scalar('Loss/train', np.random.random(), n_iter)
writer.add_scalar('Loss/test', np.random.random(), n_iter)
writer.add_scalar('Accuracy/train', np.random.random(), n_iter)
writer.add_scalar('Accuracy/test', np.random.random(), n_iter)
保存和恢复模型和优化器
start_epoch = 0
# Load checkpoint.
if resume: # resume为参数,第一次训练时设为0,中断再训练时设为1
model_path = os.path.join('model', 'best_checkpoint.pth.tar')
assert os.path.isfile(model_path)
checkpoint = torch.load(model_path)
best_acc = checkpoint['best_acc']
start_epoch = checkpoint['epoch']
model.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
print('Load checkpoint at epoch {}.'.format(start_epoch))
print('Best accuracy so far {}.'.format(best_acc))
# Train the model
for epoch in range(start_epoch, num_epochs):
# Test the model
# save checkpoint
is_best = current_acc > best_acc
best_acc = max(current_acc, best_acc)
checkpoint = {
'best_acc': best_acc,
'epoch': epoch + 1,
'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
}
model_path = os.path.join('model', 'checkpoint.pth.tar')
best_model_path = os.path.join('model', 'best_checkpoint.pth.tar')
torch.save(checkpoint, model_path)
if is_best:
shutil.copy(model_path, best_model_path)
恢复部分权重
model.load_state_dict(torch.load('model.pth'), strict=False)
以较大学习率微调全连接层,较小学习率微调卷积层
model = torchvision.models.resnet18(pretrained=True)
finetuned_parameters = list(map(id, model.fc.parameters()))
conv_parameters = (p for p in model.parameters() if id(p) not in finetuned_parameters)
parameters = [{'params': conv_parameters, 'lr': 1e-3},
{'params': model.fc.parameters()}]
optimizer = torch.optim.SGD(parameters, lr=1e-2, momentum=0.9, weight_decay=1e-4)
训练部分代码框架
for t in epoch(80):
for images, labels in tqdm.tqdm(train_loader, desc='Epoch %3d' % (t + 1)):
images, labels = images.cuda(), labels.cuda()
scores = model(images)
loss = loss_function(scores, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
计算每个类别的查准率(precision)、查全率(recall)、F1和总体指标
import sklearn.metrics
all_label = []
all_prediction = []
for images, labels in tqdm.tqdm(data_loader):
# Data.
images, labels = images.cuda(), labels.cuda()
# Forward pass.
score = model(images)
# Save label and predictions.
prediction = torch.argmax(score, dim=1)
all_label.append(labels.cpu().numpy())
all_prediction.append(prediction.cpu().numpy())
# Compute RP and confusion matrix.
all_label = np.concatenate(all_label)
assert len(all_label.shape) == 1
all_prediction = np.concatenate(all_prediction)
assert all_label.shape == all_prediction.shape
micro_p, micro_r, micro_f1, _ = sklearn.metrics.precision_recall_fscore_support(
all_label, all_prediction, average='micro', labels=range(num_classes))
class_p, class_r, class_f1, class_occurence = sklearn.metrics.precision_recall_fscore_support(
all_label, all_prediction, average=None, labels=range(num_classes))
# Ci,j = #{y=i and hat_y=j}
confusion_mat = sklearn.metrics.confusion_matrix(
all_label, all_prediction, labels=range(num_classes))
assert confusion_mat.shape == (num_classes, num_classes)
ImageFolder 打印类别信息
import torchvision.datasets as dset
dataset = dset.ImageFolder('./data/dogcat_2') #没有transform,先看看取得的原始图像数据
print(dataset.classes) #根据分的文件夹的名字来确定的类别
print(dataset.class_to_idx) #按顺序为这些类别定义索引为0,1...
print(dataset.imgs) #返回从所有文件夹中得到的图片的路径以及其类别