Tensorboard main usage

Tensorboard is mainly used to record various parameters in the training process, basically showing all the training details. There are great benefits for improving the model.

from datetime import datetime
from tensorboardX import SummaryWriter

# TIMESTAMP用于新建一个文件夹,存储不同时间训练得到的结果
TIMESTAMP = "{0:%Y-%m-%dT%H-%M-%S/}".format(datetime.now())
tb_writer = SummaryWriter(logdir='./runs/'+TIMESTAMP)

There are mainly the following methods:

1、add_text

add_text(
            tag: str,
            text_string: str,
            global_step: Optional[int] = None,
            walltime: Optional[float] = None):

This should be used in text processing, but it can be used to record the hyperparameters used in this training.
tb_writer.add_text(tag="super parameters", text_string=str(args),global_step=0)

2、add_scalar

add_scalar(
            self,
            tag: str,
            scalar_value: Union[float, numpy_compatible],
            global_step: Optional[int] = None,
            walltime: Optional[float] = None,
            display_name: Optional[str] = "",
            summary_description: Optional[str] = ""):

This is the main function used to record the changes of various scalars.
tb_writer.add_scalar(tag="loss", scalar_value=total_loss/(i+1),global_step=epoch)
tb_writer.add_scalar(tag="lr", scalar_value=current_lr,global_step=epoch)

3、add_image

add_image(
            self,
            tag: str,
            img_tensor: numpy_compatible,
            global_step: Optional[int] = None,
            walltime: Optional[float] = None,
            dataformats: Optional[str] = 'CHW'):

img_tensor: shaped like [channel,height,width]or uint8data float. The range of elements is in [0,1](float)or [0,255](unit8).
What this function can display is very flexible. It can be the convolution kernel, or the obtained feature layer, or the prediction result of each round of the test picture. It can be torchvision.utils.make_grid()used together or in combination plt.figure(), and even the "Grad-CAM" method can be used to save the heat map of each forecast to realize iterative observation of points of interest. In comparison, plt.figure()the use of is more flexible.

gred = make_grid(img,normalize=True,scale_each=True,nrow=4)
tb_writer.add_image(tag="val original images", img_tensor=gred)

4、add_histogram

add_histogram(
            self,
            tag: str,
            values: numpy_compatible,
            global_step: Optional[int] = None,
            bins: Optional[str] = 'tensorflow',
            walltime: Optional[float] = None,
            max_bins=None):

It is generally used to view the histogram of weights and check possible problems in the training process.

tb_writer.add_histogram(tag="conv1",
                            values=model.conv1.weight,
                            global_step=epoch)

5、add_graph

def add_graph(
            self,
            model,
            input_to_model=None,
            verbose=False):

When using it, an input needs to be initialized and installed, and tensorboardother functions do not depend on it tensorboard.

The complete code is as follows:

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import torch.optim as optim
import torch.nn as nn
import math
import torch
import argparse
import torchvision.transforms.functional as F
from tensorboardX import SummaryWriter
from torchvision import datasets,transforms,models
from torchvision.utils import make_grid
from torch.utils.data import DataLoader
from datetime import datetime
from tqdm import tqdm

parser = argparse.ArgumentParser(description='Tensorboard Tutorial Demo Code')
parser.add_argument('-batch_size', '-b', type=int, help='batch size', default=32)
parser.add_argument('-cuda', '-g', type=int, help='cuda id', default=0)
parser.add_argument('-Epoch', '-e', type=int, default=5)

# learning rate
parser.add_argument('-lambda_lr', '-llr',type=str, default='cos_lr')
parser.add_argument('-learning_rate', '-lr', type=float, help='learning rate', default=1e-4)
parser.add_argument('-warm_up_epochs', '-w', type=int, help='warm up epoch for Cosine Schedule', default=1)
parser.add_argument('-weight_decay', '-wd', type=float, default=4e-5,
                    help='weight decay for Adam')
# dataset
parser.add_argument('-dataset_name', '-data', type=str, default='cifar10')
parser.add_argument('-img_size', '-is', type=int, default=32)
parser.add_argument('-crop_size', '-cs', type=int, default=28)

args = parser.parse_args()


transform = transforms.Compose(
        [transforms.Resize([32,32]),
         transforms.RandomResizedCrop([28,28]),
         transforms.RandomHorizontalFlip(),
         transforms.ToTensor(),
         transforms.Normalize(mean=[0.485, 0.456, 0.406],
                              std=[0.229, 0.224, 0.225])
         ])
CIFAR10 = datasets.CIFAR10(root='./cifar10', transform=transform)
data_loader = DataLoader(CIFAR10,batch_size=10,shuffle=True,num_workers=4)

model = models.resnet18(num_classes=10).cuda()

optimizier = optim.SGD(model.parameters(),lr=args.learning_rate,momentum=args.weight_decay)

warm_up_epochs = args.warm_up_epochs
warm_up_with_cosine_lr = lambda epoch:  (epoch+1) / warm_up_epochs if epoch < warm_up_epochs \
else  0.5 * ( math.cos((epoch - warm_up_epochs) /(args.Epoch - warm_up_epochs) * math.pi) + 1)

lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizier,warm_up_with_cosine_lr)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1).cuda()

TIMESTAMP = "{0:%Y-%m-%dT%H-%M-%S/}".format(datetime.now())
tb_writer = SummaryWriter(logdir='./runs/'+TIMESTAMP)
tb_writer.add_text(tag="super paramters", text_string=str(args),global_step=0)

# train
for epoch in range(args.Epoch):
    total_loss = 0
    length = len(data_loader)
    with tqdm(total=length,postfix=dict,mininterval=0.3) as pbar:
        for i,(img,label) in enumerate(data_loader):
            
            img,label = img.cuda(),label.cuda()
            output = model(img)
            
            loss = criterion(output,label)
            
            optimizier.zero_grad()
            loss.backward()
            optimizier.step()
            
            total_loss += loss.item()
            
            current_lr = optimizier.state_dict()['param_groups'][0]['lr']
            pbar.set_description(f'epoch:{epoch+1}/{args.Epoch}, iter:{i + 1}/{length}')
            pbar.set_postfix(**{'avg_loss': total_loss/(i+1),
                                'lr'        : current_lr})
            pbar.update(1)
        
        # tensorboard log
        tb_writer.add_scalar(tag="loss", scalar_value=total_loss/(i+1),global_step=epoch)
        tb_writer.add_scalar(tag="lr", scalar_value=current_lr,global_step=epoch)
        tb_writer.add_histogram(tag="conv1",
                            values=model.conv1.weight,
                            global_step=epoch)
        lr_scheduler.step()

# evaluate
transform_val = transforms.Compose(
        [transforms.Resize([32,32]),
         transforms.CenterCrop([28,28]),
         transforms.ToTensor(),
#         transforms.Normalize(mean=[0.485, 0.456, 0.406],
#                              std=[0.229, 0.224, 0.225])
         ])

CIFAR10_val = datasets.CIFAR10(root='./cifar10',train=False, transform=transform)
data_loader_val = DataLoader(CIFAR10_val,batch_size=8,shuffle=False,num_workers=4)

with torch.no_grad():
    model.eval()
    for img,label in data_loader_val:
        # add original images into tensorboard
        gred = make_grid(img,normalize=True,scale_each=True,nrow=4)
        tb_writer.add_image(tag="val original images", img_tensor=gred)
        tb_writer.close()
        
        img = F.normalize(img, mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
        img, label = img.cuda(),label.cuda()
        output = model(img)
        
        pred = torch.max(output,1)[1]
        
        
        break

Guess you like

Origin blog.csdn.net/Huang_Fj/article/details/123766034