版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/ZWX2445205419/article/details/86627561
训练模型一般步骤
目录结构:
-- project
-- logs
-- train.log
-- test.log
-- models
-- model.ckpt
-- networks
-- vgg.py
-- alexnet.py
-- src
-- main.py
-- manager.py
-- run.sh
main.py
#! -*- coding: utf-8 -*-
import argparse
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import sys
sys.path.append('../')
from networks.simple_net import SimpleNet
from src.manager import Manager
FLAGS = argparse.ArgumentParser()
FLAGS.add_argument('--epochs', type=int, default=10, help='train epochs')
FLAGS.add_argument('--cuda', action='store_true', help='use GPU')
FLAGS.add_argument('--train_log', type=str, help='training log file')
FLAGS.add_argument('--test_log', type=str, help='test log file')
FLAGS.add_argument('--save_model_name', type=str, help='save model name')
FLAGS.add_argument('--load_model_name', type=str, help='load model name')
FLAGS.add_argument('--eval_frequency', type=int, help='eval frequency')
def dataloader(batch_size, data_root):
_transform = {
'train': transforms.Compose([
transforms.Resize(size=(32, 32)),
transforms.RandomCrop(size=(28, 28)),
transforms.ToTensor(),
]),
'test': transforms.Compose([
# transforms.Resize(size=(32, 32)),
transforms.ToTensor(),
]),
}
_dataset = {
'train': datasets.MNIST(root=data_root, train=True, download=True, transform=_transform['train']),
'test': datasets.MNIST(root=data_root, train=True, download=True, transform=_transform['test'])
}
data_loader = {
'train': DataLoader(dataset=_dataset['train'], batch_size=batch_size, shuffle=True, num_workers=4),
'test': DataLoader(dataset=_dataset['test'], batch_size=batch_size, shuffle=True, num_workers=4)
}
return data_loader
def main():
data_root = '/home/zwx/MyProjects/Datasets/MNIST'
args = FLAGS.parse_args()
model = SimpleNet()
loader = dataloader(32, data_root)
manager = Manager(model, args, loader)
best_accuracy = manager.load_model()
optimizer = optim.Adam(model.parameters(), lr=0.001)
manager.train(optimizer, best_accuracy)
if __name__ == '__main__':
main()
manager.py
import torch
import torch.nn as nn
from tqdm import tqdm
from torch.autograd import Variable
import os
class Manager(object):
def __init__(self, model, args, data_loader):
self.model = model
self.args = args
self.train_loader = data_loader['train']
self.test_loader = data_loader['test']
self.cuda = args.cuda
self.criterion = nn.CrossEntropyLoss()
def eval(self):
# Transfer model mode from train to eval.
self.model.eval()
# User GPU.
if self.cuda:
self.model.cuda()
print('Eval by GPU ...')
else:
print('Eval by GPU ...')
total_correct = 0.0
total_loss = 0.0
total_num = len(self.test_loader)
# Eval.
for inputs, labels in tqdm(self.test_loader, desc='Eval'):
if self.cuda:
inputs, labels = inputs.cuda(), labels.cuda()
# model eval.
with torch.no_grad():
scores = self.model(inputs)
# Calculate loss.
batch_loss = self.criterion(scores, labels)
# Calculate accuracy.
correct = self.get_accuracy(scores, labels)
# Count loss and correct.
total_loss += batch_loss
total_correct += correct
loss = total_loss / total_num
accuracy = total_correct / total_num
info = 'Test accuracy: @{:.3f}%, loss: @{:.3f}'.format(accuracy, loss)
self.save_log(info, self.args.test_log)
print(info)
print('-' * 40)
self.model.train()
return accuracy, loss
def train(self, optimizer, best_accuracy=None):
self.model.train()
if self.cuda:
self.model.cuda()
if best_accuracy is None:
best_accuracy = 0.0
total_num = len(self.train_loader)
for i in range(self.args.epochs):
epoch_index = i + 1
total_loss = 0.0
total_corect = 0.0
for inputs, labels in tqdm(self.train_loader, desc='Train epoch: {}'.format(epoch_index)):
if self.cuda:
inputs, labels = inputs.cuda(), labels.cuda()
inputs, labels = Variable(inputs), Variable(labels)
# zero the parameter gradients
optimizer.zero_grad()
# Forward pass.
scores = self.model(inputs)
# Calculate loss.
batch_loss = self.criterion(scores, labels)
# Backward pass.
batch_loss.backward()
# Update parameters.
optimizer.step()
# Calculate accuracy.
correct = self.get_accuracy(scores, labels)
# Count loss and accuracy
total_loss += batch_loss
total_corect += correct
# Epoch average loss and accuracy.
loss = total_loss / total_num
accuracy = total_corect / total_num
# Print train info.
info = 'Epoch: {} train accuracy: @{:.3f}%, loss: @{:.3f}'.format(epoch_index, accuracy, loss)
print(info)
# Save train info to log file.
self.save_log(info, self.args.train_log)
# Save best model
if accuracy > best_accuracy:
print('Best model so far, best accuracy @{:.3f} -> @{:.3f}'.format(best_accuracy, accuracy))
best_accuracy = accuracy
self.save_model(epoch_index, best_accuracy, loss, self.args.save_model_name)
# Eval every 5 train epoch
if epoch_index % self.args.eval_frequency == 0:
self.eval()
def get_accuracy(self, scores, labels):
if self.cuda:
scores = scores.data.cpu()
labels = labels.data.cpu()
correct = 0
outputs = torch.argmax(scores, dim=1)
for predict, target in zip(outputs, labels):
if predict == target:
correct += 1
return correct
def save_model(self, epoch, loss, best_accuracy, save_name):
# model from GPU mode to CPU mode
if self.cuda:
self.model.cpu()
ckpt = {
'epoch': epoch,
'loss': loss,
'best_accuracy': best_accuracy,
'state_dict': self.model.state_dict()
}
torch.save(ckpt, save_name)
# model from CPU to GPU for step training
if self.cuda:
self.model.cuda()
return best_accuracy
def load_model(self):
model_path = self.args.load_model_name
best_accuracy = None
if os.path.exists(model_path):
print('Model load state dict from {}'.format(model_path))
ckpt = torch.load(model_path)
self.model.load_state_dict(ckpt['state_dict'])
epoch = ckpt['epoch']
loss = ckpt['loss']
best_accuracy = ckpt['best_accuracy']
print('Load successful! model saved at {} epoch, best accuracy: {:.3f}, loss: {:.3f}'.format(epoch, loss,
best_accuracy))
else:
print('Train from beginning ...')
return best_accuracy
def save_log(self, info, log_file):
with open(log_file, 'a+') as f:
f.writelines('{}\n'.format(info))
run.sh
#!/usr/bin/env bash
LOG_ROOT='../logs'
MODEL_ROOT='../models/'
EPOCHS=100
EVAL_FREQUENCY=5
MODEL_NAME='residual_attention_network'
TRAIN_LOG="${LOG_ROOT}/${MODEL_NAME}_train.log"
TEST_LOG="${LOG_ROOT}/${MODEL_NAME}_test.log"
SAVE_MODEL_NAME="${MODEL_ROOT}/${MODEL_NAME}_${EPOCHS}.ckpt"
LOAD_MODEL_NAME="model.ckpt"
python main.py --cuda \
--epochs ${EPOCHS} \
--train_log ${TRAIN_LOG} \
--test_log ${TEST_LOG} \
--save_model_name ${SAVE_MODEL_NAME} \
--load_model_name ${LOAD_MODEL_NAME} \
--eval_frequency ${EVAL_FREQUENCY}