代码解读:train.py
训练函数
# 训练函数
def train(train_loader, model, criterion, optimizer, epoch):
# 定义计时器
batch_time = AverageMeter() # 用于记录每个batch的时间
data_time = AverageMeter() # 用于记录数据加载的时间
losses = AverageMeter() # 用于记录损失
top1 = AverageMeter() # 用于记录top1准确率
top5 = AverageMeter() # 用于记录top5准确率
# 切换到训练模式
model.train()
# 开始计时
end = time.time()
for i, (input, target) in enumerate(train_loader):
# 记录数据加载时间
data_time.update(time.time() - end)
# 将数据移动到GPU上
target = target.cuda()
input = input.cuda()
input_var = torch.autograd.Variable(input)
target_var = torch.autograd.Variable(target)
# 计算输出
output, global_c, local_c= model(input_var)
concate_loss = criterion(output, target_var)
global_loss = criterion(global_c, target_var)
local_loss = criterion(local_c, target_var)
loss = concate_loss + 0.5*(global_loss + local_loss)
# 计算准确率并记录损失
prec1, prec5 = accuracy(output.data, target_var, topk=(1, 5))
losses.update(loss.item(), input.size(0))
top1.update(prec1.item(), input.size(0))
top5.update(prec5.item(), input.size(0))
# 计算梯度并更新参数
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 记录每个batch的时间
batch_time.update(time.time() - end)
end = time.time()
# 每隔一定的迭代次数打印一次结果
if i % args.print_freq == 0:
print('Epoch: [{0}][{1}/{2}]\t'
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
epoch, i, len(train_loader), batch_time=batch_time,
data_time=data_time, loss=losses, top1=top1, top5=top5))
验证函数
# 验证函数,用于测试模型的性能
def validate(val_loader, model, criterion):
# 定义计时器
batch_time = AverageMeter()
losses = AverageMeter()
top1 = AverageMeter()
top5 = AverageMeter()
# 切换到评估模式
model.eval()
# 开始计时
end = time.time()
for i, (input, target) in enumerate(val_loader):
# 将数据移动到GPU上
target = target.cuda()
input = input.cuda()
input_var = torch.autograd.Variable(input)
target_var = torch.autograd.Variable(target)
# 计算输出
output, global_c, local_c= model(input_var)
concate_loss = criterion(output, target_var)
global_loss = criterion(global_c, target_var)
local_loss = criterion(local_c, target_var)
loss = concate_loss + 0.5*(global_loss + local_loss)
# 计算准确率并记录损失
prec1, prec5 = accuracy(output.data, target_var, topk=(1, 5))
losses.update(loss.data.item(), input.size(0))
top1.update(prec1.item(), input.size(0))
top5.update(prec5.item(), input.size(0))
# 计算时间
batch_time.update(time.time() - end)
end = time.time()
# 每隔一定的迭代次数打印一次结果
if i % args.print_freq == 0:
print('Test: [{0}/{1}]\t'
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
i, len(val_loader), batch_time=batch_time, loss=losses,
top1=top1, top5=top5))
# 打印平均准确率
print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
.format(top1=top1, top5=top5))
# 返回平均准确率
return top1.avg
保存模型和调整学习率
def save():
torch.save(vgg16.state_dict(), args.pathModelParams)
print('Checkpoint saved to {}'.format(args.pathModelParams))
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def adjust_learning_rate(optimizer, epoch):
"""每30个epoch将初始学习率衰减10倍"""
lr = args.lr * (0.1 ** (epoch // 40)) # 计算新的学习率
param_groups = optimizer.state_dict()['param_groups']
# print param_groups
param_groups[0]['lr']=lr # 更新第一个参数组的学习率
param_groups[1]['lr']=lr # 更新第二个参数组的学习率
param_groups[2]['lr']=lr*0.01 # 更新第三个参数组的学习率
param_groups[3]['lr']=lr*0.01 # 更新第四个参数组的学习率
param_groups[4]['lr']=lr*0.01 # 更新第五个参数组的学习率
for param_group in param_groups:
print (param_group) # 打印每个参数组的学习率
def accuracy(output, target, topk=(1,)):
"""Computes the precision@k for the specified values of k"""
maxk = max(topk)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
res = []
for k in topk:
correct_k = correct[:k].contiguous().view(-1).float().sum(0)
res.append(correct_k.mul_(100.0 / batch_size))
return res
训练循环
# 训练循环
best_prec1 = 0
for epoch in range(0, args.epoch):
# 调整学习率
adjust_learning_rate(optimizer, epoch)
# 训练一个epoch
train(train_loader, vgg16, criterion, optimizer, epoch)
# 在验证集上评估
prec1 = validate(test_loader,vgg16, criterion)
# 记录最好的准确率并保存模型
is_best = prec1 > best_prec1
if prec1 > best_prec1:
save()
best_prec1 = max(prec1, best_prec1)