pytorch training EfficientnetV2


foreword

  Not long ago, I used pytorch to reproduce the network structure of efficientnetv2, but I have other things to do later, so the article on the training part has been delayed until now. Some links to articles about Efficientnet can be referred to as follows:
EfficientnetV1 training
Flask Deploy EfficientnetV1 classification network
pytorch to build EfficientnetV2 network structure
Then, the training code in this article is also based on the v1 training code, so there will be a gap with the official one.


1. Data placement

  For the placement of training data, please refer to the EfficientnetV1 training article, which is roughly as follows:
insert image description here
There are various types of data under the train and val folders. Here, let’s take cats and dogs as an example, as follows:
insert image description here

Two, training

  The training code is directly given here, and some comments have been added to the code. If you look closely, you will find that it is similar to the training code of V1, except that the packaging method and a few parameters have been changed. It should be noted that you need to copy the efficientnetV2 network structure reproduced with pytorch mentioned above to model.py, and place it as follows.
insert image description here
The code details are as follows:

from model import EfficientnetV2
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets,transforms
from torch.utils.data import DataLoader
import os,time,argparse

device="cuda" if torch.cuda.is_available() else "cpu"
#数据处理
def process(opt):
    # 数据增强
    data_transforms = {
    
    
        'train': transforms.Compose([
            # transforms.Resize((self.imgsz, self.imgsz)),  # resize
            transforms.CenterCrop((opt.imgsz, opt.imgsz)),  # 中心裁剪
            transforms.RandomRotation(10),  # 随机旋转,旋转范围为【-10,10】
            transforms.RandomHorizontalFlip(p=0.2),  # 水平镜像
            transforms.ToTensor(),  # 转换为张量
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 标准化
        ]),
        "val": transforms.Compose([
            # transforms.Resize((self.imgsz, self.imgsz)),  # resize
            transforms.CenterCrop((opt.imgsz, opt.imgsz)),  # 中心裁剪
            transforms.ToTensor(),  # 张量转换
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    }

    # 定义图像生成器
    image_datasets = {
    
    x: datasets.ImageFolder(os.path.join(opt.img_dir, x), data_transforms[x]) for x in
                      ['train', 'val']}
    # 得到训练集和验证集
    trainx = DataLoader(image_datasets["train"], batch_size=opt.batch_size, shuffle=True, drop_last=True)
    valx = DataLoader(image_datasets["val"], batch_size=opt.batch_size, shuffle=True, drop_last=True)

    b = image_datasets["train"].class_to_idx  # id和类别对应
    print(b)
    return trainx,valx,b
#训练
def train(opt):
    start_time = (time.strftime("%m%d_%H%M", time.localtime()))
    save_weight = opt.save_dir + os.sep + start_time  # 保存路径
    os.makedirs(save_weight, exist_ok=True)
    model=EfficientnetV2(opt.model_type,opt.class_num).cuda()
    best_acc = 0
    best_epoch = 0  # 准确率最高的模型的训练周期
    model.train(True)
    # 优化器
    optimzer=optim.SGD(model.parameters(),lr=opt.lr,momentum=opt.m,weight_decay=0.0004)
    cross = nn.CrossEntropyLoss() #损失函数
    trainx, valx, b = process(opt)

    for ech in range(opt.epochs):
        optimzer1 = lrfn(ech, optimzer,opt.lr)

        print("----------Start Train Epoch %d----------" % (ech + 1))
        # 开始训练
        run_loss = 0.0  # 损失
        run_correct = 0.0  # 准确率
        count = 0.0  # 分类正确的个数

        for i, data in enumerate(trainx):

            inputs, label = data
            inputs, label = inputs.to(device), label.to(device)

            # 训练
            optimzer1.zero_grad()
            output = model(inputs)

            loss = cross(output, label)
            loss.backward()
            optimzer1.step()

            run_loss += loss.item()  # 损失累加
            _, pred = torch.max(output.data, 1)
            count += label.size(0)  # 求总共的训练个数
            run_correct += pred.eq(label.data).cpu().sum()  # 截止当前预测正确的个数
            # 每隔100个batch打印一次信息,这里打印的ACC是当前预测正确的个数/当前训练过的的个数
            if (i + 1) % 500 == 0:
                print('[Epoch:{}__iter:{}/{}] | Acc:{}'.format(ech + 1, i + 1, len(trainx), run_correct / count))

        train_acc = run_correct / count
        # 每次训完一批打印一次信息
        print('Epoch:{} | Loss:{} | Acc:{}'.format(ech + 1, run_loss / len(trainx), train_acc))

        # 训完一批次后进行验证
        print("----------Waiting Test Epoch {}----------".format(ech + 1))
        with torch.no_grad():
            correct = 0.  # 预测正确的个数
            total = 0.  # 总个数
            for inputs, labels in valx:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)

                # 获取最高分的那个类的索引
                _, pred = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += pred.eq(labels).cpu().sum()
            test_acc = correct / total
            print("批次%d的验证集准确率:" % (ech + 1), test_acc.cpu().detach().numpy())
        if best_acc < test_acc:
            best_acc = test_acc
            best_epoch = ech + 1

            torch.save(model, save_weight + os.sep + "best.pth")
        print(f'best epoch : {
      
      best_epoch}, best accuracy : {
      
      best_acc}')

#学习率设置
def lrfn(num_epoch,optim,lr):
    lr_start=lr
    max_lr=0.01
    lr_up_epoch = 5  # 学习率上升批次
    lr_sustain_epoch = 10  # 学习率保持不变
    lr_exp = .8  # 衰减因子
    if num_epoch < lr_up_epoch:  # 0-10个epoch学习率线性增加
        lr = (max_lr - lr_start) / lr_up_epoch * num_epoch + lr_start
    elif num_epoch < lr_up_epoch + lr_sustain_epoch:  # 学习率保持不变
        lr = max_lr
    else:  # 指数下降
        lr = (max_lr - lr_start) * lr_exp ** (num_epoch - lr_up_epoch - lr_sustain_epoch) + lr_start
    for param_group in optim.param_groups:
        param_group['lr'] = lr
    return optim


def parse_opt():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_type",type=str,default="S",help="Model type") #模型选型,可选s,m,l,大小写均可
    parser.add_argument("--img-dir", type=str, default="", help="train image path")  # 数据集的路径
    parser.add_argument("--imgsz", type=int, default=480, help="image size")  # 图像尺寸
    parser.add_argument("--epochs", type=int, default=100, help="train epochs")  # 训练批次
    parser.add_argument("--batch-size", type=int, default=16, help="train batch-size")  # batch-size
    parser.add_argument("--class_num", type=int, default=2, help="class num")  # 类别数
    parser.add_argument("--lr",type=float,default=0.0001,help="Init lr") #学习率初始值
    parser.add_argument("--m", type=float, default=0.9, help="optimer momentum")  # 动量
    parser.add_argument("--save_dir", type=str, default="",
                        help="save models dir")  # 保存模型路径
    opt = parser.parse_known_args()[0]
    return opt



if __name__ == '__main__':
    opt=parse_opt()
    models=train(opt)

In train.py, you only need to select a model type in parse_opt(), such as S, M, L, etc. Secondly, set the parameters such as the data set path, the number of categories, and the save path to your own.
Note: There is a print(b) near line 38 in the code. This is a list of categories. Be sure to remember this list and use it in the test, otherwise the categories may be wrong during the test.

2. Test

The test part directly gives the code, where lines 44 and 48 need to be their own test data path and model path, the code is as follows

import torch
import torchvision
from PIL import Image
import cv2,glob,os,time
import shutil
from pathlib import Path

def expend_img(img,img_size=480,expand_pix=0):
    '''
    :param img: 图片数据
    :param fill_pix: 填充像素,默认为灰色,自行更改
    :return:
    '''
    h, w = img.shape[:2]
    if h > w and h >= img_size:  # 左右padding
        top_expand = 0
        bottom_expand = 0
        left_expand = int((h - w) / 2)
        right_expand = left_expand
        new_img = cv2.copyMakeBorder(img, top_expand, bottom_expand, left_expand, right_expand, cv2.BORDER_CONSTANT,
                                     value=expand_pix)
    elif w > h and w >= img_size:  # 上下padding
        left_expand = 0
        right_expand = 0
        top_expand = int((w - h) / 2)
        bottom_expand = top_expand
        new_img = cv2.copyMakeBorder(img, top_expand, bottom_expand, left_expand, right_expand, cv2.BORDER_CONSTANT,
                                     value=expand_pix)
    elif w < img_size and h < img_size:  # 四周padding
        left_expand = int((img_size - w) / 2)
        right_expand = left_expand
        top_expand = int((img_size - h) / 2)
        bottom_expand = top_expand
        new_img = cv2.copyMakeBorder(img, top_expand, bottom_expand, left_expand, right_expand, cv2.BORDER_CONSTANT,
                                     value=expand_pix)
    else:
        new_img = img

    new_img = cv2.resize(new_img, (img_size, img_size))
    return new_img

#模糊分类
if __name__ == '__main__':
    img_dir="" #测试数据路径
    img_list=glob.glob(img_dir+os.sep+"*.jpg")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    #加载模型
    model=torch.load("").to(device)
    model.eval()

    class_list=[]

    for imgpath in img_list:
        img=cv2.imread(imgpath)
        s=time.time()
        img=expend_img(img)

        #PIL
        img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))  # 注意时间
        # # img= img[:, :, ::-1].transpose(2, 0, 1).copy() #注意时间

        data_transorform = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        img = data_transorform(img)
        pred_img = torch.reshape(img, (-1, 3, 480,480)).to(device)
        start_time=time.time()
        pred=model(pred_img)[0]
        pred = torch.nn.Softmax(dim=0)(pred)
        end_time=time.time()
        score, pred_id = torch.max(pred, dim=0)
        #预测类别
        pred_class=class_list[pred_id]
        e=time.time()

        print(f"{imgpath} is {pred_class},score is {score},inference time is {e-s}")
    print("Finished!")



3. Training and Test Results

Only part of the results are given here, as follows:
When training 12epoch, I have something to use the server, so I interrupted the training, the accuracy rate at this time:
insert image description here
when testing 3000 pieces of data with this model, the result is:
insert image description here
Note: training and testing In the code, the set imgsize is 480, which can be changed according to your own needs


Summarize

  The above is the entire content of this article. If you have any questions, please feel free to communicate in the comment area.

Guess you like

Origin blog.csdn.net/qq_55068938/article/details/130952629