pytorch advanced learning (four): use different classification models for data training (alexnet, resnet, vgg, etc.)

Course resources : 5. I have written more than ten classification models for you, and you can run them directly [Pytorch for elementary school students]_哔哩哔哩_bilibili

 

Table of contents

1. Project introduction

 1. Dataset preparation

2. Run CreateDataset.py

3. Run TrainModal.py 

4. How to switch the graphics card model

Two, the code

1. CreateDataset.py

2.TrainModal.py 

3. Running results


1. Project introduction

 1. Dataset preparation

The data set is under the data folder

 

2. Run CreateDataset.py

Run CreateDataset.py to generate dataset files of train.txt and test.txt.

 

3. Run TrainModal.py 

To train the model, I imported multiple network models of alexnet, vgg, and resnet from the models module in torchvision, and just uncomment the corresponding code when using it. For example, I am training the network of vgg11 now.

    # 不使用预训练参数
   # model = alexnet(pretrained=False, num_classes=5).to(device) # 29.3%

    '''        VGG系列    '''
    model = vgg11(weights=False, num_classes=5).to(device)   #  23.1%
    # model = vgg13(weights=False, num_classes=5).to(device)   # 30.0%
    # model = vgg16(weights=False, num_classes=5).to(device)


    '''        ResNet系列    '''
    # model = resnet18(weights=False, num_classes=5).to(device)    # 43.6%
    # model = resnet34(weights=False, num_classes=5).to(device)
    # model = resnet50(weights= False, num_classes=5).to(device)
    #model = resnet101(weights=False, num_classes=5).to(device)   #  26.2%
    # model = resnet152(weights=False, num_classes=5).to(device)

 At the same time, it should be noted that the weights parameter in vgg11 is set to false. We enter the definition page of vgg and find that weights is whether to use pre-training parameters . Setting it to FALSE means that we do not use pre-training parameters, because the pre-training category of vgg network The number is 1000, and our own dataset does not have that many classes, so no pre-training is used.

 

Change the file name of the pth generated in the last line to the name of the corresponding network, such as model_vgg11.pth. 

    # 保存训练好的模型
    torch.save(model.state_dict(), "model_vgg11.pth")
    print("Saved PyTorch Model Success!")

4. How to switch the graphics card model

I encountered the problem of "torch.cuda.OutOfMemoryError" during the running process . The video memory of the graphics card is not enough. Use the command to check the usage of the graphics card in the server:

nvidia-smi

It can be seen that I have been using the default graphics card 0, and the usage has reached 100%, but graphics card 1 has used 67%, and the memory space is still used, so use the following code to replace graphics card 0 with graphics card 1.

# 设置显卡型号为1
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

 

Two, the code

1. CreateDataset.py

'''
生成训练集和测试集,保存在txt文件中
'''
##相当于模型的输入。后面做数据加载器dataload的时候从里面读他的数据
import os
import random#打乱数据用的

# 百分之60用来当训练集
train_ratio = 0.6

# 用来当测试集
test_ratio = 1-train_ratio

rootdata = r"data"  #数据的根目录

train_list, test_list = [],[]#读取里面每一类的类别
data_list = []

#生产train.txt和test.txt
class_flag = -1
for a,b,c in os.walk(rootdata):
    print(a)
    for i in range(len(c)):
        data_list.append(os.path.join(a,c[i]))

    for i in range(0,int(len(c)*train_ratio)):
        train_data = os.path.join(a, c[i])+'\t'+str(class_flag)+'\n'
        train_list.append(train_data)

    for i in range(int(len(c) * train_ratio),len(c)):
        test_data = os.path.join(a, c[i]) + '\t' + str(class_flag)+'\n'
        test_list.append(test_data)

    class_flag += 1

print(train_list)
random.shuffle(train_list)#打乱次序
random.shuffle(test_list)

with open('train.txt','w',encoding='UTF-8') as f:
    for train_img in train_list:
        f.write(str(train_img))

with open('test.txt','w',encoding='UTF-8') as f:
    for test_img in test_list:
        f.write(test_img)

2.TrainModal.py 

'''
    加载pytorch自带的模型,从头训练自己的数据
'''
import time
import torch
from torch import nn
from torch.utils.data import DataLoader
from utils import LoadData

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'


from torchvision.models import alexnet  #最简单的模型
from torchvision.models import vgg11, vgg13, vgg16, vgg19   # VGG系列
from torchvision.models import resnet18, resnet34,resnet50, resnet101, resnet152    # ResNet系列
from torchvision.models import inception_v3     # Inception 系列

# 定义训练函数,需要
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    # 从数据加载器中读取batch(一次读取多少张,即批次数),X(图片数据),y(图片真实标签)。
    for batch, (X, y) in enumerate(dataloader):
        # 将数据存到显卡
        X, y = X.cuda(), y.cuda()

        # 得到预测的结果pred
        pred = model(X)

        # 计算预测的误差
        # print(pred,y)
        loss = loss_fn(pred, y)

        # 反向传播,更新模型参数
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 每训练10次,输出一次当前信息
        if batch % 10 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test(dataloader, model):
    size = len(dataloader.dataset)
    # 将模型转为验证模式
    model.eval()
    # 初始化test_loss 和 correct, 用来统计每次的误差
    test_loss, correct = 0, 0
    # 测试时模型参数不用更新,所以no_gard()
    # 非训练, 推理期用到
    with torch.no_grad():
        # 加载数据加载器,得到里面的X(图片数据)和y(真实标签)
        for X, y in dataloader:
            # 将数据转到GPU
            X, y = X.cuda(), y.cuda()
            # 将图片传入到模型当中就,得到预测的值pred
            pred = model(X)
            # 计算预测值pred和真实值y的差距
            test_loss += loss_fn(pred, y).item()
            # 统计预测正确的个数
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= size
    correct /= size
    print(f"correct = {correct}, Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")




if __name__=='__main__':
    batch_size = 8

    # # 给训练集和测试集分别创建一个数据集加载器
    train_data = LoadData("train.txt", True)
    valid_data = LoadData("test.txt", False)


    train_dataloader = DataLoader(dataset=train_data, num_workers=4, pin_memory=True, batch_size=batch_size, shuffle=True)
    test_dataloader = DataLoader(dataset=valid_data, num_workers=4, pin_memory=True, batch_size=batch_size)

    # 如果显卡可用,则用显卡进行训练
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using {device} device")


    '''
        随着模型的加深,需要训练的模型参数量增加,相同的训练次数下模型训练准确率起来得更慢
    '''

    # 不使用预训练参数
   # model = alexnet(pretrained=False, num_classes=5).to(device) # 29.3%

    '''        VGG系列    '''
    model = vgg11(weights=False, num_classes=5).to(device)   #  23.1%
    # model = vgg13(weights=False, num_classes=5).to(device)   # 30.0%
    # model = vgg16(weights=False, num_classes=5).to(device)


    '''        ResNet系列    '''
    # model = resnet18(weights=False, num_classes=5).to(device)    # 43.6%
    # model = resnet34(weights=False, num_classes=5).to(device)
    # model = resnet50(weights= False, num_classes=5).to(device)
    #model = resnet101(weights=False, num_classes=5).to(device)   #  26.2%
    # model = resnet152(weights=False, num_classes=5).to(device)




    print(model)
    # 定义损失函数,计算相差多少,交叉熵,
    loss_fn = nn.CrossEntropyLoss()

    # 定义优化器,用来训练时候优化模型参数,随机梯度下降法
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)  # 初始学习率


    # 一共训练1次
    epochs = 1
    for t in range(epochs):
        print(f"Epoch {t+1}\n-------------------------------")
        time_start = time.time()
        train(train_dataloader, model, loss_fn, optimizer)
        time_end = time.time()
        print(f"train time: {(time_end-time_start)}")
        test(test_dataloader, model)
    print("Done!")

    # 保存训练好的模型
    torch.save(model.state_dict(), "model_vgg11.pth")
    print("Saved PyTorch Model Success!")

3. Running results

The running results of vgg11: You can see that the final accuracy rate is 24.8%, because the pre-training model is not used, so the accuracy rate is very low.

 

Guess you like

Origin blog.csdn.net/weixin_45662399/article/details/130087279