Image classification - use pytorch to build an AlexNet network model to train your own data set (cat and dog classification)

1 Data preparation

        When doing image classification, many examples like to use handwritten numbers as an example to explain image classification. This is an extremely irresponsible teaching. I personally think that doing deep learning sometimes requires working on datasets, and because many The framework has built-in handwritten digital data sets, which have been processed for us, and can be directly imported into the neural network. Therefore, after the whole experiment, we don’t even know what the data looks like, let alone train ourselves after learning. the dataset.

        Here I use the dataset of cat and dog classification, as shown in the following figure: 

        Use the following script to divide the dataset into training set and validation set

import os
from shutil import copy
import random


def mkfile(file):
    if not os.path.exists(file):
        os.makedirs(file)


# 获取data文件夹下所有文件夹名(即需要分类的类名)
file_path = 'D:/PycharmProjects/pytorch_test/test/data_name'
flower_class = [cla for cla in os.listdir(file_path)]

# 创建 训练集train 文件夹,并由类名在其目录下创建5个子目录
mkfile('data/train')
for cla in flower_class:
    mkfile('data/train/' + cla)

# 创建 验证集val 文件夹,并由类名在其目录下创建子目录
mkfile('data/val')
for cla in flower_class:
    mkfile('data/val/' + cla)

# 划分比例,训练集 : 验证集 = 9 : 1
split_rate = 0.1

# 遍历所有类别的全部图像并按比例分成训练集和验证集
for cla in flower_class:
    cla_path = file_path + '/' + cla + '/'  # 某一类别的子目录
    images = os.listdir(cla_path)  # iamges 列表存储了该目录下所有图像的名称
    num = len(images)
    eval_index = random.sample(images, k=int(num * split_rate))  # 从images列表中随机抽取 k 个图像名称
    for index, image in enumerate(images):
        # eval_index 中保存验证集val的图像名称
        if image in eval_index:
            image_path = cla_path + image
            new_path = 'data/val/' + cla
            copy(image_path, new_path)  # 将选中的图像复制到新路径

        # 其余的图像保存在训练集train中
        else:
            image_path = cla_path + image
            new_path = 'data/train/' + cla
            copy(image_path, new_path)
        print("\r[{}] processing [{}/{}]".format(cla, index + 1, num), end="")  # processing bar
    print()

print("processing done!")

        The divided dataset is shown in the figure below, with a certain percentage of cat and dog photos in both the training set and the validation set.

 2 Use pytorch to build an AlexNet network model

import torch
from torch import nn
import torch.nn.functional as F


class MyAlexNet(nn.Module):
    def __init__(self):
        super(MyAlexNet, self).__init__()
        self.c1 = nn.Conv2d(in_channels=3, out_channels=48, kernel_size=11, stride=4, padding=2)
        self.ReLU = nn.ReLU()
        self.c2 = nn.Conv2d(in_channels=48, out_channels=128, kernel_size=5, stride=1, padding=2)
        self.s2 = nn.MaxPool2d(2)
        self.c3 = nn.Conv2d(in_channels=128, out_channels=192, kernel_size=3, stride=1, padding=1)
        self.s3 = nn.MaxPool2d(2)
        self.c4 = nn.Conv2d(in_channels=192, out_channels=192, kernel_size=3, stride=1, padding=1)
        self.c5 = nn.Conv2d(in_channels=192, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.s5 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.flatten = nn.Flatten()
        self.f6 = nn.Linear(4608, 2048)
        self.f7 = nn.Linear(2048, 2048)
        self.f8 = nn.Linear(2048, 1000)

        self.f9 = nn.Linear(1000, 2)

    def forward(self, x):
        x = self.ReLU(self.c1(x))
        x = self.ReLU(self.c2(x))
        x = self.s2(x)
        x = self.ReLU(self.c3(x))
        x = self.s3(x)
        x = self.ReLU(self.c4(x))
        x = self.ReLU(self.c5(x))
        x = self.s5(x)
        x = self.flatten(x)
        x = self.f6(x)
        x = F.dropout(x, p=0.5)
        x = self.f7(x)
        x = F.dropout(x, p=0.5)
        x = self.f8(x)
        x = F.dropout(x, p=0.5)

        x = self.f9(x)
        x = F.dropout(x, p=0.5)
        return x

if __name__ =="__mian__":
    x = torch.rand([1, 3, 224, 224])
    model = MyAlexNet()
    y = model(x)





 3 training network model code

import torch
from torch import nn
from net import MyAlexNet
import numpy as np

from torch.optim import lr_scheduler
import os

from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt

# 解决中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

ROOT_TRAIN = r'D:/PycharmProjects/pytorch_test/test/data/train'
ROOT_TEST = r'D:/PycharmProjects/pytorch_test/test/data/val'


# 将图像RGB三个通道的像素值分别减去0.5,再除以0.5.从而将所有的像素值固定在[-1,1]范围内
normalize = transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
train_transform = transforms.Compose([
    transforms.Resize((224, 224)), # 裁剪为224*224
    transforms.RandomVerticalFlip(), # 随机垂直旋转
    transforms.ToTensor(), # 将0-255范围内的像素转为0-1范围内的tensor
    normalize])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    normalize])

train_dataset = ImageFolder(ROOT_TRAIN, transform=train_transform)
val_dataset = ImageFolder(ROOT_TEST, transform=val_transform)

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=True)

# 如果显卡可用,则用显卡进行训练
device = 'cuda' if  torch.cuda.is_available() else 'cpu'

# 调用net里面的定义的网络模型, 如果GPU可用则将模型转到GPU
model = MyAlexNet().to(device)

# 定义损失函数(交叉熵损失)
loss_fn = nn.CrossEntropyLoss()

# 定义优化器(SGD)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# 学习率每隔10epoch变为原来的0.1
lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

# 定义训练函数
def train(dataloader, model, loss_fn, optimizer):
    loss, current, n = 0.0, 0.0, 0
    for batch, (x, y) in enumerate(dataloader):

        # 前向传播
        image, y = x.to(device), y.to(device)
        output = model(image)
        cur_loss = loss_fn(output, y)
        _, pred = torch.max(output, axis=1)
        cur_acc = torch.sum(y==pred)/output.shape[0]

        # 反向传播
        optimizer.zero_grad()
        cur_loss.backward()
        optimizer.step()
        loss += cur_loss.item()
        current += cur_acc.item()
        n = n+1

    train_loss = loss / n
    tran_acc = current /n
    print('train_loss:' + str(train_loss))
    print('train_acc:' + str(tran_acc))
    return train_loss, tran_acc

# 定义测试函数
def val(dataloader, model, loss_fn):
    # 将模型转为验证模型
    model.eval()
    loss, current, n = 0.0, 0.0, 0
    with torch.no_grad():
        for batch, (x, y) in enumerate(dataloader):
            image, y = x.to(device), y.to(device)
            output = model(image)
            cur_loss = loss_fn(output, y)
            _, pred = torch.max(output, axis=1)
            cur_acc = torch.sum(y == pred) / output.shape[0]
            loss += cur_loss.item()
            current += cur_acc.item()
            n = n+1

        val_loss = loss / n
        val_acc = current / n
        print('val_loss:' + str(val_loss))
        print('val_acc:' + str(val_acc))
        return val_loss, val_acc

# 画图函数
def matplot_loss(train_loss, val_loss):
    plt.plot(train_loss, label='train_loss')
    plt.plot(val_loss, label='val_loss')
    plt.legend(loc='best')
    plt.ylabel('loss', fontsize=12)
    plt.xlabel('epoch', fontsize=12)
    plt.title("训练集和验证集loss值对比图")
    plt.show()

def matplot_acc(train_acc, val_acc):
    plt.plot(train_acc, label='train_acc')
    plt.plot(val_acc, label='val_acc')
    plt.legend(loc='best')
    plt.ylabel('acc', fontsize=12)
    plt.xlabel('epoch', fontsize=12)
    plt.title("训练集和验证集精确度值对比图")
    plt.show()



# 开始训练
loss_train = []
acc_train = []
loss_val = []
acc_val = []

epoch = 100
min_acc = 0
for t in range(epoch):
    lr_scheduler.step()
    print(f"epoch{t+1}\n--------------")
    train_loss, train_acc = train(train_dataloader, model, loss_fn, optimizer)
    val_loss, val_acc = val(val_dataloader, model, loss_fn)

    loss_train.append(train_loss)
    acc_train.append(train_acc)
    loss_val.append(val_loss)
    acc_val.append(val_acc)

    # 保存最好的模型权重文件
    if val_acc > min_acc:
        folder = 'save_model'
        if not os.path.exists(folder):
            os.mkdir('save_model')
        min_acc = val_acc
        print(f'save best model,第{t+1}轮')
        torch.save(model.state_dict(), 'save_model/best_model.pth')
    # 保存最后的权重模型文件
    if t == epoch - 1:
        torch.save(model.state_dict(), 'save_model/last_model.pth')
print('Done!')

matplot_loss(loss_train, loss_val)
matplot_acc(acc_train, acc_val)






4 Test code

import torch
from net import MyAlexNet
import numpy as np
from torch.autograd import Variable
from torchvision import datasets, transforms
from torchvision.transforms import ToPILImage
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

ROOT_TRAIN = r'D:/PycharmProjects/pytorch_test/test/data/train'
ROOT_TEST = r'D:/PycharmProjects/pytorch_test/test/data/val'

# 将图像RGB三个通道的像素值分别减去0.5,再除以0.5.从而将所有的像素值固定在[-1,1]范围内
normalize = transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])

train_transform = transforms.Compose([
    transforms.Resize((224, 224)), # 裁剪为224*224
    transforms.RandomVerticalFlip(), # 随机垂直旋转
    transforms.ToTensor(), # 将0-255范围内的像素转为0-1范围内的tensor
    normalize])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    normalize])

train_dataset = ImageFolder(ROOT_TRAIN, transform=train_transform)
val_dataset = ImageFolder(ROOT_TEST, transform=val_transform)

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=True)


# 如果显卡可用,则用显卡进行训练
device = 'cuda' if  torch.cuda.is_available() else 'cpu'

# 调用net里面的定义的网络模型, 如果GPU可用则将模型转到GPU
model = MyAlexNet().to(device)

# 加载模型train.py里面训练的模型
model.load_state_dict(torch.load('D:/PycharmProjects/pytorch_test/test/save_model/best_model.pth'))

# 获取预测结果
classes = [
    'cat',
    'dag',
]

# 把tensor转成Image,方便可视化
show = ToPILImage()

# 进入验证阶段
model.eval()

# 对val_dataset里面的照片进行推理验证
for i in range(50):
    x, y = val_dataset[i][0], val_dataset[i][1]
    show(x).show()
    x = Variable(torch.unsqueeze(x, dim=0).float(), requires_grad=False).to(device)
    x = torch.tensor(x).to(device)
    with torch.no_grad():
        pred = model(x)
        predicted, actual = classes[torch.argmax(pred[0])], classes[y]
        print(f'Predicted: "{predicted}", Actual: "{actual}"')

Guess you like

Origin blog.csdn.net/didiaopao/article/details/120717525