[Image Recognition] Panda recognition based on VGG16 model

illustrate

This experimental environment: python==3.10, pytorch environment : conda install pytorch torchvision torchaudio cpuonly -c pytorch
This experimental case is the identification of giant pandas and Chinese red pandas based on the VGG16 model in the pytorch environment

1. Data preparation

The data set structure is as follows:

dataset
	|-test
		|-bear
		|-pandas
	|-train
		|-bear
		|-pandas
	|-val
		|-bear
		|-pandas
#test:测试集
#train:训练集
#val:验证集
#bear:里面装的是小熊猫的数据集
#pandas:里面装的是大熊猫的数据集

The bear data set part is shown below:
Insert image description here
The pandas data set part is shown below:
Insert image description here

2. Data preprocessing

(1) Import dependent packages

import torch.optim as optim
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
from torchvision import datasets
from torch.autograd import Variable
from torchvision.models import vgg16
import os
from PIL import Image
from torchvision import transforms as T
import shutil
import matplotlib.pyplot as plt
import random

(2) Set global parameters

# 设置全局参数
modellr = 1e-4
BATCH_SIZE = 32
EPOCHS = 10
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DEVICE
  • Running screenshot
    Insert image description here
    In this case, I used the CPU for training and did not use the GPU.

(3) Data preprocessing

# 数据预处理
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
 
])
transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
Labels = {
    
    'bear': 0, 'pandas': 1}#设置标签

(4) Read the data set

# 读取数据
dataset_train = datasets.ImageFolder('dataset/train', transform)
print(dataset_train.imgs)
# 对应文件夹的label
print(dataset_train.class_to_idx)
dataset_test = datasets.ImageFolder('dataset/val', transform_test)
# 对应文件夹的label
print(dataset_test.class_to_idx)
# 导入数据
train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=False)
  • Run screenshot

Insert image description here

3. Model training

(1) Model settings

# 实例化模型并且移动到GPU
criterion = nn.CrossEntropyLoss()
model_ft = vgg16(pretrained=False)
model_ft.classifier = classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 2),
        )
model_ft.to(DEVICE)
# 选择简单暴力的Adam优化器,学习率调低
optimizer = optim.Adam(model_ft.parameters(), lr=modellr)

def adjust_learning_rate(optimizer, epoch):
    modellrnew = modellr * (0.1 ** (epoch // 50))
    print("lr:", modellrnew)
    for param_group in optimizer.param_groups:
        param_group['lr'] = modellrnew

(2) Define the model training process and verification process

# 定义训练过程
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    sum_loss = 0
    train_acc = 0
    total_num = len(train_loader.dataset)
    # print(total_num, len(train_loader))
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data).to(device), Variable(target).to(device)
        im = Variable(data)
        output = model(data)
        loss = criterion(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print_loss = loss.data.item()
        sum_loss += print_loss

        out_t = output.argmax(dim=1)  # 取出预测的最大值
        num_correct = (out_t == target).sum().item()
        acc = num_correct / im.shape[0]
        train_acc += acc

        if (batch_idx + 1) % 50 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
                       100. * (batch_idx + 1) / len(train_loader), loss.item()))

    ave_loss = sum_loss / len(train_loader)
    ave_acc = train_acc / len(train_loader)
    print('epoch:{}, train_acc: {}, loss:{}'.format(epoch, ave_acc, ave_loss))
    return ave_acc, ave_loss
#定义验证过程
def val(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    total_num = len(test_loader.dataset)
    with torch.no_grad():
        for data, target in test_loader:
            data, target = Variable(data).to(device), Variable(target).to(device)
            output = model(data)
            loss = criterion(output, target)
            _, pred = torch.max(output.data, 1)
            correct += torch.sum(pred == target)
            print_loss = loss.data.item()
            test_loss += print_loss
        correct = correct.data.item()
        acc = correct / total_num
        avgloss = test_loss / len(test_loader)
        print('Val set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            avgloss, correct, len(test_loader.dataset), 100 * acc))
    return acc, avgloss

(3) Model training

# 训练
train_acc_list, train_loss_list, val_acc_list, val_loss_list = [], [], [], []
for epoch in range(1, EPOCHS + 1):
    adjust_learning_rate(optimizer, epoch)
    train_acc, train_loss =train(model_ft, DEVICE, train_loader, optimizer, epoch)
    val_acc, val_loss=val(model_ft, DEVICE, test_loader)
    train_acc_list.append(train_acc)
    val_acc_list.append(val_acc)
    train_loss_list.append(train_loss)
    val_loss_list.append(val_loss)
torch.save(model_ft, 'model.pth')

  • Run screenshot
    Insert image description here

(4) Model evaluation

epochs_range = range(EPOCHS)
print(epochs_range, train_acc_list)

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, train_acc_list, label='Training Accuracy')
plt.plot(epochs_range, val_acc_list, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, train_loss_list, label='Training Loss')
plt.plot(epochs_range, val_loss_list, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.savefig('./acc_loss.jpg')

  • Run screenshot

Insert image description here

(5)Model reasoning

#定义标签类别
classes = ('bear', 'pandas')
#数据增强
transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = torch.load("model.pth")
model.eval()
model.to(DEVICE)
dataset_test = datasets.ImageFolder('dataset/test', transform_test)
print(len(dataset_test))
# 对应文件夹的label

y_true, y_sore = [], []
for index in range(len(dataset_test)):
    item = dataset_test[index]
    img, label = item
    img.unsqueeze_(0)
    data = Variable(img).to(DEVICE)
    output = model(data)
    _, pred = torch.max(output.data, 1)
    y_true.append(label)
    y_sore.append(pred.data.item())
    print('Image Name:{}, label:{}, predict:{}'.format(dataset_test.imgs[index][0], classes[label], classes[pred.data.item()]))
    index += 1

  • The reasoning results are as follows:
    Insert image description here

(7) Visualization of reasoning results

plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号 #有中文出现的情况,需要u'内容'

path='dataset/test/bear/'
testList=os.listdir(path)
for file in testList:
        img=Image.open(path+file)
        img=transform_test(img)
        img.unsqueeze_(0)
        img = Variable(img).to(DEVICE)
        out=model(img)
        # Predict
        _, pred = torch.max(out.data, 1)
        for index in range(0, 1):
            img1 = file[random.randint(0, len(file)-1)]
            plt.subplot(1,1,index+1)
            image = Image.open(path+file)
            plt.imshow(image, cmap='gray', interpolation='none')
            plt.title("推理结果为: {}".format(classes[pred.data.item()]))
            plt.xticks([])
            plt.yticks([])
        plt.savefig('./result_test.jpg')
if os.path.exists('./result_test.jpg'):
    print('识别成功,结果保存到当前目录,命名为result_test.jpg')
else:
    print('识别出错!!!')
  • Visualize results
    Insert image description here

Guess you like

Origin blog.csdn.net/weixin_45736855/article/details/130512643