PyTorch: From scratch, using transfer learning, ten steps to achieve a vision project

Most of the blogs on the Internet are about the principle of a certain model and how to realize it, but few people write how to build a project from scratch. Next, I will start from scratch and use transfer learning to implement a vision project in ten steps.

1. Import related packages

import torch
import torch.nn as nn
from torchvision.models import resnet34, resnet50, resnet101, resnet152, vgg16, vgg19, inception_v3
from torch.nn import functional as F
import torch.optim as optim
import cv2
from torchvision import transforms
from torch.utils import data
from torch.autograd import Variable
import os

2. Read the picture

def get_pic(img_path):
    """读取图片"""
    img = cv2.imread(img_path)

    return img

3. Process the image

Introduction to torchvision.transform

def process_pic(img):
    """处理图片"""
    transform = transforms.Compose([
        transforms.ToPILImage(),        # 不转换为PIL会报错
        transforms.Resize([64, 64]),    # 缩放图片,保持长宽比不变,最短边为 64 像素
        transforms.CenterCrop(64),      # 从中心开始裁剪,从图片中间切出 64*64 的图片
        transforms.ToTensor(),          # 将图片(Image)转成Tensor,归一化至[0, 1], 将(H, W, C) -> [C, H, W]
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # 均值,标准差
    ])
    img_tensor = transform(img)

    return img_tensor

4. Get images and labels

def get_data(type="Training", num=num):
    pic_path = "../data/Fruit360/archive/fruits-360_dataset/fruits-360/" + type

    # 图像和标签分开
    img_list = []
    label_list = []
    lab = os.listdir(pic_path)
    for index, l in enumerate(lab[:num]):
        img_path = os.listdir(pic_path + '/' + l)
        for i in img_path:
            img_path = pic_path + '/' + l + '/' + i
            img = get_pic(img_path)
            # img = cv2.resize(img, (64, 64))  # 修改图片的尺寸
            img = process_pic(img)
            img_list.append(img.numpy())
            label_list.append(index)

    return img_list, label_list

5. Load data

TensorDataset和DataLoader

train_img_list, train_label_list = get_data("Training")
X_train = torch.Tensor(train_img_list).float()
y_train = torch.tensor(train_label_list)
test_img_list, test_label_list = get_data("Test")
X_test = torch.Tensor(test_img_list).float()
y_test = torch.tensor(test_label_list)

# TensorDataset 可以用来对 tensor 进行打包,就好像python中的zip功能
train_set = data.TensorDataset(*(X_train, y_train))
test_set = data.TensorDataset(*(X_test, y_test))
# DataLoader就是用来包装所使用的数据,每次抛出一批数据
train_loader = data.DataLoader(train_set, batch_size=32, shuffle=True)
test_loader = data.DataLoader(test_set, batch_size=32, shuffle=True)
# 我们可以通过iter()函数获取这些可迭代对象的迭代器。然后,我们可以对获取到的迭代器不断使⽤next()函数来获取下⼀条数据。
train_sample = next(iter(train_loader))
test_sample = next(iter(test_loader))

6. Transfer Learning

transfer learning

model = resnet34(pretrained=True)  # pretrained表示是否加载已经与训练好的参数
model_weight_path = '../checkpoints/resnet34-pre.pth'
missing_keys, unexpected_keys = model.load_state_dict(torch.load(model_weight_path), strict=False)
dim = model.fc.in_features
model.fc = nn.Linear(dim, num)  # 将最后的fc层的输出改为标签数量
for param in model.fc.parameters():
    param.requires_grad = True
model = model.to(device)  # 如果有GPU,而且确认使用则保留;如果没有GPU,请删除
criterion = nn.CrossEntropyLoss()                       # 定义损失函数
optimizer = optim.Adam(model.parameters(), lr=0.01)     # 定义优化函数

7. Training

def train():
    model.train()
    total_loss = 0

    for image, label in train_loader:
        image = Variable(image.to(device))
        label = Variable(label.to(device))

        # 如果不将梯度清零的话,梯度会与上一个batch的数据相关,因此该函数要写在反向传播和梯度下降之前
        optimizer.zero_grad()
        target = model(image)
        loss = criterion(target, label)
        # autograd包会根据tensor进行过的数学运算来自动计算其对应的梯度, 产生梯度
        loss.backward()
        # 通过梯度下降法来更新参数的值
        optimizer.step()

        total_loss += loss.item()

    return total_loss / float(len(train_loader))

8. Evaluation

def evaluate():
    model.eval()
    corrects = eval_loss = total_num = 0

    for image, label in test_loader:
        image = Variable(image.to(device))
        label = Variable(label.to(device))

        if hasattr(torch.cuda, 'empty_cache'):
            torch.cuda.empty_cache()

        pred = model(image)
        loss = criterion(pred, label)

        eval_loss += loss.item()
        corrects += (torch.max(pred, 1)[1].view(label.size()).data == label.data).sum()
        total_num += len(label)

    return eval_loss / float(len(test_loader)), corrects, corrects * 100.0 / total_num, total_num

9. Output training results

try:
    print('-' * 90)
    train_loss = []
    valid_loss = []
    accuracy = []
    for epoch in range(1, 11):
        loss = train()
        print(f"第{
      
      epoch}个周期. 损失值为{
      
      loss}")

        loss, corrects, acc, size = evaluate()
        valid_loss.append(loss * 1000.)
        accuracy.append(acc)

        print('-' * 10)
        print(f"第{
      
      epoch}个周期, 正确率为{
      
      acc}%")
        print('-' * 10)
except KeyboardInterrupt:
    print('-' * 90)
    print("Exiting from training early")

# 保存训练的模型:
torch.save(model.state_dict(), "../checkpoints/resnet34.pth")

10. Code summary

import torch
import torch.nn as nn
from torchvision.models import resnet34, resnet50, resnet101, resnet152, vgg16, vgg19, inception_v3
import numpy as np
import pandas as pd
from torch.nn import functional as F
import torch.optim as optim
import cv2
from torchvision import transforms
import torchvision.datasets as dsets
from torch.utils import data
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import os


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
num = 25


def get_pic(img_path):
    """读取图片"""
    img = cv2.imread(img_path)

    return img


def process_pic(img):
    """处理图片"""
    transform = transforms.Compose([
        transforms.ToPILImage(),        # 不转换为PIL会报错
        transforms.Resize([64, 64]),    # 缩放图片,保持长宽比不变,最短边为 64 像素
        transforms.CenterCrop(64),      # 从中心开始裁剪,从图片中间切出 64*64 的图片
        transforms.ToTensor(),          # 将图片(Image)转成Tensor,归一化至[0, 1], 将(H, W, C) -> [C, H, W]
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # 均值,标准差
    ])
    img_tensor = transform(img)

    return img_tensor


def get_data(type="Training", num=num):
    pic_path = "../data/Fruit360/archive/fruits-360_dataset/fruits-360/" + type

    # 图像和标签分开
    img_list = []
    label_list = []
    lab = os.listdir(pic_path)
    for index, l in enumerate(lab[:num]):
        img_path = os.listdir(pic_path + '/' + l)
        for i in img_path:
            img_path = pic_path + '/' + l + '/' + i
            img = get_pic(img_path)
            # img = cv2.resize(img, (64, 64))  # 修改图片的尺寸
            img = process_pic(img)
            img_list.append(img.numpy())
            label_list.append(index)

    return img_list, label_list


train_img_list, train_label_list = get_data("Training")
X_train = torch.Tensor(train_img_list).float()
y_train = torch.tensor(train_label_list)
test_img_list, test_label_list = get_data("Test")
X_test = torch.Tensor(test_img_list).float()
y_test = torch.tensor(test_label_list)
# TensorDataset 可以用来对 tensor 进行打包,就好像python中的zip功能
train_set = data.TensorDataset(*(X_train, y_train))
test_set = data.TensorDataset(*(X_test, y_test))
# DataLoader就是用来包装所使用的数据,每次抛出一批数据
train_loader = data.DataLoader(train_set, batch_size=32, shuffle=True)
test_loader = data.DataLoader(test_set, batch_size=32, shuffle=True)
# 我们可以通过iter()函数获取这些可迭代对象的迭代器。然后,我们可以对获取到的迭代器不断使⽤next()函数来获取下⼀条数据。
train_sample = next(iter(train_loader))
test_sample = next(iter(test_loader))


model = resnet34(pretrained=True)  # pretrained表示是否加载已经与训练好的参数
model_weight_path = '../checkpoints/resnet34-pre.pth'
missing_keys, unexpected_keys = model.load_state_dict(torch.load(model_weight_path), strict=False)
dim = model.fc.in_features
model.fc = nn.Linear(dim, num)  # 将最后的fc层的输出改为标签数量
for param in model.fc.parameters():
    param.requires_grad = True
model = model.to(device)  # 如果有GPU,而且确认使用则保留;如果没有GPU,请删除
criterion = nn.CrossEntropyLoss()                       # 定义损失函数
optimizer = optim.Adam(model.parameters(), lr=0.01)     # 定义优化函数


def train():
    model.train()
    total_loss = 0

    for image, label in train_loader:
        image = Variable(image.to(device))
        label = Variable(label.to(device))

        # 如果不将梯度清零的话,梯度会与上一个batch的数据相关,因此该函数要写在反向传播和梯度下降之前
        optimizer.zero_grad()
        target = model(image)
        loss = criterion(target, label)
        # autograd包会根据tensor进行过的数学运算来自动计算其对应的梯度, 产生梯度
        loss.backward()
        # 通过梯度下降法来更新参数的值
        optimizer.step()

        total_loss += loss.item()

    return total_loss / float(len(train_loader))


def evaluate():
    model.eval()
    corrects = eval_loss = total_num = 0

    for image, label in test_loader:
        image = Variable(image.to(device))
        label = Variable(label.to(device))

        if hasattr(torch.cuda, 'empty_cache'):
            torch.cuda.empty_cache()

        pred = model(image)
        loss = criterion(pred, label)

        eval_loss += loss.item()
        corrects += (torch.max(pred, 1)[1].view(label.size()).data == label.data).sum()
        total_num += len(label)

    return eval_loss / float(len(test_loader)), corrects, corrects * 100.0 / total_num, total_num


try:
    print('-' * 90)
    train_loss = []
    valid_loss = []
    accuracy = []
    for epoch in range(1, 11):
        loss = train()
        print(f"第{
      
      epoch}个周期. 损失值为{
      
      loss}")

        loss, corrects, acc, size = evaluate()
        valid_loss.append(loss * 1000.)
        accuracy.append(acc)

        print('-' * 10)
        print(f"第{
      
      epoch}个周期, 正确率为{
      
      acc}%")
        print('-' * 10)
except KeyboardInterrupt:
    print('-' * 90)
    print("Exiting from training early")

# 保存训练的模型:
torch.save(model.state_dict(), "../checkpoints/resnet34.pth")

Create a file demo separately for a single image test

import torch
from torchvision.models import resnet34
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
import cv2
import matplotlib.pyplot as plt


num = 25
model = resnet34(pretrained=True)  # pretrained表示是否加载已经与训练好的参数
model.fc = nn.Linear(model.fc.in_features, num)     # 将最后的fc层的输出改为标签数量
model = model.cuda()                # 如果有GPU,而且确认使用则保留;如果没有GPU,请删除
# 加载预训练模型的参数
model.load_state_dict(torch.load("./checkpoints/resnet34.pth"))
model.eval()


def get_pic(img_path):
    """读取图片"""
    img = cv2.imread(img_path)

    return img


def process_pic(img):
    """处理图片"""
    transform = transforms.Compose([
        transforms.ToPILImage(),        # 不转换为PIL会报错
        transforms.Resize([64, 64]),    # 缩放图片,保持长宽比不变,最短边为 64 像素
        transforms.CenterCrop(64),      # 从中心开始裁剪,从图片中间切出 64*64 的图片
        transforms.ToTensor(),          # 将图片(Image)转成Tensor,归一化至[0, 1], 将(H, W, C) -> [C, H, W]
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # 均值,标准差
    ])
    img_tensor = transform(img)

    return img_tensor


pic_path = "./data/Fruit360/archive/fruits-360_dataset/fruits-360/Test/Banana Lady Finger"
img = get_pic(pic_path + "/33_100.jpg")
img = process_pic(img)

with torch.no_grad():
    output = model(torch.unsqueeze(img, 0).cuda())
    # 输出概率最大的类别
    predict = F.softmax(output, dim=1)
    predict_cla = torch.argmax(predict).cpu().numpy()

print(str(predict_cla+1))

Guess you like

Origin blog.csdn.net/Bat_Reality/article/details/120292123