[Image classification] Train your own classification model based on pytorch and deploy it using flask

Main reference:

1. Classification model training

import data set

# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    
    
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
                             )
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir='train_cls/23_0421_text_cls2'
# data_dir='/data2/zengxingyu2/code/23_0420_cls_text/23_0421_text_cls2'
batch_size=8
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

image_datasets = {
    
    x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {
    
    x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                              shuffle=True, num_workers=4)
               for x in ['train', 'val']}
dataset_sizes = {
    
    x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

training related

def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {
      
      epoch}/{
      
      num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{
      
      phase} Loss: {
      
      epoch_loss:.4f} Acc: {
      
      epoch_acc:.4f}')

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(model, 'best_model_text_cls2.pth')

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {
      
      time_elapsed // 60:.0f}m {
      
      time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {
      
      best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

Visualize training results

def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title(f'predicted: {
      
      class_names[preds[j]]}')
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

main function

if __name__ == '__main__':
    model_ft = models.resnet18(pretrained=True)
    num_ftrs = model_ft.fc.in_features
    # Here the size of each output sample is set to 2.
    # Alternatively, it can be generalized to ``nn.Linear(num_ftrs, len(class_names))``.
    model_ft.fc = nn.Linear(num_ftrs, 2)

    model_ft = model_ft.to(device)

    criterion = nn.CrossEntropyLoss()

    # Observe that all parameters are being optimized
    optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

    # Decay LR by a factor of 0.1 every 7 epochs
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

    ######################################################################
    # Train and evaluate
    # ^^^^^^^^^^^^^^^^^^
    #
    # It should take around 15-25 min on CPU. On GPU though, it takes less than a
    # minute.
    #

    model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                           num_epochs=25)


    torch.save(model_ft, 'best_model_text_cls2.pth')
    # visualize_model(model_ft)

Running may download the model by itself due to network reasons

After downloading the model according to the prompt, put it in the prompt position, windows and linux are all under the .cache of the current user

Downloading: “https://download.pytorch.org/models/resnet18-f37072fd.pth” to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
cp /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth ~/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth

Two, flask deployment code

# -*- coding: utf-8 -*-
# @Time : 2023/4/21 16:01
# @Author : XyZeng


# -*- coding: utf-8 -*-
# @Time : 2023/4/7 10:24
# @Author : XyZeng
import io
import traceback

import requests
from flask import Flask, jsonify, request
import torch
import flask
import torchvision.transforms as transforms
from PIL import Image


app = Flask(__name__)

# 定义用于输入图像的转换
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
                         )
])


def request_get(img_path,url="http://127.0.0.1:8421/cls_text?"):
    params={
    
    }
    params['img_path']=img_path
    response = requests.get(url, params=params)
    print(response.text)
    return response.text



@app.before_first_request
def load_model():
    global model
    # 加载模型 /data2/zengxingyu2/code/23_0330_pick_img_ocr/best_model_text_cls2.pth
    model = torch.load('best_model_text_cls2.pth', map_location=torch.device('cuda'))
    # 将模型设置为评估模式
    model.eval()


# 定义一个函数,对输入图像进行推理
def get_prediction(image_path):
    # 将图像字节转换为PIL图像对象
    image = Image.open(image_path).convert('RGB')
    # 对图像进行转换
    image = transform(image).unsqueeze(0).to('cuda')
    # ...

    # 使用模型对图像进行推理
    with torch.no_grad():
        output = model(image)
        predicted_class = torch.argmax(output, dim=1).item()
    # 将预测的类别作为字符串返回
    return str(predicted_class)

@app.route('/cls_text', methods=['GET'])
def predict():
    try:
        if flask.request.args.get("img_path"):
            img_path = flask.request.args.get("img_path")
            print("cls get img_path:", img_path)

        # 对输入图像进行推理
        prediction = get_prediction(img_path)
        # print('type',type(prediction))

        # 将预测的类别作为JSON响应返回
        return  prediction
    except:
        traceback.print_exc()
        # results['Err']=traceback.format_exc()
        return traceback.format_exc()


if __name__ == '__main__':


    # python /data2/zengxingyu2/code/23_0420_cls_text/flask_torch_cls.py
    app.run(debug=False, host='0.0.0.0', port=8421)

Appendix complete code

# -*- coding: utf-8 -*-
# @Time : 2023/4/21 14:58


# License: BSD
# Author: Sasank Chilamkurthy
'''

https://github.com/pytorch/tutorials/blob/main/beginner_source/transfer_learning_tutorial.py
https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html
'''

from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

cudnn.benchmark = True
plt.ion()   # interactive mode




# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    
    
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
                             )
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

'''
dataload
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth

cp  /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth  ~/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth

'''

data_dir='train_cls/23_0421_text_cls2'
# data_dir='/data2/zengxingyu2/code/23_0420_cls_text/23_0421_text_cls2'
batch_size=8
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

image_datasets = {
    
    x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {
    
    x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                              shuffle=True, num_workers=4)
               for x in ['train', 'val']}
dataset_sizes = {
    
    x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes



def imshow(inp, title=None):
    """Display image for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated



def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {
      
      epoch}/{
      
      num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{
      
      phase} Loss: {
      
      epoch_loss:.4f} Acc: {
      
      epoch_acc:.4f}')

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(model, 'best_model_text_cls2.pth')

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {
      
      time_elapsed // 60:.0f}m {
      
      time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {
      
      best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model




def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title(f'predicted: {
      
      class_names[preds[j]]}')
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

if __name__ == '__main__':
    model_ft = models.resnet18(pretrained=True)
    num_ftrs = model_ft.fc.in_features
    # Here the size of each output sample is set to 2.
    # Alternatively, it can be generalized to ``nn.Linear(num_ftrs, len(class_names))``.
    model_ft.fc = nn.Linear(num_ftrs, 2)

    model_ft = model_ft.to(device)

    criterion = nn.CrossEntropyLoss()

    # Observe that all parameters are being optimized
    optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

    # Decay LR by a factor of 0.1 every 7 epochs
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

    ######################################################################
    # Train and evaluate
    # ^^^^^^^^^^^^^^^^^^
    #
    # It should take around 15-25 min on CPU. On GPU though, it takes less than a
    # minute.
    #

    model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                           num_epochs=25)


    torch.save(model_ft, 'best_model_text_cls2.pth')
    # visualize_model(model_ft)

おすすめ

転載: blog.csdn.net/imwaters/article/details/130492266