Main reference:
- Official tutorial: https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html
- https://github.com/pytorch/tutorials/blob/main/beginner_source/transfer_learning_tutorial.py
1. Classification model training
import data set
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
'train': transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
)
]),
'val': transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
data_dir='train_cls/23_0421_text_cls2'
# data_dir='/data2/zengxingyu2/code/23_0420_cls_text/23_0421_text_cls2'
batch_size=8
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
image_datasets = {
x: datasets.ImageFolder(os.path.join(data_dir, x),
data_transforms[x])
for x in ['train', 'val']}
dataloaders = {
x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
shuffle=True, num_workers=4)
for x in ['train', 'val']}
dataset_sizes = {
x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
training related
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print(f'Epoch {
epoch}/{
num_epochs - 1}')
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
if phase == 'train':
scheduler.step()
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects.double() / dataset_sizes[phase]
print(f'{
phase} Loss: {
epoch_loss:.4f} Acc: {
epoch_acc:.4f}')
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
torch.save(model, 'best_model_text_cls2.pth')
print()
time_elapsed = time.time() - since
print(f'Training complete in {
time_elapsed // 60:.0f}m {
time_elapsed % 60:.0f}s')
print(f'Best val Acc: {
best_acc:4f}')
# load best model weights
model.load_state_dict(best_model_wts)
return model
Visualize training results
def visualize_model(model, num_images=6):
was_training = model.training
model.eval()
images_so_far = 0
fig = plt.figure()
with torch.no_grad():
for i, (inputs, labels) in enumerate(dataloaders['val']):
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
for j in range(inputs.size()[0]):
images_so_far += 1
ax = plt.subplot(num_images//2, 2, images_so_far)
ax.axis('off')
ax.set_title(f'predicted: {
class_names[preds[j]]}')
imshow(inputs.cpu().data[j])
if images_so_far == num_images:
model.train(mode=was_training)
return
model.train(mode=was_training)
main function
if __name__ == '__main__':
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to ``nn.Linear(num_ftrs, len(class_names))``.
model_ft.fc = nn.Linear(num_ftrs, 2)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
######################################################################
# Train and evaluate
# ^^^^^^^^^^^^^^^^^^
#
# It should take around 15-25 min on CPU. On GPU though, it takes less than a
# minute.
#
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
num_epochs=25)
torch.save(model_ft, 'best_model_text_cls2.pth')
# visualize_model(model_ft)
Running may download the model by itself due to network reasons
After downloading the model according to the prompt, put it in the prompt position, windows and linux are all under the .cache of the current user
Downloading: “https://download.pytorch.org/models/resnet18-f37072fd.pth” to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
cp /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth ~/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
Two, flask deployment code
# -*- coding: utf-8 -*-
# @Time : 2023/4/21 16:01
# @Author : XyZeng
# -*- coding: utf-8 -*-
# @Time : 2023/4/7 10:24
# @Author : XyZeng
import io
import traceback
import requests
from flask import Flask, jsonify, request
import torch
import flask
import torchvision.transforms as transforms
from PIL import Image
app = Flask(__name__)
# 定义用于输入图像的转换
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
)
])
def request_get(img_path,url="http://127.0.0.1:8421/cls_text?"):
params={
}
params['img_path']=img_path
response = requests.get(url, params=params)
print(response.text)
return response.text
@app.before_first_request
def load_model():
global model
# 加载模型 /data2/zengxingyu2/code/23_0330_pick_img_ocr/best_model_text_cls2.pth
model = torch.load('best_model_text_cls2.pth', map_location=torch.device('cuda'))
# 将模型设置为评估模式
model.eval()
# 定义一个函数,对输入图像进行推理
def get_prediction(image_path):
# 将图像字节转换为PIL图像对象
image = Image.open(image_path).convert('RGB')
# 对图像进行转换
image = transform(image).unsqueeze(0).to('cuda')
# ...
# 使用模型对图像进行推理
with torch.no_grad():
output = model(image)
predicted_class = torch.argmax(output, dim=1).item()
# 将预测的类别作为字符串返回
return str(predicted_class)
@app.route('/cls_text', methods=['GET'])
def predict():
try:
if flask.request.args.get("img_path"):
img_path = flask.request.args.get("img_path")
print("cls get img_path:", img_path)
# 对输入图像进行推理
prediction = get_prediction(img_path)
# print('type',type(prediction))
# 将预测的类别作为JSON响应返回
return prediction
except:
traceback.print_exc()
# results['Err']=traceback.format_exc()
return traceback.format_exc()
if __name__ == '__main__':
# python /data2/zengxingyu2/code/23_0420_cls_text/flask_torch_cls.py
app.run(debug=False, host='0.0.0.0', port=8421)
Appendix complete code
# -*- coding: utf-8 -*-
# @Time : 2023/4/21 14:58
# License: BSD
# Author: Sasank Chilamkurthy
'''
https://github.com/pytorch/tutorials/blob/main/beginner_source/transfer_learning_tutorial.py
https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html
'''
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
cudnn.benchmark = True
plt.ion() # interactive mode
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
'train': transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
)
]),
'val': transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
'''
dataload
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
cp /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth ~/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
'''
data_dir='train_cls/23_0421_text_cls2'
# data_dir='/data2/zengxingyu2/code/23_0420_cls_text/23_0421_text_cls2'
batch_size=8
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
image_datasets = {
x: datasets.ImageFolder(os.path.join(data_dir, x),
data_transforms[x])
for x in ['train', 'val']}
dataloaders = {
x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
shuffle=True, num_workers=4)
for x in ['train', 'val']}
dataset_sizes = {
x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
def imshow(inp, title=None):
"""Display image for Tensor."""
inp = inp.numpy().transpose((1, 2, 0))
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
inp = std * inp + mean
inp = np.clip(inp, 0, 1)
plt.imshow(inp)
if title is not None:
plt.title(title)
plt.pause(0.001) # pause a bit so that plots are updated
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print(f'Epoch {
epoch}/{
num_epochs - 1}')
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
if phase == 'train':
scheduler.step()
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects.double() / dataset_sizes[phase]
print(f'{
phase} Loss: {
epoch_loss:.4f} Acc: {
epoch_acc:.4f}')
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
torch.save(model, 'best_model_text_cls2.pth')
print()
time_elapsed = time.time() - since
print(f'Training complete in {
time_elapsed // 60:.0f}m {
time_elapsed % 60:.0f}s')
print(f'Best val Acc: {
best_acc:4f}')
# load best model weights
model.load_state_dict(best_model_wts)
return model
def visualize_model(model, num_images=6):
was_training = model.training
model.eval()
images_so_far = 0
fig = plt.figure()
with torch.no_grad():
for i, (inputs, labels) in enumerate(dataloaders['val']):
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
for j in range(inputs.size()[0]):
images_so_far += 1
ax = plt.subplot(num_images//2, 2, images_so_far)
ax.axis('off')
ax.set_title(f'predicted: {
class_names[preds[j]]}')
imshow(inputs.cpu().data[j])
if images_so_far == num_images:
model.train(mode=was_training)
return
model.train(mode=was_training)
if __name__ == '__main__':
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to ``nn.Linear(num_ftrs, len(class_names))``.
model_ft.fc = nn.Linear(num_ftrs, 2)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
######################################################################
# Train and evaluate
# ^^^^^^^^^^^^^^^^^^
#
# It should take around 15-25 min on CPU. On GPU though, it takes less than a
# minute.
#
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
num_epochs=25)
torch.save(model_ft, 'best_model_text_cls2.pth')
# visualize_model(model_ft)