ONNX Runtime deploys ImageNet pre-trained image classification model (Brother Tongji Zihao’s study notes)

Installation and configuration environment
code running cloud GPU platform: public account artificial intelligence tips reply gpu
Tongji Zihao brother 2022-8-22 2023-4-28 2023-5-8
Install Pytorch

pip3 install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113

Install ONNX

pip install onnx -i https://pypi.tuna.tsinghua.edu.cn/simple

Install the inference engine ONNX Runtime

pip install onnxruntime -i https://pypi.tuna.tsinghua.edu.cn/simple

Install other third-party toolkits

pip install numpy pandas matplotlib tqdm opencv-python pillow -i https://pypi.tuna.tsinghua.edu.cn/simple

Verify installation configuration is successful

import torch
import onnx
import onnxruntime as ort
#验证安装配置成功
print('PyTorch版本：',torch.__version__)
print('ONNX版本：',onnx.__version__)
print('ONNX Runtime版本：',ort.__version__)

Download material files

https://zihao-openmmlab.obs.cn-east-3.myhuaweicloud.com/20220716-mmclassification/dataset/imagenet/imagenet_class_index.csv
https://zihao-openmmlab.obs.cn-east-3.myhuaweicloud.com/20220716-mmclassification/test/banana1.jpg
https://zihao-openmmlab.obs.cn-east-3.myhuaweicloud.com/20220716-mmclassification/test/video_4.mp4

Download ONNX model files

https://zihao-openmmlab.obs.cn-east-3.myhuaweicloud.com/20220716-mmclassification/dataset/fruit30/onnx/resnet18_imagenet.onnx

Deployment of the inference engine ONNX Runtime - Predicting a single image
. Use the inference engine ONNX Runtime to read the model file in the ONNX format and predict the single image file.
Brother Tongji Zihao https://space.bilibili.com/1900783
2022-8-22 2023-5-8
Application Scenario
The following code runs on the hardware that needs to be deployed (local PC, embedded development board, Raspberry Pi, Jetson Nano, server)
Just send the onnx model file to the deployment hardware, and install the ONNX Runtime environment. Use the following lines of code to run the model.

import  onnxruntime
import torch
import torch.nn.functional as F
import pandas as pd
from PIL import Image # 用pillow载入
from torchvision import transforms


# 载入onnx模型，获取ONNX Runtime推理器
ort_session=onnxruntime.InferenceSession('resnet18_imagenet.onnx',
                                         providers=['CUDAExecutionProvider'])
# # 构造随随机输入，获取输出结果
# x=torch.rand(1,3,256,256).numpy()
# print('random:',x.shape)
# # onnx runtime 输入
# ort_inputs={'input':x}
# # onnx runtime 输出
# ort_output=ort_session.run(['output'],ort_inputs)[0]
# # 注意：输入输出张量的名称需要和torch.onnx.export中设置的输入输出名对应
# print('random ort_output:',ort_output.shape)

# 载入一张真正的测试图像
img_path='banana1.jpg'
img_pil=Image.open(img_path)
# img_pil.show() #显示这张图象
# 测试集图像预处理-RCNN：缩放旋转、转Tensor、归一化
test_transform=transforms.Compose([transforms.Resize(256),
                                   transforms.CenterCrop(256),
                                   transforms.ToTensor(),
                                   transforms.Normalize(
                                       mean=[0.485,0.456,0.406],
                                       std=[0.229,0.224,0.225])
                                   ])
# 运行预处理
input_img=test_transform(img_pil)
# print('input_img_shape:',input_img.shape)
input_tensor=input_img.unsqueeze(0).numpy()
# print('input_img_tensor:',input_tensor.shape)

# 推理预测
# ONN Runtime 输入
ort_inputs={
    
    'input':input_tensor}
# ONN Runtime 输出
pred_logits=ort_session.run(['output'],ort_inputs)[0]
pred_logits=torch.tensor(pred_logits)
# print('pred_logits:',pred_logits.shape)
# 对logit分数做softmax运算，得到置信度概率
pred_softmax=F.softmax(pred_logits,dim=1)
# print('pre_softmax:',pred_softmax.shape)
# 解析预测结果
# 取置信度最高的前n个结果
n=3
top_n=torch.topk(pred_softmax,n)
# print('top_n:',top_n)
# 预测结果
pred_ids=top_n.indices.numpy()[0]
# print('pre_ids:',pred_ids)
# 预测置信度
confs=top_n.values.numpy()[0]
# print('confs：',confs)

# 载入ID和 类别名称 对应关系
df=pd.read_csv('imagenet_class_index.csv')
idx_to_labels={
    
    }
for idx,row in df.iterrows():
    idx_to_labels[row['ID']]=row['class'] # 英文
    # idx_to_labels[row['ID']] = row['Chinese']# 中文
# print('idx_to_labels:',idx_to_labels)

# 分别用英文和中文打印预测结果
for i in range(n):
    class_name=idx_to_labels[pred_ids[i]] # 获取类别名称
    confidence=confs[i]*100
    text='{:<20}{:>.3f}%'.format(class_name,confidence)
    print(text)

ImageNet-ONNX Runtime Deployment - Camera and Video - English
Use the ONNX Runtime inference engine to load the ImageNet pre-trained image classification onnx model to predict the real-time camera footage.
Brother Tongji Zihao: https://space.bilibili.com/1900783
Test running environment: Macbook Pro
Notes
This code needs to be run locally where the camera is connected and cannot be run on the cloud GPU platform.
run locally

pip install onnxruntime

Install onnx runtime and prepare onnx model files.
Video processing English template frame by frame

from PIL import Image
import onnxruntime
import torch
import torch.nn.functional as F
from torchvision import transforms
import pandas as pd
import cv2
import time
from tqdm import tqdm

# 处理帧函数
def process_frame(img_bgr):
    '''
    输入摄像头拍摄画面bgr-array，输出图像分类预测结果bgr-array
    '''
    # 载入 onnx 模型，获取 ONNX Runtime 推理器

    cuda = torch.cuda.is_available()
    providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
    ort_session = onnxruntime.InferenceSession('resnet18_imagenet.onnx',None,providers=providers)
    # 载入ImageNet 1000图像分类标签
    df = pd.read_csv('imagenet_class_index.csv')
    idx_to_labels = {
    
    }
    for idx, row in df.iterrows():
        idx_to_labels[row['ID']] = row['class']
    # 图像预处理
    # 测试集图像预处理-RCTN：缩放裁剪、转 Tensor、归一化
    test_transform = transforms.Compose([transforms.Resize(256),
                                         transforms.CenterCrop(256),
                                         transforms.ToTensor(),
                                         transforms.Normalize(
                                             mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225])
                                         ])

    # 记录该帧开始处理的时间
    start_time = time.time()

    ## 画面转成 RGB 的 Pillow 格式
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)  # BGR转RGB
    img_pil = Image.fromarray(img_rgb)  # array 转 PIL

    ## 预处理
    input_img = test_transform(img_pil)  # 预处理
    input_tensor = input_img.unsqueeze(0).numpy()

    ## onnx runtime 预测
    ort_inputs = {
    
    'input': input_tensor}  # onnx runtime 输入
    pred_logits = ort_session.run(['output'], ort_inputs)[0]  # onnx runtime 输出
    pred_logits = torch.tensor(pred_logits)
    pred_softmax = F.softmax(pred_logits, dim=1)  # 对 logit 分数做 softmax 运算

    ## 解析top-n预测结果的类别和置信度
    top_n = torch.topk(pred_softmax, 5)  # 取置信度最大的 n 个结果
    pred_ids = top_n[1].cpu().detach().numpy().squeeze()  # 解析预测类别
    confs = top_n[0].cpu().detach().numpy().squeeze()  # 解析置信度

    # 在图像上写英文
    for i in range(len(confs)):
        pred_class = idx_to_labels[pred_ids[i]]

        # 写字：图片，添加的文字，左上角坐标，字体，字体大小，颜色，线宽，线型
        text = '{:<15} {:>.3f}'.format(pred_class, confs[i])
        img_bgr = cv2.putText(img_bgr, text, (50, 160 + 80 * i), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 4,
                              cv2.LINE_AA)

    # 记录该帧处理完毕的时间
    end_time = time.time()
    # 计算每秒处理图像帧数FPS
    FPS = 1 / (end_time - start_time)
    # 图片，添加的文字，左上角坐标，字体，字体大小，颜色，线宽，线型
    img_bgr = cv2.putText(img_bgr, 'FPS  ' + str(int(FPS)), (50, 80), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 255), 4,
                          cv2.LINE_AA)

    return img_bgr

def generate_video(input_path):
    filehead = input_path.split('/')[-1]
    output_path = "out-" + filehead

    print('视频开始处理', input_path)

    # 获取视频总帧数
    cap = cv2.VideoCapture(input_path)
    frame_count = 0
    while (cap.isOpened()):
        success, frame = cap.read()
        frame_count += 1
        if not success:
            break
    cap.release()
    print('视频总帧数为', frame_count)

    # cv2.namedWindow('Crack Detection and Measurement Video Processing')
    cap = cv2.VideoCapture(input_path)
    frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # fourcc = int(cap.get(cv2.CAP_PROP_FOURCC))
    # fourcc = cv2.VideoWriter_fourcc(*'XVID')
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps = cap.get(cv2.CAP_PROP_FPS)

    out = cv2.VideoWriter(output_path, fourcc, fps, (int(frame_size[0]), int(frame_size[1])))

    # 进度条绑定视频总帧数
    with tqdm(total=frame_count - 1) as pbar:
        try:
            while (cap.isOpened()):
                success, frame = cap.read()
                if not success:
                    break

                # 处理帧
                # frame_path = './temp_frame.png'
                # cv2.imwrite(frame_path, frame)
                try:
                    frame = process_frame(frame)
                except:
                    print('报错！', error)
                    pass

                if success == True:
                    # cv2.imshow('Video Processing', frame)
                    out.write(frame)

                    # 进度条更新一帧
                    pbar.update(1)

                # if cv2.waitKey(1) & 0xFF == ord('q'):
                # break
        except:
            print('中途中断')
            pass

    cv2.destroyAllWindows()
    out.release()
    cap.release()
    print('视频已保存', output_path)

generate_video(input_path='video_4.mp4')

Call the camera to obtain the English template of each frame

from PIL import Image
import onnxruntime
import torch
import torch.nn.functional as F
from torchvision import transforms
import pandas as pd
import cv2
import time

# 处理帧函数
def process_frame(img_bgr):
    '''
    输入摄像头拍摄画面bgr-array，输出图像分类预测结果bgr-array
    '''
    # 载入 onnx 模型，获取 ONNX Runtime 推理器

    cuda = torch.cuda.is_available()
    providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
    ort_session = onnxruntime.InferenceSession('resnet18_imagenet.onnx',None,providers=providers)
    # 载入ImageNet 1000图像分类标签
    df = pd.read_csv('imagenet_class_index.csv')
    idx_to_labels = {
    
    }
    for idx, row in df.iterrows():
        idx_to_labels[row['ID']] = row['class']
    # 图像预处理
    # 测试集图像预处理-RCTN：缩放裁剪、转 Tensor、归一化
    test_transform = transforms.Compose([transforms.Resize(256),
                                         transforms.CenterCrop(256),
                                         transforms.ToTensor(),
                                         transforms.Normalize(
                                             mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225])
                                         ])

    # 记录该帧开始处理的时间
    start_time = time.time()

    ## 画面转成 RGB 的 Pillow 格式
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)  # BGR转RGB
    img_pil = Image.fromarray(img_rgb)  # array 转 PIL

    ## 预处理
    input_img = test_transform(img_pil)  # 预处理
    input_tensor = input_img.unsqueeze(0).numpy()

    ## onnx runtime 预测
    ort_inputs = {
    
    'input': input_tensor}  # onnx runtime 输入
    pred_logits = ort_session.run(['output'], ort_inputs)[0]  # onnx runtime 输出
    pred_logits = torch.tensor(pred_logits)
    pred_softmax = F.softmax(pred_logits, dim=1)  # 对 logit 分数做 softmax 运算

    ## 解析top-n预测结果的类别和置信度
    top_n = torch.topk(pred_softmax, 5)  # 取置信度最大的 n 个结果
    pred_ids = top_n[1].cpu().detach().numpy().squeeze()  # 解析预测类别
    confs = top_n[0].cpu().detach().numpy().squeeze()  # 解析置信度

    # 在图像上写英文
    for i in range(len(confs)):
        pred_class = idx_to_labels[pred_ids[i]]

        # 写字：图片，添加的文字，左上角坐标，字体，字体大小，颜色，线宽，线型
        text = '{:<15} {:>.3f}'.format(pred_class, confs[i])
        img_bgr = cv2.putText(img_bgr, text, (50, 160 + 80 * i), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 4,
                              cv2.LINE_AA)

    # 记录该帧处理完毕的时间
    end_time = time.time()
    # 计算每秒处理图像帧数FPS
    FPS = 1 / (end_time - start_time)
    # 图片，添加的文字，左上角坐标，字体，字体大小，颜色，线宽，线型
    img_bgr = cv2.putText(img_bgr, 'FPS  ' + str(int(FPS)), (50, 80), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 255), 4,
                          cv2.LINE_AA)

    return img_bgr


cap = cv2.VideoCapture(1)
# 打开cap
cap.open(0)
# 无限循环，直到break被触发
while cap.isOpened():
    # 获取画面
    success, frame = cap.read()
    if not success:  # 如果获取画面不成功，则退出
        print('获取画面不成功，退出')
        break
    ## 逐帧处理
    frame = process_frame(frame)
    # 展示处理后的三通道图像
    cv2.imshow('my_window', frame)
    key_pressed = cv2.waitKey(60)  # 每隔多少毫秒毫秒，获取键盘哪个键被按下
    # print('键盘上被按下的键：', key_pressed)
    if key_pressed in [ord('q'), 27]:  # 按键盘上的q或esc退出（在英文输入法下）
        break
# 关闭摄像头
cap.release()
# 关闭图像窗口
cv2.destroyAllWindows()
# 按键盘上的q键退出

ImageNet-ONNX Runtime Deployment-Camera and Video-Chinese
Use ImageNet to pre-train the image classification model to predict the real-time camera footage.
This code needs to be run locally where the camera is connected and cannot be run on the cloud GPU platform.
Brother Tongji Zihao: https://space.bilibili.com/1900783
Test running environment: Macbook Pro
imports Chinese fonts

https://zihao-openmmlab.obs.cn-east-3.myhuaweicloud.com/20220716-mmclassification/dataset/SimHei.ttf

Video frame by frame processing Chinese template

from PIL import Image, ImageFont, ImageDraw
import onnxruntime
import torch
import torch.nn.functional as F
from torchvision import transforms
import pandas as pd
import cv2
import numpy as np
import time
from tqdm import tqdm

# 处理帧函数
def process_frame(img_bgr):
    '''
    输入摄像头拍摄画面bgr-array，输出图像分类预测结果bgr-array
    '''
    # 导入中文字体，指定字体大小
    font = ImageFont.truetype('SimHei.ttf', 32)
    # 载入 onnx 模型，获取 ONNX Runtime 推理器

    cuda = torch.cuda.is_available()
    providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
    ort_session = onnxruntime.InferenceSession('resnet18_imagenet.onnx',None,providers=providers)
    # 载入ImageNet 1000图像分类标签
    df = pd.read_csv('imagenet_class_index.csv')
    idx_to_labels = {
    
    }
    for idx, row in df.iterrows():
        idx_to_labels[row['ID']] = row['Chinese']
    # 图像预处理
    # 测试集图像预处理-RCTN：缩放裁剪、转 Tensor、归一化
    test_transform = transforms.Compose([transforms.Resize(256),
                                         transforms.CenterCrop(256),
                                         transforms.ToTensor(),
                                         transforms.Normalize(
                                             mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225])
                                         ])

    # 记录该帧开始处理的时间
    start_time = time.time()

    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)  # BGR转RGB
    img_pil = Image.fromarray(img_rgb)  # array 转 PIL

    ## 预处理
    input_img = test_transform(img_pil)  # 预处理
    input_tensor = input_img.unsqueeze(0).numpy()

    ## onnx runtime 预测
    ort_inputs = {
    
    'input': input_tensor}  # onnx runtime 输入
    pred_logits = ort_session.run(['output'], ort_inputs)[0]  # onnx runtime 输出
    pred_logits = torch.tensor(pred_logits)
    pred_softmax = F.softmax(pred_logits, dim=1)  # 对 logit 分数做 softmax 运算

    ## 解析图像分类预测结果
    n = 5
    top_n = torch.topk(pred_softmax, n)  # 取置信度最大的 n 个结果
    pred_ids = top_n[1].cpu().detach().numpy().squeeze()  # 解析出类别
    confs = top_n[0].cpu().detach().numpy().squeeze()  # 解析出置信度

    ## 在图像上写中文
    draw = ImageDraw.Draw(img_pil)
    for i in range(len(confs)):
        pred_class = idx_to_labels[pred_ids[i]]

        # 写中文：文字坐标，中文字符串，字体，rgba颜色
        text = '{:<15} {:>.3f}'.format(pred_class, confs[i])  # 中文字符串
        draw.text((50, 100 + 50 * i), text, font=font, fill=(255, 0, 0, 1))

    img_rgb = np.array(img_pil)  # PIL 转 array
    img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)  # RGB转BGR

    # 记录该帧处理完毕的时间
    end_time = time.time()
    # 计算每秒处理图像帧数FPS
    FPS = 1 / (end_time - start_time)
    # 图片，添加的文字，左上角坐标，字体，字体大小，颜色，线宽，线型
    img_bgr = cv2.putText(img_bgr, 'FPS  ' + str(int(FPS)), (50, 80), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 255), 4,
                          cv2.LINE_AA)
    return img_bgr

def generate_video(input_path='videos/robot.mp4'):
    filehead = input_path.split('/')[-1]
    output_path = "out-" + filehead

    print('视频开始处理', input_path)

    # 获取视频总帧数
    cap = cv2.VideoCapture(input_path)
    frame_count = 0
    while (cap.isOpened()):
        success, frame = cap.read()
        frame_count += 1
        if not success:
            break
    cap.release()
    print('视频总帧数为', frame_count)

    # cv2.namedWindow('Crack Detection and Measurement Video Processing')
    cap = cv2.VideoCapture(input_path)
    frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # fourcc = int(cap.get(cv2.CAP_PROP_FOURCC))
    # fourcc = cv2.VideoWriter_fourcc(*'XVID')
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps = cap.get(cv2.CAP_PROP_FPS)

    out = cv2.VideoWriter(output_path, fourcc, fps, (int(frame_size[0]), int(frame_size[1])))

    # 进度条绑定视频总帧数
    with tqdm(total=frame_count - 1) as pbar:
        try:
            while (cap.isOpened()):
                success, frame = cap.read()
                if not success:
                    break

                # 处理帧
                # frame_path = './temp_frame.png'
                # cv2.imwrite(frame_path, frame)
                try:
                    frame = process_frame(frame)
                except:
                    print('报错！', error)
                    pass

                if success == True:
                    # cv2.imshow('Video Processing', frame)
                    out.write(frame)

                    # 进度条更新一帧
                    pbar.update(1)

                # if cv2.waitKey(1) & 0xFF == ord('q'):
                # break
        except:
            print('中途中断')
            pass

    cv2.destroyAllWindows()
    out.release()
    cap.release()
    print('视频已保存', output_path)

generate_video(input_path='video_4.mp4')

Call the camera to obtain the Chinese template of each frame

from PIL import Image, ImageFont, ImageDraw
import onnxruntime
import torch
import torch.nn.functional as F
from torchvision import transforms
import pandas as pd
import cv2
import numpy as np
import time
from tqdm import tqdm

# 处理帧函数
def process_frame(img_bgr):
    '''
    输入摄像头拍摄画面bgr-array，输出图像分类预测结果bgr-array
    '''
    # 导入中文字体，指定字体大小
    font = ImageFont.truetype('SimHei.ttf', 32)
    # 载入 onnx 模型，获取 ONNX Runtime 推理器

    cuda = torch.cuda.is_available()
    providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
    ort_session = onnxruntime.InferenceSession('resnet18_imagenet.onnx',None,providers=providers)
    # 载入ImageNet 1000图像分类标签
    df = pd.read_csv('imagenet_class_index.csv')
    idx_to_labels = {
    
    }
    for idx, row in df.iterrows():
        idx_to_labels[row['ID']] = row['Chinese']
    # 图像预处理
    # 测试集图像预处理-RCTN：缩放裁剪、转 Tensor、归一化
    test_transform = transforms.Compose([transforms.Resize(256),
                                         transforms.CenterCrop(256),
                                         transforms.ToTensor(),
                                         transforms.Normalize(
                                             mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225])
                                         ])

    # 记录该帧开始处理的时间
    start_time = time.time()

    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)  # BGR转RGB
    img_pil = Image.fromarray(img_rgb)  # array 转 PIL

    ## 预处理
    input_img = test_transform(img_pil)  # 预处理
    input_tensor = input_img.unsqueeze(0).numpy()

    ## onnx runtime 预测
    ort_inputs = {
    
    'input': input_tensor}  # onnx runtime 输入
    pred_logits = ort_session.run(['output'], ort_inputs)[0]  # onnx runtime 输出
    pred_logits = torch.tensor(pred_logits)
    pred_softmax = F.softmax(pred_logits, dim=1)  # 对 logit 分数做 softmax 运算

    ## 解析图像分类预测结果
    n = 5
    top_n = torch.topk(pred_softmax, n)  # 取置信度最大的 n 个结果
    pred_ids = top_n[1].cpu().detach().numpy().squeeze()  # 解析出类别
    confs = top_n[0].cpu().detach().numpy().squeeze()  # 解析出置信度

    ## 在图像上写中文
    draw = ImageDraw.Draw(img_pil)
    for i in range(len(confs)):
        pred_class = idx_to_labels[pred_ids[i]]

        # 写中文：文字坐标，中文字符串，字体，rgba颜色
        text = '{:<15} {:>.3f}'.format(pred_class, confs[i])  # 中文字符串
        draw.text((50, 100 + 50 * i), text, font=font, fill=(255, 0, 0, 1))

    img_rgb = np.array(img_pil)  # PIL 转 array
    img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)  # RGB转BGR

    # 记录该帧处理完毕的时间
    end_time = time.time()
    # 计算每秒处理图像帧数FPS
    FPS = 1 / (end_time - start_time)
    # 图片，添加的文字，左上角坐标，字体，字体大小，颜色，线宽，线型
    img_bgr = cv2.putText(img_bgr, 'FPS  ' + str(int(FPS)), (50, 80), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 255), 4,
                          cv2.LINE_AA)
    return img_bgr

# 获取摄像头，传入0表示获取系统默认摄像头
cap = cv2.VideoCapture(1)
# 打开cap
cap.open(0)
# 无限循环，直到break被触发
while cap.isOpened():
    # 获取画面
    success, frame = cap.read()
    if not success:  # 如果获取画面不成功，则退出
        print('获取画面不成功，退出')
        break
    ## 逐帧处理
    frame = process_frame(frame)
    # 展示处理后的三通道图像
    cv2.imshow('my_window', frame)
    key_pressed = cv2.waitKey(60)  # 每隔多少毫秒毫秒，获取键盘哪个键被按下
    # print('键盘上被按下的键：', key_pressed)
    if key_pressed in [ord('q'), 27]:  # 按键盘上的q或esc退出（在英文输入法下）
        break
# 关闭摄像头
cap.release()
# 关闭图像窗口
cv2.destroyAllWindows()
# 按键盘上的q键退出

ONNX Runtime deploys ImageNet pre-trained image classification model (Brother Tongji Zihao’s study notes)

Guess you like