Article précédent : yolo v7 vers rknn
Cet article :
1. Il s'agit d'une conversion du modèle de détection. On estime que la classification, la pose et le segment seront écrits plus tard ;
2.☆Prise en charge de la quantification. Il a été résolu que la confiance serait entièrement égale à 0 après la quantification ;
3. Correction de certaines invites d'erreur apparaissant pendant le processus de conversion. Le problème est principalement que la taille de l’axe du tableau dépasse la limite.
1. Formation
1.Changer de version
ultralytiques-8.0.186
git clone https://github.com/ultralytics/ultralytics
cd ultralytics
git checkout eb976f5ad20d7779e82f733af4ebe592beaa89b5
2.Formation
Pour la formation v8, merci de vous référer à : xxx (pas encore écrit, le travail manuel c'est marrant).
L’introduction sur le site officiel est également très détaillée, et j’en posterai une ici.
2. pt2onnx
Remarque, opset_version=12
imgsz=(h, w)Faites attention à l'ordre de h et w。
from ultralytics import YOLO
def yolov8_export():
# Load a model
model = YOLO(model="./runs/detect/bbb2/weights/best.pt")
model.export(format='onnx', imgsz=(608, 608), opset=12, simplify=True)
3. onnx2rknn
1.Configuration de l'environnement virtuel RK3588
rknn-toolkit2 1.5.0
git clone https://github.com/rockchip-linux/rknn-toolkit2
cd rknn-toolkit2
conda create -n rknn-toolkit2 python=3.6
conda activate rknn-toolkit2
pip install doc/requirements_cp36-*.txt
pip install packages/rknn_toolkit2-*-cp36-*.whl
Si vous souhaitez utiliser la dernière version de whl, https://eyun.baidu.com/s/3eTDMk6Y extrayez le mot de passe : rknn
RK_NPU_SDK -> RK_NPU_SDK_1.5.0 -> développer -> rknn-toolkit2-1.5.1b24 -> les packages
téléchargent le dernier whl Descendez et installez-le.
2. Convertir + tester une seule image
Le code complet est le suivant :
# -*- coding: utf-8 -*-
"""
@Time : 2023/8/17 13:44:51
@Author : tm1
@IDE : PyCharm
@Project: onnx2rknn_YOLOv8
@Disc : 手动选择onnx的输出节点。
区别:1.被舍弃的部分onnx后处理需要手动实现;
2.可以量化。
"""
import cv2
import numpy as np
import yaml
from rknn.api import RKNN
ONNX_MODEL = './onnx_model/VisDrone2019/best.onnx'
RKNN_MODEL = './onnx_model/VisDrone2019/best.rknn'
DATASET = './onnx_model/VisDrone2019/quantize.txt'
dataset = './onnx_model/VisDrone2019/VisDrone2019.yaml'
QUANTIZE_ON = True
# CLASSES = {0: "hogcote"} # 训练时的类别
CLASSES = {
} # 训练时的类别
if CLASSES == {
}:
with open(dataset, 'r') as f:
CLASSES = yaml.safe_load(f)['names']
nmsThresh = 0.45 # 值越大,代表允许重叠的面积越大。
objectThresh = 0.5
# 注意调整为onnx模型的大小。
model_h = 608
model_w = 608
color_palette = np.random.uniform(0, 255, size=(len(CLASSES), 3))
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114)):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im, ratio, (dw, dh)
def draw_detections(img, box, score, class_id):
"""
Draws bounding boxes and labels on the input image based on the detected objects.
Args:
img: The input image to draw detections on.
box: Detected bounding box.
score: Corresponding detection score.
class_id: Class ID for the detected object.
Returns:
None
"""
# Extract the coordinates of the bounding box
x1, y1, w, h = box
# Retrieve the color for the class ID
color = color_palette[class_id]
# Draw the bounding box on the image
cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)
# Create the label text with class name and score
label = f'{
CLASSES[class_id]}: {
score:.2f}'
# Calculate the dimensions of the label text
(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
# Calculate the position of the label text
label_x = x1
label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
# Draw a filled rectangle as the background for the label text
cv2.rectangle(img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color,
cv2.FILLED)
# Draw the label text on the image
cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def postprocess(input_image, outputs):
img_h, img_w = input_image.shape[:2]
boxes0 = np.transpose(np.squeeze(outputs[0]))
scores0 = np.transpose(np.squeeze(outputs[1]))
if len(scores0.shape) == 1:
scores0 = np.expand_dims(scores0, axis=1)
scores = sigmoid(scores0)
max_scores = np.max(scores, axis=1) # 多个类别时,最大的分数。
max_indices = np.argmax(scores, axis=1)
t = np.where(max_scores >= objectThresh)[0] # 元组
boxes = boxes0[t]
scores = max_scores[t]
class_ids = max_indices[t]
# 根据分数从高到低排序
sorted_indices = np.argsort(scores)[::-1]
boxes = boxes[sorted_indices]
scores = scores[sorted_indices]
class_ids = class_ids[sorted_indices]
print(boxes)
print(scores)
print(class_ids)
# Get the number of rows in the outputs array
rows = boxes.shape[0]
# Lists to store the bounding boxes, scores, and class IDs of the detections
boxes_ = []
scores_ = []
class_ids_ = []
# Calculate the scaling factors for the bounding box coordinates
x_factor = img_w / model_w
y_factor = img_h / model_h
# Iterate over each row in the outputs array
for i in range(rows):
# Extract the class scores from the current row
classes_scores = scores[i]
# Find the maximum score among the class scores
max_score = np.amax(classes_scores)
# If the maximum score is above the confidence threshold
if max_score >= objectThresh:
# Get the class ID with the highest score
class_id = np.argmax(classes_scores)
# Extract the bounding box coordinates from the current row
x, y, w, h = boxes[i]
# Calculate the scaled coordinates of the bounding box
left = int((x - w / 2) * x_factor)
top = int((y - h / 2) * y_factor)
width = int(w * x_factor)
height = int(h * y_factor)
# Add the class ID, score, and box coordinates to the respective lists
class_ids_.append(class_id)
scores_.append(max_score)
boxes_.append([left, top, width, height])
print(boxes_)
print(scores_)
print(class_ids_)
# Apply non-maximum suppression to filter out overlapping bounding boxes
indices = cv2.dnn.NMSBoxes(boxes_, scores_, score_threshold=objectThresh, nms_threshold=nmsThresh)
# Iterate over the selected indices after non-maximum suppression
for i in indices:
# Get the box, score, and class ID corresponding to the index
box = boxes_[i]
score = scores_[i]
class_id = class_ids_[i]
# Draw the detection on the input image
draw_detections(input_image, box, score, class_id)
return input_image
def export_rknn():
rknn = RKNN(verbose=True)
rknn.config(
# see:ultralytics/yolo/data/utils.py
mean_values=[[0, 0, 0]],
std_values=[[255, 255, 255]],
# TODO:使用下面均值、方差后,效果更差:
# mean_values=[[123.675, 116.28, 103.53]], # IMAGENET_MEAN = 0.485, 0.456, 0.406
# std_values=[[58.395, 57.12, 57.375]], # IMAGENET_STD = 0.229, 0.224, 0.225
quantized_algorithm='normal',
quantized_method='channel',
# optimization_level=2,
compress_weight=False, # 压缩模型的权值,可以减小rknn模型的大小。默认值为False。
# single_core_mode=True,
# model_pruning=False, # 修剪模型以减小模型大小,默认值为False。
target_platform='rk3588'
)
rknn.load_onnx(
model=ONNX_MODEL,
outputs=[
'/model.22/Mul_2_output_0', '/model.22/Split_output_1',
]
)
rknn.build(do_quantization=QUANTIZE_ON, dataset=DATASET, rknn_batch_size=1)
rknn.export_rknn(RKNN_MODEL)
# # 精度分析
# rknn.accuracy_analysis(
# inputs=['/home/tm1/D/workspace/onnx2rknn_YOLOv8/onnx_model/official/zidane.jpg'],
# output_dir="./snapshot",
# target=None
# )
rknn.init_runtime()
return rknn
if __name__ == '__main__':
# 数据准备
img_path = 'onnx_model/VisDrone2019/img.png'
orig_img = cv2.imread(img_path)
# img = cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)
img = orig_img
img_h, img_w = img.shape[:2]
resized_img, ratio, (dw, dh) = letterbox(img, new_shape=(model_h, model_w)) # padding resize
# resized_img = cv2.resize(img, (model_w, model_h), interpolation=cv2.INTER_LINEAR) # direct resize
input = np.expand_dims(resized_img, axis=0)
# 转换模型
rknn = export_rknn()
# 推理
outputs = rknn.inference(inputs=[input], data_format="nhwc")
# 后处理
result_img = postprocess(resized_img, outputs)
# 保存结果
cv2.imwrite('./onnx_model/VisDrone2019/img_result.jpg', result_img)
# 释放
rknn.release()
3. Explication des éléments clés du code
3.1 Dans la fonction export_rknn()
rknn.load_onnx(
model=ONNX_MODEL,
outputs=[
'/model.22/Mul_2_output_0', '/model.22/Split_output_1',
]
)
L'origine des nœuds /model.22/Mul_2_output_0 et /model.22/Split_output_1 :
utilisez ce site Web pour ouvrir le modèle onnx converti
, puis tirez-le vers le bas :
3.2 Dans la fonction postprocess()
Comme vous pouvez le voir sur l'image ci-dessus, le sigmoïde derrière node/model.22/Split_output_1 a été supprimé.
Pourquoi les quantifications de confiance sont-elles toutes à 0 ? La plage de valeurs du sigmoïde est (0,1), qui est 0 après la quantification de int8. Alors retirez le sigmoïde.
Référence 1 , Référence 2
3.3 Problème selon lequel la taille de l'axe du tableau dépasse la limite
Comme vous pouvez le voir sur l'image ci-dessus, la taille de l'axe des opérations Reshape, Softmax et Transpose est de 7581 (=76 76+38 38+19*19). Mon image d'entrée est de 608 x 608. Si l'image est plus grande, elle dépassera la limite de rk.
Les restrictions rk sont les suivantes :
Pour plus de détails, voir rknn-toolkit2/doc/RKNN_Compiler_Support_Operator_List_v1.5.0.pdf