Model quantization (FP32, FP6, Int8) and evaluation

import nncf
from openvino.tools import mo
from openvino.runtime import serialize
# from multiprocessing import process

#代码需要放在main下执行，否则会报进程错误：
#这是一个关于windows上多进程实现的恩特。在windows上，子进程会自动import启动它的这个文件，而在import的时候是会自动执行这些语句的。
#如果不加__main__限制的化，就会无限递归创建子进程，进而报错。于是import的时候使用 name == “main” 保护起来就可以了。
if __name__ == "__main__":
    # #要读取的onnx模型名称
    MODEL_NAME = "best.onnx"
    #
    # #onnx模型所在的路径名称
    MODEL_PATH = f"runs/train/jintan_wu_727/weights"
    #
    # #要保存的openvino的模型名称
    # IR_MODEL_NAME = "yolov5s"
    IR_MODEL_NAME = "yolo_WuZhuangSeCha"
    #
    # #将onnx模型的路径连起来
    onnx_path = f"{
      
      MODEL_PATH}/{
      
      MODEL_NAME}"

    #fp32 IR model
    fp32_path = f"{
      
      MODEL_PATH}/FP32_openvino_model/{
      
      IR_MODEL_NAME}_fp32.xml"
    print(f"Export ONNX to Openvino FP32 IR to:{
      
      fp32_path}")
    model = mo.convert_model(onnx_path)
    serialize(model, fp32_path)

    # #fp16 IR model 的保存路径
    fp16_path = f"{
      
      MODEL_PATH}/FP16_openvino_model/{
      
      IR_MODEL_NAME}_fp16.xml"
    print(f"Export onnx to openvino FP16 IR to:{
      
      fp16_path}")
    model = mo.convert_model(onnx_path, compress_to_fp16=True)
    #将onnx模型文件转换成openvino IR模型文件
    serialize(model, fp16_path)

    # #准备数据集进行量化
    from utils.datasets import create_dataloader
    from utils.general import check_dataset

    #数据集所在的配置文件
    DATASET_CONFIG = "./data/coco128.yaml"

    #创建一个数据读取器
    def create_data_source():
        data = check_dataset(DATASET_CONFIG)
        val_dataloader = create_dataloader(data["val"], imgsz=640, batch_size=1, stride=32, pad=0.5, workers=1)[0]

        return val_dataloader

    data_source = create_data_source()

    #输入的前处理
    def transform_fn(data_item):
        images = data_item[0]
        images = images.float()
        images = images / 255.0
        images = images.cpu().detach().numpy()

        return images

    #实例化校准数据集
    nncf_calibration_dataset = nncf.Dataset(data_source, transform_fn)


    #配置量化管道
    subset_size = 1000
    preset = nncf.QuantizationPreset.MIXED

    #执行模型优化
    from openvino.runtime import Core
    from openvino.runtime import serialize

    core = Core()
    ov_model = core.read_model(fp16_path)
    quantized_model = nncf.quantize(
        ov_model, nncf_calibration_dataset, preset=preset, subset_size=subset_size
    )
    nncf_int8_path = f"{
      
      MODEL_PATH}/NNCF_INT8_openvino_model/{
      
      IR_MODEL_NAME}_int8.xml"
    serialize(quantized_model, nncf_int8_path)
    print(f"Export onnx to openvino Int8 IR to: {
      
      nncf_int8_path}")
######################################################需要模型量化时释放以上代码########################################################


    #比较FP32,FP16和INT8模型的准确性
    # from pathlib import Path
    # import val
    #
    # #验证集的配置文件
    # DATASET_CONFIG = "./data/coco128.yaml"
    #
    # # 要读取的onnx模型名称
    # MODEL_NAME = "best.onnx"
    #
    # # onnx模型所在的路径名称
    # MODEL_PATH = f"runs/train/exp12/weights"
    #
    # # 要保存的openvino的模型名称
    # # IR_MODEL_NAME = "yolov5s"
    # IR_MODEL_NAME = "yolo_WuZhuangSeCha"
    #
    # # 将onnx模型的路径连起来
    # onnx_path = f"{MODEL_PATH}/{MODEL_NAME}"
    #
    # # #fp16 IR model 的保存路径
    # fp32_path = f"{MODEL_PATH}/FP32_openvino_model/{IR_MODEL_NAME}_fp32.xml "
    #
    # fp16_path = f"{MODEL_PATH}/FP16_openvino_model/{IR_MODEL_NAME}_fp16.xml"
    #
    # nncf_int8_path = f"{MODEL_PATH}/NNCF_INT8_openvino_model/{IR_MODEL_NAME}_int8.xml"
    #
    # print("checking the accuracy of the fp32 model:")
    # a = Path(fp32_path).parent
    # fp32_metrics = val.run(
    #     data = DATASET_CONFIG,
    #     weights = Path(fp32_path).parent,
    #     batch_size = 1,
    #     workers = 1,
    #     plots = False,
    #     device = 'cpu',
    #     iou_thres = 0.65,
    # )
    #
    # fp32_ap5 = fp32_metrics[0][2]
    # fp32_ap_full = fp32_metrics[0][3]
    # print(f"[email protected] = {fp32_ap5}")
    # print(f"[email protected]:.95 = {fp32_ap_full}")
    #
    # print("checking the accuracy of the fp16 model:")
    # fp16_metrics = val.run(
    #     data = DATASET_CONFIG,
    #     weights = Path(fp16_path).parent,
    #     batch_size = 1,
    #     workers = 1,
    #     plots = False,
    #     device = 'cpu',
    #     iou_thres = 0.65,
    # )
    #
    # fp16_ap5 = fp16_metrics[0][2]
    # fp16_ap_full = fp16_metrics[0][3]
    # print(f"[email protected] = {fp16_ap5}")
    # print(f"[email protected]:.95 = {fp16_ap_full}")
    #
    # print("checking the accuracy of the NNCF int8 model:")
    # int8_metrics = val.run(
    #     data=DATASET_CONFIG,
    #     weights=Path(nncf_int8_path).parent,
    #     batch_size=1,
    #     workers=1,
    #     plots=False,
    #     device='cpu',
    #     iou_thres=0.65,
    # )
    #
    # nncf_int8_path = int8_metrics[0][2]
    # nncf_int8_ap_full = int8_metrics[0][3]
    # print(f"[email protected] = {nncf_int8_path}")
    # print(f"[email protected]:.95 = {nncf_int8_ap_full}")
    # print('\n')
    # print("++++++++FP32,FP16,int8模型指标对比+++++++++++++\n")
    #
    # print(f"[email protected]:{fp32_ap5}")
    # print(f"[email protected]:{fp32_ap_full}")
    # print('\n\n')
    # print(f"[email protected]:{fp16_ap5}")
    # print(f"[email protected]:{fp16_ap_full}")
    # print('\n\n')
    # print(f"[email protected]:{nncf_int8_path}")
    # print(f"[email protected]:{nncf_int8_ap_full}")

Among them, the half part of the decomposition line of this code is used for quantization, and the code of the lower part of the dividing line is released when evaluating the model.

openvino reasoning on the python side

import numpy as np
import torch
from PIL import Image
from utils.datasets import letterbox
from utils.plots import plot_images

from typing import List, Tuple, Dict
from utils.general import scale_coords, non_max_suppression
from openvino.runtime import Model
from pathlib import Path
from utils.plots import Annotator
import cv2
import time
import glob


def preprocess_image(img0: np.ndarray):
    #resize
    img = letterbox(img0, auto=False)[0]

    #convert
    img = img.transpose(2, 0, 1)
    #ascontiguousarray函数将一个内存不连续存储的数组转换为内存连续存储的数组，使得运行速度更快。
    img = np.ascontiguousarray(img)
    return img, img0

def prepare_input_tensor(image: np.ndarray):
    input_tensor = image.astype(np.float32)
    input_tensor /= 255.0

    if input_tensor.ndim == 3:
        input_tensor = np.expand_dims(input_tensor, 0)
    return input_tensor

def detect(model:Model, image_path:Path, conf_thres: float=0.25, iou_thres: float=0.45, classes: List[int]=None, agnostic_nms: bool=False):

    output_blob = model.output(0)
    img = np.array(Image.open(image_path).convert("RGB"))
    preprocessed_img, orig_img = preprocess_image(img)
    input_tensor = prepare_input_tensor(preprocessed_img)
    start = time.time()
    predictons = torch.from_numpy(model(input_tensor)[output_blob])
    end = time.time()
    cost_time = end - start
    print(f"耗时：{
      
      cost_time * 1000}ms")
    pred = non_max_suppression(predictons, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms)
    return pred, orig_img, input_tensor.shape, cost_time

def draw_boxes(predictions: np.ndarray, input_shape:Tuple[int], image: np.ndarray, names:List[str], colors:Dict[str, int]):
    from utils.plots import Annotator
    annotator = Annotator(image, line_width=15, example=str(names))
    if not len(predictions):
        return image
    predictions[:, :4] = scale_coords(input_shape[2:], predictions[:,:4], image.shape).round()

    #write results
    for *xyxy, conf, cls in reversed(predictions):
        label = f'{
      
      names[int(cls)]}{
      
      conf:.2f}'
        annotator.box_label(xyxy, label, color=colors[names[int(cls)]])

    return image

if __name__ =="__main__":
    from openvino.runtime import Core
    core = Core()

    NAMES = ["xx"]
    COLORS = {
    
    name:[np.random.randint(0, 255) for _ in range(3)]
              for i, name in enumerate(NAMES)}

    #read converted model
    model_path = 'xxx'
    model = core.read_model(model_path)

    #load model on CPU device
    compiled_model = core.compile_model(model, 'CPU')

    # image_path = 'data/mydata/images/8.png'

    total_time = 0
    cost_time = 0
    count = 20
    #推理

    images_path = glob.iglob(r"xxx/*.png")
    index = 0
    for image_path in images_path:
        boxes, image, input_shape, cost_time = detect(compiled_model, image_path)
        total_time += cost_time
        index += 1

        #后处理
        image_with_boxes = draw_boxes(boxes[0], input_shape, image, NAMES, COLORS)
        # cv2.imwrite(f"xxx\\{index}.jpg", image_with_boxes)

        cv2.namedWindow("test", cv2.WINDOW_NORMAL)
        cv2.imshow("test", image_with_boxes)
        cv2.waitKey(0)
        # print("++++++++")
        # print(image_path)

    #计算平均推理耗时
    average_infer_time = float(total_time / index)
    print(f"{
      
      index}次推理中，平均一张图的推理耗时是{
      
      (average_infer_time) * 1000}ms")


    # for i in range (count):
    #     boxes, image, input_shape, cost_time = detect(compiled_model, image_path)
    #     total_time += cost_time
    #
    #
    # #计算平均推理耗时
    # average_infer_time = float(total_time / count)
    # print(f"{count}次推理中，平均一张图的推理耗时是{(average_infer_time) * 1000}ms")
    #
    #
    # #后处理
    # image_with_boxes = draw_boxes(boxes[0], input_shape, image, NAMES, COLORS)
    #
    # cv2.namedWindow("test", cv2.WINDOW_NORMAL)
    # cv2.imshow("test", image_with_boxes)
    # cv2.waitKey(0)

    #visualize results(没看到可视化的检测图)
    # Image.fromarray(image_with_boxes)

    # cv2.imshow("test", Image)
    # cv2.waitKey(0)

This part is reasoning for loading xml files on python

Stitch and save the inference results

import os
import glob
from PIL import Image
import cv2

if __name__ == "__main__":
    images_path = glob.iglob(r"xxx/*.jpg")
    index = 0
    count = 0
    images = []
    for image_path in images_path:
        if index != 0 and index % 9 == 0:
            paste_images = Image.new('RGB', (600,900))
            paste_images.paste(images[0], (0,0))
            paste_images.paste(images[1], (200,0))
            paste_images.paste(images[2], (400,0))
            paste_images.paste(images[3], (0, 300))
            paste_images.paste(images[4], (200,300))
            paste_images.paste(images[5], (400,300))
            paste_images.paste(images[6], (0, 600))
            paste_images.paste(images[7], (200, 600))
            paste_images.paste(images[8], (400, 600))

            #导出拼接的图像
            paste_images.save(f"xxx\\{
      
      count}.jpg")
            index = 0
            images = []
            # cv2.imwrite(f"xx\\{index}.jpg", paste_images)
        else:
            index += 1
            count += 1
            image = Image.open(image_path)
            image = image.resize((200, 300))
            images.append(image)

YOLOv5 (target detection part does not include instance segmentation) -nncf quantification, evaluation, and openvino reasoning on the python side

YOLOv5-nncf quantization, evaluation, and openvino reasoning on the python side

Model quantization (FP32, FP6, Int8) and evaluation

openvino reasoning on the python side

Stitch and save the inference results

Guess you like