yolov8 の TensorRT デプロイメント (C++ バージョン)

1. 環境構築

        CUDAおよびCUDNN環境を設定し、TensorRT環境をインストールする必要があります。以下のブログを参照してください。

【Ubuntu版】TensorRTインストールチュートリアル(tarパッケージ方法)_ubuntuインストールtensorrt - CSDNブログ

2. モデルの準備

        まず、ONNX モデル ファイルが必要です。私は Pytorch->ONNX です。

from ultralytics import YOLO

model = YOLO("./yolov8n.pt")

if __name__ == '__main__':

    model.export(format="onnx")

        次に、yolov8n.onnx ファイルを取得し、続いて ONNX->trt を実行します。

TensorRT のインストール場所を見つける

TensorRT-8.6.4.3/bin/trtexec --onnx=yolov8n.onnx --saveEigine=yolov8n.trt

これにより、TensorRT が使用できるモデル ファイルが生成されます。

3. yolov8の出力結果の分析

        yolov8 による出力 0.shape の出力は 1x84x8400 です。

8400 は事前に選択されたボックスの数で、yolov5 の 25600 よりもはるかに少ないです。

このうち、84個のうち最初の4個はフレームのxywh(中心座標+幅と高さ)で、yolov5と比べるとオブジェクト性が1つ少ないです。 yolov8 では、オブジェクト性 = 次の 80 クラスの中で最大の信頼度になります。

最終スコア = 客観性 * 信頼性

4. メインコード

検出.cpp

#include<iostream>  
#include<opencv2/opencv.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc.hpp>
#include<fstream>
#include "NvInfer.h"
#include "processing.hpp"
//#include "logging.h"

using namespace nvinfer1;
using namespace std;

const int model_width = 640;
const int model_height = 640;

class MyLogger : public nvinfer1::ILogger
{
    public:
    explicit MyLogger(nvinfer1::ILogger::Severity severity =nvinfer1::ILogger::Severity::kWARNING) : severity_(severity) {}

    void log(nvinfer1::ILogger::Severity severity, const char *msg) noexcept override
    {
        if (severity <= severity_) {
            std::cerr << msg << std::endl;
        }
    }
    nvinfer1::ILogger::Severity severity_;
};

int main()
{
//一、图像处理
    string image_path = "/home/hitcrt/code/tensorrt/TRT_test/street.jpg";  //填写自己图片路径(需要绝对路径)
    cv::Mat input_image = cv::imread(image_path);

    float* input_blob = new float[model_height * model_width * 3];
    cv::Mat resize_image;
	//比例
    const float _ratio = std::min(model_width / (input_image.cols * 1.0f),
                            model_height / (input_image.rows * 1.0f));
    // 等比例缩放
    const int border_width = input_image.cols * _ratio;
    const int border_height = input_image.rows * _ratio;
    // 计算偏移值
    const int x_offset = (model_width - border_width) / 2;
    const int y_offset = (model_height - border_height) / 2;

    //将输入图像缩放至resize_image
    cv::resize(input_image, resize_image, cv::Size(border_width, border_height));
    //复制图像并且制作边界
    cv::copyMakeBorder(resize_image, resize_image, y_offset, y_offset, x_offset,
                        x_offset, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114));
    // 转换为RGB格式
    cv::cvtColor(resize_image, resize_image, cv::COLOR_BGR2RGB);
    
    //归一化
    const int channels = resize_image.channels();
    const int width = resize_image.cols;
    const int height = resize_image.rows;
    for (int c = 0; c < channels; c++) {
        for (int h = 0; h < height; h++) {
            for (int w = 0; w < width; w++) {
                input_blob[c * width * height + h * width + w] =
                    resize_image.at<cv::Vec3b>(h, w)[c] / 255.0f;  //at<Vec3b> 是 OpenCV 中用于访问图像像素的一种方法,使用 at<Vec3b> 获取彩色图像中特定位置的像素颜色值
            }
        }
    }

//二、模型反序列化
    MyLogger logger;
    //读取trt信息
    const std::string engine_file_path = "/home/hitcrt/code/tensorrt/TRT_test/yolov8n.trt";  //填写自己trt文件路径(需要绝对路径)
    std::stringstream engine_file_stream;
    engine_file_stream.seekg(0, engine_file_stream.beg);  //从起始位置偏移0个字节,指针移动到文件流的开头
    std::ifstream ifs(engine_file_path);
    engine_file_stream << ifs.rdbuf();  //将读取到的数据流交给engine_file_stream
    ifs.close();

    engine_file_stream.seekg(0, std::ios::end); //先把文件输入流指针定位到文档末尾来获取文档的长度
    const int model_size = engine_file_stream.tellg();  //获取文件流的总长度
    engine_file_stream.seekg(0, std::ios::beg);
    void *model_mem = malloc(model_size);               //开辟一样长的空间
    engine_file_stream.read(static_cast<char *>(model_mem), model_size);    //将内容读取到model_mem中

    nvinfer1::IRuntime *runtime = nvinfer1::createInferRuntime(logger);
    nvinfer1::ICudaEngine *engine = runtime->deserializeCudaEngine(model_mem, model_size);

    free(model_mem);

//三、模型推理
    nvinfer1::IExecutionContext *context = engine->createExecutionContext();

    void *buffers[2];
    // 获取模型输入尺寸并分配GPU内存
    nvinfer1::Dims input_dim = engine->getBindingDimensions(0);
    int input_size = 1;
    for (int j = 0; j < input_dim.nbDims; ++j) {
        if(input_dim.d[j] < 0)
            input_size *= -input_dim.d[j];
        else
            input_size *= input_dim.d[j];
    }
    cudaMalloc(&buffers[0], input_size * sizeof(float));

    // 获取模型输出尺寸并分配GPU内存
    nvinfer1::Dims output_dim = engine->getBindingDimensions(1);

    int output_size = 1;
    for (int j = 0; j < output_dim.nbDims; ++j) {
        if(output_dim.d[j] < 0)
            output_size *= -output_dim.d[j];
        else
            output_size *= output_dim.d[j];
    }
    cudaMalloc(&buffers[1], output_size * sizeof(float));

    // 给模型输出数据分配相应的CPU内存
    float *output_buffer = new float[output_size];
    //数据投入
    cudaStream_t stream;
    cudaStreamCreate(&stream);
    // 拷贝输入数据
    cudaMemcpyAsync(buffers[0], input_blob, input_size * sizeof(float),
                    cudaMemcpyHostToDevice, stream);
    // 执行推理
    if(context->enqueueV2(buffers, stream, nullptr))
    {
        cout << "enqueueV2执行推理成功" << endl;
    }
    else{
        cout << "enqueueV2执行推理失败" << endl;
        return -1;
    }
    // 拷贝输出数据
    cudaMemcpyAsync(output_buffer, buffers[1], output_size * sizeof(float),
                    cudaMemcpyDeviceToHost, stream);

    cudaStreamSynchronize(stream);

    delete context;
    delete engine;
    delete runtime;
    delete[] input_blob;

//四、输出结果output_buffer,放入objs  xywh为中心点坐标 和宽高
    float *ptr = output_buffer;     // 1x84x8400  =  705600
    vector<vector<float>> temp(84, vector<float>(8400));
    vector<vector<float>> outVec(8400, vector<float>(84));
    for(int i = 0; i < 705600; i++)
    {
        temp[i/8400][i%8400] = *ptr;
        ptr++;
    }
    for(int i = 0; i < 84; i++)
    {
        for(int j = 0; j < 8400; j++)
        {
            outVec[j][i] = temp[i][j];
        }
    }
    std::vector<Object> objs;
    for (int i = 0; i < 8400; ++i)
    {
        const float objectness = *(std::max_element(outVec[i].begin() + 4, outVec[i].begin() + 83));
        if (objectness >= 0.45f)
        {
            const int label = std::max_element(outVec[i].begin() + 4, outVec[i].begin() + 83) - (outVec[i].begin() + 4);  //std::max_element返回范围内的最大元素
            const float confidence = outVec[i][label + 4] * objectness;
            if (confidence >= 0.25f) {
                const float bx = outVec[i][0];
                const float by = outVec[i][1];
                const float bw = outVec[i][2];
                const float bh = outVec[i][3];
                Object obj;
                // 还原图像尺寸中box的尺寸比例,这里要减掉偏移值,并把box中心点坐标xy转成左上角坐标xy
                obj.box.x = (bx - bw * 0.5f - x_offset) / _ratio;
                obj.box.y = (by - bh * 0.5f - y_offset) / _ratio;
                obj.box.width = bw / _ratio;
                obj.box.height = bh / _ratio;
                obj.label = label;
                obj.confidence = confidence;
                objs.push_back(std::move(obj));
            }
        }
    }  // i loop

//五、NMS非极大值抑制
    vector<Object> output;
    hardNMS(objs, output, 0.6, 10);

//六、画框
    vector<Object>::iterator it = output.begin();
    while(it != output.end()){
        cv::Point topLeft(it->box.x, it->box.y);
        cv::Point bottomRight(it->box.x + it->box.width, it->box.y + it->box.height);
        cv::rectangle(input_image, topLeft, bottomRight, cv::Scalar(0, 0, 255), 2);
        std::stringstream buff;
        buff.precision(2);  //覆盖默认精度,置信度保留2位小数
        buff.setf(std::ios::fixed);
        buff << it->confidence;
        string text =names[it->label] + " " + buff.str();
        cv::putText(input_image, text, topLeft, 0, 1, cv::Scalar(0, 255, 0), 2);
        it++;
    }
    cv::imwrite("detected.jpg", input_image);

    return 0;
}

前処理.hpp

#include <iostream>
#include <vector>
#include <list>
using namespace std;

//以coco数据集为例
string names[] = {"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
        "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
        "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
        "'skis'", "'snowboard'", "'sports ball'", "'kite'", "'baseball bat'", "'baseball glove'", "'skateboard'", "'surfboard'",
        "'tennis racket'", "'bottle'", "'wine glass'", "'cup'", "'fork'", "'knife'", "'spoon'", "'bowl'", "'banana'", "'apple'",
        "'sandwich'", "'orange'", "'broccoli'", "'carrot'", "'hot dog'", "'pizza'", "'donut'", "'cake'", "'chair'", "'couch'",
        "'potted plant'", "'bed'", "'dining table'", "'toilet'", "'tv'", "'laptop'", "'mouse'", "'remote'", "'keyboard'", "'cell phone'",
        "'microwave'", "'oven'", "'toaster'", "'sink'", "'refrigerator'", "'book'", "'clock'", "'vase'", "'scissors'", "'teddy bear'",
        "'hair drier'", "'toothbrush'"};

struct BOX
{
    float x;
    float y;
    float width;
    float height;
};

struct Object
{
    BOX box;    // lu点和wh
    int label;
    float confidence;  //这里的confidence实际指的是score 即 objectness*confidence
};

bool cmp(Object &obj1, Object &obj2){
    return obj1.confidence > obj2.confidence;
}

float iou_of(const Object &obj1, const Object &obj2)
{
    float x1_lu = obj1.box.x;
    float y1_lu = obj1.box.y;
    float x1_rb = x1_lu + obj1.box.width;
    float y1_rb = y1_lu + obj1.box.height;
    float x2_lu = obj2.box.x;
    float y2_lu = obj2.box.y;
    float x2_rb = x2_lu + obj2.box.width;
    float y2_rb = y2_lu + obj2.box.height;
    //交集左上角坐标i_x1, i_y1
    float i_x1 = std::max(x1_lu, x2_lu);
    float i_y1 = std::max(y1_lu, y2_lu);
    //交集右下角坐标i_x2, i_y2
    float i_x2 = std::min(x1_rb, x2_rb);
    float i_y2 = std::min(y1_rb, y2_rb);
    //交集框宽高
    float i_w = i_x2 - i_x1;
    float i_h = i_y2 - i_y1;
    //并集左上角坐标
    float o_x1 = std::min(x1_lu, x2_lu);
    float o_y1 = std::min(y1_lu, y2_lu);
    //并集右下角坐标
    float o_x2 = std::max(x1_rb, x2_rb);
    float o_y2 = std::max(y1_rb, y2_rb);
    //并集宽高
    float o_w = o_x2 - o_x1;
    float o_h = o_y2 - o_y1;

    return (i_w*i_h) / (o_w*o_h);
}

std::vector<int> hardNMS(std::vector<Object> &input, std::vector<Object> &output, float iou_threshold, unsigned int topk)
{  //Object只有confidence和label
    const unsigned int box_num = input.size(); 
    std::vector<int> merged(box_num, 0);
    std::vector<int> indices;

    if (input.empty())
        return indices;
    std::vector<Object> res;
    //先对bboxs按照conf进行排序
    std::sort(input.begin(), input.end(),
            [](const Object &a, const Object &b)
            { return a.confidence > b.confidence; });   //[]表示C++中的lambda函数
    
    unsigned int count = 0;
    for (unsigned int i = 0; i < box_num; ++i)
    {   //按照conf依次遍历bbox
        if (merged[i])
            continue;
        //如果已经被剔除,continue
        Object buf;
        buf = input[i];
        merged[i] = 1; //剔除当前bbox

        //由于后面的置信度低,只需要考虑当前bbox后面的即可
        for (unsigned int j = i + 1; j < box_num; ++j)
        {
            if (merged[j])
                continue;

            float iou = static_cast<float>(iou_of(input[j], input[i]));
            //计算iou
            if (iou > iou_threshold)
            { //超过阈值认为重合,剔除第j个bbox,
                merged[j] = 1;
            }
        }
        indices.push_back(i);
        res.push_back(buf); //将最高conf的bbox填入结果

        // keep top k
        //获取前k个输出,这个应该是针对密集输出的情况,此时input已经做了conf剔除
        count += 1;
        if (count >= topk)
            break;
    }
    output.swap(res);

    return indices;
}

float sigmoid(float x)
{
    return 1.0 / (exp(-x) + 1.0);
}

CMakeLists.txt

cmake_minimum_required(VERSION 2.6)
project(Demo)

set(CMAKE_BUILD_TYPE "Debug")    # 用于gdb调试

add_definitions(-std=c++11)      # 14?

option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)    # 是否必要
# set(CMAKE_CXX_STANDARD 11)        # 14?
# set(CMAKE_BUILD_TYPE Debug)       # 用于gdb调试

find_package(CUDA REQUIRED)

if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
    message("embed_platform on")
    include_directories(/usr/local/cuda/targets/aarch64-linux/include)
    link_directories(/usr/local/cuda/targets/aarch64-linux/lib)
else()
    message("embed_platform off")
    include_directories(/usr/local/cuda/include)
    link_directories(/usr/local/cuda/lib64)
endif()

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")

find_package(OpenCV)
include_directories(${OpenCV_INCLUDE_DIRS} /home/home_expand/TensorRT-8.6.1.6/include)
link_directories(/home/home_expand/TensorRT-8.6.1.6/lib)

add_executable(Demo segment.cpp)

target_link_libraries(Demo nvinfer cudart ${OpenCV_LIBRARIES})

add_definitions(-O2 -pthread)

おすすめ

転載: blog.csdn.net/liujiahao123987/article/details/133892746