O módulo opencv C++ dnn chama a câmera de profundidade yolov5 e Intel RealSense D435 para detecção de alvo.

1. Código

#include <opencv2/opencv.hpp>
#include <opencv2/dnn/dnn.hpp>
#include <librealsense2/rs.hpp> // Include RealSense Cross Platform API

using namespace cv;
using namespace dnn;
using namespace std;
using namespace rs2;

// 类名数组，这里需要替换为实际YOLO模型所检测的对象的类名
const char* classNames[] = {"object1", "object2", "object3", "object4"};

int main(int argc, char** argv)
{
    // 模型权重和配置文件路径，这些文件包含了训练好的YOLO模型参数和网络配置
    String model = "yolov5.onnx";  // 替换为实际模型文件路径

    // 加载预训练的模型和配置到DNN网络中
    Net net = readNetFromONNX(model);
    // 设置推理引擎后端为OpenCV，目标设备为CPU
    net.setPreferableBackend(DNN_BACKEND_OPENCV);
    net.setPreferableTarget(DNN_TARGET_CPU);

    // Declare depth colorizer for pretty visualization of depth data
    colorizer color_map;

    // Declare RealSense pipeline, encapsulating the actual device and sensors
    pipeline p;
    // Start streaming with default recommended configuration
    p.start();

    // 循环直到用户按下键盘上的任意键
    while (waitKey(1) < 0) {
        // Wait for the next set of frames from the camera
        frameset frames = p.wait_for_frames();
        // Get a frame from the RGB camera
        frame color = frames.get_color_frame();

        // Create OpenCV matrix of size (color_height, color_width)
        Mat frame(Size(640, 480), CV_8UC3, (void*)color.get_data(), Mat::AUTO_STEP);

        Mat blob;   // 用于存储处理后的图像，以适应网络输入

        // 将帧图像转换为网络输入所需格式
        blobFromImage(frame, blob, 1/255.0, cv::Size(416, 416), Scalar(0,0,0), true, false);

        // 将blob设置为网络的输入
        net.setInput(blob);

        // 运行前向传递以获取网络的输出层
        vector<Mat> outs;
        net.forward(outs, net.getUnconnectedOutLayersNames());

        // 遍历网络输出的每一层结果
        for (size_t i = 0; i < outs.size(); ++i) {
            for (int j = 0; j < outs[i].rows; ++j) {
                Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
                Point classIdPoint;
                double confidence;

                minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);

                if (confidence > 0.5) {
                    int centerX = (int)(outs[i].at<float>(j, 0) * frame.cols);
                    int centerY = (int)(outs[i].at<float>(j, 1) * frame.rows);
                    int width = (int)(outs[i].at<float>(j, 2) * frame.cols);
                    int height = (int)(outs[i].at<float>(j, 3) * frame.rows);
                    int left = centerX - width / 2;
                    int top = centerY - height / 2;

                    rectangle(frame, Rect(left, top, width, height), Scalar(0, 255, 0), 2);
                    int classIdx = static_cast<int>(classIdPoint.x);
                    string classLabel = string(classNames[classIdx]);
                    string label = classLabel + ":" + format("%.2f", confidence);
                    
                    int baseLine;
                    Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
                    top = max(top, labelSize.height);
                    rectangle(frame, Point(left, top - labelSize.height), Point(left + labelSize.width, top + baseLine), Scalar::all(255), FILLED);
                    putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0));
                }
            }
        }

        // 展示处理后的帧
        imshow("YoloV8", frame);
    }

    return 0;
}

Obs: Como não tenho a câmera em mãos, apenas procurei as informações e o código escrito após o documento, e não pratiquei.

2. Pacote de instalação

Precisa instalar opencv, biblioteca librealsense2

Link: Biblioteca CSDN de recursos Intel.RealSense.SDK.zip

O módulo opencv C++ dnn chama a câmera de profundidade yolov5 e Intel RealSense D435 para detecção de alvo.

Acho que você gosta