I. Introducción
Porque antes, la python
programación se usaba directamente para lograr la detección de objetivos y el modelo se usaba directamente. Así que quiero saber cómo usar el c++
lenguaje para la detección de objetivos, y también me puede ayudar a familiarizarme mejor con c++
la gramática. En aras de la simplicidad, la forma en que se utiliza opencv
el dnn
módulo para llamar al yolo
modelo es una buena elección.
Dos, configuración del entorno
win10
Utilizo el vs2019
programa en ejecución en el sistema , por lo que necesito realizar la configuración del entorno necesaria, como la instalación opencv
y la vs2019
adición opencv
. Para obtener más información, consulte los siguientes artículos:
https://blog.csdn.net/qq321772514/article/details/90514538
Tres, implementación de código
El siguiente código proviene del siguiente artículo ( el enlace de Github del proyecto también está en él ):
https://blog.csdn.net/nihate/article/details/108850477
1 、 yolo.h
#pragma once
#include <fstream>
#include <sstream>
#include <iostream>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using namespace cv;
using namespace dnn;
using namespace std;
struct Net_config
{
float confThreshold; // Confidence threshold
float nmsThreshold; // Non-maximum suppression threshold
int inpWidth; // Width of network's input image
int inpHeight; // Height of network's input image
string classesFile;
string modelConfiguration;
string modelWeights;
string netname;
};
class YOLO
{
public:
YOLO(Net_config config);
void detect(Mat& frame);
private:
float confThreshold;
float nmsThreshold;
int inpWidth;
int inpHeight;
char netname[20];
vector<string> classes;
Net net;
void postprocess(Mat& frame, const vector<Mat>& outs);
void drawPred(int classId, float conf,
int left, int top, int right, int bottom, Mat& frame);
};
Net_config yolo_nets[4] = {
{
0.5, 0.4, 416, 416,"coco.names",
"yolov3/yolov3.cfg", "yolov3/yolov3.weights", "yolov3"},
{
0.5, 0.4, 608, 608,"coco.names",
"yolov4/yolov4.cfg", "yolov4/yolov4.weights", "yolov4"},
{
0.5, 0.4, 320, 320,"coco.names",
"yolo-fastest/yolo-fastest-xl.cfg",
"yolo-fastest/yolo-fastest-xl.weights", "yolo-fastest"},
{
0.5, 0.4, 320, 320,"coco.names",
"yolobile/csdarknet53s-panet-spp.cfg",
"yolobile/yolobile.weights", "yolobile"}
};
2 、 main_yolo.cpp
#include "yolo.h"
YOLO::YOLO(Net_config config)
{
cout << "Net use " << config.netname << endl;
this->confThreshold = config.confThreshold;
this->nmsThreshold = config.nmsThreshold;
this->inpWidth = config.inpWidth;
this->inpHeight = config.inpHeight;
strcpy_s(this->netname, config.netname.c_str());
ifstream ifs(config.classesFile.c_str());
string line;
while (getline(ifs, line)) this->classes.push_back(line);
this->net = readNetFromDarknet(config.modelConfiguration, config.modelWeights);
this->net.setPreferableBackend(DNN_BACKEND_OPENCV);
this->net.setPreferableTarget(DNN_TARGET_CPU);
}
void YOLO::postprocess(Mat& frame, const vector<Mat>& outs)
// Remove the bounding boxes with low confidence using non-maxima suppression
{
vector<int> classIds;
vector<float> confidences;
vector<Rect> boxes;
//不同的模型的输出可能不一样,yolo的输出outs是[[[x,y,w,h,...],[],...[]]],
//之所以多一维,是因为模型输入的frame是四维的,第一维表示帧数,如果只有一张图片推理,那就是1
for (size_t i = 0; i < outs.size(); ++i)
{
// Scan through all the bounding boxes output from the network and keep only the
// ones with high confidence scores. Assign the box's class label as the class
// with the highest score for the box.
//data是指针,每次从存储一个框的信息的地址跳到另一个框的地址
float* data = (float*)outs[i].data;
for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
{
Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
Point classIdPoint;
double confidence;
// Get the value and location of the maximum score
// 找到最大的score的索引,刚好对应80个种类的索引
minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
if (confidence > this->confThreshold)
{
int centerX = (int)(data[0] * frame.cols);
int centerY = (int)(data[1] * frame.rows);
int width = (int)(data[2] * frame.cols);
int height = (int)(data[3] * frame.rows);
int left = centerX - width / 2;
int top = centerY - height / 2;
classIds.push_back(classIdPoint.x);
confidences.push_back((float)confidence);
boxes.push_back(Rect(left, top, width, height));
}
}
}
// Perform non maximum suppression to eliminate redundant overlapping boxes with
// lower confidences
vector<int> indices;
NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
Rect box = boxes[idx];
this->drawPred(classIds[idx], confidences[idx], box.x, box.y,
box.x + box.width, box.y + box.height, frame);
}
}
void YOLO::drawPred(int classId, float conf,
int left, int top, int right, int bottom, Mat& frame)
// Draw the predicted bounding box
{
//Draw a rectangle displaying the bounding box
rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 3);
//Get the label for the class name and its confidence
string label = format("%.2f", conf);
if (!this->classes.empty())
{
CV_Assert(classId < (int)this->classes.size());
label = this->classes[classId] + ":" + label;
}
//Display the label at the top of the bounding box
int baseLine;
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
top = max(top, labelSize.height);
putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75,
Scalar(0, 255, 0), 1);
}
void YOLO::detect(Mat& frame)
{
Mat blob;
blobFromImage(frame, blob, 1 / 255.0,
Size(this->inpWidth, this->inpHeight),
Scalar(0, 0, 0), true, false);
this->net.setInput(blob);
vector<Mat> outs;
this->net.forward(outs, this->net.getUnconnectedOutLayersNames());
this->postprocess(frame, outs);
vector<double> layersTimes;
double freq = getTickFrequency() / 1000;
double t = net.getPerfProfile(layersTimes) / freq;
string label = format("%s Inference time : %.2f ms", this->netname, t);
putText(frame, label, Point(0, 30), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 0, 255), 2);
//imwrite(format("%s_out.jpg", this->netname), frame);
}
int main()
{
YOLO yolo_model(yolo_nets[0]);
string imgpath = "bus.jpg";
Mat srcimg = imread(imgpath);
yolo_model.detect(srcimg);
static const string kWinName = "Deep learning object detection in OpenCV";
namedWindow(kWinName, WINDOW_NORMAL);
imshow(kWinName, srcimg);
waitKey(0);
destroyAllWindows();
}