Notas dieciséis del estudio de C ++: use OpenCv (c ++) para llamar al modelo yolo para lograr la detección de objetivos

I. Introducción

Porque antes, la pythonprogramación se usaba directamente para lograr la detección de objetivos y el modelo se usaba directamente. Así que quiero saber cómo usar el c++lenguaje para la detección de objetivos, y también me puede ayudar a familiarizarme mejor con c++la gramática. En aras de la simplicidad, la forma en que se utiliza opencvel dnnmódulo para llamar al yolomodelo es una buena elección.

Dos, configuración del entorno

win10Utilizo el vs2019programa en ejecución en el sistema , por lo que necesito realizar la configuración del entorno necesaria, como la instalación opencvy la vs2019adición opencv. Para obtener más información, consulte los siguientes artículos:

https://blog.csdn.net/qq321772514/article/details/90514538

Tres, implementación de código

El siguiente código proviene del siguiente artículo ( el enlace de Github del proyecto también está en él ):

https://blog.csdn.net/nihate/article/details/108850477

1 、 yolo.h

#pragma once
#include <fstream>
#include <sstream>
#include <iostream>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>

using namespace cv;
using namespace dnn;
using namespace std;

struct Net_config
{
    
    
	float confThreshold; // Confidence threshold
	float nmsThreshold;  // Non-maximum suppression threshold
	int inpWidth;  // Width of network's input image
	int inpHeight; // Height of network's input image
	string classesFile;
	string modelConfiguration;
	string modelWeights;
	string netname;
};

class YOLO
{
    
    
public:
	YOLO(Net_config config);
	void detect(Mat& frame);
private:
	float confThreshold;
	float nmsThreshold;
	int inpWidth;
	int inpHeight;
	char netname[20];
	vector<string> classes;
	Net net;
	void postprocess(Mat& frame, const vector<Mat>& outs);
	void drawPred(int classId, float conf, 
				  int left, int top, int right, int bottom, Mat& frame);
};

Net_config yolo_nets[4] = {
    
    
	{
    
    0.5, 0.4, 416, 416,"coco.names", 
	"yolov3/yolov3.cfg", "yolov3/yolov3.weights", "yolov3"},
	
	{
    
    0.5, 0.4, 608, 608,"coco.names", 
	"yolov4/yolov4.cfg", "yolov4/yolov4.weights", "yolov4"},
	
	{
    
    0.5, 0.4, 320, 320,"coco.names", 
	"yolo-fastest/yolo-fastest-xl.cfg", 
	"yolo-fastest/yolo-fastest-xl.weights", "yolo-fastest"},
	
	{
    
    0.5, 0.4, 320, 320,"coco.names", 
	"yolobile/csdarknet53s-panet-spp.cfg", 
	"yolobile/yolobile.weights", "yolobile"}
};

2 、 main_yolo.cpp

#include "yolo.h"

YOLO::YOLO(Net_config config)
{
    
    
	cout << "Net use " << config.netname << endl;
	this->confThreshold = config.confThreshold;
	this->nmsThreshold = config.nmsThreshold;
	this->inpWidth = config.inpWidth;
	this->inpHeight = config.inpHeight;
	strcpy_s(this->netname, config.netname.c_str());

	ifstream ifs(config.classesFile.c_str());
	string line;
	while (getline(ifs, line)) this->classes.push_back(line);

	this->net = readNetFromDarknet(config.modelConfiguration, config.modelWeights);
	this->net.setPreferableBackend(DNN_BACKEND_OPENCV);
	this->net.setPreferableTarget(DNN_TARGET_CPU);
}

void YOLO::postprocess(Mat& frame, const vector<Mat>& outs)
// Remove the bounding boxes with low confidence using non-maxima suppression
{
    
    
	vector<int> classIds;
	vector<float> confidences;
	vector<Rect> boxes;
	//不同的模型的输出可能不一样,yolo的输出outs是[[[x,y,w,h,...],[],...[]]],
	//之所以多一维,是因为模型输入的frame是四维的,第一维表示帧数,如果只有一张图片推理,那就是1
	for (size_t i = 0; i < outs.size(); ++i)
	{
    
    
		// Scan through all the bounding boxes output from the network and keep only the
		// ones with high confidence scores. Assign the box's class label as the class
		// with the highest score for the box.
		//data是指针,每次从存储一个框的信息的地址跳到另一个框的地址
		float* data = (float*)outs[i].data;
		for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
		{
    
    
			Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
			Point classIdPoint;
			double confidence;
			// Get the value and location of the maximum score
			// 找到最大的score的索引,刚好对应80个种类的索引
			minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
			if (confidence > this->confThreshold)
			{
    
    
				int centerX = (int)(data[0] * frame.cols);
				int centerY = (int)(data[1] * frame.rows);
				int width = (int)(data[2] * frame.cols);
				int height = (int)(data[3] * frame.rows);
				int left = centerX - width / 2;
				int top = centerY - height / 2;

				classIds.push_back(classIdPoint.x);
				confidences.push_back((float)confidence);
				boxes.push_back(Rect(left, top, width, height));
			}
		}
	}

	// Perform non maximum suppression to eliminate redundant overlapping boxes with
	// lower confidences
	vector<int> indices;
	NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
	for (size_t i = 0; i < indices.size(); ++i)
	{
    
    
		int idx = indices[i];
		Rect box = boxes[idx];
		this->drawPred(classIds[idx], confidences[idx], box.x, box.y,
			box.x + box.width, box.y + box.height, frame);
	}
}

void YOLO::drawPred(int classId, float conf, 
	int left, int top, int right, int bottom, Mat& frame)   
// Draw the predicted bounding box
{
    
    
	//Draw a rectangle displaying the bounding box
	rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 3);

	//Get the label for the class name and its confidence
	string label = format("%.2f", conf);
	if (!this->classes.empty())
	{
    
    
		CV_Assert(classId < (int)this->classes.size());
		label = this->classes[classId] + ":" + label;
	}

	//Display the label at the top of the bounding box
	int baseLine;
	Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
	top = max(top, labelSize.height);
	putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, 
			Scalar(0, 255, 0), 1);
}

void YOLO::detect(Mat& frame)
{
    
    
	Mat blob;
	blobFromImage(frame, blob, 1 / 255.0, 
				  Size(this->inpWidth, this->inpHeight), 
				  Scalar(0, 0, 0), true, false);
	
	this->net.setInput(blob);
	vector<Mat> outs;
	this->net.forward(outs, this->net.getUnconnectedOutLayersNames());
	this->postprocess(frame, outs);

	vector<double> layersTimes;
	double freq = getTickFrequency() / 1000;
	double t = net.getPerfProfile(layersTimes) / freq;
	string label = format("%s Inference time : %.2f ms", this->netname, t);
	putText(frame, label, Point(0, 30), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 0, 255), 2);
	//imwrite(format("%s_out.jpg", this->netname), frame);
}

int main()
{
    
    
	YOLO yolo_model(yolo_nets[0]);
	string imgpath = "bus.jpg";
	Mat srcimg = imread(imgpath);
	yolo_model.detect(srcimg);

	static const string kWinName = "Deep learning object detection in OpenCV";
	namedWindow(kWinName, WINDOW_NORMAL);
	imshow(kWinName, srcimg);
	waitKey(0);
	destroyAllWindows();
}

Supongo que te gusta

Origin blog.csdn.net/qq_39507748/article/details/113063822
Recomendado
Clasificación