C++ reads the yolov5 model for target detection (reads the real-time monitoring of the camera)

article introduction

This article is a project based on the yolov5 model. It mainly uses c++ to call and test the yolov5 model, so as to realize the target detection task. There are two main points in the task process, the first version problem, and the second configuration problem

1. Required software and version

      Training part pytorch==1.13.0 opencv==3.4.1 other direct pip

      c++ deployment 

       vs2019 or vs2022    

        libtorch-1.13.0

        opencv==3.4.1 Link: https://pan.baidu.com/s/1XPWUNfS7PTFiDkHTG8yvcQ 
Extraction code: d9g4

        Some may need cmake anyway, I am useless Link: https://pan.baidu.com/s/1-eLo7ecgQg94Mjtw-pQcXw 
Extraction code: rg0x

Second, install vs

Official website address:
Visual Studio Older Downloads - 2019, 2017, 2015 and previous versions

 The appeal link may recommend installing Visual Studio Installer 2019 or 2022 for 2017

Environment configuration and task preparation can learn from my previous article

Libtorch-yolov5 deploys pytorch version_It's time to wake up~'s blog-CSDN Blog

Okay, enough bullshit, the text begins

First create new file in vs

Create a new cpp file in the source file, and create a .h header file in the header file

download yolov5-libtorch

File link: https://pan.baidu.com/s/1oIP1btJd10gQddxAHijg7w 
Extraction code: lntf

  • Paste the code in src/YoloV5.cpp into the above YoloV5.cpp file
  • Paste the code in nclude/YoloV5.h into the above YoloV5.h file
  • Change the import method of the header file in YoloV5.cpp to "YoloV5.h
  • "

changed to

 Create a new main.cpp file in the source file. This file is used to call yolov5

Copy the code into main.cpp

This is the real-time monitoring of the read camera

#include "YoloV5.h"

int main()
{
	// 第二个参数为是否启用 cuda 详细用法可以参考 YoloV5.h 文件
	YoloV5 yolo("C:/Users/hwx/Documents/Github/YoloV5-LibTorch/test/yolov5s.cuda.pt", true);
	// 读取分类标签(我们用的官方的所以这里是 coco 中的分类)
	// 其实这些代码无所谓哪 只是后面预测出来的框没有标签罢了
	std::ifstream f("C:/Users/hwx/Documents/Github/YoloV5-LibTorch/test/coco.txt");
	std::string name = "";
	int i = 0;
	std::map<int, std::string> labels;
	while (std::getline(f, name))
	{
		labels.insert(std::pair<int, std::string>(i, name));
		i++;
	}
	// 用 OpenCV 打开摄像头读取文件(你随便咋样获取图片都OK哪)
	cv::VideoCapture cap = cv::VideoCapture(0);
	// 设置宽高 无所谓多宽多高后面都会通过一个算法转换为固定宽高的
	// 固定宽高值应该是你通过YoloV5训练得到的模型所需要的
	// 传入方式是构造 YoloV5 对象时传入 width 默认值为 640,height 默认值为 640
	cap.set(cv::CAP_PROP_FRAME_WIDTH, 1000);
	cap.set(cv::CAP_PROP_FRAME_HEIGHT, 800);
	cv::Mat frame;
	while (cap.isOpened())
	{
		// 读取一帧
		cap.read(frame);
		if (frame.empty())
		{
			std::cout << "Read frame failed!" << std::endl;
			break;
		}
		// 预测
		// 简单吧,两行代码预测结果就出来了,封装的还可以吧 嘚瑟
		std::vector<torch::Tensor> r = yolo.prediction(frame);
		// 画框根据你自己的项目调用相应的方法,也可以不画框自己处理
		frame = yolo.drawRectangle(frame, r[0], labels);
		// show 图片
		cv::imshow("", frame);
		if (cv::waitKey(1) == 27) break;
	}
	return 0;
}

This is to read all the pictures in the folder

#if 0
#include "YOLOv5.h"
#include"Ex.h"
#include<opencv2\opencv.hpp>
#include<io.h>
#include<iostream>

int main()
{
	YoloV5 yolo("D:\\Besktop\\best.torchscript.pt", true);
	// 读取分类标签
	std::ifstream f("D:\\Besktop\\voc.txt");
	std::string name = "";
	int i = 0;

	std::map<int, std::string> labels;
	while (std::getline(f, name))
	{
		labels.insert(std::pair<int, std::string>(i, name));
		std::cout << labels << std::endl;
		i++;
	}
	//cv::Mat frame = cv::imread("D:\\Besktop\\000\\划伤_2023032218553818.bmp");
	string path = "D:\\Besktop\\000\\";
	String dest = "D:\\Besktop\\1\\";
	String savedfilename;
	int len = path.length();
	vector<cv::String> filenames;
	cv::glob(path, filenames);
	for (int i = 0; i < filenames.size(); i++) 
	{
		Mat frame;
		frame = imread(filenames[i], i);
		//frame = 255 - frame;   //对每一张图片取反
		savedfilename = dest + filenames[i].substr(len);
		cout << savedfilename << endl;
		// 预测
		std::vector<torch::Tensor> r = yolo.prediction(frame);
		std::cout << r << std::endl;
		// 画框
		frame = yolo.drawRectangle(frame, r[0], labels);
		//bool is = yolo.existencePrediction(r);
		//std::cout << is << std::endl;
		// show 图片
		//cv::imshow("", frame);
		//imwrite(fileName, frame);
		imwrite(savedfilename, frame);
		cv::waitKey(0);
		//if (cv::waitKey(1) == 27);
		
	}
	return 0;
}
#endif // 0

read a picture


#if 1
#include "YoloV5.h"

int main()
{
	YoloV5 yolo("../dataset/best.torchscript.pt", true);
	// 读取分类标签
	std::ifstream f("../dataset/voc.txt");
	std::string name = "";
	int i = 0;
	std::map<int, std::string> labels;
	while (std::getline(f, name))
	{
		labels.insert(std::pair<int, std::string>(i, name));
		i++;
	}
	// 用 OpenCV 打开摄像头读取文件
	//cv::VideoCapture cap = cv::VideoCapture(0);
	//cap.set(cv::CAP_PROP_FRAME_WIDTH, 1000);
	//cap.set(cv::CAP_PROP_FRAME_HEIGHT, 800);
	//cv::Mat frame;
	//while (cap.isOpened())
	//{
	//	// 读取一帧
	//	cap.read(frame);
	//	if (frame.empty())
	//	{
	//		std::cout << "Read frame failed!" << std::endl;
	//		break;
	//	}
		cv::Mat frame = cv::imread("D:\\Besktop\\000\\断栅_2.bmp");
		// 预测
		std::vector<torch::Tensor> r = yolo.prediction(frame);
		std::cout << r << std::endl;
		// 画框处理
		frame = yolo.drawRectangle(frame, r[0], labels);
		// show 图片
		cv::imshow("", frame);
		cv::waitKey(0);
		//if (cv::waitKey(1) == 27) break;
		return 0;
	}
	

#endif // 1

Add header files in VC++ directory/include directory

 

Add the .lib library in the VC++ directory/library directory, add it if it exists, or add it if it is not

Add the lib library name in input/additional dependencies, add it if it exists, and don't add it if it doesn't

The path is inside the lib folder in your libtorch and opencv files

torch.lib
torch_cuda.lib
torch_cuda_cu.lib
torch_cuda_cpp.lib
torch_cpu.lib
c10_cuda.lib
caffe2_nvrtc.lib
c10.lib
kineto.lib
dnnl.lib
fbgemm.lib
asmjit.lib
XNNPACK.lib
cpuinfo.lib
clog.lib
libprotoc.lib
pthreadpool.lib
libprotobuf.lib
libprotobuf-lite.lib
opencv_world341.lib

Note Copy the .dll files under the libtorch and opencv folders to the environment under your project file

Open the libtorch .dll file and you can see it

Opencv's hiding is relatively deep .......opencv3.4.1\opencv\build\x64\vc15\lib

My project file adds path as....active2\x64\Release

 /INCLUDE:"?ignore_this_library_placeholder@@YAHXZ" 

Then modify the model path in the main function and the label path below and finally run it

Finally, let's check again

There will be these files in the program, only need to see main.cpp tesst.cpp YOLOV5.h Yolov5.cpp

these files

If main.cpp is above, it will not be pasted here

tesst.cpp

#if 0
#include "YOLOv5.h"
#include"Ex.h"
#include<opencv2\opencv.hpp>
#include<io.h>
#include<iostream>

int main()
{
	YoloV5 yolo("D:\\Besktop\\best.torchscript.pt", true);
	// 读取分类标签
	std::ifstream f("D:\\Besktop\\voc.txt");
	std::string name = "";
	int i = 0;

	std::map<int, std::string> labels;
	while (std::getline(f, name))
	{
		labels.insert(std::pair<int, std::string>(i, name));
		std::cout << labels << std::endl;
		i++;
	}
	//cv::Mat frame = cv::imread("D:\\Besktop\\000\\划伤_2023032218553818.bmp");
	string path = "D:\\Besktop\\000\\";
	String dest = "D:\\Besktop\\1\\";
	String savedfilename;
	int len = path.length();
	vector<cv::String> filenames;
	cv::glob(path, filenames);
	for (int i = 0; i < filenames.size(); i++) 
	{
		Mat frame;
		frame = imread(filenames[i], i);
		//frame = 255 - frame;   //对每一张图片取反
		savedfilename = dest + filenames[i].substr(len);
		cout << savedfilename << endl;
		// 预测
		std::vector<torch::Tensor> r = yolo.prediction(frame);
		std::cout << r << std::endl;
		// 画框
		frame = yolo.drawRectangle(frame, r[0], labels);
		//bool is = yolo.existencePrediction(r);
		//std::cout << is << std::endl;
		// show 图片
		//cv::imshow("", frame);
		//imwrite(fileName, frame);
		imwrite(savedfilename, frame);
		cv::waitKey(0);
		//if (cv::waitKey(1) == 27);
		
	}
	return 0;
}
#endif // 0

yolov5.h

#pragma once
#include <torch/torch.h>
#include <torch/script.h>
#include <opencv2/opencv.hpp>
#include <iostream>
#include <ctime>
/**
 * ImageResizeData 图片处理过后保存图片的数据结构
 */
class ImageResizeData
{
public:
	// 添加处理过后的图片
	void setImg(cv::Mat img);
	// 获取处理过后的图片
	cv::Mat getImg();
	// 当原始图片宽高比大于处理过后图片宽高比时此函数返回 true
	bool isW();
	// 当原始图片高宽比大于处理过后图片高宽比时此函数返回 true
	bool isH();
	// 添加处理之后图片的宽
	void setWidth(int width);
	// 获取处理之后图片的宽
	int getWidth();
	// 添加处理之后图片的高
	void setHeight(int height);
	// 获取处理之后图片的高
	int getHeight();
	// 添加原始图片的宽
	void setW(int w);
	// 获取原始图片的宽
	int getW();
	// 添加原始图片的高
	void setH(int h);
	// 获取原始图片的高
	int getH();
	// 添加从原始图片到处理过后图片所添加黑边大小
	void setBorder(int border);
	// 获取从原始图片到处理过后图片所添加黑边大小
	int getBorder();
private:
	// 处理过后图片高
	int height;
	// 处理过后图片宽
	int width;
	// 原始图片宽
	int w;
	// 原始图片高
	int h;
	// 从原始图片到处理图片所添加的黑边大小
	int border;
	// 处理过后的图片
	cv::Mat img;
};

/**
 * YoloV5 的实现类
 */
class YoloV5
{
public:
	/**
	 * 构造函数
	 * @param ptFile yoloV5 pt文件路径
	 * @param isCuda 是否使用 cuda 默认不起用
	 * @param height yoloV5 训练时图片的高
	 * @param width yoloV5 训练时图片的宽
	 * @param confThres 非极大值抑制中的 scoreThresh
	 * @param iouThres 非极大值抑制中的 iouThresh
	 */
	YoloV5(std::string ptFile, bool isCuda = false, bool isHalf = false, int height = 640, int width = 640, float confThres = 0.25, float iouThres = 0.45);
	/**
	 * 预测函数
	 * @param data 语言预测的数据格式 (batch, rgb, height, width)
	 */
	std::vector<torch::Tensor> prediction(torch::Tensor data);
	/**
	 * 预测函数
	 * @param filePath 需要预测的图片路径
	 */
	std::vector<torch::Tensor> prediction(std::string filePath);
	/**
	 * 预测函数
	 * @param img 需要预测的图片
	 */
	std::vector<torch::Tensor> prediction(cv::Mat img);
	/**
	 * 预测函数
	 * @param imgs 需要预测的图片集合
	 */
	std::vector<torch::Tensor> prediction(std::vector <cv::Mat> imgs);
	/**
	 * 改变图片大小的函数
	 * @param img 原始图片
	 * @param height 要处理成的图片的高
	 * @param width 要处理成的图片的宽
	 * @return 封装好的处理过后图片数据结构
	 */
	static ImageResizeData resize(cv::Mat img, int height, int width);
	/**
	 * 改变图片大小的函数
	 * @param img 原始图片
	 * @return 封装好的处理过后图片数据结构
	 */
	ImageResizeData resize(cv::Mat img);
	/**
	 * 改变图片大小的函数
	 * @param imgs 原始图片集合
	 * @param height 要处理成的图片的高
	 * @param width 要处理成的图片的宽
	 * @return 封装好的处理过后图片数据结构
	 */
	static std::vector<ImageResizeData> resize(std::vector <cv::Mat> imgs, int height, int width);
	/**
	 * 改变图片大小的函数
	 * @param imgs 原始图片集合
	 * @return 封装好的处理过后图片数据结构
	 */
	std::vector<ImageResizeData> resize(std::vector <cv::Mat> imgs);
	/**
	 * 根据输出结果在给定图片中画出框
	 * @param imgs 原始图片集合
	 * @param rectangles 通过预测函数处理好的结果
	 * @param labels 类别标签
	 * @param thickness 线宽
	 * @return 画好框的图片
	 */
	std::vector<cv::Mat> drawRectangle(std::vector<cv::Mat> imgs, std::vector<torch::Tensor> rectangles, std::map<int, std::string> labels, int thickness = 2);
	/**
	 * 根据输出结果在给定图片中画出框
	 * @param imgs 原始图片集合
	 * @param rectangles 通过预测函数处理好的结果
	 * @param thickness 线宽
	 * @return 画好框的图片
	 */
	std::vector<cv::Mat> drawRectangle(std::vector<cv::Mat> imgs, std::vector<torch::Tensor> rectangles, int thickness = 2);
	/**
	 * 根据输出结果在给定图片中画出框
	 * @param imgs 原始图片集合
	 * @param rectangles 通过预测函数处理好的结果
	 * @param colors 每种类型对应颜色
	 * @param labels 类别标签
	 * @return 画好框的图片
	 */
	std::vector<cv::Mat> drawRectangle(std::vector<cv::Mat> imgs, std::vector<torch::Tensor> rectangles, std::map<int, cv::Scalar> colors, std::map<int, std::string> labels, int thickness = 2);
	/**
	 * 根据输出结果在给定图片中画出框
	 * @param img 原始图片
	 * @param rectangle 通过预测函数处理好的结果
	 * @param thickness 线宽
	 * @return 画好框的图片
	 */
	cv::Mat	drawRectangle(cv::Mat img, torch::Tensor rectangle, int thickness = 2);
	/**
	 * 根据输出结果在给定图片中画出框
	 * @param img 原始图片
	 * @param rectangle 通过预测函数处理好的结果
	 * @param labels 类别标签
	 * @param thickness 线宽
	 * @return 画好框的图片
	 */
	cv::Mat	drawRectangle(cv::Mat img, torch::Tensor rectangle, std::map<int, std::string> labels, int thickness = 2);
	/**
	 * 根据输出结果在给定图片中画出框
	 * @param img 原始图片
	 * @param rectangle 通过预测函数处理好的结果
	 * @param colos 每种类型对应颜色
	 * @param labels 类别标签
	 * @param thickness 线宽
	 * @return 画好框的图片
	 */
	cv::Mat	drawRectangle(cv::Mat img, torch::Tensor rectangle, std::map<int, cv::Scalar> colors, std::map<int, std::string> labels, int thickness = 2);
	/**
	 * 用于判断给定数据是否存在预测
	 * @param clazz 通过预测函数处理好的结果
	 * @return 如果图片中存在给定某一种分类返回 true
	 */
	bool existencePrediction(torch::Tensor clazz);
	/**
	 * 用于判断给定数据是否存在预测
	 * @param classs 通过预测函数处理好的结果
	 * @return 如果图片集合中存在给定某一种分类返回 true
	 */
	bool existencePrediction(std::vector<torch::Tensor> classs);

private:
	// 是否启用 cuda
	bool isCuda;
	// 是否使用半精度
	bool isHalf;
	// 非极大值抑制中的第一步数据清理
	float confThres;
	// 非极大值抑制中 iou
	float iouThres;
	// 模型所需要的图片的高
	float height;
	// 模型所需要的图片的宽
	float width;
	// 画框颜色 map
	std::map<int, cv::Scalar> mainColors;
	// 模型
	torch::jit::script::Module model;
	// 随机获取一种颜色
	cv::Scalar getRandScalar();
	// 图片通道转换为 rgb
	cv::Mat img2RGB(cv::Mat img);
	// 图片变为 Tensor
	torch::Tensor img2Tensor(cv::Mat img);
	// (center_x center_y w h) to (left, top, right, bottom)
	torch::Tensor xywh2xyxy(torch::Tensor x);
	// 非极大值抑制算法
	torch::Tensor nms(torch::Tensor bboxes, torch::Tensor scores, float thresh);
	// 预测出来的框根据原始图片还原算法
	std::vector<torch::Tensor> sizeOriginal(std::vector<torch::Tensor> result, std::vector<ImageResizeData> imgRDs);
	// 非极大值抑制算法整体
	std::vector<torch::Tensor> non_max_suppression(torch::Tensor preds, float confThres = 0.25, float iouThres = 0.45);
};

 yolov5.cpp

#include "YoloV5.h"

YoloV5::YoloV5(std::string ptFile, bool isCuda, bool isHalf, int height, int width, float confThres, float iouThres)
{
	model = torch::jit::load(ptFile);
	if (isCuda)
	{
		model.to(torch::kCUDA);
	}
	if (isHalf)
	{
		model.to(torch::kHalf);
	}
	this->height = height;
	this->width = width;
	this->isCuda = isCuda;
	this->iouThres = iouThres;
	this->confThres = confThres;
	this->isHalf = isHalf;
	model.eval();
	unsigned seed = time(0);
	std::srand(seed);
}

std::vector<torch::Tensor> YoloV5::non_max_suppression(torch::Tensor prediction, float confThres, float iouThres)
{
	torch::Tensor xc = prediction.select(2, 4) > confThres;
	int maxWh = 4096;
	int maxNms = 30000;
	std::vector<torch::Tensor> output;
	for (int i = 0; i < prediction.size(0); i++)
	{
		output.push_back(torch::zeros({ 0, 6 }));
	}
	for (int i = 0; i < prediction.size(0); i++)
	{
		torch::Tensor x = prediction[i];
		x = x.index_select(0, torch::nonzero(xc[i]).select(1, 0));
		if (x.size(0) == 0) continue;

		x.slice(1, 5, x.size(1)).mul_(x.slice(1, 4, 5));
		torch::Tensor box = xywh2xyxy(x.slice(1, 0, 4));
		std::tuple<torch::Tensor, torch::Tensor> max_tuple = torch::max(x.slice(1, 5, x.size(1)), 1, true);
		x = torch::cat({ box, std::get<0>(max_tuple), std::get<1>(max_tuple) }, 1);
		x = x.index_select(0, torch::nonzero(std::get<0>(max_tuple) > confThres).select(1, 0));
		int n = x.size(0);
		if (n == 0)
		{
			continue;
		}
		else if (n > maxNms)
		{
			x = x.index_select(0, x.select(1, 4).argsort(0, true).slice(0, 0, maxNms));
		}
		torch::Tensor c = x.slice(1, 5, 6) * maxWh;
		torch::Tensor boxes = x.slice(1, 0, 4) + c, scores = x.select(1, 4);
		torch::Tensor ix = nms(boxes, scores, iouThres).to(x.device());
		output[i] = x.index_select(0, ix).cpu();
	}
	return output;
}

cv::Scalar YoloV5::getRandScalar()
{
	return cv::Scalar(std::rand() % 256, std::rand() % 256, std::rand() % 256);
}

cv::Mat YoloV5::img2RGB(cv::Mat img)
{
	int imgC = img.channels();
	if (imgC == 1)
	{
		cv::cvtColor(img, img, cv::COLOR_GRAY2RGB);
	}
	else
	{
		cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
	}
	return img;
}

torch::Tensor YoloV5::img2Tensor(cv::Mat img)
{
	torch::Tensor data = torch::from_blob(img.data, { (int)height, (int)width, 3 }, torch::kByte);
	data = data.permute({ 2, 0, 1 });
	data = data.toType(torch::kFloat);
	data = data.div(255);
	data = data.unsqueeze(0);
	return data;
}

torch::Tensor YoloV5::xywh2xyxy(torch::Tensor x)
{
	torch::Tensor y = x.clone();
	y.select(1, 0) = x.select(1, 0) - x.select(1, 2) / 2;
	y.select(1, 1) = x.select(1, 1) - x.select(1, 3) / 2;
	y.select(1, 2) = x.select(1, 0) + x.select(1, 2) / 2;
	y.select(1, 3) = x.select(1, 1) + x.select(1, 3) / 2;
	return y;
}

torch::Tensor YoloV5::nms(torch::Tensor bboxes, torch::Tensor scores, float thresh)
{
	auto x1 = bboxes.select(1, 0);
	auto y1 = bboxes.select(1, 1);
	auto x2 = bboxes.select(1, 2);
	auto y2 = bboxes.select(1, 3);
	auto areas = (x2 - x1) * (y2 - y1);
	auto tuple_sorted = scores.sort(0, true);
	auto order = std::get<1>(tuple_sorted);

	std::vector<int> keep;
	while (order.numel() > 0)
	{
		if (order.numel() == 1)
		{
			auto i = order.item();
			keep.push_back(i.toInt());
			break;
		}
		else
		{
			auto i = order[0].item();
			keep.push_back(i.toInt());
		}

		auto order_mask = order.narrow(0, 1, order.size(-1) - 1);

		auto xx1 = x1.index({ order_mask }).clamp(x1[keep.back()].item().toFloat(), 1e10);
		auto yy1 = y1.index({ order_mask }).clamp(y1[keep.back()].item().toFloat(), 1e10);
		auto xx2 = x2.index({ order_mask }).clamp(0, x2[keep.back()].item().toFloat());
		auto yy2 = y2.index({ order_mask }).clamp(0, y2[keep.back()].item().toFloat());
		auto inter = (xx2 - xx1).clamp(0, 1e10) * (yy2 - yy1).clamp(0, 1e10);

		auto iou = inter / (areas[keep.back()] + areas.index({ order.narrow(0,1,order.size(-1) - 1) }) - inter);
		auto idx = (iou <= thresh).nonzero().squeeze();
		if (idx.numel() == 0)
		{
			break;
		}
		order = order.index({ idx + 1 });
	}
	return torch::tensor(keep);
}

std::vector<torch::Tensor> YoloV5::sizeOriginal(std::vector<torch::Tensor> result, std::vector<ImageResizeData> imgRDs)
{
	std::vector<torch::Tensor> resultOrg;
	for (int i = 0; i < result.size(); i++)
	{

		torch::Tensor data = result[i];
		ImageResizeData imgRD = imgRDs[i];
		for (int j = 0; j < data.size(0); j++)
		{
			torch::Tensor tensor = data.select(0, j);
			// (left, top, right, bottom)
			if (imgRD.isW())
			{
				tensor[1] -= imgRD.getBorder();
				tensor[3] -= imgRD.getBorder();
				tensor[0] *= (float)imgRD.getW() / (float)imgRD.getWidth();
				tensor[2] *= (float)imgRD.getW() / (float)imgRD.getWidth();
				tensor[1] *= (float)imgRD.getH() / (float)(imgRD.getHeight() - 2 * imgRD.getBorder());
				tensor[3] *= (float)imgRD.getH() / (float)(imgRD.getHeight() - 2 * imgRD.getBorder());
			}
			else
			{
				tensor[0] -= imgRD.getBorder();
				tensor[2] -= imgRD.getBorder();
				tensor[1] *= (float)imgRD.getH() / (float)imgRD.getHeight();
				tensor[3] *= (float)imgRD.getH() / (float)imgRD.getHeight();
				tensor[0] *= (float)imgRD.getW() / (float)(imgRD.getWidth() - 2 * imgRD.getBorder());
				tensor[2] *= (float)imgRD.getW() / (float)(imgRD.getWidth() - 2 * imgRD.getBorder());
			}
			// 加了黑边之后预测结果可能在黑边上,就会造成结果为负数
			for (int k = 0; k < 4; k++)
			{
				if (tensor[k].item().toFloat() < 0)
				{
					tensor[k] = 0;
				}
			}
		}

		resultOrg.push_back(data);
	}
	return resultOrg;
}

std::vector<torch::Tensor> YoloV5::prediction(torch::Tensor data)
{
	if (!data.is_cuda() && this->isCuda)
	{
		data = data.cuda();
	}
	if (data.is_cuda() && !this->isCuda)
	{
		data = data.cpu();
	}
	if (this->isHalf)
	{
		data = data.to(torch::kHalf);
	}
	torch::Tensor pred = model.forward({ data }).toTuple()->elements()[0].toTensor();
	return non_max_suppression(pred, confThres, iouThres);
}

std::vector<torch::Tensor> YoloV5::prediction(std::string filePath)
{
	cv::Mat img = cv::imread(filePath);
	return prediction(img);
}

std::vector<torch::Tensor> YoloV5::prediction(cv::Mat img)
{
	ImageResizeData imgRD = resize(img);
	cv::Mat reImg = img2RGB(imgRD.getImg());
	torch::Tensor data = img2Tensor(reImg);
	std::vector<torch::Tensor> result = prediction(data);
	std::vector<ImageResizeData> imgRDs;
	imgRDs.push_back(imgRD);
	return sizeOriginal(result, imgRDs);
}

std::vector<torch::Tensor> YoloV5::prediction(std::vector<cv::Mat> imgs)
{
	std::vector<ImageResizeData> imageRDs;
	std::vector<torch::Tensor> datas;
	for (int i = 0; i < imgs.size(); i++)
	{
		ImageResizeData imgRD = resize(imgs[i]);
		imageRDs.push_back(imgRD);
		cv::Mat img = img2RGB(imgRD.getImg());
		datas.push_back(img2Tensor(img));
	}
	torch::Tensor data = torch::cat(datas, 0);
	std::vector<torch::Tensor> result = prediction(data);
	return sizeOriginal(result, imageRDs);
}

ImageResizeData YoloV5::resize(cv::Mat img, int height, int width)
{
	ImageResizeData imgResizeData;
	int w = img.cols, h = img.rows;
	imgResizeData.setH(h);
	imgResizeData.setW(w);
	imgResizeData.setHeight(height);
	imgResizeData.setWidth(width);
	bool isW = (float)w / (float)h > (float)width / (float)height;

	cv::resize(img, img, cv::Size(
		isW ? width : (int)((float)height / (float)h * w),
		isW ? (int)((float)width / (float)w * h) : height));

	w = img.cols, h = img.rows;
	if (isW)
	{
		imgResizeData.setBorder((height - h) / 2);
		cv::copyMakeBorder(img, img, (height - h) / 2, height - h - (height - h) / 2, 0, 0, cv::BORDER_CONSTANT);
	}
	else
	{
		imgResizeData.setBorder((width - w) / 2);
		cv::copyMakeBorder(img, img, 0, 0, (width - w) / 2, width - w - (width - w) / 2, cv::BORDER_CONSTANT);
	}
	imgResizeData.setImg(img);
	return imgResizeData;
}

ImageResizeData YoloV5::resize(cv::Mat img)
{
	return YoloV5::resize(img, height, width);
}

std::vector<ImageResizeData> YoloV5::resize(std::vector<cv::Mat> imgs, int height, int width)
{
	std::vector<ImageResizeData> imgRDs;
	for (int i = 0; i < imgs.size(); i++)
	{
		imgRDs.push_back(YoloV5::resize(imgs[i], height, width));
	}
	return imgRDs;
}

std::vector<ImageResizeData> YoloV5::resize(std::vector<cv::Mat> imgs)
{
	return YoloV5::resize(imgs, height, width);
}

std::vector<cv::Mat> YoloV5::drawRectangle(std::vector<cv::Mat> imgs, std::vector<torch::Tensor> rectangles, std::map<int, std::string> labels, int thickness)
{
	std::map<int, cv::Scalar> colors;
	return drawRectangle(imgs, rectangles, colors, labels, thickness);
}

std::vector<cv::Mat> YoloV5::drawRectangle(std::vector<cv::Mat> imgs, std::vector<torch::Tensor> rectangles, int thickness)
{
	std::map<int, cv::Scalar> colors;
	std::map<int, std::string> labels;
	return drawRectangle(imgs, rectangles, colors, labels, thickness);
}

std::vector<cv::Mat> YoloV5::drawRectangle(std::vector<cv::Mat> imgs, std::vector<torch::Tensor> rectangles, std::map<int, cv::Scalar> colors, std::map<int, std::string> labels, int thickness)
{
	std::vector<cv::Mat> results;
	for (int i = 0; i < imgs.size(); i++)
	{
		results.push_back(drawRectangle(imgs[i], rectangles[i], colors, labels, thickness));
	}
	return results;
}

cv::Mat YoloV5::drawRectangle(cv::Mat img, torch::Tensor rectangle, int thickness)
{
	std::map<int, cv::Scalar> colors;
	std::map<int, std::string> labels;
	return drawRectangle(img, rectangle, colors, labels, thickness);
}

cv::Mat YoloV5::drawRectangle(cv::Mat img, torch::Tensor rectangle, std::map<int, std::string> labels, int thickness)
{
	std::map<int, cv::Scalar> colors;
	return drawRectangle(img, rectangle, colors, labels, thickness);
}

cv::Mat YoloV5::drawRectangle(cv::Mat img, torch::Tensor rectangle, std::map<int, cv::Scalar> colors, std::map<int, std::string> labels, int thickness)
{
	std::map<int, cv::Scalar>::iterator it;
	std::map<int, std::string>::iterator labelIt;
	for (int i = 0; i < rectangle.size(0); i++)
	{
		int clazz = rectangle[i][5].item().toInt();
		it = colors.find(clazz);
		cv::Scalar color = NULL;
		if (it == colors.end())
		{
			it = mainColors.find(clazz);
			if (it == mainColors.end())
			{
				color = getRandScalar();
				mainColors.insert(std::pair<int, cv::Scalar>(clazz, color));
			}
			else
			{
				color = it->second;
			}
		}
		else
		{
			color = it->second;
		}
		cv::rectangle(img, cv::Point(rectangle[i][0].item().toInt(), rectangle[i][1].item().toInt()), cv::Point(rectangle[i][2].item().toInt(), rectangle[i][3].item().toInt()), color, thickness);
		labelIt = labels.find(clazz);

		std::ostringstream oss;

		if (labelIt != labels.end())
		{
			oss << labelIt->second << " ";
		}

		oss << rectangle[i][4].item().toFloat();
		std::string label = oss.str();

		cv::putText(img, label, cv::Point(rectangle[i][0].item().toInt(), rectangle[i][1].item().toInt()), cv::FONT_HERSHEY_PLAIN, 1, color, thickness);
	}
	return img;
}

bool YoloV5::existencePrediction(torch::Tensor clazz)
{
	return clazz.size(0) > 0 ? true : false;
}

bool YoloV5::existencePrediction(std::vector<torch::Tensor> classs)
{
	for (int i = 0; i < classs.size(); i++)
	{
		if (existencePrediction(classs[i]))
		{
			return true;
		}
	}
	return false;
}


void ImageResizeData::setImg(cv::Mat img)
{
	this->img = img;
}

cv::Mat ImageResizeData::getImg()
{
	return img;
}

bool ImageResizeData::isW()
{
	return (float)w / (float)h > (float)width / (float)height;
}

bool ImageResizeData::isH()
{
	return (float)h / (float)w > (float)height / (float)width;
}

void ImageResizeData::setWidth(int width)
{
	this->width = width;
}

int ImageResizeData::getWidth()
{
	return width;
}

void ImageResizeData::setHeight(int height)
{
	this->height = height;
}

int ImageResizeData::getHeight()
{
	return height;
}

void ImageResizeData::setW(int w)
{
	this->w = w;
}

int ImageResizeData::getW()
{
	return w;
}

void ImageResizeData::setH(int h)
{
	this->h = h;
}

int ImageResizeData::getH()
{
	return h;
}

void ImageResizeData::setBorder(int border)
{
	this->border = border;
}

int ImageResizeData::getBorder()
{
	return border;
}

 This effect is to read the camera image for test detection. In the blog post, there is a case of replacing the camera with a picture for detection.

 

Read it if you understand it, ask me in the comment area if you don’t understand it

Guess you like

Origin blog.csdn.net/qq_65356682/article/details/129799161