OpenCv-C++-深度神经网络(DNN)模块-使用GOTURN模型进行对象跟踪

版权声明:本文为博主原创文章,转载请附上博文链接! https://blog.csdn.net/Daker_Huang/article/details/87914764

关于GOTURN的来源,可以查看论文原文:http://davheld.github.io/GOTURN/GOTURN.pdf
在作者文章中提到,利用GOTURN进行神经网络训练,对跟踪的对象能够实时进行跟踪,且FPS能够达到100,但是只限于在GPU上,对于CPU,还是有点不理想。

下图中,我们先框定前一帧的对象,即下图中的“what to track”,然后在当前帧中搜索可能的对象,即“search region”,经过对比,两张图像并不是完全一致,将二者都经过卷积层,通过全连接层进行回归,框定最终对象,然后再将框定的对象作为前一帧,不断循环,直到视频播放结束为止。
在这里插入图片描述
首先需要下载模型,模型下载地址为:GOTURN模型
实例:

#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>

using namespace cv;
using namespace cv::dnn;
using namespace std;

String  goturn_model = "D:/test/dnn/goturn/goturn.caffemodel";
String goturn_prototxt = "D:/test/dnn/goturn/goturn.prototxt";

Net net;

Rect trackObjects(Mat &frame, Mat &prevFrame);
Mat frame, prevFrame;
Rect prevBB;
int main(int argc, char** argv) {
	net = readNetFromCaffe(goturn_prototxt, goturn_model);
	VideoCapture capture;
	capture.open("D:/test/dog.avi");
	capture.read(frame);
	frame.copyTo(prevFrame);
	prevBB = selectROI(frame, true, true);
	namedWindow("frame", WINDOW_AUTOSIZE);
	while (capture.read(frame)) {
		
		Rect currentBB = trackObjects(frame, prevFrame);
		rectangle(frame, currentBB, Scalar(0, 0, 255), 2, 8, 0);

		// ready for next frame
		frame.copyTo(prevFrame);
		prevBB.x = currentBB.x;
		prevBB.y = currentBB.y;
		prevBB.width = currentBB.width;
		prevBB.height = currentBB.height;

		imshow("frame", frame);
		char c = waitKey(10);
		if (c == 27) {
			break;
		}
	}
}



Rect trackObjects(Mat& frame, Mat &prevFrame) {
	Rect rect;
	int INPUT_SIZE = 227;
	//Using prevFrame & prevBB from model and curFrame GOTURN calculating curBB
	Mat curFrame = frame.clone();
	Rect2d curBB;

	float padTargetPatch = 2.0;
	Rect2f searchPatchRect, targetPatchRect;
	Point2f currCenter, prevCenter;
	Mat prevFramePadded, curFramePadded;
	Mat searchPatch, targetPatch;

	prevCenter.x = (float)(prevBB.x + prevBB.width / 2);
	prevCenter.y = (float)(prevBB.y + prevBB.height / 2);

	targetPatchRect.width = (float)(prevBB.width*padTargetPatch);
	targetPatchRect.height = (float)(prevBB.height*padTargetPatch);
	targetPatchRect.x = (float)(prevCenter.x - prevBB.width*padTargetPatch / 2.0 + targetPatchRect.width);
	targetPatchRect.y = (float)(prevCenter.y - prevBB.height*padTargetPatch / 2.0 + targetPatchRect.height);

	copyMakeBorder(prevFrame, prevFramePadded, (int)targetPatchRect.height, (int)targetPatchRect.height, (int)targetPatchRect.width, (int)targetPatchRect.width, BORDER_REPLICATE);
	targetPatch = prevFramePadded(targetPatchRect).clone();

	copyMakeBorder(curFrame, curFramePadded, (int)targetPatchRect.height, (int)targetPatchRect.height, (int)targetPatchRect.width, (int)targetPatchRect.width, BORDER_REPLICATE);
	searchPatch = curFramePadded(targetPatchRect).clone();

	//Preprocess
	//Resize
	resize(targetPatch, targetPatch, Size(INPUT_SIZE, INPUT_SIZE));
	resize(searchPatch, searchPatch, Size(INPUT_SIZE, INPUT_SIZE));

	//Mean Subtract
	targetPatch = targetPatch - 128;
	searchPatch = searchPatch - 128;

	//Convert to Float type
	targetPatch.convertTo(targetPatch, CV_32F);
	searchPatch.convertTo(searchPatch, CV_32F);

	Mat targetBlob = blobFromImage(targetPatch);
	Mat searchBlob = blobFromImage(searchPatch);

	net.setInput(targetBlob, "data1");
	net.setInput(searchBlob, "data2");

	Mat res = net.forward("scale");
	Mat resMat = res.reshape(1, 1);
	//printf("width : %d, height : %d\n", (resMat.at<float>(2) - resMat.at<float>(0)), (resMat.at<float>(3) - resMat.at<float>(1)));

	curBB.x = targetPatchRect.x + (resMat.at<float>(0) * targetPatchRect.width / INPUT_SIZE) - targetPatchRect.width;
	curBB.y = targetPatchRect.y + (resMat.at<float>(1) * targetPatchRect.height / INPUT_SIZE) - targetPatchRect.height;
	curBB.width = (resMat.at<float>(2) - resMat.at<float>(0)) * targetPatchRect.width / INPUT_SIZE;
	curBB.height = (resMat.at<float>(3) - resMat.at<float>(1)) * targetPatchRect.height / INPUT_SIZE;

	//Predicted BB
	Rect boundingBox = curBB;
	return boundingBox;
}

猜你喜欢

转载自blog.csdn.net/Daker_Huang/article/details/87914764