跑一跑HandPose

效果：（效果其实也一般）

下载模型文件和配置文件（sh脚本，如果脚本太方便就直接下载下面打包文件）

# Downloading body pose (COCO and MPI), face and hand models
OPENPOSE_URL="http://posefs1.perception.cs.cmu.edu/OpenPose/models/"
HAND_FOLDER="hand/"

# "------------------------- HAND MODELS -------------------------"
# Hand
HAND_MODEL=$HAND_FOLDER"pose_iter_102000.caffemodel"
wget -c ${OPENPOSE_URL}${HAND_MODEL} -P ${HAND_FOLDER}

所有文件打包（设置了最低的1分）：包含代码和模型以及测试视频的打包

配置最新版的openCV （需要有DNN模块 3.4以上）

然后就是简单粗暴的代码：

#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <iostream>

using namespace std;
using namespace cv;
using namespace cv::dnn;

const int POSE_PAIRS[20][2] =
{
	{ 0,1 },{ 1,2 },{ 2,3 },{ 3,4 },         // thumb
{ 0,5 },{ 5,6 },{ 6,7 },{ 7,8 },         // index
{ 0,9 },{ 9,10 },{ 10,11 },{ 11,12 },    // middle
{ 0,13 },{ 13,14 },{ 14,15 },{ 15,16 },  // ring
{ 0,17 },{ 17,18 },{ 18,19 },{ 19,20 }   // small
};

string protoFile = "pose_deploy.prototxt";
string weightsFile = "pose_iter_102000.caffemodel";

int nPoints = 22;

int main()
{
	float thresh = 0.01;

	cv::VideoCapture cap("asl.mp4");

	if (!cap.isOpened())
	{
		cerr << "Unable to connect to camera" << endl;
		return 1;
	}

	Mat frame, frameCopy;
	int frameWidth = cap.get(CAP_PROP_FRAME_WIDTH);
	int frameHeight = cap.get(CAP_PROP_FRAME_HEIGHT);
	float aspect_ratio = frameWidth / (float)frameHeight;
	int inHeight = 368;
	int inWidth = (int(aspect_ratio*inHeight) * 8) / 8;

	cout << "inWidth = " << inWidth << " ; inHeight = " << inHeight << endl;

	VideoWriter video("Output-Skeleton.avi", VideoWriter::fourcc('M', 'J', 'P', 'G'), 10, Size(frameWidth, frameHeight));

	Net net = readNetFromCaffe(protoFile, weightsFile);

	double t = 0;
	while (1)
	{
		double t = (double)cv::getTickCount();

		cap >> frame;
		frameCopy = frame.clone();
		Mat inpBlob = blobFromImage(frame, 1.0 / 255, Size(inWidth, inHeight), Scalar(0, 0, 0), false, false);

		net.setInput(inpBlob);

		Mat output = net.forward();

		int H = output.size[2];
		int W = output.size[3];

		// find the position of the body parts
		vector<Point> points(nPoints);
		for (int n = 0; n < nPoints; n++)
		{
			// Probability map of corresponding body's part.
			Mat probMap(H, W, CV_32F, output.ptr(0, n));
			resize(probMap, probMap, Size(frameWidth, frameHeight));

			Point maxLoc;
			double prob;
			minMaxLoc(probMap, 0, &prob, 0, &maxLoc);
			if (prob > thresh)
			{
				circle(frameCopy, cv::Point((int)maxLoc.x, (int)maxLoc.y), 8, Scalar(0, 255, 255), -1);
				cv::putText(frameCopy, cv::format("%d", n), cv::Point((int)maxLoc.x, (int)maxLoc.y), cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 0, 255), 2);

			}
			points[n] = maxLoc;
		}

		int nPairs = sizeof(POSE_PAIRS) / sizeof(POSE_PAIRS[0]);

		for (int n = 0; n < nPairs; n++)
		{
			// lookup 2 connected body/hand parts
			Point2f partA = points[POSE_PAIRS[n][0]];
			Point2f partB = points[POSE_PAIRS[n][1]];

			if (partA.x <= 0 || partA.y <= 0 || partB.x <= 0 || partB.y <= 0)
				continue;

			line(frame, partA, partB, Scalar(0, 255, 255), 8);
			circle(frame, partA, 8, Scalar(0, 0, 255), -1);
			circle(frame, partB, 8, Scalar(0, 0, 255), -1);
		}

		t = ((double)cv::getTickCount() - t) / cv::getTickFrequency();
		cout << "Time Taken for frame = " << t << endl;
		cv::putText(frame, cv::format("time taken = %.2f sec", t), cv::Point(50, 50), cv::FONT_HERSHEY_COMPLEX, .8, cv::Scalar(255, 50, 0), 2);
		// imshow("Output-Keypoints", frameCopy);
		imshow("Output-Skeleton", frame);
		video.write(frame);
		char key = waitKey(1);
		if (key == 27)
			break;
	}
	// When everything done, release the video capture and write object
	cap.release();
	video.release();

	return 0;
}

Reference:

https://www.learnopencv.com/hand-keypoint-detection-using-deep-learning-and-opencv/

猜你喜欢