基本思想:编译opencv包含cuda版本,然后进行解码播放显示帧
一 、 首先本机配置了cuda和cudnn的环境,参考:https://sxj731533730.blog.csdn.net/article/details/115064903然后下载解码组件进行重新编译opencv进行准备
ubuntu@ubuntu:~/Downloads$ wget https://developer.nvidia.com/downloads/designworks/video-codec-sdk/secure/12.1/video_codec_sdk_12.1.14.zip
然后解压之后
ubuntu@ubuntu:~/Downloads$ sudo cp Video_Codec_SDK_12.1.14/Interface/* /usr/local/cuda/include
然后下载opencv进行编译我用的最新版本4.6.0 (2023-5-25)
ubuntu@ubuntu:~$ git clone https://github.com/opencv/opencv.git
ubuntu@ubuntu:~$ git clone https://github.com/opencv/opencv_contrib.git
ubuntu@ubuntu:~$ cmake -D CMAKE_BUILD_TYPE=RELEASE \
-D CMAKE_INSTALL_PREFIX=/usr/local \
-D WITH_TBB=ON \
-D BUILD_TBB=ON \
-D ENABLE_FAST_MATH=1 \
-D CUDA_FAST_MATH=1 \
-D WITH_CUBLAS=1 \
-D WITH_V4L=ON \
-D WITH_LIBV4L=ON \
-D WITH_CUDA=ON \
-D WITH_CUDNN=ON \
-D WITH_CUDEV=ON \
-D WITH_GTK_2_X=ON \
-D WITH_NVCUVID=ON \
-D CUDA_ARCH_BIN=8.6 \
-D OPENCV_EXTRA_MODULES_PATH=../../opencv_contrib/modules \
-D WITH_QT=ON \
-D WITH_OPENGL=ON \
-D WITH_FFMPEG=ON \
-D OPENCV_GENERATE_PKGCONFIG=ON \
..
ubuntu@ubuntu:~/opencv/build$ sudo make install
ubuntu@ubuntu:~/opencv/build$ sudo ldconfig
#直接跳到第二步进行测试一下,要是没问题,下面这些不必操作
ubuntu@ubuntu:~/opencv/build$ cd /usr/local/lib/pkgconfig
ubuntu@ubuntu:~/opencv/build$ sudo mkdir pkgconfig && cd pkgconfig
ubuntu@ubuntu:~/opencv/build$ sudo gedit opencv.pc
----------
# Package Information for pkg-config
prefix=/usr/local
exec_prefix=${prefix}
libdir=${exec_prefix}/lib
includedir=${prefix}/include/opencv4
Name: OpenCV
Description: Open Source Computer Vision Library
Version: 4.6.0
Libs: -L${exec_prefix}/lib -lopencv_gapi -lopencv_ml -lopencv_video -lopencv_highgui -lopencv_videoio -lopencv_imgcodecs -lopencv_objdetect -lopencv_dnn -lopencv_stitching -lopencv_calib3d -lopencv_features2d -lopencv_flann -lopencv_photo -lopencv_imgproc -lopencv_core
Libs.private: -ldl -lm -lpthread -lrt
Cflags: -I${includedir}
----------
ubuntu@ubuntu:~/opencv/build$ sudo ln -s opencv4.pc opencv.pc
ubuntu@ubuntu:~/opencv/build$ pkg-config --libs --cflags opencv4
二、测试安装
ubuntu@ubuntu:~/opencv/build$ pkg-config --libs --cflags opencv4
-I/usr/local/include/opencv4 -L/usr/local/lib -lopencv_gapi -lopencv_stitching -lopencv_alphamat -lopencv_aruco -lopencv_barcode -lopencv_bgsegm -lopencv_bioinspired -lopencv_ccalib -lopencv_cudabgsegm -lopencv_cudafeatures2d -lopencv_cudaobjdetect -lopencv_cudastereo -lopencv_cvv -lopencv_dnn_objdetect -lopencv_dnn_superres -lopencv_dpm -lopencv_face -lopencv_freetype -lopencv_fuzzy -lopencv_hdf -lopencv_hfs -lopencv_img_hash -lopencv_intensity_transform -lopencv_line_descriptor -lopencv_mcc -lopencv_quality -lopencv_rapid -lopencv_reg -lopencv_rgbd -lopencv_saliency -lopencv_stereo -lopencv_structured_light -lopencv_phase_unwrapping -lopencv_superres -lopencv_cudacodec -lopencv_surface_matching -lopencv_tracking -lopencv_highgui -lopencv_datasets -lopencv_text -lopencv_plot -lopencv_videostab -lopencv_cudaoptflow -lopencv_optflow -lopencv_cudalegacy -lopencv_videoio -lopencv_cudawarping -lopencv_viz -lopencv_wechat_qrcode -lopencv_xfeatures2d -lopencv_shape -lopencv_ml -lopencv_ximgproc -lopencv_video -lopencv_xobjdetect -lopencv_objdetect -lopencv_calib3d -lopencv_imgcodecs -lopencv_features2d -lopencv_dnn -lopencv_flann -lopencv_xphoto -lopencv_photo -lopencv_cudaimgproc -lopencv_cudafilters -lopencv_imgproc -lopencv_cudaarithm -lopencv_core -lopencv_cudev
三、测试单帧速度
cmakelists.txt
cmake_minimum_required(VERSION 3.16)
project(untitled8)
set(CMAKE_CXX_FLAGS "-std=c++11")
#message(STATUS ${OpenCV_INCLUDE_DIRS})
#添加头文件
find_package (OpenCV 4.0.0 REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
#链接Opencv库
add_executable(untitled8 main.cpp)
target_link_libraries(untitled8 ${OpenCV_LIBS} )
main.cpp
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/cudacodec.hpp>
#include <chrono>
#include "stdio.h"
#include "string"
using namespace cv;
int gpu_test()
{
cv::cuda::printCudaDeviceInfo(cv::cuda::getDevice());
int count = cv::cuda::getCudaEnabledDeviceCount();
printf("GPU Device Count : %d \n", count);
const std::string filename = "/home/ubuntu/untitled8/mot17_demo.mp4";
cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(filename);
cv::cuda::GpuMat gpu_frame;
int frame_id = 0;
std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
while (reader->nextFrame(gpu_frame))
{
frame_id = frame_id + 1;
}
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
std::chrono::duration<double> time_useds = std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
double time_ms = time_useds.count() * 1000.0f;
double fps = double(frame_id) / time_ms * 1000.0f;
printf("GPU test took time: %f ms, frames: %d , FPS: %f\n", time_ms, frame_id, fps);
reader.release();
return 0;
}
int cpu_test()
{
const std::string filename = "/home/ubuntu/untitled8/mot17_demo.mp4";
cv::VideoCapture capture;
capture.open(filename);
if (!capture.isOpened())
{
printf("Open video failed !!! \n");
return -1;
}
int width = (int)capture.get(cv::CAP_PROP_FRAME_WIDTH);
int height = (int)capture.get(cv::CAP_PROP_FRAME_HEIGHT);
printf("src video width: %d , %d \n", width, height);
cv::Mat frame;
int frame_id = 0;
std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
while (capture.read(frame))
{
frame_id = frame_id + 1;
}
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
std::chrono::duration<double> time_useds = std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
double time_ms = time_useds.count() * 1000.0f;
double fps = double(frame_id) / time_ms * 1000.0f;
printf("CPU test took time: %f ms, frames: %d , FPS: %f\n", time_ms, frame_id, fps);
capture.release();
return 0;
}
int main(int argc, char const *argv[])
{
gpu_test();
cpu_test();
return 0;
}
测试数据
/home/ubuntu/untitled8/cmake-build-debug/untitled8
*** CUDA Device Query (Runtime API) version (CUDART static linking) ***
Device count: 1
Device 0: "NVIDIA GeForce RTX 3050 Laptop GPU"
CUDA Driver Version / Runtime Version 12.0 / 11.10
CUDA Capability Major/Minor version number: 8.6
Total amount of global memory: 3902 MBytes (4091478016 bytes)
GPU Clock Speed: 1.50 GHz
Max Texture Dimension Size (x,y,z) 1D=(131072), 2D=(131072,65536), 3D=(16384,16384,16384)
Max Layered Texture Size (dim) x layers 1D=(32768) x 2048, 2D=(32768,32768) x 2048
Total amount of constant memory: 65536 bytes
Total amount of shared memory per block: 49152 bytes
Total number of registers available per block: 65536
Warp size: 32
Maximum number of threads per block: 1024
Maximum sizes of each dimension of a block: 1024 x 1024 x 64
Maximum sizes of each dimension of a grid: 2147483647 x 65535 x 65535
Maximum memory pitch: 2147483647 bytes
Texture alignment: 512 bytes
Concurrent copy and execution: Yes with 2 copy engine(s)
Run time limit on kernels: Yes
Integrated GPU sharing Host Memory: No
Support host page-locked memory mapping: Yes
Concurrent kernel execution: Yes
Alignment requirement for Surfaces: Yes
Device has ECC support enabled: No
Device is using TCC driver mode: No
Device supports Unified Addressing (UVA): Yes
Device PCI Bus ID / PCI location ID: 1 / 0
Compute Mode:
Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)
deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 12.0, CUDA Runtime Version = 11.10, NumDevs = 1
GPU Device Count : 1
GPU test took time: 329.191769 ms, frames: 200 , FPS: 607.548605
src video width: 1920 , 1080
CPU test took time: 375.254235 ms, frames: 200 , FPS: 532.972000
Process finished with exit code 0
四、测试视频播放
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/cudacodec.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <chrono>
#include <iomanip>
#include <opencv2/highgui/highgui.hpp>
#include "string"
using namespace cv;
int gpu_test()
{
cv::cuda::printCudaDeviceInfo(cv::cuda::getDevice());
int count = cv::cuda::getCudaEnabledDeviceCount();
printf("GPU Device Count : %d \n", count);
const std::string filename = "/home/ubuntu/untitled8/palace.mp4";
cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(filename);
cv::cuda::GpuMat gpu_frame;
cv::Mat frame;
int frame_id = 0;
auto startTime = std::chrono::steady_clock::now();
int counter = 0;
float fps = 0;
while (reader->nextFrame(gpu_frame))
{
counter++;
auto currentTime = std::chrono::steady_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::duration<float>>(currentTime - startTime);
if (elapsed > std::chrono::seconds(1)) {
fps = counter / elapsed.count();
counter = 0;
startTime = currentTime;
}
gpu_frame.step = gpu_frame.cols * gpu_frame.channels();
gpu_frame.download(frame);
std::stringstream fpsStr;
fpsStr << "fps: " << std::fixed << std::setprecision(2) << fps;
cv::putText(frame, fpsStr.str(), cv::Point(2, 20), cv::FONT_HERSHEY_TRIPLEX, 0.4, cv::Scalar(0,255,0));
cv::imshow("frame",frame);
cv::waitKey(1);
}
reader.release();
return 0;
}
int cpu_test()
{
const std::string filename = "/home/ubuntu/untitled8/palace.mp4";
cv::VideoCapture capture;
capture.open(filename);
if (!capture.isOpened())
{
printf("Open video failed !!! \n");
return -1;
}
int width = (int)capture.get(cv::CAP_PROP_FRAME_WIDTH);
int height = (int)capture.get(cv::CAP_PROP_FRAME_HEIGHT);
printf("src video width: %d , %d \n", width, height);
cv::Mat frame;
auto startTime = std::chrono::steady_clock::now();
int counter = 0;
float fps = 0;
while (capture.read(frame))
{
counter++;
auto currentTime = std::chrono::steady_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::duration<float>>(currentTime - startTime);
if (elapsed > std::chrono::seconds(1)) {
fps = counter / elapsed.count();
counter = 0;
startTime = currentTime;
}
std::stringstream fpsStr;
fpsStr << "fps: " << std::fixed << std::setprecision(2) << fps;
cv::putText(frame, fpsStr.str(), cv::Point(2, 20), cv::FONT_HERSHEY_TRIPLEX, 0.4, cv::Scalar(0,255,0));
cv::imshow("frame",frame);
cv::waitKey(1);
}
capture.release();
return 0;
}
int main(int argc, char const *argv[])
{
// gpu_test();
cpu_test();
return 0;
}
先研究opencv+cuda的程序开发
附录简单灰度图
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/cudacodec.hpp>
#include <chrono>
#include <opencv2/imgproc.hpp>
#include "stdio.h"
#include "opencv2/core/cuda.hpp"
#include "opencv2/cudaimgproc.hpp"
#include "string"
using namespace cv;
using namespace cv::cuda;
int main(int argc, char const *argv[])
{
cuda::printCudaDeviceInfo(cuda::getDevice());
int count = cuda::getCudaEnabledDeviceCount();
printf("GPU Device Count : %d n", count);
// 灰度转换
Mat src_host = imread("../0.jpg");
GpuMat src, gray;
src.upload(src_host);
cuda::cvtColor(src, gray, cv::COLOR_BGR2GRAY);
Mat gray_host;
gray.download(gray_host);
imshow("src", src_host);
imshow("gray", gray_host);
waitKey(0);
return 0;
}