画像処理アルゴリズムにおける動作効率の最適化についていくつか記録します 20221016

画像処理シリーズ記事ディレクトリ

序文

画像処理における効率最適化のプロセスをいくつか記録します。
参考リンク:
https://blog.csdn.net/libaineu2004/article/details/104129127
https://blog.csdn.net/qq_27278957/article/details/84646948

1. 画像ピクセルの走査方法の比較

例として、すべてのピクセルを走査して画像を反転します。

#include "ImageProcess.h"

void PrintCostTime(double& t1, double& t2)
{
    
    
	double t = ((t2 - t1) / getTickFrequency()) * 1000;
	cout << "time: " << t << endl;
}
void method_at(Mat& _src)
{
    
    
	Mat src = _src.clone();
	double t1 = getTickCount();
	int w = src.cols;
	int h = src.rows;
	int dim = src.channels();
	for (int row = 0; row < h; row++) 
	{
    
    
		for (int col = 0; col < w; col++) 
		{
    
    
			if (dim == 3) {
    
    
				Vec3b bgr = src.at<Vec3b>(row, col);
				bgr[0] = 255 - bgr[0];
				bgr[1] = 255 - bgr[1];
				bgr[2] = 255 - bgr[2];
				src.at<Vec3b>(row, col) = bgr;
			}
			else if (dim == 1) {
    
    
				float pixel = src.at<uchar>(row, col);
				src.at<uchar>(row, col) =saturate_cast<uchar>(255 - pixel);
			}
		}
	}
	double t2 = getTickCount();
	PrintCostTime(t1, t2);
	imshow("result", src);
	waitKey(0);
}

void method_Matptr(Mat& _src)
{
    
    
	Mat src = _src.clone();
	double t1 = getTickCount();
	int w = src.cols;
	int h = src.rows;
	int dim = src.channels();
	if (dim == 3) {
    
    
		for (int row = 0; row < h; row++)
		{
    
    
			//uchar* pixel = src.ptr<uchar>(row);
			Vec3b* pixel = src.ptr<cv::Vec3b>(row);
			for (int col = 0; col < w; col++)
			{
    
    
				//pixel[0] = 255 - pixel[0];
				//pixel[1] = 255 - pixel[1];
				//pixel[2] = 255 - pixel[2];
				//pixel += 3;
				//Vec3b bgr = pixel[col];
				pixel[col][0] = 255 - pixel[col][0];
				pixel[col][1] = 255 - pixel[col][1];
				pixel[col][2] = 255 - pixel[col][2];
			}
		}
	}
	else if (dim == 1) {
    
    
		for (int row = 0; row < h; row++)
		{
    
    
			uchar* pixel = src.ptr<uchar>(row);
			for (int col = 0; col < w; col++)
			{
    
    
				pixel[0] = 255 - pixel[0];
				pixel ++;
				//pixel[col] = 255 - pixel[col];
				//*pixel++ = 255 - *pixel;
			}
		}
	}
	double t2 = getTickCount();
	PrintCostTime(t1, t2);
	imshow("result", src);
	waitKey(0);
}

void method_Dataptr(Mat& _src)
{
    
    
	Mat src = _src.clone();
	double t1 = getTickCount();
	int w = src.cols;
	int h = src.rows;
	int dim = src.channels();
	if (dim == 3) {
    
    
		for (int row = 0; row < h; row++)
		{
    
    
			uchar* pixel = src.data + row*src.step;
			for (int col = 0; col < w; col++)
			{
    
    
				pixel[0] = 255 - pixel[0];
				pixel[1] = 255 - pixel[1];
				pixel[2] = 255 - pixel[2];
				pixel += 3;
			}
		}
	}
	else if (dim == 1) {
    
    
		for (int row = 0; row < h; row++)
		{
    
    
			uchar* pixel = src.data + row * src.step;
			for (int col = 0; col < w; col++)
			{
    
    
				pixel[0] = 255 - pixel[0];
				pixel++;
				//pixel[col] = 255 - pixel[col];
				//*pixel++ = 255 - *pixel;
			}
		}
	}
	double t2 = getTickCount();
	PrintCostTime(t1, t2);
	imshow("result", src);
	waitKey(0);
}

void method_iterator(Mat& _src)
{
    
    
	Mat src = _src.clone();
	double t1 = getTickCount();
	int w = src.cols;
	int h = src.rows;
	int dim = src.channels();
	if (dim == 3) {
    
    
		Mat_<Vec3b>::iterator it = src.begin<Vec3b>();
		Mat_<Vec3b>::iterator itend = src.end<Vec3b>();
		for (; it != itend; ++it)
		{
    
    
			(*it)[0] = 255 - (*it)[0];
			(*it)[1] = 255 - (*it)[1];
			(*it)[2] = 255 - (*it)[2];
		}
	}
	if (dim == 1) {
    
    
		Mat_<uchar>::iterator it = src.begin<uchar>();
		Mat_<uchar>::iterator itend = src.end<uchar>();
		for (; it != itend; ++it)
		{
    
    
			(*it) = 255 - (*it);
		}
	}
	double t2 = getTickCount();
	PrintCostTime(t1, t2);
	imshow("result", src);
	waitKey(0);
}

ご覧のとおり、ポインターを使用する方法が最も高速です。
ここに画像の説明を挿入

2. OpenMP を使用して高速化する

ここに画像の説明を挿入
Windows Visual Studio の場合はプロパティで openmp を設定し、Linux の場合は CMakeLists.txt に構成を追加します。

find_package(OpenMP REQUIRED)
if (OPENMP_FOUND)
	message("OPENMP FOUND")
    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
endif()

コードを変更し、for ループに #pragma ompParallel for num_threads(4) を追加します。

#include "ImageProcess.h"

void PrintCostTime(double& t1, double& t2)
{
    
    
	double t = ((t2 - t1) / getTickFrequency()) * 1000;
	cout << "time: " << t << endl;
}
void method_at(Mat& _src)
{
    
    
	Mat src = _src.clone();
	double t1 = getTickCount();
	int w = src.cols;
	int h = src.rows;
	int dim = src.channels();
#pragma omp parallel for num_threads(4)  //指定4个线程
	for (int row = 0; row < h; row++) 
	{
    
    
		for (int col = 0; col < w; col++) 
		{
    
    
			if (dim == 3) {
    
    
				Vec3b bgr = src.at<Vec3b>(row, col);
				bgr[0] = 255 - bgr[0];
				bgr[1] = 255 - bgr[1];
				bgr[2] = 255 - bgr[2];
				src.at<Vec3b>(row, col) = bgr;
			}
			else if (dim == 1) {
    
    
				float pixel = src.at<uchar>(row, col);
				src.at<uchar>(row, col) =saturate_cast<uchar>(255 - pixel);
			}
		}
	}
	double t2 = getTickCount();
	cout << "method_at ";
	PrintCostTime(t1, t2);
	imshow("result", src);
	waitKey(0);
}

void method_Matptr(Mat& _src)
{
    
    
	Mat src = _src.clone();
	double t1 = getTickCount();
	int w = src.cols;
	int h = src.rows;
	int dim = src.channels();
	if (dim == 3) {
    
    
#pragma omp parallel for num_threads(4)  //指定4个线程
		for (int row = 0; row < h; row++)
		{
    
    
			//uchar* pixel = src.ptr<uchar>(row);
			Vec3b* pixel = src.ptr<cv::Vec3b>(row);
			for (int col = 0; col < w; col++)
			{
    
    
				//pixel[0] = 255 - pixel[0];
				//pixel[1] = 255 - pixel[1];
				//pixel[2] = 255 - pixel[2];
				//pixel += 3;
				//Vec3b bgr = pixel[col];
				pixel[col][0] = 255 - pixel[col][0];
				pixel[col][1] = 255 - pixel[col][1];
				pixel[col][2] = 255 - pixel[col][2];
			}
		}
	}
	else if (dim == 1) {
    
    
#pragma omp parallel for num_threads(4)  //指定4个线程
		for (int row = 0; row < h; row++)
		{
    
    
			uchar* pixel = src.ptr<uchar>(row);
			for (int col = 0; col < w; col++)
			{
    
    
				pixel[0] = 255 - pixel[0];
				pixel ++;
				//pixel[col] = 255 - pixel[col];
				//*pixel++ = 255 - *pixel;
			}
		}
	}
	double t2 = getTickCount();
	cout << "method_Matptr ";
	PrintCostTime(t1, t2);
	imshow("result", src);
	waitKey(0);
}

void method_Dataptr(Mat& _src)
{
    
    
	Mat src = _src.clone();
	double t1 = getTickCount();
	int w = src.cols;
	int h = src.rows;
	int dim = src.channels();
	if (dim == 3) {
    
    
#pragma omp parallel for num_threads(4)  //指定4个线程
		for (int row = 0; row < h; row++)
		{
    
    
			uchar* pixel = src.data + row*src.step;
			for (int col = 0; col < w; col++)
			{
    
    
				pixel[0] = 255 - pixel[0];
				pixel[1] = 255 - pixel[1];
				pixel[2] = 255 - pixel[2];
				pixel += 3;
			}
		}
	}
	else if (dim == 1) {
    
    
#pragma omp parallel for num_threads(4)  //指定4个线程
		for (int row = 0; row < h; row++)
		{
    
    
			uchar* pixel = src.data + row * src.step;
			for (int col = 0; col < w; col++)
			{
    
    
				pixel[0] = 255 - pixel[0];
				pixel++;
				//pixel[col] = 255 - pixel[col];
				//*pixel++ = 255 - *pixel;
			}
		}
	}
	double t2 = getTickCount();
	cout << "method_Dataptr ";
	PrintCostTime(t1, t2);
	imshow("result", src);
	waitKey(0);
}

もう一度速度をテストしてみると、少し速度が速くなっていることがわかります。これは for ループの時間が非常に短いためです。独自のアルゴリズムに変更すれば、さらに速度を向上させることができます。自分のプロジェクトで最適化を行ったところ、700ms以上から120msになり、改善効果は非常に良好です。
ここに画像の説明を挿入

要約する

画像処理工程の簡易記録

おすすめ

転載: blog.csdn.net/zengwubbb/article/details/127353054