画像処理シリーズ記事ディレクトリ
序文
画像処理における効率最適化のプロセスをいくつか記録します。
参考リンク:
https://blog.csdn.net/libaineu2004/article/details/104129127
https://blog.csdn.net/qq_27278957/article/details/84646948
1. 画像ピクセルの走査方法の比較
例として、すべてのピクセルを走査して画像を反転します。
#include "ImageProcess.h"
void PrintCostTime(double& t1, double& t2)
{
double t = ((t2 - t1) / getTickFrequency()) * 1000;
cout << "time: " << t << endl;
}
void method_at(Mat& _src)
{
Mat src = _src.clone();
double t1 = getTickCount();
int w = src.cols;
int h = src.rows;
int dim = src.channels();
for (int row = 0; row < h; row++)
{
for (int col = 0; col < w; col++)
{
if (dim == 3) {
Vec3b bgr = src.at<Vec3b>(row, col);
bgr[0] = 255 - bgr[0];
bgr[1] = 255 - bgr[1];
bgr[2] = 255 - bgr[2];
src.at<Vec3b>(row, col) = bgr;
}
else if (dim == 1) {
float pixel = src.at<uchar>(row, col);
src.at<uchar>(row, col) =saturate_cast<uchar>(255 - pixel);
}
}
}
double t2 = getTickCount();
PrintCostTime(t1, t2);
imshow("result", src);
waitKey(0);
}
void method_Matptr(Mat& _src)
{
Mat src = _src.clone();
double t1 = getTickCount();
int w = src.cols;
int h = src.rows;
int dim = src.channels();
if (dim == 3) {
for (int row = 0; row < h; row++)
{
//uchar* pixel = src.ptr<uchar>(row);
Vec3b* pixel = src.ptr<cv::Vec3b>(row);
for (int col = 0; col < w; col++)
{
//pixel[0] = 255 - pixel[0];
//pixel[1] = 255 - pixel[1];
//pixel[2] = 255 - pixel[2];
//pixel += 3;
//Vec3b bgr = pixel[col];
pixel[col][0] = 255 - pixel[col][0];
pixel[col][1] = 255 - pixel[col][1];
pixel[col][2] = 255 - pixel[col][2];
}
}
}
else if (dim == 1) {
for (int row = 0; row < h; row++)
{
uchar* pixel = src.ptr<uchar>(row);
for (int col = 0; col < w; col++)
{
pixel[0] = 255 - pixel[0];
pixel ++;
//pixel[col] = 255 - pixel[col];
//*pixel++ = 255 - *pixel;
}
}
}
double t2 = getTickCount();
PrintCostTime(t1, t2);
imshow("result", src);
waitKey(0);
}
void method_Dataptr(Mat& _src)
{
Mat src = _src.clone();
double t1 = getTickCount();
int w = src.cols;
int h = src.rows;
int dim = src.channels();
if (dim == 3) {
for (int row = 0; row < h; row++)
{
uchar* pixel = src.data + row*src.step;
for (int col = 0; col < w; col++)
{
pixel[0] = 255 - pixel[0];
pixel[1] = 255 - pixel[1];
pixel[2] = 255 - pixel[2];
pixel += 3;
}
}
}
else if (dim == 1) {
for (int row = 0; row < h; row++)
{
uchar* pixel = src.data + row * src.step;
for (int col = 0; col < w; col++)
{
pixel[0] = 255 - pixel[0];
pixel++;
//pixel[col] = 255 - pixel[col];
//*pixel++ = 255 - *pixel;
}
}
}
double t2 = getTickCount();
PrintCostTime(t1, t2);
imshow("result", src);
waitKey(0);
}
void method_iterator(Mat& _src)
{
Mat src = _src.clone();
double t1 = getTickCount();
int w = src.cols;
int h = src.rows;
int dim = src.channels();
if (dim == 3) {
Mat_<Vec3b>::iterator it = src.begin<Vec3b>();
Mat_<Vec3b>::iterator itend = src.end<Vec3b>();
for (; it != itend; ++it)
{
(*it)[0] = 255 - (*it)[0];
(*it)[1] = 255 - (*it)[1];
(*it)[2] = 255 - (*it)[2];
}
}
if (dim == 1) {
Mat_<uchar>::iterator it = src.begin<uchar>();
Mat_<uchar>::iterator itend = src.end<uchar>();
for (; it != itend; ++it)
{
(*it) = 255 - (*it);
}
}
double t2 = getTickCount();
PrintCostTime(t1, t2);
imshow("result", src);
waitKey(0);
}
ご覧のとおり、ポインターを使用する方法が最も高速です。
2. OpenMP を使用して高速化する
Windows Visual Studio の場合はプロパティで openmp を設定し、Linux の場合は CMakeLists.txt に構成を追加します。
find_package(OpenMP REQUIRED)
if (OPENMP_FOUND)
message("OPENMP FOUND")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
endif()
コードを変更し、for ループに #pragma ompParallel for num_threads(4) を追加します。
#include "ImageProcess.h"
void PrintCostTime(double& t1, double& t2)
{
double t = ((t2 - t1) / getTickFrequency()) * 1000;
cout << "time: " << t << endl;
}
void method_at(Mat& _src)
{
Mat src = _src.clone();
double t1 = getTickCount();
int w = src.cols;
int h = src.rows;
int dim = src.channels();
#pragma omp parallel for num_threads(4) //指定4个线程
for (int row = 0; row < h; row++)
{
for (int col = 0; col < w; col++)
{
if (dim == 3) {
Vec3b bgr = src.at<Vec3b>(row, col);
bgr[0] = 255 - bgr[0];
bgr[1] = 255 - bgr[1];
bgr[2] = 255 - bgr[2];
src.at<Vec3b>(row, col) = bgr;
}
else if (dim == 1) {
float pixel = src.at<uchar>(row, col);
src.at<uchar>(row, col) =saturate_cast<uchar>(255 - pixel);
}
}
}
double t2 = getTickCount();
cout << "method_at ";
PrintCostTime(t1, t2);
imshow("result", src);
waitKey(0);
}
void method_Matptr(Mat& _src)
{
Mat src = _src.clone();
double t1 = getTickCount();
int w = src.cols;
int h = src.rows;
int dim = src.channels();
if (dim == 3) {
#pragma omp parallel for num_threads(4) //指定4个线程
for (int row = 0; row < h; row++)
{
//uchar* pixel = src.ptr<uchar>(row);
Vec3b* pixel = src.ptr<cv::Vec3b>(row);
for (int col = 0; col < w; col++)
{
//pixel[0] = 255 - pixel[0];
//pixel[1] = 255 - pixel[1];
//pixel[2] = 255 - pixel[2];
//pixel += 3;
//Vec3b bgr = pixel[col];
pixel[col][0] = 255 - pixel[col][0];
pixel[col][1] = 255 - pixel[col][1];
pixel[col][2] = 255 - pixel[col][2];
}
}
}
else if (dim == 1) {
#pragma omp parallel for num_threads(4) //指定4个线程
for (int row = 0; row < h; row++)
{
uchar* pixel = src.ptr<uchar>(row);
for (int col = 0; col < w; col++)
{
pixel[0] = 255 - pixel[0];
pixel ++;
//pixel[col] = 255 - pixel[col];
//*pixel++ = 255 - *pixel;
}
}
}
double t2 = getTickCount();
cout << "method_Matptr ";
PrintCostTime(t1, t2);
imshow("result", src);
waitKey(0);
}
void method_Dataptr(Mat& _src)
{
Mat src = _src.clone();
double t1 = getTickCount();
int w = src.cols;
int h = src.rows;
int dim = src.channels();
if (dim == 3) {
#pragma omp parallel for num_threads(4) //指定4个线程
for (int row = 0; row < h; row++)
{
uchar* pixel = src.data + row*src.step;
for (int col = 0; col < w; col++)
{
pixel[0] = 255 - pixel[0];
pixel[1] = 255 - pixel[1];
pixel[2] = 255 - pixel[2];
pixel += 3;
}
}
}
else if (dim == 1) {
#pragma omp parallel for num_threads(4) //指定4个线程
for (int row = 0; row < h; row++)
{
uchar* pixel = src.data + row * src.step;
for (int col = 0; col < w; col++)
{
pixel[0] = 255 - pixel[0];
pixel++;
//pixel[col] = 255 - pixel[col];
//*pixel++ = 255 - *pixel;
}
}
}
double t2 = getTickCount();
cout << "method_Dataptr ";
PrintCostTime(t1, t2);
imshow("result", src);
waitKey(0);
}
もう一度速度をテストしてみると、少し速度が速くなっていることがわかります。これは for ループの時間が非常に短いためです。独自のアルゴリズムに変更すれば、さらに速度を向上させることができます。自分のプロジェクトで最適化を行ったところ、700ms以上から120msになり、改善効果は非常に良好です。
要約する
画像処理工程の簡易記録