CUDA图像处理 | Sobel算子边缘提取

CUDA数组与纹理操作

CUDA数组与设备内存从相同的内存池中分配，但前者拥有有一个细节不明的布局：为2D和3D局部性做了优化。
CUDA数组不消耗CUDA地址空间
CUDA可以只在设备内存中驻留，GPU在总线中传输数据时在这两种形式之间转换。
纹理存储器是一种只读存储器。
kernel 函数通过纹理参考系从纹理内存中读取数据（Texture Fetching，纹理拾取）。
纹理参照系必须通过 cudaBindTexture 或者 cudaBindTextureToArray 进行纹理绑定（Texture Blinding）
用 cudaUnbindTexture 来解决绑定。

使用纹理存储器处理图像一般包括几个步骤：

声明纹理参照系：
```
texture<Type,Dim,cudaReaMode> tex;
```

声明CUDA数组，设置绑定参数，初始化CUDA数组：

cuda Array *array = NULL;
desc = cudaCreateChannelDesc<unsigned char>();
cudaMallocArray(&array,&desc,nWidth,nHeight);
cudaMemcpyToArray(array,0,0,h_pData,sizeof(Pixel)*nWidth*nHeight,cudaMemcpyHostToDevice);

纹理绑定：
```
cudaBindTextureToArray(tex,array);
```

kernel纹理拾取与其他图像处理操作

d_pData[y*width+x] = tex2D(tex,tu,tv);
// ...

解除绑定：
```
cudaUnbindTexture(tex);
```
释放CUDA数组
```
cudaFreeArray(array);
```

Sobel算子原理

Sobel 算子主要作用：边缘检测。它是一种离散性质查分算子。
该算子包含横向或纵向，将之与图像做平面卷积，记得得到鲁昂度的差分。
```
/*
1: Gx // 横向
-1  0   +1
-2  0   +2
-1  0   +1

2: Gy // 纵向
+1  +2  +1 
0   0   0
-1  -2  -1 
*/
```
G = sqrt(Gx^2+Gy^2)

|G| = |Gx|+|Gy|
如梯度G大于某一个阈值！则认为该点（x，y）为边缘点。

扫描二维码关注公众号，回复： 3241642 查看本文章
梯度方向为：

θ = arctan（Gy/Gx)

GPU代码实现

#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/opencv.hpp>  

// cuda
#include "cuda.h"   
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "device_functions.h" // 此头文件包含 __syncthreads ()函数

// cuda纹理
#ifndef __CUDACC__  
#define __CUDACC__  
#include "cuda_texture_types.h"  
#include "texture_indirect_functions.h"
#include "texture_fetch_functions.h"
#endif
#include <iostream>

using namespace std;
using namespace cv;

////////////////////////////////////////////////////////

texture<unsigned char, 2> tex;
cudaArray *array;
cudaChannelFormatDesc desc;

// 纹理
void setupTexture(int iw, int ih, unsigned char* data, int Bpp) {
    if (Bpp == 1) {
        desc = cudaCreateChannelDesc<unsigned char>();
    }
    else {
        desc = cudaCreateChannelDesc<uchar4>();
    }
    cudaMallocArray(&array, &desc, iw, ih);
    cudaMemcpyToArray(array, 0, 0, data, Bpp * sizeof(unsigned char)*ih*iw,cudaMemcpyHostToDevice);

}

__device__ unsigned char ComputeSobel(unsigned char ul, unsigned char um, unsigned char ur,
    unsigned char ml, unsigned char mm, unsigned char mr,
    unsigned char ll, unsigned char lm, unsigned char lr) {

    short Horz = ur + 2 * mr + lr - ul- 2 * ml - ll;
    short Vert = ul + 2 * um + ur - ll - 2 * lm - lr;
    short Sum = (short)(abs(Horz) + abs(Vert));

    if (Sum < 0) {
        return 0;
    }
    else if (Sum > 0xff)
        return 0xff;
    else {
        return (unsigned char)Sum;
    }
}

__global__ void SobelTex(unsigned char*pSobelOrignal, unsigned int Pitch, int w, int h, float fScale) {

    unsigned char* pSobel = (unsigned char*)(((char *)pSobelOrignal) + blockIdx.x*Pitch);

    for (int i = threadIdx.x; i < w; i + blockDim.x) {

        unsigned char pix00 = tex2D(tex, (float)i - 1, (float)blockIdx.x - 1);
        unsigned char pix01 = tex2D(tex, (float)i , (float)blockIdx.x - 1);
        unsigned char pix02 = tex2D(tex, (float)i+1, (float)blockIdx.x - 1);
        unsigned char pix10 = tex2D(tex, (float)i -1, (float)blockIdx.x );
        unsigned char pix11 = tex2D(tex, (float)i, (float)blockIdx.x);
        unsigned char pix12 = tex2D(tex, (float)i+1, (float)blockIdx.x);
        unsigned char pix20 = tex2D(tex, (float)i - 1, (float)blockIdx.x+1);
        unsigned char pix21 = tex2D(tex, (float)i, (float)blockIdx.x+1);
        unsigned char pix22 = tex2D(tex, (float)i + 1, (float)blockIdx.x+1);

        pSobel[i] = ComputeSobel(pix00, pix01, pix02, pix10, pix11, pix12, pix20, pix21, pix22);

    }

}

// Sobel算子边缘提取核心代码
extern "C"
double cudaSobelFilter(unsigned char* pDestGPU, unsigned char*pSrcGPU, int nWidth, int nHeight) {

    unsigned char* d_pSrcGPU = NULL;
    unsigned char* d_pDestGPU = NULL;

    cudaMalloc((void**)&d_pSrcGPU, nWidth*nHeight * sizeof(unsigned char));
    cudaMalloc((void**)&d_pDestGPU, nWidth*nHeight * sizeof(unsigned char));
    cudaMemset(d_pSrcGPU, 0, nWidth*nHeight * sizeof(unsigned char));
    cudaMemset(d_pDestGPU, 0, nWidth*nHeight * sizeof(unsigned char));

    cudaMemcpy(d_pSrcGPU, pSrcGPU, nWidth*nHeight * sizeof(unsigned char), cudaMemcpyHostToDevice);

    // Sobel算子边缘检测
    float imgScale = 1.f;
    setupTexture(nWidth, nHeight, pSrcGPU, 1);
    cudaBindTextureToArray(&tex, array, &desc);
    SobelTex << <nHeight, 256 >> > (d_pDestGPU, nWidth,nWidth, nHeight, imgScale);

    cudaUnbindTexture(&tex);
    cudaMemcpy(pDestGPU, d_pDestGPU, nWidth*nHeight * sizeof(unsigned char), cudaMemcpyDeviceToHost);

    cudaFree(d_pDestGPU);
    cudaFree(d_pSrcGPU);
    return 0;
}

/////////////////////////////////////////////////////////////
int main() {

    Mat image_source = imread("E:\\JZCHEN\\test\\lena.jpg", IMREAD_GRAYSCALE);
    Mat image_template = imread("E:\\JZCHEN\\test\\template.jpg", IMREAD_GRAYSCALE);

    Mat img;
    image_source.copyTo(img);
    cudaSobelFilter(img.data, image_source.data, image_source.cols, image_source.rows);

    imshow("sobel",img);
    cv::waitKey(0);

    system("pause");
    return 0;
}

CUDA图像处理 | Sobel算子边缘提取

CUDA图像处理 | Sobel算子边缘提取

CUDA数组与纹理操作

使用纹理存储器处理图像一般包括几个步骤：

Sobel算子原理

GPU代码实现

猜你喜欢