CUDA图像处理 | Sobel算子边缘提取

CUDA图像处理 | Sobel算子边缘提取

CUDA数组与纹理操作

  • CUDA数组与设备内存从相同的内存池中分配,但前者拥有有一个细节不明的布局:为2D和3D局部性做了优化。
  • CUDA数组不消耗CUDA地址空间
  • CUDA可以只在设备内存中驻留,GPU在总线中传输数据时在这两种形式之间转换。
  • 纹理存储器是一种只读存储器。
  • kernel 函数通过纹理参考系从纹理内存中读取数据(Texture Fetching,纹理拾取)。
  • 纹理参照系必须通过 cudaBindTexture 或者 cudaBindTextureToArray 进行纹理绑定(Texture Blinding)
  • 用 cudaUnbindTexture 来解决绑定。

使用纹理存储器处理图像一般包括几个步骤:

  1. 声明纹理参照系:

    texture<Type,Dim,cudaReaMode> tex;
  2. 声明CUDA数组,设置绑定参数,初始化CUDA数组:

    cuda Array *array = NULL;
    desc = cudaCreateChannelDesc<unsigned char>();
    cudaMallocArray(&array,&desc,nWidth,nHeight);
    cudaMemcpyToArray(array,0,0,h_pData,sizeof(Pixel)*nWidth*nHeight,cudaMemcpyHostToDevice);
  3. 纹理绑定:

    cudaBindTextureToArray(tex,array);
  4. kernel纹理拾取与其他图像处理操作

    d_pData[y*width+x] = tex2D(tex,tu,tv);
    // ...
  5. 解除绑定:

    cudaUnbindTexture(tex);
  6. 释放CUDA数组

    cudaFreeArray(array);

Sobel算子原理

  • Sobel 算子 主要作用:边缘检测。它是一种离散性质查分算子。

  • 该算子包含横向或纵向,将之与图像做平面卷积,记得得到鲁昂度的差分。

    /*
    1: Gx // 横向
    -1  0   +1
    -2  0   +2
    -1  0   +1
    
    2: Gy // 纵向
    +1  +2  +1 
    0   0   0
    -1  -2  -1 
    */

    G = sqrt(Gx^2+Gy^2)

    |G| = |Gx|+|Gy|

  • 如梯度G大于某一个阈值! 则认为该点(x,y)为边缘点。

    扫描二维码关注公众号,回复: 3241642 查看本文章
  • 梯度方向为:

    θ = arctan(Gy/Gx)

GPU代码实现

#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/opencv.hpp>  

// cuda
#include "cuda.h"   
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "device_functions.h" // 此头文件包含 __syncthreads ()函数

// cuda纹理
#ifndef __CUDACC__  
#define __CUDACC__  
#include "cuda_texture_types.h"  
#include "texture_indirect_functions.h"
#include "texture_fetch_functions.h"
#endif
#include <iostream>

using namespace std;
using namespace cv;

////////////////////////////////////////////////////////

texture<unsigned char, 2> tex;
cudaArray *array;
cudaChannelFormatDesc desc;

// 纹理
void setupTexture(int iw, int ih, unsigned char* data, int Bpp) {
    if (Bpp == 1) {
        desc = cudaCreateChannelDesc<unsigned char>();
    }
    else {
        desc = cudaCreateChannelDesc<uchar4>();
    }
    cudaMallocArray(&array, &desc, iw, ih);
    cudaMemcpyToArray(array, 0, 0, data, Bpp * sizeof(unsigned char)*ih*iw,cudaMemcpyHostToDevice);

}

__device__ unsigned char ComputeSobel(unsigned char ul, unsigned char um, unsigned char ur,
    unsigned char ml, unsigned char mm, unsigned char mr,
    unsigned char ll, unsigned char lm, unsigned char lr) {

    short Horz = ur + 2 * mr + lr - ul- 2 * ml - ll;
    short Vert = ul + 2 * um + ur - ll - 2 * lm - lr;
    short Sum = (short)(abs(Horz) + abs(Vert));

    if (Sum < 0) {
        return 0;
    }
    else if (Sum > 0xff)
        return 0xff;
    else {
        return (unsigned char)Sum;
    }
}

__global__ void SobelTex(unsigned char*pSobelOrignal, unsigned int Pitch, int w, int h, float fScale) {

    unsigned char* pSobel = (unsigned char*)(((char *)pSobelOrignal) + blockIdx.x*Pitch);

    for (int i = threadIdx.x; i < w; i + blockDim.x) {

        unsigned char pix00 = tex2D(tex, (float)i - 1, (float)blockIdx.x - 1);
        unsigned char pix01 = tex2D(tex, (float)i , (float)blockIdx.x - 1);
        unsigned char pix02 = tex2D(tex, (float)i+1, (float)blockIdx.x - 1);
        unsigned char pix10 = tex2D(tex, (float)i -1, (float)blockIdx.x );
        unsigned char pix11 = tex2D(tex, (float)i, (float)blockIdx.x);
        unsigned char pix12 = tex2D(tex, (float)i+1, (float)blockIdx.x);
        unsigned char pix20 = tex2D(tex, (float)i - 1, (float)blockIdx.x+1);
        unsigned char pix21 = tex2D(tex, (float)i, (float)blockIdx.x+1);
        unsigned char pix22 = tex2D(tex, (float)i + 1, (float)blockIdx.x+1);

        pSobel[i] = ComputeSobel(pix00, pix01, pix02, pix10, pix11, pix12, pix20, pix21, pix22);

    }

}

// Sobel算子边缘提取核心代码
extern "C"
double cudaSobelFilter(unsigned char* pDestGPU, unsigned char*pSrcGPU, int nWidth, int nHeight) {

    unsigned char* d_pSrcGPU = NULL;
    unsigned char* d_pDestGPU = NULL;

    cudaMalloc((void**)&d_pSrcGPU, nWidth*nHeight * sizeof(unsigned char));
    cudaMalloc((void**)&d_pDestGPU, nWidth*nHeight * sizeof(unsigned char));
    cudaMemset(d_pSrcGPU, 0, nWidth*nHeight * sizeof(unsigned char));
    cudaMemset(d_pDestGPU, 0, nWidth*nHeight * sizeof(unsigned char));

    cudaMemcpy(d_pSrcGPU, pSrcGPU, nWidth*nHeight * sizeof(unsigned char), cudaMemcpyHostToDevice);

    // Sobel算子边缘检测
    float imgScale = 1.f;
    setupTexture(nWidth, nHeight, pSrcGPU, 1);
    cudaBindTextureToArray(&tex, array, &desc);
    SobelTex << <nHeight, 256 >> > (d_pDestGPU, nWidth,nWidth, nHeight, imgScale);

    cudaUnbindTexture(&tex);
    cudaMemcpy(pDestGPU, d_pDestGPU, nWidth*nHeight * sizeof(unsigned char), cudaMemcpyDeviceToHost);

    cudaFree(d_pDestGPU);
    cudaFree(d_pSrcGPU);
    return 0;
}

/////////////////////////////////////////////////////////////
int main() {

    Mat image_source = imread("E:\\JZCHEN\\test\\lena.jpg", IMREAD_GRAYSCALE);
    Mat image_template = imread("E:\\JZCHEN\\test\\template.jpg", IMREAD_GRAYSCALE);

    Mat img;
    image_source.copyTo(img);
    cudaSobelFilter(img.data, image_source.data, image_source.cols, image_source.rows);

    imshow("sobel",img);
    cv::waitKey(0);

    system("pause");
    return 0;
}

猜你喜欢

转载自blog.csdn.net/qjh5606/article/details/82620828