CUDA(四)——图片处理


#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "cpu_bitmap.h"
#include <stdio.h>
#include "cuda.h"
#include <math.h>
#define PI 3.1415926f
const int DIM = 1024;
__global__ void kernel(unsigned char* ptr)
{
    //将线程映射到像素位置
    int x = threadIdx.x + blockIdx.x*blockDim.x;
    int y = threadIdx.y + blockIdx.y*blockDim.y;
    int offset = x + y*blockDim.x*gridDim.x;
    //计算每个像素的值
    __shared__ float shared[16][16];//共享内存
    const float period = 128.0f;
    shared[threadIdx.x][threadIdx.y] = 255 * (sinf(x*2.0f*PI / period) + 1.0f)*
        (sinf(y*2.0f*PI / period) + 1.0f) / 4.0f;
    //
    cudaThreadSynchronize();//同步
    //将每个值赋给像素
    ptr[offset * 4 + 0] = 0;
    ptr[offset * 4 + 1] = shared[15 - threadIdx.x][15 - threadIdx.y];
    ptr[offset * 4 + 2] = 0;
    ptr[offset * 4 + 3] = 255;

}
int main()
{
    CPUBitmap bitmap(DIM, DIM);
    unsigned char* dev_bitmap;
    cudaMalloc((void**)&dev_bitmap, bitmap.image_size());
    dim3 grids(DIM / 16, DIM / 16);
    dim3 threads(16, 16);
    kernel << <grids, threads >> > ;
    cudaMemcpy(bitmap.get_ptr(), dev_bitmap, bitmap.image_size(), cudaMemcpyDeviceToHost);
    bitmap.display_and_exit();
    cudaFree(dev_bitmap);

}

猜你喜欢

转载自blog.csdn.net/u014413083/article/details/53228215