cuda编程---第一个cuda程序

版权声明:本文为博主原创文章,转载请加入原文链接,谢谢。。 https://blog.csdn.net/shawncheer/article/details/77929728
前言:

1、参考: nvidia offical tutorial

2、使用 nvcc 编译程序,并且加-g -G 参数进行编译,以保存调试信息,之后可以用cuda-gdb 进行单步调试。

程序:

#include <iostream>
#include <math.h>

//CUDA Kernel function to add the elements of two arrays on the GPU.
__global__
void add(int n,float *x, float *y)
{   int index= blockIdx.x*blockDim.x+threadIdx.x;
    int stride=blockDim.x*gridDim.x;
    for(int i=index;i<n;i+=stride)
    y[i]=x[i]+y[i];
}

int main(void){
    int N = 1 << 20; //1M element.

    //float *x=new float[N];
    //float *y=new float[N];

    //Allocate Unified Memory -- accessible from CPU or GPU
    float *x,*y;
    cudaMallocManaged(&x,N*sizeof(float));
    cudaMallocManaged(&y,N*sizeof(float));

    //initialize x and y arrays on the host.
    for (int i=0;i<N;i++){
        x[i]=1.0f;
        y[i]=2.0f;
    }

    int blockSize = 256;
    int numBlocks = (N + blockSize -1) /blockSize;

    //run kernel on 1M elements on the CPU.
    add<<<numBlocks,blockSize>>>(N,x,y);

    //Wait for GPUto finish before accessing on host.
    cudaDeviceSynchronize();

    // Check for errors (all values should be 3.0f)
    float maxError=0.0f;
    for (int i=0;i<N;i++)
        maxError=fmax(maxError,fabs(y[i]-3.0f));
    std::cout << "Max error: " <<maxError<<std::endl;

    //Free memory
    cudaFree(x);
    cudaFree(y);

    return 0;
}

猜你喜欢

转载自blog.csdn.net/shawncheer/article/details/77929728