版权声明:本文为博主原创文章,转载请加入原文链接,谢谢。。 https://blog.csdn.net/shawncheer/article/details/77929728
- 前言:
-
1、参考: nvidia offical tutorial
- 2、使用 nvcc 编译程序,并且加-g -G 参数进行编译,以保存调试信息,之后可以用cuda-gdb 进行单步调试。
程序:
#include <iostream>
#include <math.h>
//CUDA Kernel function to add the elements of two arrays on the GPU.
__global__
void add(int n,float *x, float *y)
{ int index= blockIdx.x*blockDim.x+threadIdx.x;
int stride=blockDim.x*gridDim.x;
for(int i=index;i<n;i+=stride)
y[i]=x[i]+y[i];
}
int main(void){
int N = 1 << 20; //1M element.
//float *x=new float[N];
//float *y=new float[N];
//Allocate Unified Memory -- accessible from CPU or GPU
float *x,*y;
cudaMallocManaged(&x,N*sizeof(float));
cudaMallocManaged(&y,N*sizeof(float));
//initialize x and y arrays on the host.
for (int i=0;i<N;i++){
x[i]=1.0f;
y[i]=2.0f;
}
int blockSize = 256;
int numBlocks = (N + blockSize -1) /blockSize;
//run kernel on 1M elements on the CPU.
add<<<numBlocks,blockSize>>>(N,x,y);
//Wait for GPUto finish before accessing on host.
cudaDeviceSynchronize();
// Check for errors (all values should be 3.0f)
float maxError=0.0f;
for (int i=0;i<N;i++)
maxError=fmax(maxError,fabs(y[i]-3.0f));
std::cout << "Max error: " <<maxError<<std::endl;
//Free memory
cudaFree(x);
cudaFree(y);
return 0;
}