关于CUDA 分配固定内存

可以使用

float *h_a;
    CHECK(cudaMallocHost ((float **)&h_a, nbytes));

来直接分配固定的追内存,而不是虚拟分页中的内存,好处就是速度会稍微快一些。缺点就是可能内存利用率下降。

// allocate pinned host memory
    float *h_a;
    CHECK(cudaMallocHost ((float **)&h_a, nbytes));

    // allocate device memory
    float *d_a;
    CHECK(cudaMalloc((float **)&d_a, nbytes));

    // initialize host memory
    memset(h_a, 0, nbytes);

    for (int i = 0; i < isize; i++) h_a[i] = 100.10f;

    // transfer data from the host to the device
    CHECK(cudaMemcpy(d_a, h_a, nbytes, cudaMemcpyHostToDevice));

    // transfer data from the device to the host
    CHECK(cudaMemcpy(h_a, d_a, nbytes, cudaMemcpyDeviceToHost));

    // free memory
    CHECK(cudaFree(d_a));
    CHECK(cudaFreeHost(h_a));

猜你喜欢

转载自blog.csdn.net/czw0723/article/details/88983702
今日推荐