CUDA实现高效查找--审核未通过?

#include <stdio.h>
#include <vector>
#include <iostream>

using namespace std;

void __global__ bisearch(int *d, int *dval, int *pos, const int N);

void __global__ bisearch(int *d, int *dval, int *pos, const int N)
{
    
    

    int tid = blockDim.x * blockIdx.x + threadIdx.x;
    if(tid < N)
    {
    
    
       if(d[tid] == *dval)
       {
    
    *pos = tid;}
    }
}


int main(int argc, char *arcv[])

{
    
    
    if(argc<3)
    {
    
    
        printf("assignment is not enough!");
    }

    int N = atoi(arcv[1]);
    int val = atoi(arcv[2]);

    // host memory
    int hpos = -1;
    int a[N] = {
    
    0,1,2};
    vector<int> vec(a,a+N);
    
    /*  
    for(int i=0; i<N; i++)
    {
        vec.push_back(i);
    }

    cout<< vec[0] <<endl;
    cout<< vec[1] <<endl;
    cout<< vec[2] << endl;
    cout<< val<<endl;
    */

    int *h;
    h = (int *)&vec[0];
    
    // device memory
    int *d;
    cudaMalloc(&d, sizeof(int)*N);
    int *dval;
    cudaMalloc(&dval, sizeof(int));
    int *dpos;
    cudaMalloc(&dpos, sizeof(int));


    // host --> device
    cudaMemcpy( d, h, sizeof(int)*N,cudaMemcpyHostToDevice);
    cudaMemcpy( dval, &val, sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy( dpos, &hpos, sizeof(int), cudaMemcpyHostToDevice);

    // threads and blocks

    const int block_size = 128;
    const int grid_size = (N + block_size -1) / block_size;

    bisearch<<<grid_size,block_size>>>(d, dval, dpos, N);

    // cuda to cpu
    cudaMemcpy( &hpos, dpos, sizeof(int), cudaMemcpyDeviceToHost   );
    



    printf("Ans: %d\n", hpos);


    // free memory
    cudaFree(d);
    cudaFree(dval);
    cudaFree(dpos);

    return 0;
    

}

总结

  vector不太会用;vector定义需要使用using namespace std;

猜你喜欢

转载自blog.csdn.net/wulele2/article/details/118944799