#include <stdio.h>
#include <vector>
#include <iostream>
using namespace std;
void __global__ bisearch(int *d, int *dval, int *pos, const int N);
void __global__ bisearch(int *d, int *dval, int *pos, const int N)
{
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if(tid < N)
{
if(d[tid] == *dval)
{
*pos = tid;}
}
}
int main(int argc, char *arcv[])
{
if(argc<3)
{
printf("assignment is not enough!");
}
int N = atoi(arcv[1]);
int val = atoi(arcv[2]);
// host memory
int hpos = -1;
int a[N] = {
0,1,2};
vector<int> vec(a,a+N);
/*
for(int i=0; i<N; i++)
{
vec.push_back(i);
}
cout<< vec[0] <<endl;
cout<< vec[1] <<endl;
cout<< vec[2] << endl;
cout<< val<<endl;
*/
int *h;
h = (int *)&vec[0];
// device memory
int *d;
cudaMalloc(&d, sizeof(int)*N);
int *dval;
cudaMalloc(&dval, sizeof(int));
int *dpos;
cudaMalloc(&dpos, sizeof(int));
// host --> device
cudaMemcpy( d, h, sizeof(int)*N,cudaMemcpyHostToDevice);
cudaMemcpy( dval, &val, sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy( dpos, &hpos, sizeof(int), cudaMemcpyHostToDevice);
// threads and blocks
const int block_size = 128;
const int grid_size = (N + block_size -1) / block_size;
bisearch<<<grid_size,block_size>>>(d, dval, dpos, N);
// cuda to cpu
cudaMemcpy( &hpos, dpos, sizeof(int), cudaMemcpyDeviceToHost );
printf("Ans: %d\n", hpos);
// free memory
cudaFree(d);
cudaFree(dval);
cudaFree(dpos);
return 0;
}
总结
vector不太会用;vector定义需要使用using namespace std;