cuda C使用boost库

本次实验的前提是cuda环境和boost环境都搭建好了。

使用nvcc编译含有boost库的代码。

nvcc cuda_Array.cu -o Array -I. -arch=sm_35

就可以了。

一下是我的代码，前提是目录下一定要求data.txt文件，要不然会出错。

#include <iostream>
#include <vector>
#include <algorithm>
#include <fstream>
#include <iomanip>
#include <boost/timer.hpp>
#include <boost/progress.hpp>
#include <cuda_runtime.h>
#include <cmath>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h> 
#include <stdlib.h>

using namespace std;

__global__ void SNR_gpu(double* d_a,double* d_b,double* d_c,int n)
{
	const int tidx = threadIdx.x;
	const int bidx = blockIdx.x;
	const int t_n = gridDim.x * blockDim.x;
	int tid = bidx * blockDim.x + tidx;
	while(tid < n)
	{
		d_c[tid] = 20 * log10(d_a[tid] / d_b[tid]);
		tid += t_n; 
	}

}

int main()
{
	boost::timer t;
	//boost::progress_timer t;
	const int row = 40;							// 设置行数
	const int line = 8064;						// 设置列数
	const int threads = 1024;					// 设置线程数
	const int blocks  = 8;					// 设置块的个数
	const int n = 8064;							// 设置内存大小
//	thrust::host_vector<thrust::host_vector<double> > V;
//	thrust::host_vector<double> one_row;
	vector<std::string >v(40);					// 启用精度条
//	vector<double> one_row;						// 设置中间向量
//	vector<double>::iterator it;				// 这是迭代器
//	vector<double>::iterator it1;

	double* d_a,*d_b,*d_c,**V,*h_c;					// 设置数据指针
     
	const int Nbytes = n *sizeof(double);		// 设置字节大小
	V = (double**)malloc(row * sizeof(double*));
	h_c = (double*)malloc(Nbytes);				// 申请主设备内存大小
	
	for(int i = 0; i < row;i++)
	{
		V[i] = (double*)malloc(line * sizeof(double));
	}
	cout << "提取数据中..." << endl; 

	ifstream data("data.txt");					// 读文件
	double d = 0;								// 中间值
	std::ofstream fs("./data.txt");	
	boost::progress_display pd(v.size());
	for(int row_count = 0;row_count < row;row_count++)
	{
		for(int line_count = 0;line_count < line;line_count++)
		{
			data >> d;
			V[row_count][line_count] = d;
			//one_row.push_back(d);			
		}
		
		pd.restart(v.size());
		pd += row_count;
		//std::cout << row_count << std::endl;
	//	V.push_back(one_row);
	}
	data.close();
	
/*	for(int i = 0; i < V.size();i++)
	{
		for(int j = 0; j < V[i].size(); j++)
		{
			cout << "V[" << i << "]["<<j<<"]="<< setprecision(16) << V[i][j] << endl;	
		}	
	}
*/
/*	for(int i = 0; i < row;i++)
	{
		for(int j = 0; j < line; j++)
		{			
			cout << "V[" << i << "]["<<j<<"]="<< setprecision(16) << V[i][j] << endl;	
		}
	}
*/
	cout << "拷贝数据中..."<<endl;
	cudaMalloc((double**)&d_a,Nbytes);
	cudaMalloc((double**)&d_b,Nbytes);
	cudaMalloc((double**)&d_c,Nbytes);
	//double* it = &V[0];
	//double* it1 = &V[1];
	cudaMemcpy(d_a,V[0],Nbytes,cudaMemcpyHostToDevice);
	cudaMemcpy(d_b,V[1],Nbytes,cudaMemcpyHostToDevice);
	cout << "GPU运算中..." << endl;
	SNR_gpu<<<blocks,threads>>>(d_a,d_b,d_c,n);
	cout << "运算完成..."<< endl;
	cudaMemcpy(h_c,d_c,Nbytes,cudaMemcpyDeviceToHost);
	
	cudaFree(d_a);
	cudaFree(d_b);
	cudaFree(d_c);
	cout << "数据输出..."<<endl;
/*	for(int i = 0; i < n;i++)
	{
		cout << "h_c[" << i << "]=" << h_c[i]<< endl;
	}
*/	//V.clear();
	for(int i = 0; i < row; i++)
	{
		free(V[i]);
	}
	
	free(V);
	free(h_c);
	std::cout << "now time elapsed:" << t.elapsed() << "s" << std::endl;
	return 0;
}

这是效果图。

猜你喜欢