opencl的helloworld，将输入和输出部分分开

分开了输入和输出

test.cl

__kernel void hello_kernel(   __global const float * a,
                           __global const float * b,
                           __global float* result )
{
   int gid = get_global_id(0);
   result[gid] = a[gid] + b[gid];

}

头文件

#pragma once
#include <iostream>
#include <fstream>
#include <sstream>
#include <time.h>
#include <vector>

#ifdef __APPLE__
#include <opencl/cl.h>
#else
#include <CL/cl.h>
#endif

class myOpenCL
{
public:
   myOpenCL(std::string strOpenCLFileName,
       std::string strOpenCLKernalEntry,
       int sizeOfInputType,
       int sizeOfInputObject,
       int sizeOfEachInputUnit,
       std::vector<std::vector<float>> inputVec2,
       int sizeOfOutputType,
       int sizeOfOutputObject,
       int sizeOfEachOutputUnit,
       std::vector<std::vector<float>> outputVec2);
   ~myOpenCL();

public:
   //处理全过程
   void process();
   //返回结果
   std::vector<std::vector<float>> getResult();
   //为cpu平台创建上下文
   cl_context createContext();
   //选择第一个可用设备，并创建一个命令队列
   cl_command_queue createCommandQueue(cl_context context, cl_device_id & device);
   //从磁盘加载内核源文件创建和构建一个程序对象
   cl_program createProgram( const char* fileName);
   //建立内核参数
   cl_int setKernelParameter(int id, cl_mem theData);
   //使用命令队列使将在设备上执行的内核排队
   cl_int setKernalQueue(size_t* globalWorkSize, size_t* localWorkSize);
   //从内核读回结果
   cl_int readResult(cl_mem memObject, float * result);

public:
   //返回设备上下文
   cl_context getContext();
private:
   std::string _strOpenCLFileName; //opencl处理的文件名称
   std::string _strOpenCLKernalEntry;//opencl入口名称
   cl_context _theContext;       //设备上下文
   cl_command_queue _commandQueue;//命令队列
   cl_device_id _device;   //设备ID
   cl_program _theProgram; //程序对象
   cl_kernel _theKernel;//创建opencl内核
   int _sizeOfInputType;       //输入物体的数目
   int _sizeOfInputObject;   //每个输入物体含多少值
   int _sizeOfEachInputUnit;   //每个输入值的大小
   std::vector<std::vector<float>> _inputVec2;//输入的vector
   int _sizeOfOutputType;       //输出物体的数目
   int _sizeOfOutputObject;   //每个输出物体含多少值
   int _sizeOfEachOutputUnit;   //每个输出单元的大小
   std::vector<std::vector<float>> _outputVec2;//输出的vector

};

实现文件

#include "myOpenCL.h"
myOpenCL::myOpenCL(std::string strOpenCLFileName,
   std::string strOpenCLKernalEntry,
   int sizeOfInputType,
   int sizeOfInputObject,
   int sizeOfEachInputUnit,
   std::vector<std::vector<float>> inputVec2,
   int sizeOfOutputType,
   int sizeOfOutputObject,
   int sizeOfEachOutputUnit,
   std::vector<std::vector<float>> outputVec2 )
{
   _strOpenCLFileName = strOpenCLFileName;
   _strOpenCLKernalEntry = strOpenCLKernalEntry;
   _sizeOfInputType = sizeOfInputType;
   _sizeOfInputObject = sizeOfInputObject;
   _sizeOfEachInputUnit = sizeOfEachInputUnit;
   _inputVec2 = inputVec2;
   _sizeOfOutputType = sizeOfOutputType;
   _sizeOfOutputObject = sizeOfOutputObject;
   _sizeOfEachOutputUnit = sizeOfEachOutputUnit;
   _outputVec2 = outputVec2;
   _theContext = NULL;
   _commandQueue = NULL;
   _theProgram = NULL;
   _theKernel = NULL;

}

myOpenCL::~myOpenCL()
{
   _inputVec2.clear();
   _outputVec2.clear();
   if (_commandQueue != 0)
   {
       clReleaseCommandQueue(_commandQueue);
   }

   if (_theKernel != 0)
   {
       clReleaseKernel(_theKernel);
   }
   if (_theProgram != 0)
   {
       clReleaseProgram(_theProgram);
   }
   if (_theContext != 0)
   {
       clReleaseContext(_theContext);
   }
}

//为cpu平台创建上下文
cl_context myOpenCL::createContext()
{
   cl_platform_id firstPlatformId = 0;
   cl_uint numPlatforms = 0;
   //这里选择第一个平台
   cl_int errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);
   //创建平台的一个上下文，先试图创建一个gpu的，如果没有的话，就创建cpu的
   cl_context_properties contextProperties[] =
   {
       CL_CONTEXT_PLATFORM,
       (cl_context_properties)firstPlatformId,
       0
   };
   cl_context context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, NULL, NULL, &errNum);
   if (errNum != CL_SUCCESS)
   {
       context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU, NULL, NULL, &errNum);
   }
   return context;
}
//选择第一个可用设备，并创建一个命令队列
cl_command_queue myOpenCL::createCommandQueue(cl_context context, cl_device_id & device)
{
   size_t deviceBufferSize = -1;
   clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize);
   //为设备缓存分配空间
   cl_device_id * devices = new cl_device_id[deviceBufferSize / sizeof(cl_device_id)];
   clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceBufferSize, devices, NULL);
   //这里只选择第一个可用的设备，在该设备创建一个命令队列.这个命令队列用于将程序中要执行的内核排队，并读回结果
   cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);

   device = devices[0];
   delete[] devices;
   return commandQueue;
}

//从磁盘加载内核源文件创建和构建一个程序对象
cl_program myOpenCL::createProgram( const char* fileName)
{
   std::ifstream kernelFile(fileName, std::ios::in);
   if (!kernelFile.is_open())
   {
       std::cerr << "不能打开文件" << fileName << std::endl;
       return NULL;
   }

   std::ostringstream oss;
   oss << kernelFile.rdbuf();
   std::string srcStdStr = oss.str();
   const char * srcStr = srcStdStr.c_str();
   //创建程序对象
   cl_program program = clCreateProgramWithSource(_theContext, 1, (const char**)&srcStr, NULL, NULL);
   //编译内核源码
   clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
   return program;
}

//返回设备上下文
cl_context myOpenCL::getContext()
{
   return _theContext;
}

//建立内核参数
cl_int myOpenCL::setKernelParameter( int id, cl_mem theData)
{
   cl_int errNum = clSetKernelArg(_theKernel, id, sizeof(cl_mem), &theData);
   return errNum;
}

//使用命令队列使将在设备上执行的内核排队
cl_int myOpenCL::setKernalQueue(size_t* globalWorkSize, size_t* localWorkSize)
{
   cl_int errNum = clEnqueueNDRangeKernel(_commandQueue, _theKernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
   return errNum;
}
//从内核读回结果
cl_int myOpenCL::readResult(cl_mem memObject, float * result)
{
   cl_int errNum = clEnqueueReadBuffer(_commandQueue, memObject, CL_TRUE, 0,
       _sizeOfOutputObject * _sizeOfEachOutputUnit, result, 0, NULL, NULL);
   return errNum;
}

//处理全过程
void myOpenCL::process()
{
   _theContext = this->createContext();
   _commandQueue = this->createCommandQueue(_theContext, _device);
   _theProgram = this->createProgram(_strOpenCLFileName.c_str());
   //创建opencl内核
   _theKernel = clCreateKernel(_theProgram, _strOpenCLKernalEntry.c_str(), NULL);

   std::vector<cl_mem> memInputVector;
   memInputVector.clear();
   memInputVector.resize(_sizeOfInputType);
   for (size_t i = 0; i < _sizeOfInputType; i++)
   {
       memInputVector[i] = 0;
   }
   //先读后写分配内存
   for (size_t i = 0; i < _sizeOfInputType; i++)
   {
       memInputVector[i] = clCreateBuffer(_theContext,
           CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
           _sizeOfEachInputUnit * _sizeOfInputObject,
           &_inputVec2[i][0], NULL);
   }
   std::vector<cl_mem> memOutputVector;
   memOutputVector.clear();
   memOutputVector.resize(_sizeOfOutputType);
   for (size_t i = 0; i < _sizeOfOutputType; i++)
   {
       memOutputVector[i] = 0;
   }
   //先读后写分配内存
   for (size_t i = 0; i < _sizeOfOutputType; i++)
   {
       memOutputVector[i] = clCreateBuffer(_theContext,
           CL_MEM_READ_WRITE,
           _sizeOfEachOutputUnit * _sizeOfOutputObject,
           NULL, NULL);
   }

   //建立内核参数
   for (size_t i = 0; i < _sizeOfInputType; i++)
   {
       this->setKernelParameter(i, memInputVector[i]);
   }
   for (size_t i = 0; i < _sizeOfOutputType; i++)
   {
       this->setKernelParameter(i + _sizeOfInputType, memOutputVector[i]);
   }
   //使用命令队列使将在设备上执行的内核排队
   size_t globalWorkSize[1] = { _sizeOfInputObject };
   size_t localWorkSize[1] = { 1 };
   this->setKernalQueue(globalWorkSize, localWorkSize);
   //从内核读回结果
   this->readResult(memInputVector[_sizeOfInputType - 1], &_outputVec2[_sizeOfOutputType - 1][0]);
   memInputVector.clear();
}

//返回结果
std::vector<std::vector<float>> myOpenCL::getResult()
{
   return _outputVec2;
}

调用

#include "myOpenCL.h"

const int ARRAY_SIZE = 1000000;

int main(int argc, char ** argv)
{
   std::string strOpenCLFileName = "test.cl";
   std::string strOpenCLKernalEntry = "hello_kernel";
   int sizeOfInputType = 2;
   int sizeOfInputObject = ARRAY_SIZE;
   int sizeOfEachInputUnit = sizeof(float);
   std::vector<std::vector<float>> inputVec2;
   //设定各单元数值
   inputVec2.clear();
   inputVec2.resize(sizeOfInputType);
   for (size_t j = 0; j < sizeOfInputType; j++)
   {
       inputVec2[j].resize(sizeOfInputObject);
   }

   for (size_t i = 0; i < sizeOfInputObject; i++)
   {
       inputVec2[0][i] = (float)i;
       inputVec2[1][i] = (float)(i * 2);
   }

   int sizeOfOutputType = 1;
   int sizeOfOutputObject = ARRAY_SIZE;
   int sizeOfEachOutputUnit = sizeof(float);
   std::vector<std::vector<float>> outputVec2;
   outputVec2.clear();
   outputVec2.resize(sizeOfOutputType);
   for (size_t j = 0; j < sizeOfOutputType; j++)
   {
       outputVec2[j].resize(sizeOfOutputObject);
   }

   myOpenCL theOpenCL(strOpenCLFileName,
       strOpenCLKernalEntry,
       sizeOfInputType,
       sizeOfInputObject,
       sizeOfEachInputUnit,
       inputVec2,
       sizeOfOutputType,
       sizeOfOutputObject,
       sizeOfEachOutputUnit,
       outputVec2);

   theOpenCL.process();
   //输出结果
   std::vector<std::vector<float>> resultVec = theOpenCL.getResult();
   int sizeOfResult = resultVec.size();
   for (size_t j = 0; j < sizeOfResult; j++)
   {
       for (size_t i = 0; i < resultVec[j].size(); i++)
       {
           if (i % 10 == 0)
           {
               std::cout << std::endl;
           }
           float theResult = resultVec[j][i];
           std::cout << theResult << ",";
       }

   }

   return 0;
}

opencl的helloworld，将输入和输出部分分开

猜你喜欢