OpenCL 设备队列

▶ 按书上写的设备队列的代码,需要 OpenCL2.0 的平台和设备,先把代码堆上来

● 程序主要功能:用主机上的数组 Ahost 和 Bhost 创建设备缓冲区 Adevice 和 Bdevice,调用核函数 foo 及其子核函数 fooChild 计算 factor * Adevice .* Bdevice,结果写入 Cdevice,最后拷贝回主机数组 Chost 检查结果。

● 代码

 1 //deviceQueue.cl
 2 __kernel void fooChild(const int nElement, const float factor,
 3     __global const float *A, __global const float *B, __global float *C)
 4 {
 5     uint gid = get_global_id(0);
 6     if (gid < nElement)
 7         C[gid] = factor * A[gid] * B[gid];
 8 }
 9 
10 __kernel void foo(const int nElement, const float factor,
11     __global const float *A, __global const float *B, __global float *C)
12 {
13     uint gid = get_global_id(0), gsize = get_global_size(0);
14     uint childGsize = nElement / gsize, childOffset = gid * childGsize;
15 
16     __global const float *Achild = &A[childOffset];
17     __global const float *Bchild = &B[childOffset];
18     __global const float *Cchild = &C[childOffset];
19 
20     queue_t defQ = get_default_queue();
21     ndrange_t ndrange = ndrange_1D(childGsize);
22     void(^fooChildWrapper)(void) = ^{ fooChild(childGsize, factor, Achild, Bchild, Cchild); };
23     enqueue_kernel(defQ, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, saxpyDpChildWrapper);
24 }
  1 //main.c
  2 #include <stdio.h>  
  3 #include <stdlib.h>  
  4 #include <cl.h>
  5 
  6 const char *sourceCode = "D:/Code/deviceQueue.cl";
  7 
  8 char* readSource(const char* kernelPath)// 读取文本文件,存储为 char *
  9 {
 10     FILE *fp;
 11     char *source;
 12     long int size;
 13     //printf("readSource, Program file: %s\n", kernelPath);
 14     fopen_s(&fp, kernelPath, "rb");
 15     if (!fp)
 16     {
 17         printf("Open kernel file failed\n");
 18         exit(-1);
 19     }
 20     if (fseek(fp, 0, SEEK_END) != 0)
 21     {
 22         printf("Seek end of file faildd\n");
 23         exit(-1);
 24     }
 25     if ((size = ftell(fp)) < 0)
 26     {
 27         printf("Get file position failed\n");
 28         exit(-1);
 29     }
 30     rewind(fp);
 31     if ((source = (char *)malloc(size + 1)) == NULL)
 32     {
 33         printf("Allocate space failed\n");
 34         exit(-1);
 35     }
 36     fread(source, 1, size, fp);
 37     fclose(fp);
 38     source[size] = '\0';
 39     return source;
 40 }
 41 
 42 int main()
 43 {
 44     const int nElement = 8196, nChildElement = 128, dataSize = nElement * sizeof(float);
 45     float factor = 2.3f;    
 46     char info[1024] = { 0 };
 47     int i;
 48 
 49     // 初始化平台
 50     cl_int status;
 51     cl_platform_id platform;
 52     status = clGetPlatformIDs(1, &platform, NULL);
 53     cl_device_id device;
 54     status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
 55     cl_context_properties contextProp[] = { CL_CONTEXT_PLATFORM,(cl_context_properties)(platform), 0 };
 56     cl_context context = clCreateContext(contextProp, 1, &device, NULL, contextProp, &status);
 57     cl_queue_properties queueProp[3] = { CL_QUEUE_PROPERTIES,CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT, 0 };
 58     cl_command_queue queue = clCreateCommandQueueWithProperties(context, device, queueProp, &status);
 59     cl_event eventProducer, eventConsumer;
 60 
 61     const char* source = readSource(sourceCode);
 62     cl_program program = clCreateProgramWithSource(context, 1, &source, NULL, &status);
 63     status = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
 64     if (status)
 65     {
 66         clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 1024, info, NULL);
 67         printf("Build log:\n%s\n", info);
 68     }
 69 
 70     cl_kernel foo = clCreateKernel(program, "foo", &status);    
 71     size_t globalSize = nElement / nChildElement, localSize = 1;// 每个父工作项调度 nChildElement 个子工作项
 72 
 73     float *Ahost = (float *)malloc(dataSize);
 74     float *Bhost = (float *)malloc(dataSize);
 75     float *Chost = (float *)malloc(dataSize);
 76     for (i = 0; i < nElement; Ahost[i] = i, Bhost[i] = i + 1, Chost[i] = 0.f, i++);
 77 
 78     cl_mem Adevice, Bdevice, Cdevice;
 79     Adevice = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, dataSize, Ahost, &status);
 80     Bdevice = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, dataSize, Bhost, &status);
 81     Cdevice = clCreateBuffer(context, CL_MEM_WRITE_ONLY, dataSize, NULL, &status);
 82     
 83     clSetKernelArg(foo, 0, sizeof(int), (void*)&nElement);
 84     clSetKernelArg(foo, 1, sizeof(float), (void*)&factor);
 85     clSetKernelArg(foo, 2, sizeof(cl_mem), Adevice);
 86     clSetKernelArg(foo, 3, sizeof(cl_mem), Bdevice);
 87     clSetKernelArg(foo, 4, sizeof(cl_mem), Cdevice);
 88     
 89     clEnqueueNDRangeKernel(queue, foo, 1, NULL, &globalSize, &localSize, 0, NULL, &eventProducer);
 90     clFinish(queue);
 91 
 92     clEnqueueReadBuffer(queue, Cdevice, CL_TRUE, dataSize, dataSize, Chost, 1, &eventConsumer, NULL);
 93     clFinish(queue);
 94 
 95     for (i = 0; i < nElement; i++)
 96     {
 97         if (Chost[i] != factor*i*(i + 1))
 98             break;
 99     }
100     printf("Output is %s.\n", (i == nElement) ? "correct" : "incorrect");
101 
102     free(Ahost);
103     free(Bhost);
104     free(Chost);
105     clReleaseContext(context);
106     clReleaseCommandQueue(queue);
107     clReleaseProgram(program);
108     clReleaseKernel(foo);
109     clReleaseMemObject(Adevice);
110     clReleaseMemObject(Bdevice);
111     clReleaseMemObject(Cdevice);
112     getchar();
113     return 0;
114 }

● 输出结果

■ 一直卡在函数 clCreateCommandQueueWithProperties 的调用上,返回值 -6(CL_OUT_OF_HOST_MEMORY),原因不明,stackExchange 上有人说换了显卡驱动就好了(https://stackoverflow.com/questions/39864947/opencl-cl-out-of-host-memory-on-clcreatecommandqueuewithproperties-with-minima),还有人说是设备位数的问题(https://stackoverflow.com/questions/45231329/opencl-clcreatecommandqueue-cl-out-of-host-memory-error),但是我更新了显卡驱动,工程改成 32 位(才发现显卡是 32 位的)还是不行。

■ 强行忽略上面的问题(clCreateCommandQueueWithProperties 第四参数用 NULL)仍然程序编译失败,返回 -11(CL_BUILD_PROGRAM_FAILURE),原因是不能支持和函数中的 queue_t 和 ndrange_t 数据类型,后面的块语法就更别想了,应该是平台和设备不能完全支持 OpenCL2.0 所致。

猜你喜欢

转载自www.cnblogs.com/cuancuancuanhao/p/9038952.html