OpenCL 管道

▶ 按书上写的管道的代码,就算使用支持 OpenCL2.0 的平台和设备,编译器还是报错曰 ”不支持修饰符 pipe“,暂时不知道是什么问题,先把代码堆上来,以后换了新的设备再说

● 程序主要功能:用主机上的数组 srcHost 创建设备缓冲区 src,调用核函数 pipeProducer 将 src 分装到管道中,再调用核函数 pipeConsumer 将管道中的数据读到设备缓冲区 dst 中,最后拷贝回主机数组 dstHost 中检查结果。

● 代码

 1 //pipe.cl
 2 __kernel void pipeProducer(__global float *src, __write_only pipe float outPipe)
 3 {
 4     int gid = get_global_id(0);
 5     float srcPipe = src[id];
 6     reserve_id_t resID = reserve_write_pipe(outPipe, 1);
 7     if (is_valid_reserve_id(resID))
 8     {
 9         if (write_pipe(outPipe, resID, 0, &srcPipe) != 0)
10             return;
11         commit_write_pipe(outPipe, resID);
12     }
13 }
14 
15 __kernel void pipeConsumer(__global float *dst, __read_only pipe float inPipe)
16 {
17     int gid = get_global_id(0);
18     float dstPipe;
19     reserve_id_t resID = reserve_read_pipe(inPipe, 1);
20     if (is_valid_reserve_id(resID))
21     {
22         if (read_pipe(inPipe, resID, 0, &dstPipe) != 0)
23             return;
24         commit_read_pipe(inPipe, resID);
25     }
26     dst[gid] = dstPipe;
27 }
  1 //main.c
  2 #include <stdio.h>  
  3 #include <stdlib.h>  
  4 #include <cl.h>
  5 
  6 const char *sourceCode = "D:/Code/pipe.cl";
  7 
  8 char* readSource(const char* kernelPath)// 读取文本文件,存储为 char *
  9 {
 10     FILE *fp;
 11     char *source;
 12     long int size;
 13     //printf("readSource, Program file: %s\n", kernelPath);
 14     fopen_s(&fp, kernelPath, "rb");
 15     if (!fp)
 16     {
 17         printf("Open kernel file failed\n");
 18         exit(-1);
 19     }
 20     if (fseek(fp, 0, SEEK_END) != 0)
 21     {
 22         printf("Seek end of file faildd\n");
 23         exit(-1);
 24     }
 25     if ((size = ftell(fp)) < 0)
 26     {
 27         printf("Get file position failed\n");
 28         exit(-1);
 29     }
 30     rewind(fp);
 31     if ((source = (char *)malloc(size + 1)) == NULL)
 32     {
 33         printf("Allocate space failed\n");
 34         exit(-1);
 35     }
 36     fread(source, 1, size, fp);
 37     fclose(fp);
 38     source[size] = '\0';
 39     return source;
 40 }
 41 
 42 int main()
 43 {
 44     const int nPacket = 1024, dataSize = nPacket * sizeof(float);
 45     char info[1024] = { 0 };
 46     int i;
 47 
 48     // 初始化平台
 49     cl_int status;    
 50     cl_platform_id platform;
 51     clGetPlatformIDs(1, &platform, NULL);    
 52     cl_device_id device;
 53     clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
 54     cl_context_properties contextProp[] = { CL_CONTEXT_PLATFORM,(cl_context_properties)(platform), 0 };
 55     cl_context context = clCreateContext(contextProp, 1, &device, NULL, contextProp, &status);
 56     cl_command_queue queue = clCreateCommandQueueWithProperties(context, device, NULL, &status);    
 57     cl_event eventProducer, eventConsumer; 
 58 
 59     const char* source = readSource(sourceCode);
 60     cl_program program = clCreateProgramWithSource(context, 1, &source, NULL, &status);    
 61     status = clBuildProgram(program, 1, &device, "-w -g –cl-std=CL2.0", NULL, NULL);
 62 
 63     clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 1024, info, NULL);
 64     printf("Build log:\n%s\n", info);
 65 
 66     cl_kernel kernelProducer = clCreateKernel(program, "pipeProducer", &status);
 67     cl_kernel kernelConsumer = clCreateKernel(program, "pipeConsumer", &status);
 68     size_t globalSize = nPacket, localSize = 128;
 69 
 70     float *srcHost = (float *)malloc(dataSize);
 71     float *dstHost = (float *)malloc(dataSize);
 72     for (i = 0; i < nPacket; srcHost[i] = i, dstHost[i] = 0.0f, i++);
 73         
 74     cl_mem src, dst;
 75     src = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, dataSize, srcHost, &status);
 76     dst = clCreateBuffer(context, CL_MEM_WRITE_ONLY, dataSize, NULL, &status);
 77    
 78     cl_mem pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, sizeof(float), nPacket, NULL, &status);
 79 
 80     clSetKernelArg(kernelProducer, 0, sizeof(cl_mem),src);
 81     clSetKernelArg(kernelProducer, 1, sizeof(cl_mem), &pipe);
 82 
 83     clSetKernelArg(kernelProducer, 0, sizeof(cl_mem), dst);
 84     clSetKernelArg(kernelProducer, 1, sizeof(cl_mem), &pipe);
 85     
 86     clEnqueueNDRangeKernel(queue, kernelProducer, 1, NULL, &globalSize, &localSize, 0, NULL, &eventProducer);
 87     clEnqueueNDRangeKernel(queue, kernelConsumer, 1, NULL, &globalSize, &localSize, 1, &eventProducer, &eventConsumer);        
 88     clEnqueueReadBuffer(queue, dst, CL_TRUE, dataSize, dataSize, dstHost, 1, &eventConsumer, NULL);
 89     clFinish(queue);
 90 
 91     for (i = 0; i < nPacket; i++)
 92     {
 93         if (dstHost[i] != i)
 94             break;
 95     }
 96     printf("Output is %s.\n", (i == nPacket) ? "correct" : "incorrect");
 97     clEnqueueSVMUnmap(queue, dst, 0, NULL, NULL);
 98 
 99     free(srcHost);
100     free(dstHost);
101     clReleaseContext(context);    
102     clReleaseCommandQueue(queue);
103     clReleaseProgram(program);
104     clReleaseKernel(kernelProducer);
105     clReleaseKernel(kernelConsumer);
106     getchar();
107     return 0;
108 }

● 输出结果

■ 使用编译参数 "-w -g –cl-std=CL2.0" 时返回 status 为 -43(CL_INVALID_BUILD_OPTIONS),不使用参数 "–cl-std=CL2.0" 的情况下返回 -11(CL_BUILD_PROGRAM_FAILURE),麻烦的是调用函数 clGetProgramBuildInfo 查询编译日志 info 始终都是空的,不知道出了什么问题。

■ 转机,去掉了修饰符 __write_only 和 __read_only(只用于图像类型的缓冲区),返回 status 为 -11,至少报错信息有了:【identifier "pipe" is undefined】和【invalid combination of type specifiers】(指在 float 上)

● 后续代码,但是上述代码都编译不了,下面的也暂时没用。(1)使用局部内存来统一工作组的写入

 1 //pipe2.cl
 2 __kernel void pipeProducer(__global float *src, __write_only pipe float outPipe)
 3 {
 4     int gid = get_global_id(0), lid = get_local_id(0);
 5     __local reserve_id_t resID;
 6     if (lid == 0)
 7         resID = reserve_write_pipe(outPipe, get_local_size(0)); // 工作组中首个工作项一次预定多个管道位置
 8     barrier(CLK_LOCAL_MEM_FENCE);
 9 
10     float srcPipe = src[id];
11     if (is_valid_reserve_id(resID))
12     {
13         if (write_pipe(outPipe, resID, lid, &srcPipe) != 0)     // 每个工作项写入预定的位置
14             return;
15         commit_write_pipe(outPipe, resID);
16     }
17 }
18 
19 __kernel void pipeConsumer(__global float *dst, __read_only pipe float inPipe)
20 {
21     int gid = get_global_id(0), lid = get_local_id(0);    
22     __local reserve_id_t resID;
23     if (lid == 0)
24         resID = reserve_read_pipe(inPipe, get_local_size(0));
25     barrier(CLK_LOCAL_MEM_FENCE);
26     
27     float dstPipe;
28     if (is_valid_reserve_id(resID))
29     {
30         if (read_pipe(inPipe, resID, lid, &dstPipe) != 0)
31             return;
32         commit_read_pipe(inPipe, resID);
33     }
34     dst[gid] = dstPipe;
35 }

● (2)使用工作组管道操作简化上述代码(只是干掉了一个 if 和一个同步)

 1 //pipe3.cl
 2 __kernel void pipeProducer(__global float *src, __write_only pipe float outPipe)
 3 {
 4     int gid = get_global_id(0), lid = get_local_id(0);
 5     __local reserve_id_t resID = work_group_reserve_write_pipe(outPipe, get_local_size(0));// 自带分支和同步
 6 
 7     float srcPipe = src[id];
 8     if (is_valid_reserve_id(resID))
 9     {
10         if (write_pipe(outPipe, resID, lid, &srcPipe) != 0)
11             return;
12         commit_write_pipe(outPipe, resID);
13     }
14 }
15 
16 __kernel void pipeConsumer(__global float *dst, __read_only pipe float inPipe)
17 {
18     int gid = get_global_id(0), lid = get_local_id(0);    
19     __local reserve_id_t resID = work_group_reserve_read_pipe(inPipe, get_local_size(0));    
20     
21     float dstPipe;
22     if (is_valid_reserve_id(resID))
23     {
24         if (read_pipe(inPipe, resID, lid, &dstPipe) != 0)
25             return;
26         commit_read_pipe(inPipe, resID);
27     }
28     dst[gid] = dstPipe;
29 }

 ● 书上原本的主函数的内容(关于数据缓冲区的部分),是用虚拟内存写的,由于办公室的电脑不支持,上面的代码中被我换成了普通缓冲区

 1     float *src = (float *)clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, dataSize, 0);
 2     float *dst = (float *)clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, dataSize, 0);
 3     if (src == NULL || dst == NULL)
 4     {
 5         printf("clSVMAlloc failed!\n");
 6         getchar();
 7         return 0;
 8     }
 9 
10     clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_WRITE, src, dataSize, 0, NULL, NULL);
11     for (i = 0; i < nPacket; i++)
12         src[i] = i, dst[i] = 0.0f;
13     clEnqueueSVMUnmap(queue, src, 0, NULL, NULL);
14 
15     cl_mem pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, sizeof(float), nPacket, NULL, &status);
16 
17     clSetKernelArgSVMPointer(kernelProducer, 0, src);
18     clSetKernelArg(kernelProducer, 1, sizeof(cl_mem), &pipe);
19 
20     clSetKernelArgSVMPointer(kernelProducer, 0, dst);
21     clSetKernelArg(kernelProducer, 1, sizeof(cl_mem), &pipe);
22     
23     clEnqueueNDRangeKernel(queue, kernelProducer, 1, NULL, &globalSize, &localSize, 0, NULL, &eventProducer);
24     clEnqueueNDRangeKernel(queue, kernelConsumer, 1, NULL, &globalSize, &localSize, 1, &eventProducer,NULL);
25     clFinish(queue);
26     
27     clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_READ, dst, dataSize, 0, NULL, NULL);    
28     for (i = 0; i < nPacket; i++)
29     {
30         if (dst[i] != i)
31             break;
32     }
33     printf("Output is %s.\n", (i == nPacket) ? "correct" : "incorrect");
34     clEnqueueSVMUnmap(queue, dst, 0, NULL, NULL);

猜你喜欢

转载自www.cnblogs.com/cuancuancuanhao/p/9038655.html