OpenCL 使用函数 clCreateProgramWithBinary 来创建程序

▶ 函数 clCreateProgramWithSource 接收 OpenCL 代码(设备无关)来创建程序,而函数 clCreateProgramWithBinary 接收已经经过函数 clBuildProgram 的 build 过的代码(设备有关)来创建程序

● 范例代码,还是计算两向量和

  1 #include <stdio.h>  
  2 #include <stdlib.h>
  3 #include <string.h>
  4 #include <cl.h>
  5 
  6 const int nElement = 2048;  
  7 const char *binaryFileName = "D:\\Code\\OpenCL\\OpenCLProjectTemp\\OpenCLProjectTemp\\vectorAdd.bin";
  8 const char *programSource = "                                              \
  9 __kernel void vectorAdd(__global int *A, __global int *B, __global int *C) \
 10 {                                                                          \
 11     int idx = get_global_id(0);                                            \
 12     C[idx] = A[idx] + B[idx];                                              \
 13     return;                                                                \
 14 }                                                                          \
 15 ";
 16 
 17 int readSource(const char* kernelPath, unsigned char **output)// 读取文本文件,存储为字符串,返回字符数
 18 {
 19     FILE *pf;
 20     long int size;
 21     printf("readSource, Program file: %s\n", kernelPath);
 22     fopen_s(&pf, kernelPath, "rb");
 23     if (!pf)
 24     {
 25         printf("Open kernel file failed\n");
 26         exit(-1);
 27     }
 28     if (fseek(pf, 0, SEEK_END) != 0)
 29     {
 30         printf("Seek end of file faildd\n");
 31         exit(-1);
 32     }
 33     if ((size = ftell(pf)) < 0)
 34     {
 35         printf("Get file position failed\n");
 36         exit(-1);
 37     }
 38     rewind(pf);
 39     if ((*output = (unsigned char *)malloc(size + 1)) == NULL)
 40     {
 41         printf("Allocate space failed\n");
 42         exit(-1);
 43     }
 44     fread(*output, 1, size, pf);
 45     fclose(pf);
 46     (*output)[size] = '\0';
 47     return strlen((char*)*output);
 48 }
 49 
 50 int main()
 51 {
 52     const size_t datasize = sizeof(int) * nElement;
 53     int i, *A, *B, *C;
 54     cl_int status;
 55     unsigned char *programBinary;
 56     FILE *pf;
 57 
 58     A = (int*)malloc(datasize);
 59     B = (int*)malloc(datasize);
 60     C = (int*)malloc(datasize);
 61     for (i = 0; i < nElement; A[i] = B[i] = i, i++);
 62                                                
 63     cl_platform_id platform;
 64     clGetPlatformIDs(1, &platform, NULL);                                      
 65     cl_device_id device;
 66     clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);                                                                                           
 67     cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &status);   
 68     cl_command_queue cmdQueue = clCreateCommandQueue(context, device, 0, &status);             
 69     cl_mem bufferA, bufferB, bufferC;
 70     bufferA = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, &status);           
 71     bufferB = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, &status);
 72     bufferC = clCreateBuffer(context, CL_MEM_WRITE_ONLY, datasize, NULL, &status);
 73     clEnqueueWriteBuffer(cmdQueue, bufferA, CL_FALSE, 0, datasize, A, 0, NULL, NULL);
 74     clEnqueueWriteBuffer(cmdQueue, bufferB, CL_FALSE, 0, datasize, B, 0, NULL, NULL);    
 75     cl_program program = clCreateProgramWithSource(context, 1, &programSource, NULL, &status);
 76     clBuildProgram(program, 1, &device, NULL, NULL, NULL);
 77 
 78     // 抽取已经 build 的 program 的代码
 79     size_t programBinarySize;
 80     clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(cl_device_id), &programBinarySize, NULL); // 获取 build 的 program 的大小
 81     programBinary = (unsigned char *)malloc(sizeof(unsigned char)*programBinarySize);
 82     clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char *), &programBinary, NULL);      // 获取代码
 83 
 84     // 将代码写入文件,再读回来,这说明可以从外部文件中直接读取已经 build 的 program 来使用
 85     fopen_s(&pf, binaryFileName, "w");
 86     fwrite(programBinary, 1, programBinarySize, pf);
 87     free(programBinary);
 88     fclose(pf);
 89     programBinarySize = readSource(binaryFileName, &programBinary);
 90 
 91     // 使用clCreateProgramWithBinary 来建立 program
 92     cl_program program2 = clCreateProgramWithBinary(context, 1, &device, &programBinarySize, (const unsigned char **)&programBinary, NULL, NULL);
 93     clBuildProgram(program2, 1, &device, NULL, NULL, NULL);
 94 
 95     cl_kernel kernel = clCreateKernel(program2, "vectorAdd", &status);
 96     status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferA);
 97     status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferB);
 98     status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &bufferC);
 99     size_t globalSize[1] = { nElement }, localSize[1] = { 256 }; 
100     status = clEnqueueNDRangeKernel(cmdQueue, kernel, 1, NULL, globalSize, localSize, 0, NULL, NULL);                                                                                                     
101     clEnqueueReadBuffer(cmdQueue, bufferC, CL_TRUE, 0, datasize, C, 0, NULL, NULL);
102 
103     for (i = 0; i < nElement; i++)
104     {
105         if (C[i] != i + i)
106             break;
107     }
108     printf("Output is %s.\n", (i == nElement) ? "correct" : "incorrect");
109 
110     free(A);
111     free(B);
112     free(C);
113     free(programBinary);
114     clReleaseContext(context);
115     clReleaseMemObject(bufferA);
116     clReleaseMemObject(bufferB);
117     clReleaseMemObject(bufferC);
118     clReleaseCommandQueue(cmdQueue);
119     clReleaseProgram(program);
120     clReleaseProgram(program2);
121     clReleaseKernel(kernel);
122     getchar();
123     return 0;
124 }

● 输出结果

readSource, Program file: D:\Code\OpenCL\OpenCLProjectTemp\OpenCLProjectTemp\vectorAdd.bin
Output is correct.

● 由代码中的向量加法生成的 program 代码文件

 1 //
 2 // Generated by NVIDIA NVVM Compiler
 3 //
 4 // Compiler Build ID: UNKNOWN
 5 // Driver 
 6 // Based on LLVM 3.4svn
 7 //
 8 
 9 .version 6.1
10 .target sm_61, texmode_independent
11 .address_size 64
12 
13     // .globl    vectorAdd
14 
15 .entry vectorAdd(
16     .param .u64 .ptr .global .align 4 vectorAdd_param_0,
17     .param .u64 .ptr .global .align 4 vectorAdd_param_1,
18     .param .u64 .ptr .global .align 4 vectorAdd_param_2
19 )
20 {
21     .reg .b32     %r<10>;
22     .reg .b64     %rd<8>;
23 
24 
25     ld.param.u64     %rd1, [vectorAdd_param_0];
26     ld.param.u64     %rd2, [vectorAdd_param_1];
27     ld.param.u64     %rd3, [vectorAdd_param_2];
28     mov.b32    %r1, %envreg3;
29     mov.u32     %r2, %ntid.x;
30     mov.u32     %r3, %ctaid.x;
31     mad.lo.s32     %r4, %r3, %r2, %r1;
32     mov.u32     %r5, %tid.x;
33     add.s32     %r6, %r4, %r5;
34     mul.wide.s32     %rd4, %r6, 4;
35     add.s64     %rd5, %rd1, %rd4;
36     ld.global.u32     %r7, [%rd5];
37     add.s64     %rd6, %rd2, %rd4;
38     ld.global.u32     %r8, [%rd6];
39     add.s32     %r9, %r8, %r7;
40     add.s64     %rd7, %rd3, %rd4;
41     st.global.u32     [%rd7], %r9;
42     ret;
43 }

猜你喜欢

转载自www.cnblogs.com/cuancuancuanhao/p/8903634.html