▶ Including explicit copy with CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE flags (functions clEnqueueWriteBuffer and clEnqueueReadBuffer); implicit copy with CL_MEM_COPY_HOST_PTR flag (no copy function, used directly on the device); and function clEnqueueMapBuffer directly between device and host Maps (converts) a pointer to a piece of memory between
● Code
1 #include <cl.h> 2 #include <stdio.h> 3 #include <stdlib.h> 4 #include <time.h> 5 #include <iostream> 6 7 using namespace std; 8 const int nElement = 2048; 9 10 const char *programSource = " \ 11 __kernel void vectorAdd(__global int *A, __global int *B, __global int *C) \ 12 { \ 13 int idx = get_global_id(0); \ 14 C[idx] = A[idx] + B[idx]; \ 15 return; \ 16 } \ 17 "; 18 19 int main(int argc, char* argv[]) 20 { 21 const size_t dataSize = sizeof(int) * nElement; 22 int i, *A, *B, *C, *returnC; 23 24 A = (int *)malloc(dataSize * sizeof(float)); 25 B = (int *)malloc(dataSize * sizeof(float)); 26 C = (int *)malloc(dataSize * sizeof(float)); 27 for (srand((unsigned)time(NULL)), i = 0; i < dataSize; A[i] = rand() % 65535, B[i] = rand() % 65535, C[i] = A[i] + B[i], i++); 28 29 cl_int status; 30 cl_platform_id platform; 31 clGetPlatformIDs(1, &platform, NULL); 32 cl_device_id device; 33 clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL); 34 cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &status); 35 cl_command_queue queue = clCreateCommandQueue(context, device, 0, & status); 36 cl_program program = clCreateProgramWithSource(context, 1 , &programSource, NULL, & status); 37 clBuildProgram(program, 1 , & device, NULL, NULL, NULL); 38 cl_kernel kernel = clCreateKernel(program, " vectorAdd " , NULL); 39 cl_event writeEvent, runEvent, mapEvent; 40 41 // Create three memory objects, implicitly copy A to clA, display B to clB, and clC receives the calculation result and maps it to returnC 42 cl_mem clA = clCreateBuffer (context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, dataSize * sizeof (cl_int), A, NULL); 43 cl_mem clB = clCreateBuffer(context, CL_MEM_READ_ONLY, dataSize * sizeof (cl_int), NULL, NULL); 44 cl_mem clC = clCreateBuffer(context, CL_MEM_WRITE_ONLY, dataSize * sizeof (cl_int), NULL, NULL); 45 46 clEnqueueWriteBuffer(queue, clB , 1 , 0 , dataSize * sizeof (cl_int), B, 0 , 0 , & writeEvent); 47 48 clFlush(queue); // Submit all tasks in the queue before there is an event that needs to wait 49 clWaitForEvents( 1 , &writeEvent); // Wait for the specified event to complete 50 51 // 执行内核 52 size_t global_work_size = dataSize; 53 clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&clA); 54 clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&clB); 55 clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&clC); 56 clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, &runEvent); 57 clFlush(queue); 58 clWaitForEvents(1, &runEvent); 59 60 // 结果拷回 61 returnC = (cl_int *)clEnqueueMapBuffer(queue, clC, CL_TRUE, CL_MAP_READ, 0, dataSize * sizeof(cl_int), 0, NULL, &mapEvent, NULL); 62 clFlush(queue); 63 clWaitForEvents(1, &mapEvent); 64 65 //结果验证 66 printf("Verify %s.\n", !memcmp(C, returnC, dataSize)?"passed":"failed");// 定义在 iostream 67 68 free(C); 69 free(A); 70 free(B); 71 clReleaseMemObject(clA); 72 clReleaseMemObject(clB); 73 clReleaseMemObject(clC); 74 clReleaseContext(context); 75 clReleaseCommandQueue(queue); 76 clReleaseProgram(program); 77 clReleaseEvent(writeEvent); 78 clReleaseEvent(runEvent); 79 clReleaseEvent(mapEvent); 80 getchar(); 81 return 0; 82 }
● output results
Verify passed.
● Note
■ After the event is declared, it must be defined through the cl_event * /* event */ parameter of a function before clWaitForEvents or clReleaseEvent can be performed, otherwise an error will be reported