#define CL_TARGET_OPENCL_VERSION 300 #include #include int main() { // Connect to a compute device // int gpu = 1; cl_device_id deviceID; cl_int err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &deviceID, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to create a device group!\n"); return 1; } FILE *file = fopen("src/test.cl", "r"); fseek(file, 0, SEEK_END); size_t length = ftell(file); fseek(file, 0, SEEK_SET); char *buffer = calloc(1, length + 1); fread(buffer, length, 1, file); printf("%s", buffer); cl_context context = clCreateContext(NULL, 1, &deviceID, NULL, NULL, &err); if(!context) { printf("Failed to create context\n"); return 1; } cl_command_queue queue = clCreateCommandQueueWithProperties(context, deviceID, NULL, &err); if(!queue) { printf("Failed to create command queue\n"); return 1; } cl_program program = clCreateProgramWithSource(context, 1, (const char **) &buffer, &length, &err); if(!program) { printf("Failed to create program\n"); return 1; } err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if(err != CL_SUCCESS) { printf("Failed to build program\n"); // clGetProgramBuildInfo... return 1; } cl_kernel kernel = clCreateKernel(program, "do_stuff", &err); if(!kernel) { printf("Failed to create kernel\n"); return 1; } unsigned int inputSize = 256000000; float *inputData = calloc(inputSize, sizeof(float)); for(unsigned int i = 0; i < inputSize; i++) { inputData[i] = i; } cl_mem input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * inputSize, NULL, &err); cl_mem output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * inputSize, NULL, &err); if(!input || !output) { printf("Failed to allocate input/output buffer\n"); return 1; } err = clEnqueueWriteBuffer(queue, input, CL_TRUE, 0, sizeof(float) * inputSize, inputData, 0, NULL, NULL); if(err != CL_SUCCESS) { printf("Failed to write to buffer\n"); return 1; } err = 0; err = clSetKernelArg(kernel, 0, sizeof(input), &input); err |= clSetKernelArg(kernel, 1, sizeof(output), &output); err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &inputSize); if(err != CL_SUCCESS) { printf("Failed to set kernel args\n"); return 1; } /*char *info = calloc(1, 1024); clGetProgramInfo(program, CL_PROGRAM_STRING_DEBUG_INFO, 1024, info, NULL); printf("INFO: %s\n", info);*/ size_t local; err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL); if(err != CL_SUCCESS) { printf("Failed to get work group size\n"); return 1; } printf("Group size is %zu\n", local); size_t global = inputSize; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if(err != CL_SUCCESS) { printf("Failed to enqueue\n"); return 1; } clFinish(queue); float *outputData = calloc(inputSize, sizeof(float)); err = clEnqueueReadBuffer(queue, output, CL_TRUE, 0, sizeof(float) * inputSize, outputData, 0, NULL, NULL); if(err != CL_SUCCESS) { printf("Failed to read from buffer\n"); return 1; } for(unsigned int i = 0; i < inputSize; i++) { if(i % 1000 != 0) continue; printf("%f: %f\n", inputData[i], outputData[i]); } }