129 lines
3.4 KiB
C
129 lines
3.4 KiB
C
#define CL_TARGET_OPENCL_VERSION 300
|
|
|
|
#include <CL/cl.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <stdbool.h>
|
|
|
|
typedef struct __attribute__ ((packed)) {
|
|
|
|
} cl_GPUMat;
|
|
|
|
char *loadFile(const char *path) {
|
|
FILE *file = fopen(path, "r");
|
|
fseek(file, 0, SEEK_END);
|
|
size_t length = ftell(file);
|
|
fseek(file, 0, SEEK_SET);
|
|
char *buffer = calloc(1, length + 1);
|
|
fread(buffer, length, 1, file);
|
|
return buffer;
|
|
}
|
|
|
|
int main() {
|
|
// Connect to a compute device
|
|
int useGPU = true;
|
|
cl_device_id deviceID;
|
|
cl_int err = clGetDeviceIDs(NULL, useGPU ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &deviceID, NULL);
|
|
if (err != CL_SUCCESS) {
|
|
printf("Error: Failed to create a device group!\n");
|
|
return 1;
|
|
}
|
|
|
|
char *buffer = loadFile("src/mat.cl");
|
|
printf("%s", buffer);
|
|
|
|
cl_context context = clCreateContext(NULL, 1, &deviceID, NULL, NULL, &err);
|
|
if(!context) {
|
|
printf("Failed to create context\n");
|
|
return 1;
|
|
}
|
|
|
|
cl_command_queue queue = clCreateCommandQueueWithProperties(context, deviceID, NULL, &err);
|
|
if(!queue) {
|
|
printf("Failed to create command queue\n");
|
|
return 1;
|
|
}
|
|
|
|
size_t length = strlen(buffer);
|
|
cl_program program = clCreateProgramWithSource(context, 1, (const char **) &buffer, &length, &err);
|
|
if(!program) {
|
|
printf("Failed to create program\n");
|
|
return 1;
|
|
}
|
|
|
|
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
|
if(err != CL_SUCCESS) {
|
|
printf("Failed to build program\n");
|
|
// clGetProgramBuildInfo...
|
|
return 1;
|
|
}
|
|
|
|
cl_kernel kernel = clCreateKernel(program, "do_stuff", &err);
|
|
if(!kernel) {
|
|
printf("Failed to create kernel\n");
|
|
return 1;
|
|
}
|
|
|
|
unsigned int inputSize = 256000000;
|
|
float *inputData = calloc(inputSize, sizeof(float));
|
|
for(unsigned int i = 0; i < inputSize; i++) {
|
|
inputData[i] = i;
|
|
}
|
|
|
|
cl_mem input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * inputSize, NULL, &err);
|
|
cl_mem output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * inputSize, NULL, &err);
|
|
if(!input || !output) {
|
|
printf("Failed to allocate input/output buffer\n");
|
|
return 1;
|
|
}
|
|
|
|
err = clEnqueueWriteBuffer(queue, input, CL_TRUE, 0, sizeof(float) * inputSize, inputData, 0, NULL, NULL);
|
|
if(err != CL_SUCCESS) {
|
|
printf("Failed to write to buffer\n");
|
|
return 1;
|
|
}
|
|
|
|
err = 0;
|
|
err = clSetKernelArg(kernel, 0, sizeof(input), &input);
|
|
err |= clSetKernelArg(kernel, 1, sizeof(output), &output);
|
|
err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &inputSize);
|
|
if(err != CL_SUCCESS) {
|
|
printf("Failed to set kernel args\n");
|
|
return 1;
|
|
}
|
|
|
|
/*char *info = calloc(1, 1024);
|
|
clGetProgramInfo(program, CL_PROGRAM_STRING_DEBUG_INFO, 1024, info, NULL);
|
|
printf("INFO: %s\n", info);*/
|
|
|
|
size_t local;
|
|
err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
|
|
if(err != CL_SUCCESS) {
|
|
printf("Failed to get work group size\n");
|
|
return 1;
|
|
}
|
|
|
|
printf("Group size is %zu\n", local);
|
|
|
|
size_t global = inputSize;
|
|
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
|
|
if(err != CL_SUCCESS) {
|
|
printf("Failed to enqueue\n");
|
|
return 1;
|
|
}
|
|
|
|
clFinish(queue);
|
|
|
|
float *outputData = calloc(inputSize, sizeof(float));
|
|
err = clEnqueueReadBuffer(queue, output, CL_TRUE, 0, sizeof(float) * inputSize, outputData, 0, NULL, NULL);
|
|
if(err != CL_SUCCESS) {
|
|
printf("Failed to read from buffer\n");
|
|
return 1;
|
|
}
|
|
|
|
for(unsigned int i = 0; i < inputSize; i++) {
|
|
if(i % 1000 != 0) continue;
|
|
printf("%f: %f\n", inputData[i], outputData[i]);
|
|
}
|
|
}
|