From b58471adb807db065ff910795cb783316e89a9e5 Mon Sep 17 00:00:00 2001 From: MrLetsplay2003 Date: Thu, 20 Jul 2023 22:34:56 +0200 Subject: [PATCH] initial commit --- .gitignore | 1 + .vscode/settings.json | 11 +++ Makefile | 12 +++ src/clm.c | 183 ++++++++++++++++++++++++++++++++++++++++++ src/clm.h | 39 +++++++++ src/cltest.bak.c | 119 +++++++++++++++++++++++++++ src/cltest.c | 23 ++++++ src/test.cl | 5 ++ 8 files changed, 393 insertions(+) create mode 100644 .gitignore create mode 100644 .vscode/settings.json create mode 100644 Makefile create mode 100644 src/clm.c create mode 100644 src/clm.h create mode 100644 src/cltest.bak.c create mode 100644 src/cltest.c create mode 100644 src/test.cl diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..567609b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +build/ diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..b67641b --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,11 @@ +{ + "files.associations": { + "*.embeddedhtml": "html", + "clm.h": "c", + "clm.c": "c", + "cltest.c": "c", + "*.tcc": "c", + "string": "c", + "string_view": "c" + } +} diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..95abd95 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +SRC=src +BUILD=build +CFLAGS=-Wall -g + +.PHONY: all +all: + mkdir -p $(BUILD) + gcc -lOpenCL $(CFLAGS) -o $(BUILD)/cltest $(SRC)/cltest.c $(SRC)/clm.c + +.PHONY: run +run: all + $(BUILD)/cltest diff --git a/src/clm.c b/src/clm.c new file mode 100644 index 0000000..bd28383 --- /dev/null +++ b/src/clm.c @@ -0,0 +1,183 @@ +#include +#include +#include +#include + +#include "clm.h" + +clm_Matrix INVALID_MATRIX = { .rows = 0, .cols = 0, .values = NULL }; + +clm_Vector clm_createVector(int length) { + clm_Vector vector; + vector.values = calloc(length, sizeof(float)); + vector.length = length; + return vector; +} + +void clm_freeVector(clm_Vector vector) { + free(vector.values); +} + +clm_Layer clm_createLayer(unsigned int prevNumNeurons, unsigned int numLayers) { + clm_Layer layer; + for(unsigned int i = 0; i < numLayers; i++) { + + } + return layer; +} + +clm_NN clm_createNN(unsigned int numLayers, unsigned int *layerSizes) { + clm_NN nn; + nn.numLayers = numLayers; + for(unsigned int i = 0; i < numLayers; i++) { + clm_Layer layer; + + } + return nn; +} + +clm_Matrix clm_createMatrix(unsigned int rows, unsigned int cols) { + clm_Matrix mat; + mat.rows = rows; + mat.cols = cols; + mat.values = calloc(rows * cols, sizeof(float)); + return mat; +} + +clm_Matrix clm_createMatrixRandom(unsigned int rows, unsigned int cols) { + clm_Matrix mat = clm_createMatrix(rows, cols); + + for(unsigned int i = 0; i < rows; i++) { + for(unsigned int j = 0; j < cols; j++) { + mat.values[i * cols + j] = ((float) rand() / RAND_MAX) * 2.0f - 1.0f; + } + } + + return mat; +} + +void clm_freeMatrix(clm_Matrix mat) { + free(mat.values); +} + +clm_Matrix clm_copyMatrix(clm_Matrix mat) { + clm_Matrix copy = clm_createMatrix(mat.rows, mat.cols); + + memcpy(copy.values, mat.values, mat.rows * mat.cols * sizeof(float)); + + return copy; +} + +clm_Matrix clm_matrixAddScalar(clm_Matrix mat, float scalar) { + for(unsigned int i = 0; i < mat.cols * mat.rows; i++) { + mat.values[i] += scalar; + } + + return mat; +} + +clm_Matrix clm_matrixAddMatrix(clm_Matrix mat, clm_Matrix other) { + if(mat.cols != other.cols || mat.rows != other.rows) { + printf("Failed to add matrices\n"); + return INVALID_MATRIX; + } + + for(unsigned int i = 0; i < mat.cols * mat.rows; i++) { + mat.values[i] += other.values[i]; + } + + clm_freeMatrix(other); + + return mat; +} + +clm_Matrix clm_matrixSubtractMatrix(clm_Matrix mat, clm_Matrix other) { + if(mat.cols != other.cols || mat.rows != other.rows) { + printf("Failed to add matrices\n"); + return INVALID_MATRIX; + } + + for(unsigned int i = 0; i < mat.rows * mat.cols; i++) { + mat.values[i] -= other.values[i]; + } + + clm_freeMatrix(other); + + return mat; +} + +clm_Matrix clm_matrixTranspose(clm_Matrix mat) { + clm_Matrix tr = clm_createMatrix(mat.cols, mat.rows); + for(unsigned int i = 0; i < tr.rows; i++) { + for(unsigned int j = 0; j < tr.cols; j++) { + tr.values[i * tr.cols + j] = mat.values[j * mat.cols + i]; + } + } + + clm_freeMatrix(mat); + + return tr; +} + +clm_Matrix clm_matrixMultiplyMatrix(clm_Matrix a, clm_Matrix b) { + if(a.cols != b.rows) { + printf("Cannot multiply matrices\n"); + return INVALID_MATRIX; + } + + clm_Matrix r = clm_createMatrix(a.rows, b.cols); + for(unsigned int i = 0; i < r.rows; i++) { + for(unsigned int j = 0; j < r.cols; j++) { + float sum = 0; + for(unsigned int k = 0; k < a.cols; k++) { + sum += a.values[i * a.cols + k] * b.values[k * b.cols + j]; + } + r.values[i * r.cols + j] = sum; + } + } + + clm_freeMatrix(a); + clm_freeMatrix(b); + + return r; +} + +clm_Matrix clm_matrixMultiplyScalar(clm_Matrix mat, float scalar) { + for(unsigned int i = 0; i < mat.cols * mat.rows; i++) { + mat.values[i] *= scalar; + } + + return mat; +} + +clm_Matrix clm_matrixSigmoid(clm_Matrix mat) { + for(unsigned int i = 0; i < mat.rows; i++) { + for(unsigned int j = 0; j < mat.cols; j++) { + mat.data[i * mat.cols + j] = 1 / (1 + exp(-mat.values[i * mat.cols + j])); + } + } + + return mat; +} + +clm_Matrix clm_matrixDSigmoid(clm_Matrix mat) { + for(unsigned int i = 0; i < mat.rows; i++) { + for(unsigned int j = 0; j < mat.cols; j++) { + float v = mat.data[i * mat.cols + j]; + mat.data[i * mat.cols + j] = v * (1 - v); + } + } + + return mat; +} + +void clm_matrixPrint(clm_Matrix mat) { + printf("[\n"); + for(unsigned int i = 0; i < mat.rows; i++) { + for(unsigned int j = 0; j < mat.cols; j++) { + printf("%7.3f", mat.values[i * mat.cols + j]); + } + printf("\n"); + } + printf("]\n"); +} diff --git a/src/clm.h b/src/clm.h new file mode 100644 index 0000000..5167e76 --- /dev/null +++ b/src/clm.h @@ -0,0 +1,39 @@ +#ifndef _CLM_H_ +#define _CLM_H_ + +typedef struct { + float *values; + unsigned int rows; + unsigned int cols; +} clm_Matrix; + +typedef struct { + float *values; + int length; +} clm_Vector; + +clm_Vector clm_createVector(int length); +void clm_freeVector(clm_Vector vector); + +typedef struct { + clm_Matrix weights, bias; +} clm_Layer; + +typedef struct { + clm_Layer *layers; + unsigned int numLayers; +} clm_NN; + +clm_NN clm_createNN(unsigned int numLayers, unsigned int *layerSizes); + +clm_Matrix clm_createMatrixRandom(unsigned int rows, unsigned int cols); +clm_Matrix clm_matrixAddScalar(clm_Matrix mat, float scalar); +clm_Matrix clm_matrixAddMatrix(clm_Matrix mat, clm_Matrix other); +clm_Matrix clm_matrixSubtractMatrix(clm_Matrix mat, clm_Matrix other); +clm_Matrix clm_matrixTranspose(clm_Matrix mat); +clm_Matrix clm_matrixMultiplyMatrix(clm_Matrix a, clm_Matrix b); +clm_Matrix clm_matrixMultiplyScalar(clm_Matrix mat, float scalar); + +void clm_matrixPrint(clm_Matrix mat); + +#endif diff --git a/src/cltest.bak.c b/src/cltest.bak.c new file mode 100644 index 0000000..91b7588 --- /dev/null +++ b/src/cltest.bak.c @@ -0,0 +1,119 @@ +#define CL_TARGET_OPENCL_VERSION 300 + +#include +#include + +int main() +{ + // Connect to a compute device + // + int gpu = 1; + cl_device_id deviceID; + cl_int err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &deviceID, NULL); + if (err != CL_SUCCESS) { + printf("Error: Failed to create a device group!\n"); + return 1; + } + + FILE *file = fopen("src/test.cl", "r"); + fseek(file, 0, SEEK_END); + size_t length = ftell(file); + fseek(file, 0, SEEK_SET); + char *buffer = calloc(1, length + 1); + fread(buffer, length, 1, file); + + printf("%s", buffer); + + cl_context context = clCreateContext(NULL, 1, &deviceID, NULL, NULL, &err); + if(!context) { + printf("Failed to create context\n"); + return 1; + } + + cl_command_queue queue = clCreateCommandQueueWithProperties(context, deviceID, NULL, &err); + if(!queue) { + printf("Failed to create command queue\n"); + return 1; + } + + cl_program program = clCreateProgramWithSource(context, 1, (const char **) &buffer, &length, &err); + if(!program) { + printf("Failed to create program\n"); + return 1; + } + + err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); + if(err != CL_SUCCESS) { + printf("Failed to build program\n"); + // clGetProgramBuildInfo... + return 1; + } + + cl_kernel kernel = clCreateKernel(program, "do_stuff", &err); + if(!kernel) { + printf("Failed to create kernel\n"); + return 1; + } + + unsigned int inputSize = 256000000; + float *inputData = calloc(inputSize, sizeof(float)); + for(unsigned int i = 0; i < inputSize; i++) { + inputData[i] = i; + } + + cl_mem input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * inputSize, NULL, &err); + cl_mem output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * inputSize, NULL, &err); + if(!input || !output) { + printf("Failed to allocate input/output buffer\n"); + return 1; + } + + err = clEnqueueWriteBuffer(queue, input, CL_TRUE, 0, sizeof(float) * inputSize, inputData, 0, NULL, NULL); + if(err != CL_SUCCESS) { + printf("Failed to write to buffer\n"); + return 1; + } + + err = 0; + err = clSetKernelArg(kernel, 0, sizeof(input), &input); + err |= clSetKernelArg(kernel, 1, sizeof(output), &output); + err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &inputSize); + if(err != CL_SUCCESS) { + printf("Failed to set kernel args\n"); + return 1; + } + + /*char *info = calloc(1, 1024); + clGetProgramInfo(program, CL_PROGRAM_STRING_DEBUG_INFO, 1024, info, NULL); + printf("INFO: %s\n", info);*/ + + size_t local; + err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL); + if(err != CL_SUCCESS) { + printf("Failed to get work group size\n"); + return 1; + } + + printf("Group size is %zu\n", local); + + size_t global = inputSize; + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + if(err != CL_SUCCESS) { + printf("Failed to enqueue\n"); + return 1; + } + + clFinish(queue); + + float *outputData = calloc(inputSize, sizeof(float)); + err = clEnqueueReadBuffer(queue, output, CL_TRUE, 0, sizeof(float) * inputSize, outputData, 0, NULL, NULL); + if(err != CL_SUCCESS) { + printf("Failed to read from buffer\n"); + return 1; + } + + for(unsigned int i = 0; i < inputSize; i++) { + if(i % 1000 != 0) continue; + printf("%f: %f\n", inputData[i], outputData[i]); + } +} diff --git a/src/cltest.c b/src/cltest.c new file mode 100644 index 0000000..c723ecc --- /dev/null +++ b/src/cltest.c @@ -0,0 +1,23 @@ +#include + +#include "clm.h" + +int main() { + clm_Matrix mat = clm_createMatrixRandom(2, 1); + clm_matrixPrint(mat); + + mat = clm_matrixTranspose(mat); + clm_matrixPrint(mat); + + mat = clm_matrixAddScalar(mat, 1); + clm_matrixPrint(mat); + + clm_Matrix b = clm_createMatrixRandom(2, 2); + clm_matrixPrint(b); + + clm_Matrix mul = clm_matrixMultiplyMatrix(mat, b); + clm_matrixPrint(mul); + + mul = clm_matrixMultiplyScalar(mul, 0.5); + clm_matrixPrint(mul); +} diff --git a/src/test.cl b/src/test.cl new file mode 100644 index 0000000..82c4361 --- /dev/null +++ b/src/test.cl @@ -0,0 +1,5 @@ +__kernel void do_stuff(__global float *input, __global float *output, unsigned int count) { + int i = get_global_id(0); + //printf("Task %d\n", i); + output[i] = input[i] * input[i]; +}