diff --git a/.gitignore b/.gitignore index 567609b..5354187 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ build/ +data/ + diff --git a/.vscode/settings.json b/.vscode/settings.json index b67641b..28c23cb 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -6,6 +6,8 @@ "cltest.c": "c", "*.tcc": "c", "string": "c", - "string_view": "c" + "string_view": "c", + "stdint.h": "c", + "inttypes.h": "c" } } diff --git a/Makefile b/Makefile index 95abd95..179731b 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ CFLAGS=-Wall -g .PHONY: all all: mkdir -p $(BUILD) - gcc -lOpenCL $(CFLAGS) -o $(BUILD)/cltest $(SRC)/cltest.c $(SRC)/clm.c + gcc -lOpenCL -lm $(CFLAGS) -o $(BUILD)/cltest $(SRC)/cltest.c $(SRC)/clm.c .PHONY: run run: all diff --git a/src/clm.c b/src/clm.c index bd28383..1771ce6 100644 --- a/src/clm.c +++ b/src/clm.c @@ -5,36 +5,8 @@ #include "clm.h" -clm_Matrix INVALID_MATRIX = { .rows = 0, .cols = 0, .values = NULL }; - -clm_Vector clm_createVector(int length) { - clm_Vector vector; - vector.values = calloc(length, sizeof(float)); - vector.length = length; - return vector; -} - -void clm_freeVector(clm_Vector vector) { - free(vector.values); -} - -clm_Layer clm_createLayer(unsigned int prevNumNeurons, unsigned int numLayers) { - clm_Layer layer; - for(unsigned int i = 0; i < numLayers; i++) { - - } - return layer; -} - -clm_NN clm_createNN(unsigned int numLayers, unsigned int *layerSizes) { - clm_NN nn; - nn.numLayers = numLayers; - for(unsigned int i = 0; i < numLayers; i++) { - clm_Layer layer; - - } - return nn; -} +const clm_Matrix INVALID_MATRIX = { .rows = 0, .cols = 0, .values = NULL }; +const clm_Vector INVALID_VECTOR = { .length = 0, .values = NULL }; clm_Matrix clm_createMatrix(unsigned int rows, unsigned int cols) { clm_Matrix mat; @@ -60,7 +32,7 @@ void clm_freeMatrix(clm_Matrix mat) { free(mat.values); } -clm_Matrix clm_copyMatrix(clm_Matrix mat) { +clm_Matrix clm_matrixCopy(clm_Matrix mat) { clm_Matrix copy = clm_createMatrix(mat.rows, mat.cols); memcpy(copy.values, mat.values, mat.rows * mat.cols * sizeof(float)); @@ -86,7 +58,7 @@ clm_Matrix clm_matrixAddMatrix(clm_Matrix mat, clm_Matrix other) { mat.values[i] += other.values[i]; } - clm_freeMatrix(other); + //clm_freeMatrix(other); return mat; } @@ -101,7 +73,7 @@ clm_Matrix clm_matrixSubtractMatrix(clm_Matrix mat, clm_Matrix other) { mat.values[i] -= other.values[i]; } - clm_freeMatrix(other); + //clm_freeMatrix(other); return mat; } @@ -114,7 +86,7 @@ clm_Matrix clm_matrixTranspose(clm_Matrix mat) { } } - clm_freeMatrix(mat); + //clm_freeMatrix(mat); return tr; } @@ -136,12 +108,25 @@ clm_Matrix clm_matrixMultiplyMatrix(clm_Matrix a, clm_Matrix b) { } } - clm_freeMatrix(a); - clm_freeMatrix(b); + //clm_freeMatrix(a); + //clm_freeMatrix(b); return r; } +clm_Matrix clm_matrixMultiplyMatrixElements(clm_Matrix mat, clm_Matrix other) { + if(mat.rows != other.rows || mat.cols != other.cols) { + printf("Cannot multiply matrices element-wise\n"); + return INVALID_MATRIX; + } + + for(unsigned int i = 0; i < mat.cols * mat.rows; i++) { + mat.values[i] *= other.values[i]; + } + + return mat; +} + clm_Matrix clm_matrixMultiplyScalar(clm_Matrix mat, float scalar) { for(unsigned int i = 0; i < mat.cols * mat.rows; i++) { mat.values[i] *= scalar; @@ -153,7 +138,7 @@ clm_Matrix clm_matrixMultiplyScalar(clm_Matrix mat, float scalar) { clm_Matrix clm_matrixSigmoid(clm_Matrix mat) { for(unsigned int i = 0; i < mat.rows; i++) { for(unsigned int j = 0; j < mat.cols; j++) { - mat.data[i * mat.cols + j] = 1 / (1 + exp(-mat.values[i * mat.cols + j])); + mat.values[i * mat.cols + j] = 1 / (1 + exp(-mat.values[i * mat.cols + j])); } } @@ -163,14 +148,51 @@ clm_Matrix clm_matrixSigmoid(clm_Matrix mat) { clm_Matrix clm_matrixDSigmoid(clm_Matrix mat) { for(unsigned int i = 0; i < mat.rows; i++) { for(unsigned int j = 0; j < mat.cols; j++) { - float v = mat.data[i * mat.cols + j]; - mat.data[i * mat.cols + j] = v * (1 - v); + float v = mat.values[i * mat.cols + j]; + mat.values[i * mat.cols + j] = v * (1 - v); } } return mat; } +clm_Matrix clm_matrixFromArray(float *array, unsigned int length) { + clm_Matrix matrix = clm_createMatrix(length, 1); + memcpy(matrix.values, array, length * sizeof(float)); + return matrix; +} + +bool clm_matrixIsInvalid(clm_Matrix mat) { + return mat.values == NULL; +} + +clm_Vector clm_vectorCreate(unsigned int length) { + clm_Vector vector; + vector.length = length; + vector.values = calloc(length, sizeof(float)); + return vector; +} + +bool clm_vectorIsInvalid(clm_Vector vec) { + return vec.values != NULL; +} + +clm_Linear clm_linearCreateRandom(unsigned int inputs, unsigned int outputs) { + clm_Linear linear; + linear.weights = clm_createMatrixRandom(outputs, inputs); + linear.bias = clm_createMatrixRandom(outputs, 1); + return linear; +} + +void clm_freeVector(clm_Vector vector) { + free(vector.values); +} + +void clm_freeLinear(clm_Linear linear) { + clm_freeMatrix(linear.weights); + clm_freeMatrix(linear.bias); +} + void clm_matrixPrint(clm_Matrix mat) { printf("[\n"); for(unsigned int i = 0; i < mat.rows; i++) { @@ -181,3 +203,7 @@ void clm_matrixPrint(clm_Matrix mat) { } printf("]\n"); } + +void clm_matrixPrintShape(clm_Matrix mat) { + printf("(%dx%d)\n", mat.rows, mat.cols); +} diff --git a/src/clm.h b/src/clm.h index 5167e76..9c71566 100644 --- a/src/clm.h +++ b/src/clm.h @@ -1,6 +1,8 @@ #ifndef _CLM_H_ #define _CLM_H_ +#include + typedef struct { float *values; unsigned int rows; @@ -9,22 +11,22 @@ typedef struct { typedef struct { float *values; - int length; + unsigned int length; } clm_Vector; -clm_Vector clm_createVector(int length); -void clm_freeVector(clm_Vector vector); +typedef struct { + clm_Matrix weights; + clm_Matrix bias; +} clm_Linear; typedef struct { - clm_Matrix weights, bias; -} clm_Layer; - -typedef struct { - clm_Layer *layers; + clm_Linear *layers; unsigned int numLayers; + float learnRate; } clm_NN; -clm_NN clm_createNN(unsigned int numLayers, unsigned int *layerSizes); +extern const clm_Matrix INVALID_MATRIX; +extern const clm_Vector INVALID_VECTOR; clm_Matrix clm_createMatrixRandom(unsigned int rows, unsigned int cols); clm_Matrix clm_matrixAddScalar(clm_Matrix mat, float scalar); @@ -32,8 +34,26 @@ clm_Matrix clm_matrixAddMatrix(clm_Matrix mat, clm_Matrix other); clm_Matrix clm_matrixSubtractMatrix(clm_Matrix mat, clm_Matrix other); clm_Matrix clm_matrixTranspose(clm_Matrix mat); clm_Matrix clm_matrixMultiplyMatrix(clm_Matrix a, clm_Matrix b); +clm_Matrix clm_matrixMultiplyMatrixElements(clm_Matrix mat, clm_Matrix other); clm_Matrix clm_matrixMultiplyScalar(clm_Matrix mat, float scalar); +clm_Matrix clm_matrixSigmoid(clm_Matrix mat); +clm_Matrix clm_matrixDSigmoid(clm_Matrix mat); + +clm_Matrix clm_matrixFromArray(float *array, unsigned int length); +clm_Matrix clm_matrixCopy(clm_Matrix matrix); + +bool clm_matrixIsInvalid(clm_Matrix mat); + +clm_Vector clm_vectorCreate(unsigned int length); + +bool clm_vectorIsInvalid(clm_Vector vec); + +clm_Linear clm_linearCreateRandom(unsigned int inputs, unsigned int outputs); void clm_matrixPrint(clm_Matrix mat); +void clm_matrixPrintShape(clm_Matrix mat); +void clm_freeMatrix(clm_Matrix mat); +void clm_freeVector(clm_Vector vector); +void clm_freeLinear(clm_Linear linear); #endif diff --git a/src/cltest.c b/src/cltest.c index c723ecc..0e1ddc6 100644 --- a/src/cltest.c +++ b/src/cltest.c @@ -1,23 +1,283 @@ #include +#include +#include #include "clm.h" -int main() { - clm_Matrix mat = clm_createMatrixRandom(2, 1); - clm_matrixPrint(mat); +float train_data_x[4][2] = { + {0, 0}, + {0, 1}, + {1, 0}, + {1, 1} +}; - mat = clm_matrixTranspose(mat); - clm_matrixPrint(mat); +float train_data_y[4][1] = { + {0}, + {1}, + {1}, + {0} +}; - mat = clm_matrixAddScalar(mat, 1); - clm_matrixPrint(mat); +float *predict(clm_NN nn, float *x, unsigned int length) { + clm_Matrix xM = clm_matrixFromArray(x, length); - clm_Matrix b = clm_createMatrixRandom(2, 2); - clm_matrixPrint(b); + for(unsigned int i = 0; i < nn.numLayers; i++) { + clm_Linear layer = nn.layers[i]; + clm_Matrix newX = clm_matrixMultiplyMatrix(layer.weights, xM); - clm_Matrix mul = clm_matrixMultiplyMatrix(mat, b); - clm_matrixPrint(mul); + if(clm_matrixIsInvalid(newX)) { + printf("Failed to predict\n"); + return NULL; + } - mul = clm_matrixMultiplyScalar(mul, 0.5); - clm_matrixPrint(mul); + clm_matrixAddMatrix(newX, layer.bias); + clm_matrixSigmoid(newX); + clm_freeMatrix(xM); + xM = newX; + } + + return xM.values; +} + +void train(clm_NN nn, float *x, unsigned int xL, float *y, unsigned int yL) { + clm_Matrix xM = clm_matrixFromArray(x, xL); + clm_Matrix yM = clm_matrixFromArray(y, yL); + + // TODO: potential compute/memory tradeoff? (recalculate matrices every time <-> keep everything cached) + + // Forward pass + clm_Matrix *outputs = calloc(nn.numLayers + 1 /* 1 for input */, sizeof(clm_Matrix)); + outputs[0] = xM; + for(unsigned int i = 0; i < nn.numLayers; i++) { + clm_Linear layer = nn.layers[i]; + clm_Matrix newX = clm_matrixMultiplyMatrix(layer.weights, xM); + if(clm_matrixIsInvalid(newX)) { + printf("Forward pass failed\n"); + return; + } + + clm_matrixAddMatrix(newX, layer.bias); + clm_matrixSigmoid(newX); + xM = newX; + outputs[i + 1] = xM; + } + + clm_Matrix dError = clm_matrixSubtractMatrix(yM, outputs[nn.numLayers]); // yhat - y + + clm_Matrix lastGradient = clm_matrixDSigmoid(clm_matrixCopy(outputs[nn.numLayers])); // dsig(yhat) + clm_matrixMultiplyMatrixElements(lastGradient, dError); // (yhat - y) . dsig(yhat) + clm_matrixMultiplyScalar(lastGradient, nn.learnRate); + + clm_Matrix lastInputT = clm_matrixTranspose(outputs[nn.numLayers - 1]); + clm_Matrix lastDeltaW = clm_matrixMultiplyMatrix(lastGradient, lastInputT); + clm_freeMatrix(lastInputT); + + clm_matrixAddMatrix(nn.layers[nn.numLayers - 1].weights, lastDeltaW); + clm_matrixAddMatrix(nn.layers[nn.numLayers - 1].bias, lastGradient); + + clm_freeMatrix(lastDeltaW); + clm_freeMatrix(lastGradient); + + for(int i = nn.numLayers - 2; i >= 0; i--) { + clm_Linear layer = nn.layers[i]; + clm_Matrix inputToThisLayer = outputs[i]; + clm_Matrix outputOfThisLayer = outputs[i + 1]; + + clm_Matrix weightsT = clm_matrixTranspose(nn.layers[i + 1].weights); + clm_Matrix newDError = clm_matrixMultiplyMatrix(weightsT, dError); + clm_freeMatrix(weightsT); + clm_freeMatrix(dError); + dError = newDError; + + clm_Matrix gradient = clm_matrixDSigmoid(clm_matrixCopy(outputOfThisLayer)); + clm_matrixMultiplyMatrixElements(gradient, dError); + clm_matrixMultiplyScalar(gradient, nn.learnRate); + + clm_Matrix inputT = clm_matrixTranspose(inputToThisLayer); + clm_Matrix deltaW = clm_matrixMultiplyMatrix(gradient, inputT); + clm_freeMatrix(inputT); + + clm_matrixAddMatrix(layer.weights, deltaW); + clm_matrixAddMatrix(layer.bias, gradient); + + clm_freeMatrix(deltaW); + clm_freeMatrix(gradient); + } + + clm_freeMatrix(dError); + + for(unsigned int i = 0; i <= nn.numLayers; i++) { + clm_freeMatrix(outputs[i]); + } + + free(outputs); +} + +void loadLabels(clm_Vector **labelsOut, unsigned int *labelsCountOut) { + FILE *file = fopen("data/train-labels.idx1-ubyte", "r"); + if(!file) { + perror("Failed to open labels\n"); + return; + } + + unsigned char magicBytes[4]; + fread(magicBytes, sizeof(magicBytes), 1, file); + + printf("%d\n", (magicBytes[0] << 24) | (magicBytes[1] << 16) | (magicBytes[2] << 8) | magicBytes[3]); + + unsigned char lengthBytes[4]; + fread(lengthBytes, sizeof(lengthBytes), 1, file); + + uint32_t length = (lengthBytes[0] << 24) | (lengthBytes[1] << 16) | (lengthBytes[2] << 8) | lengthBytes[3]; + printf("%" PRId32 "\n", length); + + clm_Vector *vectors = calloc(length, sizeof(clm_Vector)); + + for(unsigned int i = 0; i < length; i++) { + unsigned char label; + fread(&label, sizeof(unsigned char), 1, file); + + clm_Vector vector = clm_vectorCreate(10); + for(unsigned int j = 0; j < 10; j++) { + vector.values[j] = label == j ? 1 : 0; + } + vectors[i] = vector; + } + + *labelsOut = vectors; + *labelsCountOut = length; +} + +void loadImages(clm_Vector **imagesOut, unsigned int *imageCountOut) { + FILE *file = fopen("data/train-images.idx3-ubyte", "r"); + if(!file) { + perror("Failed to open images\n"); + return; + } + + unsigned char magicBytes[4]; + fread(magicBytes, sizeof(magicBytes), 1, file); + + printf("%d\n", (magicBytes[0] << 24) | (magicBytes[1] << 16) | (magicBytes[2] << 8) | magicBytes[3]); + + unsigned char lengthBytes[4]; + fread(lengthBytes, sizeof(lengthBytes), 1, file); + uint32_t length = (lengthBytes[0] << 24) | (lengthBytes[1] << 16) | (lengthBytes[2] << 8) | lengthBytes[3]; + printf("%" PRId32 "\n", length); + + unsigned char rowsBytes[4]; + fread(rowsBytes, sizeof(rowsBytes), 1, file); + uint32_t rows = (rowsBytes[0] << 24) | (rowsBytes[1] << 16) | (rowsBytes[2] << 8) | rowsBytes[3]; + printf("%" PRId32 "\n", rows); + + unsigned char colsBytes[4]; + fread(colsBytes, sizeof(colsBytes), 1, file); + uint32_t cols = (colsBytes[0] << 24) | (colsBytes[1] << 16) | (colsBytes[2] << 8) | colsBytes[3]; + printf("%" PRId32 "\n", cols); + + clm_Vector *images = calloc(length, sizeof(clm_Vector)); + for(unsigned int i = 0; i < length; i++) { + clm_Vector vec = clm_vectorCreate(cols * rows); + unsigned char image[cols * rows]; + fread(image, sizeof(image), 1, file); + for(unsigned int j = 0; j < cols * rows; j++) { + vec.values[j] = (float) image[j]; + } + images[i] = vec; + } + + *imagesOut = images; + *imageCountOut = length; +} + +int main() { + clm_Vector *labels = NULL; + unsigned int labelCount; + loadLabels(&labels, &labelCount); + printf("LENGTH: %u\n", labelCount); + + clm_Vector *images = NULL; + unsigned int imageCount; + loadImages(&images, &imageCount); + + imageCount = 60000; + + printf("%f\n", images[0].values[0]); + + srand(1); + + unsigned int + i = 784, + h = 30, + o = 10; + + clm_Linear layer1; + layer1.weights = clm_createMatrixRandom(h, i); + layer1.bias = clm_createMatrixRandom(h, 1); + + clm_Linear layer2; + layer2.weights = clm_createMatrixRandom(o, h); + layer2.bias = clm_createMatrixRandom(o, 1); + + clm_Linear layers[] = {layer1, layer2}; + clm_NN nn = { layers, sizeof(layers) / sizeof(clm_Linear), 0.01 }; + + for(unsigned int epoch = 0; epoch < 10; epoch++) { + printf("Epoch %u\n", epoch); + + for(unsigned int idx = 0; idx < imageCount; idx++) { // Each train sample + if(idx % 1000 == 0) { + printf("%.2f%%\n", idx / (float) imageCount * 100); + } + //printf("%u\n", idx); + //train(nn, train_data_x[idx], 2, train_data_y[idx], 1); + /*for(unsigned int f = 0; f < images[idx].length; f++) { + printf("%.2f ", images[idx].values[f]); + } + printf("\n"); + for(unsigned int f = 0; f < labels[idx].length; f++) { + printf("%.2f ", labels[idx].values[f]); + } + printf("\n");*/ + //printf("%.2f\n", labels.values[idx]); + + train(nn, images[idx].values, images[idx].length, labels[idx].values, labels[idx].length); + //train(nn, test, 784, target, 10); + //predict(nn, test, 784); + } + } + + unsigned int correct = 0; + for(unsigned int idx = 0; idx < imageCount; idx++) { // Each train sample + //printf("pred(%.2f, %.2f) = %.2f\n", train_data_x[idx][0], train_data_x[idx][1], predict(nn, train_data_x[idx], 2)[0]); + float *pred = predict(nn, images[idx].values, images[idx].length); + unsigned int predDigit = 0; + float max = -1; + for(unsigned int j = 0; j < 10; j++) { + //printf("%.2f ", pred[j]); + if(pred[j] > max || max < 0) { + max = pred[j]; + predDigit = j; + } + } + if(idx < 100) printf("%u (confidence: %.2f)\n", predDigit, max); + + unsigned int actDigit = 0; + float maxA = -1; + for(unsigned int j = 0; j < 10; j++) { + //printf("%.2f ", pred[j]); + if(labels[idx].values[j] > maxA || maxA < 0) { + maxA = labels[idx].values[j]; + actDigit = j; + } + } + if(idx < 100) printf("Actual: %u\n", actDigit); + //printf("\n"); + + if(predDigit == actDigit) correct++; + } + + printf("Correct: %u -> %.2f", correct, (float) correct / imageCount * 100); + + printf("\n"); }