nn-testing/src/cltest.c
2024-01-25 17:53:01 +01:00

265 lines
7.8 KiB
C

#include <inttypes.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "clm.h"
#include "clm_gpu.h"
float train_data_x[4][2] = {
{0, 0},
{0, 1},
{1, 0},
{1, 1}};
float train_data_y[4][1] = {
{0},
{1},
{1},
{0}};
float *predict(clm_NN nn, clm_Vector input) {
clm_Matrix xM = clm_matrixWrapArray(input.values, input.length);
for(unsigned int i = 0; i < nn.numLayers; i++) {
clm_linearForward(&nn.layers[i], 1, &xM, &nn.layers[i].output[0]);
xM = nn.layers[i].output[0];
}
return xM.values;
}
void train(clm_NN nn, unsigned int numElements, clm_Vector *inputs, clm_Vector *expectedOutputs) {
clm_Matrix *batchInputs = calloc(nn.batchSize, sizeof(clm_Matrix));
clm_Matrix *batchOutputs = calloc(nn.batchSize, sizeof(clm_Matrix));
for(unsigned int b = 0; b < ceil((float) numElements / nn.batchSize); b++) {
unsigned int batchSize = numElements - b * nn.batchSize;
if(batchSize > nn.batchSize) batchSize = nn.batchSize;
printf("Batch %d (size %d)\n", b, batchSize);
for(unsigned int i = 0; i < batchSize; i++) {
clm_Vector input = inputs[b * nn.batchSize + i];
clm_Vector output = expectedOutputs[b * nn.batchSize + i];
batchInputs[i] = clm_matrixWrapArray(input.values, input.length);
batchOutputs[i] = clm_matrixWrapArray(output.values, output.length);
}
// Forward pass
clm_Matrix *currentXs = batchInputs;
for(unsigned int i = 0; i < nn.numLayers; i++) {
clm_linearForward(&nn.layers[i], batchSize, currentXs, nn.layers[i].output);
currentXs = nn.layers[i].output;
}
clm_Linear *lastLayer = &nn.layers[nn.numLayers - 1];
for(unsigned int b = 0; b < batchSize; b++) {
// Error of last layer = y - yhat
clm_matrixCopy(batchOutputs[b], lastLayer->error[b]); // lastLayer.error = y
clm_matrixSubtractMatrix(lastLayer->error[b], lastLayer->output[b]); // lastLayer.error -= yhat
}
for(int i = nn.numLayers - 1; i >= 0; i--) {
clm_Linear *layer = &nn.layers[i];
clm_Matrix *inputsToThisLayer = i == 0 ? batchInputs : nn.layers[i - 1].output;
clm_Matrix *outputsOfThisLayer = layer->output;
clm_linearBackprop(layer, nn.learnRate, batchSize, inputsToThisLayer, outputsOfThisLayer, layer->error, i > 0, i == 0 ? NULL : nn.layers[i - 1].error, layer->weightsError, layer->gradient);
for(unsigned int b = 0; b < batchSize; b++) {
clm_matrixAddMatrix(layer->weights, layer->weightsError[b]);
clm_matrixAddMatrix(layer->bias, layer->gradient[b]);
}
}
/*for(int i = nn.numLayers - 1; i >= 0; i--) {
clm_Linear layer = nn.layers[i];
clm_Matrix *inputsToThisLayer = i == 0 ? batchInputs : nn.layers[i - 1].output;
clm_Matrix *outputsOfThisLayer = nn.layers[i].output;
clm_Matrix prevError = i == nn.numLayers - 1 ? INVALID_MATRIX : nn.layers[i + 1].error;
clm_Matrix error = layer.error;
if(i == nn.numLayers - 1) {
clm_matrixSubtractMatrix(clm_matrixCopy(batchOutputs[0], error), outputsOfThisLayer[0]); // yhat - y
} else {
clm_Matrix weightsT = clm_matrixTranspose(nn.layers[i + 1].weights);
clm_matrixMultiplyMatrix(weightsT, prevError, error);
}
clm_Matrix gradient = clm_matrixDSigmoid(outputsOfThisLayer[0]); // dsig(yhat)
clm_matrixMultiplyMatrixElements(gradient, error); // (yhat - y) . dsig(yhat)
clm_matrixMultiplyScalar(gradient, nn.learnRate);
clm_Matrix inputT = clm_matrixTranspose(inputsToThisLayer[0]);
clm_matrixMultiplyMatrix(gradient, inputT, layer.weightsError);
clm_matrixAddMatrix(layer.weights, layer.weightsError);
clm_matrixAddMatrix(layer.bias, gradient);
}*/
}
free(batchInputs);
free(batchOutputs);
}
void loadLabels(clm_Vector **labelsOut, unsigned int *labelsCountOut) {
FILE *file = fopen("data/train-labels.idx1-ubyte", "r");
if(!file) {
perror("Failed to open labels\n");
return;
}
unsigned char magicBytes[4];
fread(magicBytes, sizeof(magicBytes), 1, file);
printf("%d\n", (magicBytes[0] << 24) | (magicBytes[1] << 16) | (magicBytes[2] << 8) | magicBytes[3]);
unsigned char lengthBytes[4];
fread(lengthBytes, sizeof(lengthBytes), 1, file);
uint32_t length = (lengthBytes[0] << 24) | (lengthBytes[1] << 16) |
(lengthBytes[2] << 8) | lengthBytes[3];
printf("%" PRId32 "\n", length);
clm_Vector *vectors = calloc(length, sizeof(clm_Vector));
for(unsigned int i = 0; i < length; i++) {
unsigned char label;
fread(&label, sizeof(unsigned char), 1, file);
clm_Vector vector = clm_vectorCreate(10);
for(unsigned int j = 0; j < 10; j++) {
vector.values[j] = label == j ? 1 : 0;
}
vectors[i] = vector;
}
*labelsOut = vectors;
*labelsCountOut = length;
}
void loadImages(clm_Vector **imagesOut, unsigned int *imageCountOut) {
FILE *file = fopen("data/train-images.idx3-ubyte", "r");
if(!file) {
perror("Failed to open images\n");
return;
}
unsigned char magicBytes[4];
fread(magicBytes, sizeof(magicBytes), 1, file);
printf("%d\n", (magicBytes[0] << 24) | (magicBytes[1] << 16) | (magicBytes[2] << 8) | magicBytes[3]);
unsigned char lengthBytes[4];
fread(lengthBytes, sizeof(lengthBytes), 1, file);
uint32_t length = (lengthBytes[0] << 24) | (lengthBytes[1] << 16) | (lengthBytes[2] << 8) | lengthBytes[3];
printf("%" PRId32 "\n", length);
unsigned char rowsBytes[4];
fread(rowsBytes, sizeof(rowsBytes), 1, file);
uint32_t rows = (rowsBytes[0] << 24) | (rowsBytes[1] << 16) | (rowsBytes[2] << 8) | rowsBytes[3];
printf("%" PRId32 "\n", rows);
unsigned char colsBytes[4];
fread(colsBytes, sizeof(colsBytes), 1, file);
uint32_t cols = (colsBytes[0] << 24) | (colsBytes[1] << 16) | (colsBytes[2] << 8) | colsBytes[3];
printf("%" PRId32 "\n", cols);
clm_Vector *images = calloc(length, sizeof(clm_Vector));
for(unsigned int i = 0; i < length; i++) {
clm_Vector vec = clm_vectorCreate(cols * rows);
unsigned char image[cols * rows];
fread(image, sizeof(image), 1, file);
for(unsigned int j = 0; j < cols * rows; j++) {
vec.values[j] = (float) image[j];
}
images[i] = vec;
}
*imagesOut = images;
*imageCountOut = length;
}
int main() {
if(clm_gpuInit() != 0) {
printf("Failed to init GPU\n");
return 1;
}
clm_Vector *labels = NULL;
unsigned int labelCount;
loadLabels(&labels, &labelCount);
printf("LENGTH: %u\n", labelCount);
clm_Vector *images = NULL;
unsigned int imageCount;
loadImages(&images, &imageCount);
imageCount = 60000;
printf("%f\n", images[0].values[0]);
srand(1);
unsigned int
i = 784,
h = 30,
o = 10;
clm_Linear layers[] = {
clm_linearCreateRandom(i, h),
clm_linearCreateRandom(h, o)};
clm_NN nn = clm_nnCreate(sizeof(layers) / sizeof(clm_Linear), layers, 0.01, 1000);
for(unsigned int epoch = 0; epoch < 1; epoch++) {
printf("Epoch %u\n", epoch);
/*for(unsigned int idx = 0; idx < imageCount; idx++) { // Each train sample
if(idx % 1000 == 0) {
printf("\r%.2f%%", idx / (float) imageCount * 100);
fflush(stdout);
}
}*/
train(nn, imageCount, images, labels);
printf("\n");
}
printf("Train done\n");
unsigned int correct = 0;
for(unsigned int idx = 0; idx < imageCount; idx++) { // Each train sample
// printf("pred(%.2f, %.2f) = %.2f\n", train_data_x[idx][0],
// train_data_x[idx][1], predict(nn, train_data_x[idx], 2)[0]);
float *pred = predict(nn, images[idx]);
unsigned int predDigit = 0;
float max = -1;
for(unsigned int j = 0; j < 10; j++) {
// printf("%.2f ", pred[j]);
if(pred[j] > max || max < 0) {
max = pred[j];
predDigit = j;
}
}
// if(idx < 100) printf("%u (confidence: %.2f)\n", predDigit, max);
unsigned int actDigit = 0;
float maxA = -1;
for(unsigned int j = 0; j < 10; j++) {
// printf("%.2f ", pred[j]);
if(labels[idx].values[j] > maxA || maxA < 0) {
maxA = labels[idx].values[j];
actDigit = j;
}
}
// if(idx < 100) printf("Actual: %u\n", actDigit);
// printf("\n");
if(predDigit == actDigit) correct++;
}
printf("Correct: %u -> %.2f", correct, (float) correct / imageCount * 100);
printf("\n");
clm_gpuDestroy();
}