265 lines
7.8 KiB
C
265 lines
7.8 KiB
C
#include <inttypes.h>
|
|
#include <math.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
#include "clm.h"
|
|
#include "clm_gpu.h"
|
|
|
|
float train_data_x[4][2] = {
|
|
{0, 0},
|
|
{0, 1},
|
|
{1, 0},
|
|
{1, 1}};
|
|
|
|
float train_data_y[4][1] = {
|
|
{0},
|
|
{1},
|
|
{1},
|
|
{0}};
|
|
|
|
float *predict(clm_NN nn, clm_Vector input) {
|
|
clm_Matrix xM = clm_matrixWrapArray(input.values, input.length);
|
|
|
|
for(unsigned int i = 0; i < nn.numLayers; i++) {
|
|
clm_linearForward(&nn.layers[i], 1, &xM, &nn.layers[i].output[0]);
|
|
xM = nn.layers[i].output[0];
|
|
}
|
|
|
|
return xM.values;
|
|
}
|
|
|
|
void train(clm_NN nn, unsigned int numElements, clm_Vector *inputs, clm_Vector *expectedOutputs) {
|
|
clm_Matrix *batchInputs = calloc(nn.batchSize, sizeof(clm_Matrix));
|
|
clm_Matrix *batchOutputs = calloc(nn.batchSize, sizeof(clm_Matrix));
|
|
|
|
for(unsigned int b = 0; b < ceil((float) numElements / nn.batchSize); b++) {
|
|
unsigned int batchSize = numElements - b * nn.batchSize;
|
|
if(batchSize > nn.batchSize) batchSize = nn.batchSize;
|
|
|
|
printf("Batch %d (size %d)\n", b, batchSize);
|
|
|
|
for(unsigned int i = 0; i < batchSize; i++) {
|
|
clm_Vector input = inputs[b * nn.batchSize + i];
|
|
clm_Vector output = expectedOutputs[b * nn.batchSize + i];
|
|
batchInputs[i] = clm_matrixWrapArray(input.values, input.length);
|
|
batchOutputs[i] = clm_matrixWrapArray(output.values, output.length);
|
|
}
|
|
|
|
// Forward pass
|
|
clm_Matrix *currentXs = batchInputs;
|
|
for(unsigned int i = 0; i < nn.numLayers; i++) {
|
|
clm_linearForward(&nn.layers[i], batchSize, currentXs, nn.layers[i].output);
|
|
currentXs = nn.layers[i].output;
|
|
}
|
|
|
|
clm_Linear *lastLayer = &nn.layers[nn.numLayers - 1];
|
|
for(unsigned int b = 0; b < batchSize; b++) {
|
|
// Error of last layer = y - yhat
|
|
clm_matrixCopy(batchOutputs[b], lastLayer->error[b]); // lastLayer.error = y
|
|
clm_matrixSubtractMatrix(lastLayer->error[b], lastLayer->output[b]); // lastLayer.error -= yhat
|
|
}
|
|
|
|
for(int i = nn.numLayers - 1; i >= 0; i--) {
|
|
clm_Linear *layer = &nn.layers[i];
|
|
clm_Matrix *inputsToThisLayer = i == 0 ? batchInputs : nn.layers[i - 1].output;
|
|
clm_Matrix *outputsOfThisLayer = layer->output;
|
|
clm_linearBackprop(layer, nn.learnRate, batchSize, inputsToThisLayer, outputsOfThisLayer, layer->error, i > 0, i == 0 ? NULL : nn.layers[i - 1].error, layer->weightsError, layer->gradient);
|
|
|
|
for(unsigned int b = 0; b < batchSize; b++) {
|
|
clm_matrixAddMatrix(layer->weights, layer->weightsError[b]);
|
|
clm_matrixAddMatrix(layer->bias, layer->gradient[b]);
|
|
}
|
|
}
|
|
|
|
/*for(int i = nn.numLayers - 1; i >= 0; i--) {
|
|
clm_Linear layer = nn.layers[i];
|
|
clm_Matrix *inputsToThisLayer = i == 0 ? batchInputs : nn.layers[i - 1].output;
|
|
clm_Matrix *outputsOfThisLayer = nn.layers[i].output;
|
|
clm_Matrix prevError = i == nn.numLayers - 1 ? INVALID_MATRIX : nn.layers[i + 1].error;
|
|
clm_Matrix error = layer.error;
|
|
|
|
if(i == nn.numLayers - 1) {
|
|
clm_matrixSubtractMatrix(clm_matrixCopy(batchOutputs[0], error), outputsOfThisLayer[0]); // yhat - y
|
|
} else {
|
|
clm_Matrix weightsT = clm_matrixTranspose(nn.layers[i + 1].weights);
|
|
clm_matrixMultiplyMatrix(weightsT, prevError, error);
|
|
}
|
|
|
|
clm_Matrix gradient = clm_matrixDSigmoid(outputsOfThisLayer[0]); // dsig(yhat)
|
|
clm_matrixMultiplyMatrixElements(gradient, error); // (yhat - y) . dsig(yhat)
|
|
clm_matrixMultiplyScalar(gradient, nn.learnRate);
|
|
|
|
clm_Matrix inputT = clm_matrixTranspose(inputsToThisLayer[0]);
|
|
clm_matrixMultiplyMatrix(gradient, inputT, layer.weightsError);
|
|
|
|
clm_matrixAddMatrix(layer.weights, layer.weightsError);
|
|
clm_matrixAddMatrix(layer.bias, gradient);
|
|
}*/
|
|
}
|
|
|
|
free(batchInputs);
|
|
free(batchOutputs);
|
|
}
|
|
|
|
void loadLabels(clm_Vector **labelsOut, unsigned int *labelsCountOut) {
|
|
FILE *file = fopen("data/train-labels.idx1-ubyte", "r");
|
|
if(!file) {
|
|
perror("Failed to open labels\n");
|
|
return;
|
|
}
|
|
|
|
unsigned char magicBytes[4];
|
|
fread(magicBytes, sizeof(magicBytes), 1, file);
|
|
|
|
printf("%d\n", (magicBytes[0] << 24) | (magicBytes[1] << 16) | (magicBytes[2] << 8) | magicBytes[3]);
|
|
|
|
unsigned char lengthBytes[4];
|
|
fread(lengthBytes, sizeof(lengthBytes), 1, file);
|
|
|
|
uint32_t length = (lengthBytes[0] << 24) | (lengthBytes[1] << 16) |
|
|
(lengthBytes[2] << 8) | lengthBytes[3];
|
|
printf("%" PRId32 "\n", length);
|
|
|
|
clm_Vector *vectors = calloc(length, sizeof(clm_Vector));
|
|
|
|
for(unsigned int i = 0; i < length; i++) {
|
|
unsigned char label;
|
|
fread(&label, sizeof(unsigned char), 1, file);
|
|
|
|
clm_Vector vector = clm_vectorCreate(10);
|
|
for(unsigned int j = 0; j < 10; j++) {
|
|
vector.values[j] = label == j ? 1 : 0;
|
|
}
|
|
vectors[i] = vector;
|
|
}
|
|
|
|
*labelsOut = vectors;
|
|
*labelsCountOut = length;
|
|
}
|
|
|
|
void loadImages(clm_Vector **imagesOut, unsigned int *imageCountOut) {
|
|
FILE *file = fopen("data/train-images.idx3-ubyte", "r");
|
|
if(!file) {
|
|
perror("Failed to open images\n");
|
|
return;
|
|
}
|
|
|
|
unsigned char magicBytes[4];
|
|
fread(magicBytes, sizeof(magicBytes), 1, file);
|
|
|
|
printf("%d\n", (magicBytes[0] << 24) | (magicBytes[1] << 16) | (magicBytes[2] << 8) | magicBytes[3]);
|
|
|
|
unsigned char lengthBytes[4];
|
|
fread(lengthBytes, sizeof(lengthBytes), 1, file);
|
|
uint32_t length = (lengthBytes[0] << 24) | (lengthBytes[1] << 16) | (lengthBytes[2] << 8) | lengthBytes[3];
|
|
printf("%" PRId32 "\n", length);
|
|
|
|
unsigned char rowsBytes[4];
|
|
fread(rowsBytes, sizeof(rowsBytes), 1, file);
|
|
uint32_t rows = (rowsBytes[0] << 24) | (rowsBytes[1] << 16) | (rowsBytes[2] << 8) | rowsBytes[3];
|
|
printf("%" PRId32 "\n", rows);
|
|
|
|
unsigned char colsBytes[4];
|
|
fread(colsBytes, sizeof(colsBytes), 1, file);
|
|
uint32_t cols = (colsBytes[0] << 24) | (colsBytes[1] << 16) | (colsBytes[2] << 8) | colsBytes[3];
|
|
printf("%" PRId32 "\n", cols);
|
|
|
|
clm_Vector *images = calloc(length, sizeof(clm_Vector));
|
|
for(unsigned int i = 0; i < length; i++) {
|
|
clm_Vector vec = clm_vectorCreate(cols * rows);
|
|
unsigned char image[cols * rows];
|
|
fread(image, sizeof(image), 1, file);
|
|
for(unsigned int j = 0; j < cols * rows; j++) {
|
|
vec.values[j] = (float) image[j];
|
|
}
|
|
images[i] = vec;
|
|
}
|
|
|
|
*imagesOut = images;
|
|
*imageCountOut = length;
|
|
}
|
|
|
|
int main() {
|
|
if(clm_gpuInit() != 0) {
|
|
printf("Failed to init GPU\n");
|
|
return 1;
|
|
}
|
|
|
|
clm_Vector *labels = NULL;
|
|
unsigned int labelCount;
|
|
loadLabels(&labels, &labelCount);
|
|
printf("LENGTH: %u\n", labelCount);
|
|
|
|
clm_Vector *images = NULL;
|
|
unsigned int imageCount;
|
|
loadImages(&images, &imageCount);
|
|
|
|
imageCount = 60000;
|
|
|
|
printf("%f\n", images[0].values[0]);
|
|
|
|
srand(1);
|
|
|
|
unsigned int
|
|
i = 784,
|
|
h = 30,
|
|
o = 10;
|
|
|
|
clm_Linear layers[] = {
|
|
clm_linearCreateRandom(i, h),
|
|
clm_linearCreateRandom(h, o)};
|
|
clm_NN nn = clm_nnCreate(sizeof(layers) / sizeof(clm_Linear), layers, 0.01, 1000);
|
|
|
|
for(unsigned int epoch = 0; epoch < 1; epoch++) {
|
|
printf("Epoch %u\n", epoch);
|
|
/*for(unsigned int idx = 0; idx < imageCount; idx++) { // Each train sample
|
|
if(idx % 1000 == 0) {
|
|
printf("\r%.2f%%", idx / (float) imageCount * 100);
|
|
fflush(stdout);
|
|
}
|
|
}*/
|
|
train(nn, imageCount, images, labels);
|
|
printf("\n");
|
|
}
|
|
|
|
printf("Train done\n");
|
|
|
|
unsigned int correct = 0;
|
|
for(unsigned int idx = 0; idx < imageCount; idx++) { // Each train sample
|
|
// printf("pred(%.2f, %.2f) = %.2f\n", train_data_x[idx][0],
|
|
// train_data_x[idx][1], predict(nn, train_data_x[idx], 2)[0]);
|
|
float *pred = predict(nn, images[idx]);
|
|
unsigned int predDigit = 0;
|
|
float max = -1;
|
|
for(unsigned int j = 0; j < 10; j++) {
|
|
// printf("%.2f ", pred[j]);
|
|
if(pred[j] > max || max < 0) {
|
|
max = pred[j];
|
|
predDigit = j;
|
|
}
|
|
}
|
|
// if(idx < 100) printf("%u (confidence: %.2f)\n", predDigit, max);
|
|
|
|
unsigned int actDigit = 0;
|
|
float maxA = -1;
|
|
for(unsigned int j = 0; j < 10; j++) {
|
|
// printf("%.2f ", pred[j]);
|
|
if(labels[idx].values[j] > maxA || maxA < 0) {
|
|
maxA = labels[idx].values[j];
|
|
actDigit = j;
|
|
}
|
|
}
|
|
// if(idx < 100) printf("Actual: %u\n", actDigit);
|
|
// printf("\n");
|
|
|
|
if(predDigit == actDigit) correct++;
|
|
}
|
|
|
|
printf("Correct: %u -> %.2f", correct, (float) correct / imageCount * 100);
|
|
|
|
printf("\n");
|
|
|
|
clm_gpuDestroy();
|
|
}
|