diff --git a/src/clm.c b/src/clm.c index 732313f..9baceda 100644 --- a/src/clm.c +++ b/src/clm.c @@ -206,8 +206,8 @@ bool clm_vectorIsInvalid(clm_Vector vec) { clm_Linear clm_linearCreateRandom(unsigned int inputs, unsigned int outputs) { clm_Linear linear = {0}; - linear.weights = clm_createMatrixRandom(outputs, inputs); - linear.bias = clm_createMatrixRandom(outputs, 1); + linear.weights.matrix = clm_createMatrixRandom(outputs, inputs); + linear.bias.matrix = clm_createMatrixRandom(outputs, 1); // linear.output = clm_createMatrix(outputs, 1); // linear.error = clm_createMatrix(outputs, 1); @@ -215,33 +215,42 @@ clm_Linear clm_linearCreateRandom(unsigned int inputs, unsigned int outputs) { return linear; } +static clm_NativeMatrixArray clm_createMatrixArray(unsigned int length, unsigned int rows, unsigned int cols) { + clm_NativeMatrixArray array = {0}; + array.length = length; + array.matrixes = calloc(length, sizeof(clm_Matrix)); + + for(unsigned int i = 0; i < length; i++) { + array.matrixes[i] = clm_createMatrix(rows, cols); + } + + return array; +} + clm_NN clm_nnCreate(unsigned int numLayers, clm_Linear *layers, float learnRate, unsigned int batchSize) { clm_NN nn = {.numLayers = numLayers, .layers = layers, .learnRate = learnRate, .batchSize = batchSize}; for(unsigned int i = 0; i < numLayers; i++) { - layers[i].output = calloc(batchSize, sizeof(clm_Matrix)); - layers[i].error = calloc(batchSize, sizeof(clm_Matrix)); - layers[i].weightsError = calloc(batchSize, sizeof(clm_Matrix)); - layers[i].gradient = calloc(batchSize, sizeof(clm_Matrix)); + clm_Matrix weights = layers[i].weights.matrix; - for(unsigned int j = 0; j < batchSize; j++) { - layers[i].output[j] = clm_createMatrix(layers[i].weights.rows, 1); - layers[i].error[j] = clm_createMatrix(layers[i].weights.rows, 1); - layers[i].weightsError[j] = clm_createMatrix(layers[i].weights.rows, layers[i].weights.cols); - layers[i].gradient[j] = clm_createMatrix(layers[i].weights.rows, 1); - } + layers[i].output = clm_createMatrixArray(batchSize, weights.rows, 1); + layers[i].error = clm_createMatrixArray(batchSize, weights.rows, 1); + layers[i].weightsError = clm_createMatrixArray(batchSize, weights.rows, weights.cols); + layers[i].gradient = clm_createMatrixArray(batchSize, weights.rows, 1); } return nn; } +// TODO: clm_freeNN + void clm_freeVector(clm_Vector vector) { free(vector.values); } void clm_freeLinear(clm_Linear linear) { - clm_freeMatrix(linear.weights); - clm_freeMatrix(linear.bias); + clm_freeMatrix(linear.weights.matrix); + clm_freeMatrix(linear.bias.matrix); } void clm_matrixPrint(clm_Matrix mat) { diff --git a/src/clm.h b/src/clm.h index d409ff1..53bdbe9 100644 --- a/src/clm.h +++ b/src/clm.h @@ -22,21 +22,26 @@ typedef struct { } clm_Vector; typedef struct { - clm_Matrix weights; - clm_Matrix bias; - clm_Matrix *output; - clm_Matrix *error; - clm_Matrix *weightsError; - clm_Matrix *gradient; + clm_Matrix matrix; + clm_NativeBuf *native; +} clm_NativeMatrix; + +typedef struct { + clm_Matrix *matrixes; + clm_NativeBuf *native; + unsigned int length; +} clm_NativeMatrixArray; + +typedef struct { + clm_NativeMatrix weights; + clm_NativeMatrix bias; + clm_NativeMatrixArray output; + clm_NativeMatrixArray error; + clm_NativeMatrixArray weightsError; + clm_NativeMatrixArray gradient; - clm_NativeBuf *nativeWeights; - clm_NativeBuf *nativeBias; clm_NativeBuf *nativeInput; - clm_NativeBuf *nativeOutput; - clm_NativeBuf *nativeInputError; - clm_NativeBuf *nativeOutputError; - clm_NativeBuf *nativeWeightsError; - clm_NativeBuf *nativeGradient; + clm_NativeBuf *nativeOutputErrors; } clm_Linear; typedef struct { diff --git a/src/clm_cpu.c b/src/clm_cpu.c index c5f9037..b137e89 100644 --- a/src/clm_cpu.c +++ b/src/clm_cpu.c @@ -13,21 +13,27 @@ void clm_gpuDestroy() {} void clm_linearInit(clm_Linear *linear) {} -void clm_linearForward(clm_Linear *linear, unsigned int batchSize, clm_Matrix *inputs, clm_Matrix *outputs) { +void clm_linearForward(clm_Linear *linear, unsigned int batchSize, clm_Matrix *inputs) { + clm_Matrix *outputs = linear->output.matrixes; for(unsigned int b = 0; b < batchSize; b++) { - clm_matrixMultiplyMatrix(linear->weights, inputs[b], outputs[b]); + clm_matrixMultiplyMatrix(linear->weights.matrix, inputs[b], outputs[b]); if(clm_matrixIsInvalid(outputs[b])) { printf("Forward pass failed\n"); return; } - clm_matrixAddMatrix(outputs[b], linear->bias); + clm_matrixAddMatrix(outputs[b], linear->bias.matrix); clm_matrixSigmoid(outputs[b]); } } -void clm_linearBackprop(clm_Linear *linear, float learnRate, unsigned int batchSize, clm_Matrix *inputs, clm_Matrix *outputs, clm_Matrix *inputErrors, bool updateErrors, clm_Matrix *outputErrors, clm_Matrix *outputWeightsErrors, clm_Matrix *outputGradients) { +void clm_linearBackprop(clm_Linear *linear, float learnRate, unsigned int batchSize, clm_Matrix *inputs, bool updateErrors, clm_Matrix *outputErrors) { + clm_Matrix *outputs = linear->output.matrixes; + clm_Matrix *outputGradients = linear->gradient.matrixes; + clm_Matrix *outputWeightsErrors = linear->weightsError.matrixes; + clm_Matrix *inputErrors = linear->error.matrixes; + for(unsigned int b = 0; b < batchSize; b++) { // clm_Matrix *inputsToThisLayer = i == 0 ? batchInputs : nn.layers[i - 1].output; // clm_Matrix *outputsOfThisLayer = nn.layers[i].output; @@ -48,7 +54,7 @@ void clm_linearBackprop(clm_Linear *linear, float learnRate, unsigned int batchS if(updateErrors) { // GPU Step 2 extended (opt) - clm_Matrix weightsT = clm_matrixTranspose(linear->weights); + clm_Matrix weightsT = clm_matrixTranspose(linear->weights.matrix); clm_matrixMultiplyMatrix(weightsT, inputErrors[b], outputErrors[b]); } } diff --git a/src/clm_funcs.c b/src/clm_funcs.c index 00d5c2a..7ffa961 100644 --- a/src/clm_funcs.c +++ b/src/clm_funcs.c @@ -6,8 +6,8 @@ typedef int (*clm_gpuInitFunc)(unsigned int); typedef void (*clm_gpuDestroyFunc)(); typedef void (*clm_linearInitFunc)(clm_Linear *linear); -typedef void (*clm_linearForwardFunc)(clm_Linear *linear, unsigned int batchSize, clm_Matrix *inputs, clm_Matrix *outputs); -typedef void (*clm_linearBackpropFunc)(clm_Linear *linear, float learnRate, unsigned int batchSize, clm_Matrix *inputs, clm_Matrix *outputs, clm_Matrix *inputErrors, bool updateErrors, clm_Matrix *outputErrors, clm_Matrix *outputWeightsErrors, clm_Matrix *outputGradients); +typedef void (*clm_linearForwardFunc)(clm_Linear *linear, unsigned int batchSize, clm_Matrix *inputs); +typedef void (*clm_linearBackpropFunc)(clm_Linear *linear, float learnRate, unsigned int batchSize, clm_Matrix *inputs, bool updateErrors, clm_Matrix *outputErrors); static void *lib; static clm_gpuInitFunc initFunc; @@ -56,10 +56,10 @@ void clm_linearInit(clm_Linear *linear) { linearInitFunc(linear); } -void clm_linearForward(clm_Linear *linear, unsigned int batchSize, clm_Matrix *inputs, clm_Matrix *outputs) { - linearForwardFunc(linear, batchSize, inputs, outputs); +void clm_linearForward(clm_Linear *linear, unsigned int batchSize, clm_Matrix *inputs) { + linearForwardFunc(linear, batchSize, inputs); } -void clm_linearBackprop(clm_Linear *linear, float learnRate, unsigned int batchSize, clm_Matrix *inputs, clm_Matrix *outputs, clm_Matrix *inputErrors, bool updateErrors, clm_Matrix *outputErrors, clm_Matrix *outputWeightsErrors, clm_Matrix *outputGradients) { - linearBackpropFunc(linear, learnRate, batchSize, inputs, outputs, inputErrors, updateErrors, outputErrors, outputWeightsErrors, outputGradients); +void clm_linearBackprop(clm_Linear *linear, float learnRate, unsigned int batchSize, clm_Matrix *inputs, bool updateErrors, clm_Matrix *outputErrors) { + linearBackpropFunc(linear, learnRate, batchSize, inputs, updateErrors, outputErrors); } diff --git a/src/clm_gpu.h b/src/clm_gpu.h index 0f384dc..dcf2984 100644 --- a/src/clm_gpu.h +++ b/src/clm_gpu.h @@ -10,7 +10,7 @@ int clm_gpuInit(unsigned int mode); void clm_gpuDestroy(); void clm_linearInit(clm_Linear *linear); -void clm_linearForward(clm_Linear *linear, unsigned int batchSize, clm_Matrix *inputs, clm_Matrix *outputs); -void clm_linearBackprop(clm_Linear *linear, float learnRate, unsigned int batchSize, clm_Matrix *inputs, clm_Matrix *outputs, clm_Matrix *inputErrors, bool updateErrors, clm_Matrix *outputErrors, clm_Matrix *outputWeightsErrors, clm_Matrix *outputGradients); +void clm_linearForward(clm_Linear *linear, unsigned int batchSize, clm_Matrix *inputs); +void clm_linearBackprop(clm_Linear *linear, float learnRate, unsigned int batchSize, clm_Matrix *inputs, bool updateErrors, clm_Matrix *outputErrors); #endif diff --git a/src/clm_opencl.c b/src/clm_opencl.c index 42790b9..e5310b4 100644 --- a/src/clm_opencl.c +++ b/src/clm_opencl.c @@ -32,7 +32,7 @@ typedef struct __attribute__((packed)) { } cl_GPUMat; #define gpuMat(mat) \ - { .rows = mat.rows, .cols = mat.cols, .transposed = mat.transposed } + ((cl_GPUMat){.rows = mat.rows, .cols = mat.cols, .transposed = mat.transposed}) int clm_gpuInit(unsigned int mode) { // Connect to a compute device @@ -142,6 +142,33 @@ static cl_mem writeGPUMats(cl_GPUMat gpuMat, unsigned int numMats, clm_Matrix *m return mem; } +static cl_mem writeNativeMatrix(clm_NativeMatrix matrix) { + cl_mem mem = matrix.native->mem; + cl_int err = clEnqueueWriteBuffer(queue, mem, CL_TRUE, 0, sizeof(float) * matrix.matrix.rows * matrix.matrix.cols, matrix.matrix.values, 0, NULL, NULL); + if(err != CL_SUCCESS) { + printf("Failed to enqueue write: %s\n", clm_clErrorToString(err)); + return NULL; + } + + return mem; +} + +static cl_mem writeNativeMatrixArray(clm_NativeMatrixArray array) { + clm_Matrix mat = array.matrixes[0]; + cl_mem mem = array.native->mem; + + size_t matLength = sizeof(float) * mat.rows * mat.cols; + for(unsigned int i = 0; i < array.length; i++) { + cl_int err = clEnqueueWriteBuffer(queue, mem, CL_TRUE, i * matLength, matLength, array.matrixes[i].values, 0, NULL, NULL); + if(err != CL_SUCCESS) { + printf("Failed to enqueue write: %s\n", clm_clErrorToString(err)); + return NULL; + } + } + + return mem; +} + static void readGPUMats(cl_GPUMat mat, unsigned int numMats, clm_Matrix *mats, clm_NativeBuf *nativeBuf) { cl_int err; cl_mem mem = nativeBuf->mem; @@ -155,25 +182,51 @@ static void readGPUMats(cl_GPUMat mat, unsigned int numMats, clm_Matrix *mats, c } } +static void readNativeMatrix(clm_NativeMatrix matrix) { + cl_int err = clEnqueueReadBuffer(queue, matrix.native->mem, CL_TRUE, 0, sizeof(float) * matrix.matrix.rows * matrix.matrix.cols, matrix.matrix.values, 0, NULL, NULL); + if(err != CL_SUCCESS) { + printf("Failed to enqueue read: %s\n", clm_clErrorToString(err)); + return; + } +} + +static void readNativeMatrixArray(clm_NativeMatrixArray array) { + clm_Matrix mat = array.matrixes[0]; + for(unsigned int i = 0; i < array.length; i++) { + } +} + +static void clm_nativeAllocMatrix(clm_NativeMatrix *matrix, cl_mem_flags flags) { + matrix->native = calloc(1, sizeof(clm_NativeBuf)); + allocGPUMat(gpuMat(matrix->matrix), 1, flags, matrix->native); +} + +static void clm_nativeAllocMatrixArray(clm_NativeMatrixArray *matrixArray, cl_mem_flags flags) { + matrixArray->native = calloc(1, sizeof(clm_NativeBuf)); + allocGPUMat(gpuMat(matrixArray->matrixes[0]), matrixArray->length, flags, matrixArray->native); +} + void clm_linearInit(clm_Linear *linear) { - linear->nativeWeights = calloc(1, sizeof(clm_NativeBuf)); - linear->nativeBias = calloc(1, sizeof(clm_NativeBuf)); + clm_nativeAllocMatrix(&linear->weights, CL_MEM_READ_ONLY); + clm_nativeAllocMatrix(&linear->bias, CL_MEM_READ_ONLY); + clm_nativeAllocMatrixArray(&linear->output, CL_MEM_READ_WRITE); + clm_nativeAllocMatrixArray(&linear->error, CL_MEM_READ_WRITE); + clm_nativeAllocMatrixArray(&linear->weightsError, CL_MEM_READ_WRITE); + clm_nativeAllocMatrixArray(&linear->gradient, CL_MEM_READ_WRITE); + + // TODO: alloc mem linear->nativeInput = calloc(1, sizeof(clm_NativeBuf)); - linear->nativeOutput = calloc(1, sizeof(clm_NativeBuf)); - linear->nativeInputError = calloc(1, sizeof(clm_NativeBuf)); - linear->nativeOutputError = calloc(1, sizeof(clm_NativeBuf)); - linear->nativeWeightsError = calloc(1, sizeof(clm_NativeBuf)); - linear->nativeGradient = calloc(1, sizeof(clm_NativeBuf)); + linear->nativeOutputErrors = calloc(1, sizeof(clm_NativeBuf)); } // TODO: allow writing multiple inputs at once to improve throughput (no need to rewrite weights/bias each time) -void clm_linearForward(clm_Linear *linear, unsigned int batchSize, clm_Matrix *inputs, clm_Matrix *outputs) { +void clm_linearForward(clm_Linear *linear, unsigned int batchSize, clm_Matrix *inputs) { if(batchSize == 0) return; cl_GPUMat matInput = gpuMat(inputs[0]); - cl_GPUMat matWeights = gpuMat(linear->weights); - cl_GPUMat matBias = gpuMat(linear->bias); - cl_GPUMat matOut = gpuMat(outputs[0]); + cl_GPUMat matWeights = gpuMat(linear->weights.matrix); + cl_GPUMat matBias = gpuMat(linear->bias.matrix); + cl_GPUMat matOut = gpuMat(linear->output.matrixes[0]); size_t workSize = matOut.rows * matOut.cols; @@ -181,18 +234,14 @@ void clm_linearForward(clm_Linear *linear, unsigned int batchSize, clm_Matrix *i // TODO: make sure to always alloc nn.batchSize, not batchSize cl_mem matInput_values = writeGPUMats(matInput, batchSize, inputs, CL_MEM_READ_ONLY, linear->nativeInput); - cl_mem matWeights_values = writeGPUMat(matWeights, linear->weights, CL_MEM_READ_ONLY, linear->nativeWeights); - cl_mem matBias_values = writeGPUMat(matBias, linear->bias, CL_MEM_READ_ONLY, linear->nativeBias); + cl_mem matWeights_values = writeNativeMatrix(linear->weights); + cl_mem matBias_values = writeNativeMatrix(linear->bias); if(!matInput_values || !matWeights_values || !matBias_values) { // linear->output = INVALID_MATRIX; return; } - cl_mem matOut_values = allocGPUMat(matOut, batchSize, CL_MEM_READ_WRITE, linear->nativeOutput); - if(!matOut_values) { - // linear->output = INVALID_MATRIX; - return; - } + cl_mem matOut_values = linear->output.native->mem; err = 0; err |= clSetKernelArg(kernelLinearForward, 0, sizeof(cl_uint), &batchSize); @@ -229,38 +278,35 @@ void clm_linearForward(clm_Linear *linear, unsigned int batchSize, clm_Matrix *i return; }*/ - readGPUMats(matOut, batchSize, outputs, linear->nativeOutput); - - clFlush(queue); - clFinish(queue); + readNativeMatrixArray(linear->output); } -void clm_linearBackprop(clm_Linear *linear, float learnRate, unsigned int batchSize, clm_Matrix *inputs, clm_Matrix *outputs, clm_Matrix *inputErrors, bool updateErrors, clm_Matrix *outputErrors, clm_Matrix *outputWeightsErrors, clm_Matrix *outputGradients) { +void clm_linearBackprop(clm_Linear *linear, float learnRate, unsigned int batchSize, clm_Matrix *inputs, bool updateErrors, clm_Matrix *outputErrors) { if(batchSize == 0) return; cl_GPUMat matInput = gpuMat(inputs[0]); - cl_GPUMat matWeights = gpuMat(linear->weights); - cl_GPUMat matOutput = gpuMat(outputs[0]); - cl_GPUMat matInputErrors = gpuMat(inputErrors[0]); + cl_GPUMat matWeights = gpuMat(linear->weights.matrix); + cl_GPUMat matOutput = gpuMat(linear->output.matrixes[0]); + cl_GPUMat matInputErrors = gpuMat(linear->error.matrixes[0]); cl_GPUMat matOutputErrors = !updateErrors ? (cl_GPUMat){0} : (cl_GPUMat) gpuMat(outputErrors[0]); - cl_GPUMat matOutputWeightsErrors = gpuMat(outputWeightsErrors[0]); - cl_GPUMat matOutputGradients = gpuMat(outputGradients[0]); + cl_GPUMat matOutputWeightsErrors = gpuMat(linear->weightsError.matrixes[0]); + cl_GPUMat matOutputGradients = gpuMat(linear->gradient.matrixes[0]); cl_int err; cl_mem matInput_values = writeGPUMats(matInput, batchSize, inputs, CL_MEM_READ_ONLY, linear->nativeInput); - cl_mem matWeights_values = writeGPUMat(matWeights, linear->weights, CL_MEM_READ_ONLY, linear->nativeWeights); - cl_mem matInputErrors_values = writeGPUMats(matInputErrors, batchSize, inputErrors, CL_MEM_READ_ONLY, linear->nativeInputError); - cl_mem matOutput_values = writeGPUMats(matOutput, batchSize, outputs, CL_MEM_READ_WRITE, linear->nativeOutput); + cl_mem matWeights_values = writeNativeMatrix(linear->weights); + cl_mem matInputErrors_values = writeNativeMatrixArray(linear->error); + cl_mem matOutput_values = writeNativeMatrixArray(linear->output); if(!matInput_values || !matWeights_values || !matInputErrors_values || !matOutput_values) { printf("Failed to write GPU mats\n"); return; } // cl_mem matOutput_values = allocGPUMat(matOutput, batchSize, CL_MEM_READ_ONLY, linear->nativeOutput); - cl_mem matOutputErrors_values = !updateErrors ? NULL : allocGPUMat(matOutputErrors, batchSize, CL_MEM_READ_WRITE, linear->nativeOutputError); - cl_mem matOutputWeightsErrors_values = allocGPUMat(matOutputWeightsErrors, batchSize, CL_MEM_READ_WRITE, linear->nativeWeightsError); - cl_mem matOutputGradients_values = allocGPUMat(matOutputGradients, batchSize, CL_MEM_READ_WRITE, linear->nativeGradient); + cl_mem matOutputErrors_values = !updateErrors ? NULL : allocGPUMat(matOutputErrors, batchSize, CL_MEM_READ_WRITE, linear->nativeOutputErrors); + cl_mem matOutputWeightsErrors_values = linear->weightsError.native->mem; + cl_mem matOutputGradients_values = linear->gradient.native->mem; if(!matOutputWeightsErrors_values || !matOutputGradients_values) { printf("Failed to alloc GPU mats\n"); return; @@ -320,10 +366,8 @@ void clm_linearBackprop(clm_Linear *linear, float learnRate, unsigned int batchS clFlush(queue); clFinish(queue); - readGPUMats(matOutputGradients, batchSize, outputGradients, linear->nativeGradient); - readGPUMats(matOutputWeightsErrors, batchSize, outputWeightsErrors, linear->nativeWeightsError); - if(updateErrors) readGPUMats(matOutputErrors, batchSize, outputErrors, linear->nativeOutputError); + readNativeMatrixArray(linear->weightsError); + readNativeMatrixArray(linear->gradient); - clFlush(queue); - clFinish(queue); + if(updateErrors) readGPUMats(matOutputErrors, batchSize, outputErrors, linear->nativeOutputErrors); } diff --git a/src/cltest.c b/src/cltest.c index 5bf5527..bc97c78 100644 --- a/src/cltest.c +++ b/src/cltest.c @@ -24,8 +24,8 @@ float *predict(clm_NN nn, clm_Vector input) { clm_Matrix xM = clm_matrixWrapArray(input.values, input.length); for(unsigned int i = 0; i < nn.numLayers; i++) { - clm_linearForward(&nn.layers[i], 1, &xM, &nn.layers[i].output[0]); - xM = nn.layers[i].output[0]; + clm_linearForward(&nn.layers[i], 1, &xM); + xM = nn.layers[i].output.matrixes[0]; } return xM.values; @@ -84,28 +84,29 @@ void train(clm_NN nn, unsigned int numElements, clm_Vector *inputs, clm_Vector * } // Forward pass + printf("Forward pass\n"); clm_Matrix *currentXs = batchInputs; for(unsigned int i = 0; i < nn.numLayers; i++) { - clm_linearForward(&nn.layers[i], batchSize, currentXs, nn.layers[i].output); - currentXs = nn.layers[i].output; + clm_linearForward(&nn.layers[i], batchSize, currentXs); + currentXs = nn.layers[i].output.matrixes; } clm_Linear *lastLayer = &nn.layers[nn.numLayers - 1]; for(unsigned int b = 0; b < batchSize; b++) { // Error of last layer = y - yhat - clm_matrixCopy(batchOutputs[b], lastLayer->error[b]); // lastLayer.error = y - clm_matrixSubtractMatrix(lastLayer->error[b], lastLayer->output[b]); // lastLayer.error -= yhat + clm_matrixCopy(batchOutputs[b], lastLayer->error.matrixes[b]); // lastLayer.error = y + clm_matrixSubtractMatrix(lastLayer->error.matrixes[b], lastLayer->output.matrixes[b]); // lastLayer.error -= yhat } + printf("Backprop\n"); for(int i = nn.numLayers - 1; i >= 0; i--) { clm_Linear *layer = &nn.layers[i]; - clm_Matrix *inputsToThisLayer = i == 0 ? batchInputs : nn.layers[i - 1].output; - clm_Matrix *outputsOfThisLayer = layer->output; - clm_linearBackprop(layer, nn.learnRate, batchSize, inputsToThisLayer, outputsOfThisLayer, layer->error, i > 0, i == 0 ? NULL : nn.layers[i - 1].error, layer->weightsError, layer->gradient); + clm_Matrix *inputsToThisLayer = i == 0 ? batchInputs : nn.layers[i - 1].output.matrixes; + clm_linearBackprop(layer, nn.learnRate, batchSize, inputsToThisLayer, i > 0, i == 0 ? NULL : nn.layers[i - 1].error.matrixes); for(unsigned int b = 0; b < batchSize; b++) { - clm_matrixAddMatrix(layer->weights, layer->weightsError[b]); - clm_matrixAddMatrix(layer->bias, layer->gradient[b]); + clm_matrixAddMatrix(layer->weights.matrix, layer->weightsError.matrixes[b]); + clm_matrixAddMatrix(layer->bias.matrix, layer->gradient.matrixes[b]); } } }