More OpenCL debugging
This commit is contained in:
parent
707a9bf754
commit
d55f7c69ba
@ -230,6 +230,9 @@ void clm_linearForward(clm_Linear *linear, unsigned int batchSize, clm_Matrix *i
|
|||||||
}*/
|
}*/
|
||||||
|
|
||||||
readGPUMats(matOut, batchSize, outputs, linear->nativeOutput);
|
readGPUMats(matOut, batchSize, outputs, linear->nativeOutput);
|
||||||
|
|
||||||
|
clFlush(queue);
|
||||||
|
clFinish(queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
void clm_linearBackprop(clm_Linear *linear, float learnRate, unsigned int batchSize, clm_Matrix *inputs, clm_Matrix *outputs, clm_Matrix *inputErrors, bool updateErrors, clm_Matrix *outputErrors, clm_Matrix *outputWeightsErrors, clm_Matrix *outputGradients) {
|
void clm_linearBackprop(clm_Linear *linear, float learnRate, unsigned int batchSize, clm_Matrix *inputs, clm_Matrix *outputs, clm_Matrix *inputErrors, bool updateErrors, clm_Matrix *outputErrors, clm_Matrix *outputWeightsErrors, clm_Matrix *outputGradients) {
|
||||||
@ -320,4 +323,7 @@ void clm_linearBackprop(clm_Linear *linear, float learnRate, unsigned int batchS
|
|||||||
readGPUMats(matOutputGradients, batchSize, outputGradients, linear->nativeGradient);
|
readGPUMats(matOutputGradients, batchSize, outputGradients, linear->nativeGradient);
|
||||||
readGPUMats(matOutputWeightsErrors, batchSize, outputWeightsErrors, linear->nativeWeightsError);
|
readGPUMats(matOutputWeightsErrors, batchSize, outputWeightsErrors, linear->nativeWeightsError);
|
||||||
if(updateErrors) readGPUMats(matOutputErrors, batchSize, outputErrors, linear->nativeOutputError);
|
if(updateErrors) readGPUMats(matOutputErrors, batchSize, outputErrors, linear->nativeOutputError);
|
||||||
|
|
||||||
|
clFlush(queue);
|
||||||
|
clFinish(queue);
|
||||||
}
|
}
|
||||||
|
104
src/cltest.c
104
src/cltest.c
@ -31,6 +31,41 @@ float *predict(clm_NN nn, clm_Vector input) {
|
|||||||
return xM.values;
|
return xM.values;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float eval(clm_NN nn, unsigned int count, clm_Vector *images, clm_Vector *labels) {
|
||||||
|
unsigned int correct = 0;
|
||||||
|
for(unsigned int idx = 0; idx < count; idx++) { // Each train sample
|
||||||
|
// printf("pred(%.2f, %.2f) = %.2f\n", train_data_x[idx][0],
|
||||||
|
// train_data_x[idx][1], predict(nn, train_data_x[idx], 2)[0]);
|
||||||
|
float *pred = predict(nn, images[idx]);
|
||||||
|
unsigned int predDigit = 0;
|
||||||
|
float max = -1;
|
||||||
|
for(unsigned int j = 0; j < 10; j++) {
|
||||||
|
// printf("%.2f ", pred[j]);
|
||||||
|
if(pred[j] > max || max < 0) {
|
||||||
|
max = pred[j];
|
||||||
|
predDigit = j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// if(idx < 100) printf("%u (confidence: %.2f)\n", predDigit, max);
|
||||||
|
|
||||||
|
unsigned int actDigit = 0;
|
||||||
|
float maxA = -1;
|
||||||
|
for(unsigned int j = 0; j < 10; j++) {
|
||||||
|
// printf("%.2f ", pred[j]);
|
||||||
|
if(labels[idx].values[j] > maxA || maxA < 0) {
|
||||||
|
maxA = labels[idx].values[j];
|
||||||
|
actDigit = j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// if(idx < 100) printf("Actual: %u\n", actDigit);
|
||||||
|
// printf("\n");
|
||||||
|
|
||||||
|
if(predDigit == actDigit) correct++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (float) correct / count * 100;
|
||||||
|
}
|
||||||
|
|
||||||
void train(clm_NN nn, unsigned int numElements, clm_Vector *inputs, clm_Vector *expectedOutputs) {
|
void train(clm_NN nn, unsigned int numElements, clm_Vector *inputs, clm_Vector *expectedOutputs) {
|
||||||
clm_Matrix *batchInputs = calloc(nn.batchSize, sizeof(clm_Matrix));
|
clm_Matrix *batchInputs = calloc(nn.batchSize, sizeof(clm_Matrix));
|
||||||
clm_Matrix *batchOutputs = calloc(nn.batchSize, sizeof(clm_Matrix));
|
clm_Matrix *batchOutputs = calloc(nn.batchSize, sizeof(clm_Matrix));
|
||||||
@ -73,31 +108,6 @@ void train(clm_NN nn, unsigned int numElements, clm_Vector *inputs, clm_Vector *
|
|||||||
clm_matrixAddMatrix(layer->bias, layer->gradient[b]);
|
clm_matrixAddMatrix(layer->bias, layer->gradient[b]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*for(int i = nn.numLayers - 1; i >= 0; i--) {
|
|
||||||
clm_Linear layer = nn.layers[i];
|
|
||||||
clm_Matrix *inputsToThisLayer = i == 0 ? batchInputs : nn.layers[i - 1].output;
|
|
||||||
clm_Matrix *outputsOfThisLayer = nn.layers[i].output;
|
|
||||||
clm_Matrix prevError = i == nn.numLayers - 1 ? INVALID_MATRIX : nn.layers[i + 1].error;
|
|
||||||
clm_Matrix error = layer.error;
|
|
||||||
|
|
||||||
if(i == nn.numLayers - 1) {
|
|
||||||
clm_matrixSubtractMatrix(clm_matrixCopy(batchOutputs[0], error), outputsOfThisLayer[0]); // yhat - y
|
|
||||||
} else {
|
|
||||||
clm_Matrix weightsT = clm_matrixTranspose(nn.layers[i + 1].weights);
|
|
||||||
clm_matrixMultiplyMatrix(weightsT, prevError, error);
|
|
||||||
}
|
|
||||||
|
|
||||||
clm_Matrix gradient = clm_matrixDSigmoid(outputsOfThisLayer[0]); // dsig(yhat)
|
|
||||||
clm_matrixMultiplyMatrixElements(gradient, error); // (yhat - y) . dsig(yhat)
|
|
||||||
clm_matrixMultiplyScalar(gradient, nn.learnRate);
|
|
||||||
|
|
||||||
clm_Matrix inputT = clm_matrixTranspose(inputsToThisLayer[0]);
|
|
||||||
clm_matrixMultiplyMatrix(gradient, inputT, layer.weightsError);
|
|
||||||
|
|
||||||
clm_matrixAddMatrix(layer.weights, layer.weightsError);
|
|
||||||
clm_matrixAddMatrix(layer.bias, gradient);
|
|
||||||
}*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
free(batchInputs);
|
free(batchInputs);
|
||||||
@ -216,7 +226,7 @@ int main(int argc, const char *argv[]) {
|
|||||||
unsigned int imageCount;
|
unsigned int imageCount;
|
||||||
loadImages(&images, &imageCount);
|
loadImages(&images, &imageCount);
|
||||||
|
|
||||||
imageCount = 60000;
|
imageCount = 600;
|
||||||
|
|
||||||
printf("%f\n", images[0].values[0]);
|
printf("%f\n", images[0].values[0]);
|
||||||
|
|
||||||
@ -230,13 +240,13 @@ int main(int argc, const char *argv[]) {
|
|||||||
clm_Linear layers[] = {
|
clm_Linear layers[] = {
|
||||||
clm_linearCreateRandom(i, h),
|
clm_linearCreateRandom(i, h),
|
||||||
clm_linearCreateRandom(h, o)};
|
clm_linearCreateRandom(h, o)};
|
||||||
clm_NN nn = clm_nnCreate(sizeof(layers) / sizeof(clm_Linear), layers, 0.01, 500);
|
clm_NN nn = clm_nnCreate(sizeof(layers) / sizeof(clm_Linear), layers, 0.01, 10000);
|
||||||
|
|
||||||
for(unsigned int i = 0; i < sizeof(layers) / sizeof(clm_Linear); i++) {
|
for(unsigned int i = 0; i < sizeof(layers) / sizeof(clm_Linear); i++) {
|
||||||
clm_linearInit(&nn.layers[i]);
|
clm_linearInit(&nn.layers[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(unsigned int epoch = 0; epoch < 1; epoch++) {
|
for(unsigned int epoch = 0; epoch < 10; epoch++) {
|
||||||
printf("Epoch %u\n", epoch);
|
printf("Epoch %u\n", epoch);
|
||||||
/*for(unsigned int idx = 0; idx < imageCount; idx++) { // Each train sample
|
/*for(unsigned int idx = 0; idx < imageCount; idx++) { // Each train sample
|
||||||
if(idx % 1000 == 0) {
|
if(idx % 1000 == 0) {
|
||||||
@ -245,45 +255,15 @@ int main(int argc, const char *argv[]) {
|
|||||||
}
|
}
|
||||||
}*/
|
}*/
|
||||||
train(nn, imageCount, images, labels);
|
train(nn, imageCount, images, labels);
|
||||||
|
|
||||||
|
printf("Score: %.2f\n", eval(nn, imageCount, images, labels));
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("Train done\n");
|
printf("Train done\n");
|
||||||
|
|
||||||
unsigned int correct = 0;
|
float score = eval(nn, imageCount, images, labels);
|
||||||
for(unsigned int idx = 0; idx < imageCount; idx++) { // Each train sample
|
printf("Correct: %.2f\n", score);
|
||||||
// printf("pred(%.2f, %.2f) = %.2f\n", train_data_x[idx][0],
|
|
||||||
// train_data_x[idx][1], predict(nn, train_data_x[idx], 2)[0]);
|
|
||||||
float *pred = predict(nn, images[idx]);
|
|
||||||
unsigned int predDigit = 0;
|
|
||||||
float max = -1;
|
|
||||||
for(unsigned int j = 0; j < 10; j++) {
|
|
||||||
// printf("%.2f ", pred[j]);
|
|
||||||
if(pred[j] > max || max < 0) {
|
|
||||||
max = pred[j];
|
|
||||||
predDigit = j;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// if(idx < 100) printf("%u (confidence: %.2f)\n", predDigit, max);
|
|
||||||
|
|
||||||
unsigned int actDigit = 0;
|
|
||||||
float maxA = -1;
|
|
||||||
for(unsigned int j = 0; j < 10; j++) {
|
|
||||||
// printf("%.2f ", pred[j]);
|
|
||||||
if(labels[idx].values[j] > maxA || maxA < 0) {
|
|
||||||
maxA = labels[idx].values[j];
|
|
||||||
actDigit = j;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// if(idx < 100) printf("Actual: %u\n", actDigit);
|
|
||||||
// printf("\n");
|
|
||||||
|
|
||||||
if(predDigit == actDigit) correct++;
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("Correct: %u -> %.2f", correct, (float) correct / imageCount * 100);
|
|
||||||
|
|
||||||
printf("\n");
|
|
||||||
|
|
||||||
clm_gpuDestroy();
|
clm_gpuDestroy();
|
||||||
}
|
}
|
||||||
|
21
src/mat.cl
21
src/mat.cl
@ -21,8 +21,9 @@ void mat_multiply(cl_GPUMat matA, __global float *matA_values, cl_GPUMat matB, _
|
|||||||
uint idx = get_global_id(0);
|
uint idx = get_global_id(0);
|
||||||
if(idx >= matOut.rows * matOut.cols) return;
|
if(idx >= matOut.rows * matOut.cols) return;
|
||||||
|
|
||||||
uint i, j;
|
// TODO: might not work with transposed matOut
|
||||||
matrixGetIJ(matOut, idx, i, j);
|
uint i = idx / matOut.cols;
|
||||||
|
uint j = idx % matOut.cols;
|
||||||
|
|
||||||
float sum = 0;
|
float sum = 0;
|
||||||
for(unsigned int k = 0; k < matA.cols; k++) {
|
for(unsigned int k = 0; k < matA.cols; k++) {
|
||||||
@ -75,6 +76,14 @@ void mat_multiply_scalar(cl_GPUMat mat, __global float *mat_values, float scalar
|
|||||||
mat_values[idx] *= scalar;
|
mat_values[idx] *= scalar;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cl_GPUMat clm_matrixTranspose(cl_GPUMat mat) {
|
||||||
|
cl_GPUMat tr = {0};
|
||||||
|
tr.cols = mat.rows;
|
||||||
|
tr.rows = mat.cols;
|
||||||
|
tr.transposed = !mat.transposed;
|
||||||
|
return tr;
|
||||||
|
}
|
||||||
|
|
||||||
__kernel void linear_forward(unsigned int batchSize,
|
__kernel void linear_forward(unsigned int batchSize,
|
||||||
cl_GPUMat input, __global float *input_values,
|
cl_GPUMat input, __global float *input_values,
|
||||||
cl_GPUMat weights, __global float *weights_values,
|
cl_GPUMat weights, __global float *weights_values,
|
||||||
@ -123,14 +132,12 @@ __kernel void linear_backprop_2(unsigned int batchSize,
|
|||||||
__global float *batchOutWeightsErrors_values = outputWeightsErrors_values + b * outputWeightsErrors.rows * outputWeightsErrors.cols;
|
__global float *batchOutWeightsErrors_values = outputWeightsErrors_values + b * outputWeightsErrors.rows * outputWeightsErrors.cols;
|
||||||
__global float *batchOutGradients_values = outputGradients_values + b * outputGradients.rows * outputGradients.cols;
|
__global float *batchOutGradients_values = outputGradients_values + b * outputGradients.rows * outputGradients.cols;
|
||||||
|
|
||||||
cl_GPUMat inputsT = inputs;
|
cl_GPUMat inputsT = clm_matrixTranspose(inputs);
|
||||||
inputsT.transposed = true;
|
|
||||||
mat_multiply(outputGradients, batchOutGradients_values, inputsT, batchInput_values, outputWeightsErrors, batchOutWeightsErrors_values);
|
mat_multiply(outputGradients, batchOutGradients_values, inputsT, batchInput_values, outputWeightsErrors, batchOutWeightsErrors_values);
|
||||||
|
|
||||||
if(updateErrors) {
|
if(updateErrors) {
|
||||||
cl_GPUMat weightsT = weights;
|
cl_GPUMat weightsT = clm_matrixTranspose(weightsT);
|
||||||
weightsT.transposed = true;
|
// mat_multiply(weightsT, weights_values, inputErrors, batchInErrors_values, outputErrors, batchOutErrors_values);
|
||||||
mat_multiply(weightsT, weights_values, inputErrors, batchInErrors_values, outputErrors, batchOutErrors_values);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user