More OpenCL debugging
This commit is contained in:
parent
707a9bf754
commit
d55f7c69ba
@ -230,6 +230,9 @@ void clm_linearForward(clm_Linear *linear, unsigned int batchSize, clm_Matrix *i
|
||||
}*/
|
||||
|
||||
readGPUMats(matOut, batchSize, outputs, linear->nativeOutput);
|
||||
|
||||
clFlush(queue);
|
||||
clFinish(queue);
|
||||
}
|
||||
|
||||
void clm_linearBackprop(clm_Linear *linear, float learnRate, unsigned int batchSize, clm_Matrix *inputs, clm_Matrix *outputs, clm_Matrix *inputErrors, bool updateErrors, clm_Matrix *outputErrors, clm_Matrix *outputWeightsErrors, clm_Matrix *outputGradients) {
|
||||
@ -320,4 +323,7 @@ void clm_linearBackprop(clm_Linear *linear, float learnRate, unsigned int batchS
|
||||
readGPUMats(matOutputGradients, batchSize, outputGradients, linear->nativeGradient);
|
||||
readGPUMats(matOutputWeightsErrors, batchSize, outputWeightsErrors, linear->nativeWeightsError);
|
||||
if(updateErrors) readGPUMats(matOutputErrors, batchSize, outputErrors, linear->nativeOutputError);
|
||||
|
||||
clFlush(queue);
|
||||
clFinish(queue);
|
||||
}
|
||||
|
104
src/cltest.c
104
src/cltest.c
@ -31,6 +31,41 @@ float *predict(clm_NN nn, clm_Vector input) {
|
||||
return xM.values;
|
||||
}
|
||||
|
||||
float eval(clm_NN nn, unsigned int count, clm_Vector *images, clm_Vector *labels) {
|
||||
unsigned int correct = 0;
|
||||
for(unsigned int idx = 0; idx < count; idx++) { // Each train sample
|
||||
// printf("pred(%.2f, %.2f) = %.2f\n", train_data_x[idx][0],
|
||||
// train_data_x[idx][1], predict(nn, train_data_x[idx], 2)[0]);
|
||||
float *pred = predict(nn, images[idx]);
|
||||
unsigned int predDigit = 0;
|
||||
float max = -1;
|
||||
for(unsigned int j = 0; j < 10; j++) {
|
||||
// printf("%.2f ", pred[j]);
|
||||
if(pred[j] > max || max < 0) {
|
||||
max = pred[j];
|
||||
predDigit = j;
|
||||
}
|
||||
}
|
||||
// if(idx < 100) printf("%u (confidence: %.2f)\n", predDigit, max);
|
||||
|
||||
unsigned int actDigit = 0;
|
||||
float maxA = -1;
|
||||
for(unsigned int j = 0; j < 10; j++) {
|
||||
// printf("%.2f ", pred[j]);
|
||||
if(labels[idx].values[j] > maxA || maxA < 0) {
|
||||
maxA = labels[idx].values[j];
|
||||
actDigit = j;
|
||||
}
|
||||
}
|
||||
// if(idx < 100) printf("Actual: %u\n", actDigit);
|
||||
// printf("\n");
|
||||
|
||||
if(predDigit == actDigit) correct++;
|
||||
}
|
||||
|
||||
return (float) correct / count * 100;
|
||||
}
|
||||
|
||||
void train(clm_NN nn, unsigned int numElements, clm_Vector *inputs, clm_Vector *expectedOutputs) {
|
||||
clm_Matrix *batchInputs = calloc(nn.batchSize, sizeof(clm_Matrix));
|
||||
clm_Matrix *batchOutputs = calloc(nn.batchSize, sizeof(clm_Matrix));
|
||||
@ -73,31 +108,6 @@ void train(clm_NN nn, unsigned int numElements, clm_Vector *inputs, clm_Vector *
|
||||
clm_matrixAddMatrix(layer->bias, layer->gradient[b]);
|
||||
}
|
||||
}
|
||||
|
||||
/*for(int i = nn.numLayers - 1; i >= 0; i--) {
|
||||
clm_Linear layer = nn.layers[i];
|
||||
clm_Matrix *inputsToThisLayer = i == 0 ? batchInputs : nn.layers[i - 1].output;
|
||||
clm_Matrix *outputsOfThisLayer = nn.layers[i].output;
|
||||
clm_Matrix prevError = i == nn.numLayers - 1 ? INVALID_MATRIX : nn.layers[i + 1].error;
|
||||
clm_Matrix error = layer.error;
|
||||
|
||||
if(i == nn.numLayers - 1) {
|
||||
clm_matrixSubtractMatrix(clm_matrixCopy(batchOutputs[0], error), outputsOfThisLayer[0]); // yhat - y
|
||||
} else {
|
||||
clm_Matrix weightsT = clm_matrixTranspose(nn.layers[i + 1].weights);
|
||||
clm_matrixMultiplyMatrix(weightsT, prevError, error);
|
||||
}
|
||||
|
||||
clm_Matrix gradient = clm_matrixDSigmoid(outputsOfThisLayer[0]); // dsig(yhat)
|
||||
clm_matrixMultiplyMatrixElements(gradient, error); // (yhat - y) . dsig(yhat)
|
||||
clm_matrixMultiplyScalar(gradient, nn.learnRate);
|
||||
|
||||
clm_Matrix inputT = clm_matrixTranspose(inputsToThisLayer[0]);
|
||||
clm_matrixMultiplyMatrix(gradient, inputT, layer.weightsError);
|
||||
|
||||
clm_matrixAddMatrix(layer.weights, layer.weightsError);
|
||||
clm_matrixAddMatrix(layer.bias, gradient);
|
||||
}*/
|
||||
}
|
||||
|
||||
free(batchInputs);
|
||||
@ -216,7 +226,7 @@ int main(int argc, const char *argv[]) {
|
||||
unsigned int imageCount;
|
||||
loadImages(&images, &imageCount);
|
||||
|
||||
imageCount = 60000;
|
||||
imageCount = 600;
|
||||
|
||||
printf("%f\n", images[0].values[0]);
|
||||
|
||||
@ -230,13 +240,13 @@ int main(int argc, const char *argv[]) {
|
||||
clm_Linear layers[] = {
|
||||
clm_linearCreateRandom(i, h),
|
||||
clm_linearCreateRandom(h, o)};
|
||||
clm_NN nn = clm_nnCreate(sizeof(layers) / sizeof(clm_Linear), layers, 0.01, 500);
|
||||
clm_NN nn = clm_nnCreate(sizeof(layers) / sizeof(clm_Linear), layers, 0.01, 10000);
|
||||
|
||||
for(unsigned int i = 0; i < sizeof(layers) / sizeof(clm_Linear); i++) {
|
||||
clm_linearInit(&nn.layers[i]);
|
||||
}
|
||||
|
||||
for(unsigned int epoch = 0; epoch < 1; epoch++) {
|
||||
for(unsigned int epoch = 0; epoch < 10; epoch++) {
|
||||
printf("Epoch %u\n", epoch);
|
||||
/*for(unsigned int idx = 0; idx < imageCount; idx++) { // Each train sample
|
||||
if(idx % 1000 == 0) {
|
||||
@ -245,45 +255,15 @@ int main(int argc, const char *argv[]) {
|
||||
}
|
||||
}*/
|
||||
train(nn, imageCount, images, labels);
|
||||
|
||||
printf("Score: %.2f\n", eval(nn, imageCount, images, labels));
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("Train done\n");
|
||||
|
||||
unsigned int correct = 0;
|
||||
for(unsigned int idx = 0; idx < imageCount; idx++) { // Each train sample
|
||||
// printf("pred(%.2f, %.2f) = %.2f\n", train_data_x[idx][0],
|
||||
// train_data_x[idx][1], predict(nn, train_data_x[idx], 2)[0]);
|
||||
float *pred = predict(nn, images[idx]);
|
||||
unsigned int predDigit = 0;
|
||||
float max = -1;
|
||||
for(unsigned int j = 0; j < 10; j++) {
|
||||
// printf("%.2f ", pred[j]);
|
||||
if(pred[j] > max || max < 0) {
|
||||
max = pred[j];
|
||||
predDigit = j;
|
||||
}
|
||||
}
|
||||
// if(idx < 100) printf("%u (confidence: %.2f)\n", predDigit, max);
|
||||
|
||||
unsigned int actDigit = 0;
|
||||
float maxA = -1;
|
||||
for(unsigned int j = 0; j < 10; j++) {
|
||||
// printf("%.2f ", pred[j]);
|
||||
if(labels[idx].values[j] > maxA || maxA < 0) {
|
||||
maxA = labels[idx].values[j];
|
||||
actDigit = j;
|
||||
}
|
||||
}
|
||||
// if(idx < 100) printf("Actual: %u\n", actDigit);
|
||||
// printf("\n");
|
||||
|
||||
if(predDigit == actDigit) correct++;
|
||||
}
|
||||
|
||||
printf("Correct: %u -> %.2f", correct, (float) correct / imageCount * 100);
|
||||
|
||||
printf("\n");
|
||||
float score = eval(nn, imageCount, images, labels);
|
||||
printf("Correct: %.2f\n", score);
|
||||
|
||||
clm_gpuDestroy();
|
||||
}
|
||||
|
21
src/mat.cl
21
src/mat.cl
@ -21,8 +21,9 @@ void mat_multiply(cl_GPUMat matA, __global float *matA_values, cl_GPUMat matB, _
|
||||
uint idx = get_global_id(0);
|
||||
if(idx >= matOut.rows * matOut.cols) return;
|
||||
|
||||
uint i, j;
|
||||
matrixGetIJ(matOut, idx, i, j);
|
||||
// TODO: might not work with transposed matOut
|
||||
uint i = idx / matOut.cols;
|
||||
uint j = idx % matOut.cols;
|
||||
|
||||
float sum = 0;
|
||||
for(unsigned int k = 0; k < matA.cols; k++) {
|
||||
@ -75,6 +76,14 @@ void mat_multiply_scalar(cl_GPUMat mat, __global float *mat_values, float scalar
|
||||
mat_values[idx] *= scalar;
|
||||
}
|
||||
|
||||
cl_GPUMat clm_matrixTranspose(cl_GPUMat mat) {
|
||||
cl_GPUMat tr = {0};
|
||||
tr.cols = mat.rows;
|
||||
tr.rows = mat.cols;
|
||||
tr.transposed = !mat.transposed;
|
||||
return tr;
|
||||
}
|
||||
|
||||
__kernel void linear_forward(unsigned int batchSize,
|
||||
cl_GPUMat input, __global float *input_values,
|
||||
cl_GPUMat weights, __global float *weights_values,
|
||||
@ -123,14 +132,12 @@ __kernel void linear_backprop_2(unsigned int batchSize,
|
||||
__global float *batchOutWeightsErrors_values = outputWeightsErrors_values + b * outputWeightsErrors.rows * outputWeightsErrors.cols;
|
||||
__global float *batchOutGradients_values = outputGradients_values + b * outputGradients.rows * outputGradients.cols;
|
||||
|
||||
cl_GPUMat inputsT = inputs;
|
||||
inputsT.transposed = true;
|
||||
cl_GPUMat inputsT = clm_matrixTranspose(inputs);
|
||||
mat_multiply(outputGradients, batchOutGradients_values, inputsT, batchInput_values, outputWeightsErrors, batchOutWeightsErrors_values);
|
||||
|
||||
if(updateErrors) {
|
||||
cl_GPUMat weightsT = weights;
|
||||
weightsT.transposed = true;
|
||||
mat_multiply(weightsT, weights_values, inputErrors, batchInErrors_values, outputErrors, batchOutErrors_values);
|
||||
cl_GPUMat weightsT = clm_matrixTranspose(weightsT);
|
||||
// mat_multiply(weightsT, weights_values, inputErrors, batchInErrors_values, outputErrors, batchOutErrors_values);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user