2023-10-29 21:34:23 +01:00
|
|
|
typedef struct __attribute__((packed)) {
|
|
|
|
uint rows;
|
|
|
|
uint cols;
|
|
|
|
char transposed;
|
2023-10-29 01:15:22 +02:00
|
|
|
} cl_GPUMat;
|
|
|
|
|
2023-10-29 23:01:46 +01:00
|
|
|
#define matrixAt(mat, mat_values, r, c) mat_values[(!mat.transposed ? r * mat.cols + c : c * mat.rows + r)]
|
2023-10-29 21:34:23 +01:00
|
|
|
|
2023-10-29 23:01:46 +01:00
|
|
|
void mat_multiply(cl_GPUMat matA, __global float *matA_values, cl_GPUMat matB, __global float *matB_values, cl_GPUMat matOut, __global float *matOut_values) {
|
2023-10-29 21:34:23 +01:00
|
|
|
uint idx = get_global_id(0);
|
|
|
|
if(idx >= matOut.rows * matOut.cols) return;
|
|
|
|
|
|
|
|
uint i = idx / matOut.cols;
|
|
|
|
uint j = idx % matOut.cols;
|
2023-10-29 01:15:22 +02:00
|
|
|
|
2023-10-29 21:34:23 +01:00
|
|
|
float sum = 0;
|
|
|
|
for(unsigned int k = 0; k < matA.cols; k++) {
|
2023-10-29 23:01:46 +01:00
|
|
|
sum += matrixAt(matA, matA_values, i, k) * matrixAt(matB, matB_values, k, j);
|
2023-10-29 21:34:23 +01:00
|
|
|
}
|
2023-10-29 23:01:46 +01:00
|
|
|
matrixAt(matOut, matOut_values, i, j) = sum;
|
|
|
|
}
|
|
|
|
|
|
|
|
void mat_add(cl_GPUMat matA, __global float *matA_values, cl_GPUMat matB, __global float *matB_values) {
|
|
|
|
uint idx = get_global_id(0);
|
|
|
|
if(idx >= matA.rows * matA.cols) return;
|
|
|
|
|
|
|
|
matA_values[idx] += matB_values[idx];
|
|
|
|
}
|
|
|
|
|
|
|
|
void mat_sigmoid(cl_GPUMat mat, __global float *mat_values) {
|
|
|
|
uint idx = get_global_id(0);
|
|
|
|
if(idx >= mat.rows * mat.cols) return;
|
|
|
|
|
|
|
|
mat_values[idx] = 1 / (1 + exp(-mat_values[idx]));
|
|
|
|
}
|
|
|
|
|
2023-10-31 14:30:09 +01:00
|
|
|
__kernel void linear_forward(unsigned int batchSize, cl_GPUMat input, __global float *input_values, cl_GPUMat weights, __global float *weights_values, cl_GPUMat bias, __global float *bias_values, cl_GPUMat out, __global float *out_values) {
|
2023-10-29 23:01:46 +01:00
|
|
|
// FIXME mat_multiply possibly doesn't index at idx when out is transposed, which is important to ensure it always accesses the same index for all operations!
|
2023-10-31 14:30:09 +01:00
|
|
|
for(unsigned int b = 0; b < batchSize; b++) {
|
|
|
|
__global float *batchInput_values = input_values + b * input.rows * input.cols;
|
|
|
|
__global float *batchOut_values = out_values + b * out.rows * out.cols;
|
|
|
|
mat_multiply(weights, weights_values, input, batchInput_values, out, batchOut_values);
|
|
|
|
mat_add(out, batchOut_values, bias, bias_values);
|
|
|
|
mat_sigmoid(out, batchOut_values);
|
|
|
|
}
|
2023-10-29 01:15:22 +02:00
|
|
|
}
|