Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.xiph.org/xiph/opus.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2018-11-22 22:06:34 +0300
committerJean-Marc Valin <jmvalin@jmvalin.ca>2018-11-22 22:06:34 +0300
commit9791b22b2c83980f6b4386c870cad58557c78007 (patch)
treebbf1b402274cb2bc99fc4f8b5db72f042f5dc571 /src
parent054acff3c142c147e3b5801a13347f04c56f0eb5 (diff)
Refactoring: Isolating the matrix-vector product in gemm_accum()
Diffstat (limited to 'src')
-rw-r--r--src/mlp.c75
1 files changed, 39 insertions, 36 deletions
diff --git a/src/mlp.c b/src/mlp.c
index f43a704e..964c6a98 100644
--- a/src/mlp.c
+++ b/src/mlp.c
@@ -69,22 +69,29 @@ static OPUS_INLINE float sigmoid_approx(float x)
return .5f + .5f*tansig_approx(.5f*x);
}
-void compute_dense(const DenseLayer *layer, float *output, const float *input)
+static void gemm_accum(float *out, const opus_int8 *weights, int rows, int cols, int col_stride, const float *x)
{
int i, j;
+ for (i=0;i<rows;i++)
+ {
+ for (j=0;j<cols;j++)
+ out[i] += weights[j*col_stride + i]*x[j];
+ }
+}
+
+void compute_dense(const DenseLayer *layer, float *output, const float *input)
+{
+ int i;
int N, M;
int stride;
M = layer->nb_inputs;
N = layer->nb_neurons;
stride = N;
for (i=0;i<N;i++)
- {
- /* Compute update gate. */
- float sum = layer->bias[i];
- for (j=0;j<M;j++)
- sum += layer->input_weights[j*stride + i]*input[j];
- output[i] = WEIGHTS_SCALE*sum;
- }
+ output[i] = layer->bias[i];
+ gemm_accum(output, layer->input_weights, N, M, stride, input);
+ for (i=0;i<N;i++)
+ output[i] *= WEIGHTS_SCALE;
if (layer->sigmoid) {
for (i=0;i<N;i++)
output[i] = sigmoid_approx(output[i]);
@@ -96,45 +103,41 @@ void compute_dense(const DenseLayer *layer, float *output, const float *input)
void compute_gru(const GRULayer *gru, float *state, const float *input)
{
- int i, j;
+ int i;
int N, M;
int stride;
+ float tmp[MAX_NEURONS];
float z[MAX_NEURONS];
float r[MAX_NEURONS];
float h[MAX_NEURONS];
M = gru->nb_inputs;
N = gru->nb_neurons;
stride = 3*N;
+ /* Compute update gate. */
for (i=0;i<N;i++)
- {
- /* Compute update gate. */
- float sum = gru->bias[i];
- for (j=0;j<M;j++)
- sum += gru->input_weights[j*stride + i]*input[j];
- for (j=0;j<N;j++)
- sum += gru->recurrent_weights[j*stride + i]*state[j];
- z[i] = sigmoid_approx(WEIGHTS_SCALE*sum);
- }
+ z[i] = gru->bias[i];
+ gemm_accum(z, gru->input_weights, N, M, stride, input);
+ gemm_accum(z, gru->recurrent_weights, N, N, stride, state);
for (i=0;i<N;i++)
- {
- /* Compute reset gate. */
- float sum = gru->bias[N + i];
- for (j=0;j<M;j++)
- sum += gru->input_weights[N + j*stride + i]*input[j];
- for (j=0;j<N;j++)
- sum += gru->recurrent_weights[N + j*stride + i]*state[j];
- r[i] = sigmoid_approx(WEIGHTS_SCALE*sum);
- }
+ z[i] = sigmoid_approx(WEIGHTS_SCALE*z[i]);
+
+ /* Compute reset gate. */
for (i=0;i<N;i++)
- {
- /* Compute output. */
- float sum = gru->bias[2*N + i];
- for (j=0;j<M;j++)
- sum += gru->input_weights[2*N + j*stride + i]*input[j];
- for (j=0;j<N;j++)
- sum += gru->recurrent_weights[2*N + j*stride + i]*state[j]*r[j];
- h[i] = z[i]*state[i] + (1-z[i])*tansig_approx(WEIGHTS_SCALE*sum);
- }
+ r[i] = gru->bias[N + i];
+ gemm_accum(r, &gru->input_weights[N], N, M, stride, input);
+ gemm_accum(r, &gru->recurrent_weights[N], N, N, stride, state);
+ for (i=0;i<N;i++)
+ r[i] = sigmoid_approx(WEIGHTS_SCALE*r[i]);
+
+ /* Compute output. */
+ for (i=0;i<N;i++)
+ h[i] = gru->bias[2*N + i];
+ for (i=0;i<N;i++)
+ tmp[i] = state[i] * r[i];
+ gemm_accum(h, &gru->input_weights[2*N], N, M, stride, input);
+ gemm_accum(h, &gru->recurrent_weights[2*N], N, N, stride, tmp);
+ for (i=0;i<N;i++)
+ h[i] = z[i]*state[i] + (1-z[i])*tansig_approx(WEIGHTS_SCALE*h[i]);
for (i=0;i<N;i++)
state[i] = h[i];
}