diff options
author | Jean-Marc Valin <jmvalin@amazon.com> | 2023-11-04 00:28:24 +0300 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@amazon.com> | 2023-11-06 11:12:00 +0300 |
commit | 98b8be09d56d03d220fff3536842c0703bae865c (patch) | |
tree | 02841a9024863cc37403b5158d2dd87f5861629a | |
parent | fa5e960cb1d968955a4158aa908faa7ee3940aed (diff) |
Vectorize DRED quantization
-rw-r--r-- | silk/dred_encoder.c | 30 |
1 files changed, 19 insertions, 11 deletions
diff --git a/silk/dred_encoder.c b/silk/dred_encoder.c index 9b005a63..6b585416 100644 --- a/silk/dred_encoder.c +++ b/silk/dred_encoder.c @@ -31,7 +31,7 @@ #include <string.h> -#if 1 +#if 0 #include <stdio.h> #include <math.h> #endif @@ -43,7 +43,6 @@ #include "dred_decoder.h" #include "float_cast.h" #include "os_support.h" -#include "vec.h" #include "celt/laplace.h" @@ -220,18 +219,27 @@ void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size, int ex static void dred_encode_latents(ec_enc *enc, const float *x, const opus_uint16 *scale, const opus_uint16 *dzone, const opus_uint16 *r, const opus_uint16 *p0, int dim) { int i; + int q[IMAX(DRED_LATENT_DIM,DRED_STATE_DIM)]; + float xq[IMAX(DRED_LATENT_DIM,DRED_STATE_DIM)]; + float delta[IMAX(DRED_LATENT_DIM,DRED_STATE_DIM)]; + float deadzone[IMAX(DRED_LATENT_DIM,DRED_STATE_DIM)]; float eps = .1f; + /* This is split into multiple loops (with temporary arrays) so that the compiler + can vectorize all of it, and so we can call the vector tanh(). */ + for (i=0;i<dim;i++) { + delta[i] = dzone[i]*(1.f/1024.f); + xq[i] = x[i]*scale[i]*(1.f/256.f); + deadzone[i] = xq[i]/(delta[i]+eps); + } + compute_activation(deadzone, deadzone, dim, ACTIVATION_TANH); + for (i=0;i<dim;i++) { + xq[i] = xq[i] - delta[i]*deadzone[i]; + q[i] = (int)floor(.5f+xq[i]); + } for (i=0;i<dim;i++) { - float delta; - float xq; - int q; - delta = dzone[i]*(1.f/1024.f); - xq = x[i]*scale[i]*(1.f/256.f); - xq = xq - delta*tanh_approx(xq/(delta+eps)); - q = (int)floor(.5f+xq); /* Make the impossible actually impossible. */ - if (r[i] == 0 || p0[i] >= 32767) q = 0; - ec_laplace_encode_p0(enc, q, p0[i], r[i]); + if (r[i] == 0 || p0[i] >= 32767) q[i] = 0; + ec_laplace_encode_p0(enc, q[i], p0[i], r[i]); } } |