diff options
author | Jean-Marc Valin <jean-marc.valin@usherbrooke.ca> | 2008-02-14 16:34:01 +0300 |
---|---|---|
committer | Jean-Marc Valin <jean-marc.valin@usherbrooke.ca> | 2008-02-14 16:34:01 +0300 |
commit | 883bd8e55646d07ddd20e1f14a4a0aad6c324b0f (patch) | |
tree | 60cfba43661a5b3f0d8dce17fba61360fdebb654 | |
parent | 0d587d89c107e8d615579e1c63471690666f31d0 (diff) |
Option use of SSE intrinsics (still trying to convince gcc not to move the
input data across all registers)
-rw-r--r-- | libcelt/vq.c | 29 |
1 files changed, 24 insertions, 5 deletions
diff --git a/libcelt/vq.c b/libcelt/vq.c index db965eb..7403077 100644 --- a/libcelt/vq.c +++ b/libcelt/vq.c @@ -34,6 +34,25 @@ #include "cwrs.h" #include "vq.h" +/* Enable this or define your own implementation if you want to speed up the + VQ search (used in inner loop only) */ +#if 0 +#include <xmmintrin.h> +static inline float approx_sqrt(float x) +{ + _mm_store_ss(&x, _mm_sqrt_ss(_mm_set_ss(x))); + return x; +} +static inline float approx_inv(float x) +{ + _mm_store_ss(&x, _mm_rcp_ss(_mm_set_ss(x))); + return x; +} +#else +#define approx_sqrt(x) (sqrt(x)) +#define approx_inv(x) (1.f/(x)) +#endif + struct NBest { float score; float gain; @@ -115,7 +134,7 @@ void alg_quant(float *x, float *W, int N, int K, float *p, float alpha, ec_enc * } for (m=0;m<Lupdate;m++) - nbest[m]->score = -1e10; + nbest[m]->score = -1e10f; for (m=0;m<L2;m++) { @@ -136,14 +155,14 @@ void alg_quant(float *x, float *W, int N, int K, float *p, float alpha, ec_enc * /* Updating the sums of the new pulse(s) */ tmp_xy = xy[m] + s*x[j] - alpha*s*p[j]*Rxp; - tmp_yy = yy[m] + 2*s*y[m][j] + s*s +s*s*alpha*alpha*p[j]*p[j]*Rpp - 2*alpha*s*p[j]*yp[m] - 2*s*s*alpha*p[j]*p[j]; - tmp_yp = yp[m] + s*p[j] *(1-alpha*Rpp); + tmp_yy = yy[m] + 2.f*s*y[m][j] + s*s +s*s*alpha*alpha*p[j]*p[j]*Rpp - 2.f*alpha*s*p[j]*yp[m] - 2.f*s*s*alpha*p[j]*p[j]; + tmp_yp = yp[m] + s*p[j] *(1.f-alpha*Rpp); /* Compute the gain such that ||p + g*y|| = 1 */ - g = (sqrt(tmp_yp*tmp_yp + tmp_yy - tmp_yy*Rpp) - tmp_yp)/tmp_yy; + g = (approx_sqrt(tmp_yp*tmp_yp + tmp_yy - tmp_yy*Rpp) - tmp_yp)*approx_inv(tmp_yy); /* Knowing that gain, what the error: (x-g*y)^2 (result is negated and we discard x^2 because it's constant) */ - score = 2*g*tmp_xy - g*g*tmp_yy; + score = 2.f*g*tmp_xy - g*g*tmp_yy; if (score>nbest[Lupdate-1]->score) { |