diff options
author | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2016-08-10 06:22:27 +0300 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2016-08-10 06:22:27 +0300 |
commit | 76674feae22db03848a40446beb2fcec70d2180d (patch) | |
tree | 4c6425459ecfed95ba9a3a48ba9aa5ffeac45107 /celt/vq.c | |
parent | e806d6a74129c70d2849cf621968f905149c21e1 (diff) |
SSE2 implementation of the PVQ search
We used the SSE reciprocal square root instruction to vectorize the serch rather
than compare one at a time with multiplies. Speeds up the entire encoder by 8-10%.
Diffstat (limited to 'celt/vq.c')
-rw-r--r-- | celt/vq.c | 34 |
1 files changed, 24 insertions, 10 deletions
@@ -158,29 +158,21 @@ static unsigned extract_collapse_mask(int *iy, int N, int B) return collapse_mask; } -unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc, - opus_val16 gain, int resynth) +opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch) { VARDECL(celt_norm, y); - VARDECL(int, iy); VARDECL(int, signx); int i, j; int pulsesLeft; opus_val32 sum; opus_val32 xy; opus_val16 yy; - unsigned collapse_mask; SAVE_STACK; - celt_assert2(K>0, "alg_quant() needs at least one pulse"); - celt_assert2(N>1, "alg_quant() needs at least two dimensions"); - + (void)arch; ALLOC(y, N, celt_norm); - ALLOC(iy, N, int); ALLOC(signx, N, int); - exp_rotation(X, N, 1, B, K, spread); - /* Get rid of the sign */ sum = 0; j=0; do { @@ -322,6 +314,28 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc, but has the same performance otherwise. */ iy[j] = (iy[j]^-signx[j]) + signx[j]; } while (++j<N); + RESTORE_STACK; + return yy; +} + +unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc, + opus_val16 gain, int resynth, int arch) +{ + VARDECL(int, iy); + opus_val16 yy; + unsigned collapse_mask; + SAVE_STACK; + + celt_assert2(K>0, "alg_quant() needs at least one pulse"); + celt_assert2(N>1, "alg_quant() needs at least two dimensions"); + + /* Covers vectorization by up to 4. */ + ALLOC(iy, N+3, int); + + exp_rotation(X, N, 1, B, K, spread); + + yy = op_pvq_search(X, iy, K, N, arch); + encode_pulses(iy, N, K, enc); if (resynth) |