Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.xiph.org/xiph/opus.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/celt/vq.c
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2016-08-10 06:22:27 +0300
committerJean-Marc Valin <jmvalin@jmvalin.ca>2016-08-10 06:22:27 +0300
commit76674feae22db03848a40446beb2fcec70d2180d (patch)
tree4c6425459ecfed95ba9a3a48ba9aa5ffeac45107 /celt/vq.c
parente806d6a74129c70d2849cf621968f905149c21e1 (diff)
SSE2 implementation of the PVQ search
We used the SSE reciprocal square root instruction to vectorize the serch rather than compare one at a time with multiplies. Speeds up the entire encoder by 8-10%.
Diffstat (limited to 'celt/vq.c')
-rw-r--r--celt/vq.c34
1 files changed, 24 insertions, 10 deletions
diff --git a/celt/vq.c b/celt/vq.c
index e6895952..8d49d804 100644
--- a/celt/vq.c
+++ b/celt/vq.c
@@ -158,29 +158,21 @@ static unsigned extract_collapse_mask(int *iy, int N, int B)
return collapse_mask;
}
-unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
- opus_val16 gain, int resynth)
+opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch)
{
VARDECL(celt_norm, y);
- VARDECL(int, iy);
VARDECL(int, signx);
int i, j;
int pulsesLeft;
opus_val32 sum;
opus_val32 xy;
opus_val16 yy;
- unsigned collapse_mask;
SAVE_STACK;
- celt_assert2(K>0, "alg_quant() needs at least one pulse");
- celt_assert2(N>1, "alg_quant() needs at least two dimensions");
-
+ (void)arch;
ALLOC(y, N, celt_norm);
- ALLOC(iy, N, int);
ALLOC(signx, N, int);
- exp_rotation(X, N, 1, B, K, spread);
-
/* Get rid of the sign */
sum = 0;
j=0; do {
@@ -322,6 +314,28 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
but has the same performance otherwise. */
iy[j] = (iy[j]^-signx[j]) + signx[j];
} while (++j<N);
+ RESTORE_STACK;
+ return yy;
+}
+
+unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
+ opus_val16 gain, int resynth, int arch)
+{
+ VARDECL(int, iy);
+ opus_val16 yy;
+ unsigned collapse_mask;
+ SAVE_STACK;
+
+ celt_assert2(K>0, "alg_quant() needs at least one pulse");
+ celt_assert2(N>1, "alg_quant() needs at least two dimensions");
+
+ /* Covers vectorization by up to 4. */
+ ALLOC(iy, N+3, int);
+
+ exp_rotation(X, N, 1, B, K, spread);
+
+ yy = op_pvq_search(X, iy, K, N, arch);
+
encode_pulses(iy, N, K, enc);
if (resynth)