x86-64: Add msac_decode_symbol_adapt SSE2 asm

Also make various minor optimizations/style fixes to the MSAC C functions.
author: Henrik Gramner <gramner@twoorioles.com> 2019-04-12 00:20:18 +0300
committer: Henrik Gramner <henrik@gramner.com> 2019-04-15 22:09:27 +0300
commit: fa1b265142e1409a986f01bd7abe115b308c1028 (patch)
tree: 81befc8a99d235c98f9dce2a63cee0dee676591f /src/msac.c
parent: 44d0de41d478b6b41a1ebbf1de012caa8d75cca0 (diff)
1 files changed, 41 insertions, 49 deletions
diff --git a/src/msac.c b/src/msac.c
index 9e6d32b..104d2d6 100644
--- a/src/msac.c
+++ b/src/msac.c
@@ -58,8 +58,8 @@ static inline void ctx_refill(MsacContext *s) {
  * necessary), and stores them back in the decoder context.
  * dif: The new value of dif.
  * rng: The new value of the range. */
-static inline void ctx_norm(MsacContext *s, ec_win dif, uint32_t rng) {
-    const uint16_t d = 15 - (31 ^ clz(rng));
+static inline void ctx_norm(MsacContext *s, ec_win dif, unsigned rng) {
+    const int d = 15 ^ (31 ^ clz(rng));
     assert(rng <= 65535U);
     s->cnt -= d;
     s->dif = ((dif + 1) << d) - 1; /* Shift in 1s in the LSBs */
@@ -69,18 +69,17 @@ static inline void ctx_norm(MsacContext *s, ec_win dif, uint32_t rng) {
 }
 
 unsigned dav1d_msac_decode_bool_equi(MsacContext *const s) {
-    ec_win v, vw, dif = s->dif;
-    uint16_t r = s->rng;
-    unsigned ret;
+    ec_win vw, dif = s->dif;
+    unsigned ret, v, r = s->rng;
     assert((dif >> (EC_WIN_SIZE - 16)) < r);
     // When the probability is 1/2, f = 16384 >> EC_PROB_SHIFT = 256 and we can
     // replace the multiply with a simple shift.
     v = ((r >> 8) << 7) + EC_MIN_PROB;
-    vw   = v << (EC_WIN_SIZE - 16);
+    vw   = (ec_win)v << (EC_WIN_SIZE - 16);
     ret  = dif >= vw;
     dif -= ret*vw;
     v   += ret*(r - 2*v);
-    ctx_norm(s, dif, (unsigned) v);
+    ctx_norm(s, dif, v);
     return !ret;
 }
 
@@ -88,59 +87,57 @@ unsigned dav1d_msac_decode_bool_equi(MsacContext *const s) {
  * f: The probability that the bit is one
  * Return: The value decoded (0 or 1). */
 unsigned dav1d_msac_decode_bool(MsacContext *const s, const unsigned f) {
-    ec_win v, vw, dif = s->dif;
-    uint16_t r = s->rng;
-    unsigned ret;
+    ec_win vw, dif = s->dif;
+    unsigned ret, v, r = s->rng;
     assert((dif >> (EC_WIN_SIZE - 16)) < r);
     v = ((r >> 8) * (f >> EC_PROB_SHIFT) >> (7 - EC_PROB_SHIFT)) + EC_MIN_PROB;
-    vw   = v << (EC_WIN_SIZE - 16);
+    vw   = (ec_win)v << (EC_WIN_SIZE - 16);
     ret  = dif >= vw;
     dif -= ret*vw;
     v   += ret*(r - 2*v);
-    ctx_norm(s, dif, (unsigned) v);
+    ctx_norm(s, dif, v);
     return !ret;
 }
 
-unsigned dav1d_msac_decode_bools(MsacContext *const c, const unsigned l) {
-    int v = 0;
-    for (int n = (int) l - 1; n >= 0; n--)
-        v = (v << 1) | dav1d_msac_decode_bool_equi(c);
+unsigned dav1d_msac_decode_bools(MsacContext *const s, unsigned n) {
+    unsigned v = 0;
+    while (n--)
+        v = (v << 1) | dav1d_msac_decode_bool_equi(s);
     return v;
 }
 
-int dav1d_msac_decode_subexp(MsacContext *const c, const int ref,
+int dav1d_msac_decode_subexp(MsacContext *const s, const int ref,
                              const int n, const unsigned k)
 {
     int i = 0;
     int a = 0;
     int b = k;
     while ((2 << b) < n) {
-        if (!dav1d_msac_decode_bool_equi(c)) break;
+        if (!dav1d_msac_decode_bool_equi(s)) break;
         b = k + i++;
         a = (1 << b);
     }
-    const unsigned v = dav1d_msac_decode_bools(c, b) + a;
+    const unsigned v = dav1d_msac_decode_bools(s, b) + a;
     return ref * 2 <= n ? inv_recenter(ref, v) :
                           n - 1 - inv_recenter(n - 1 - ref, v);
 }
 
-int dav1d_msac_decode_uniform(MsacContext *const c, const unsigned n) {
+int dav1d_msac_decode_uniform(MsacContext *const s, const unsigned n) {
     assert(n > 0);
     const int l = ulog2(n) + 1;
     assert(l > 1);
     const unsigned m = (1 << l) - n;
-    const unsigned v = dav1d_msac_decode_bools(c, l - 1);
-    return v < m ? v : (v << 1) - m + dav1d_msac_decode_bool_equi(c);
+    const unsigned v = dav1d_msac_decode_bools(s, l - 1);
+    return v < m ? v : (v << 1) - m + dav1d_msac_decode_bool_equi(s);
 }
 
 /* Decodes a symbol given an inverse cumulative distribution function (CDF)
  * table in Q15. */
 static unsigned decode_symbol(MsacContext *const s, const uint16_t *const cdf,
-                              const unsigned n_symbols)
+                              const size_t n_symbols)
 {
-    ec_win u, v = s->rng, r = s->rng >> 8;
-    const ec_win c = s->dif >> (EC_WIN_SIZE - 16);
-    unsigned ret = 0;
+    const unsigned c = s->dif >> (EC_WIN_SIZE - 16);
+    unsigned u, v = s->rng, r = s->rng >> 8, ret = 0;
 
     assert(!cdf[n_symbols - 1]);
 
@@ -153,39 +150,34 @@ static unsigned decode_symbol(MsacContext *const s, const uint16_t *const cdf,
 
     assert(u <= s->rng);
 
-    ctx_norm(s, s->dif - (v << (EC_WIN_SIZE - 16)), (unsigned) (u - v));
+    ctx_norm(s, s->dif - ((ec_win)v << (EC_WIN_SIZE - 16)), u - v);
     return ret - 1;
 }
 
-static void update_cdf(uint16_t *const cdf, const unsigned val,
-                       const unsigned n_symbols)
+unsigned dav1d_msac_decode_symbol_adapt_c(MsacContext *const s,
+                                          uint16_t *const cdf,
+                                          const size_t n_symbols)
 {
-    const unsigned count = cdf[n_symbols];
-    const int rate = ((count >> 4) | 4) + (n_symbols > 3);
-    unsigned i;
-    for (i = 0; i < val; i++)
-        cdf[i] += (32768 - cdf[i]) >> rate;
-    for (; i < n_symbols - 1; i++)
-        cdf[i] -= cdf[i] >> rate;
-    cdf[n_symbols] = count + (count < 32);
-}
-
-unsigned dav1d_msac_decode_symbol_adapt(MsacContext *const c,
-                                        uint16_t *const cdf,
-                                        const unsigned n_symbols)
-{
-    const unsigned val = decode_symbol(c, cdf, n_symbols);
-    if(c->allow_update_cdf)
-        update_cdf(cdf, val, n_symbols);
+    const unsigned val = decode_symbol(s, cdf, n_symbols);
+    if (s->allow_update_cdf) {
+        const unsigned count = cdf[n_symbols];
+        const int rate = ((count >> 4) | 4) + (n_symbols > 3);
+        unsigned i;
+        for (i = 0; i < val; i++)
+            cdf[i] += (32768 - cdf[i]) >> rate;
+        for (; i < n_symbols - 1; i++)
+            cdf[i] -= cdf[i] >> rate;
+        cdf[n_symbols] = count + (count < 32);
+    }
     return val;
 }
 
-unsigned dav1d_msac_decode_bool_adapt(MsacContext *const c,
+unsigned dav1d_msac_decode_bool_adapt(MsacContext *const s,
                                       uint16_t *const cdf)
 {
-    const unsigned bit = dav1d_msac_decode_bool(c, *cdf);
+    const unsigned bit = dav1d_msac_decode_bool(s, *cdf);
 
-    if(c->allow_update_cdf){
+    if (s->allow_update_cdf) {
         // update_cdf() specialized for boolean CDFs
         const unsigned count = cdf[1];
         const int rate = (count >> 4) | 4;
author	Henrik Gramner <gramner@twoorioles.com>	2019-04-12 00:20:18 +0300
committer	Henrik Gramner <henrik@gramner.com>	2019-04-15 22:09:27 +0300
commit	fa1b265142e1409a986f01bd7abe115b308c1028 (patch)
tree	81befc8a99d235c98f9dce2a63cee0dee676591f /src/msac.c
parent	44d0de41d478b6b41a1ebbf1de012caa8d75cca0 (diff)