diff options
author | Timothy B. Terriberry <tterribe@xiph.org> | 2009-10-20 10:39:45 +0400 |
---|---|---|
committer | Jean-Marc Valin <jean-marc.valin@usherbrooke.ca> | 2009-10-20 15:14:24 +0400 |
commit | 4a275d4d8f77afa8d308c2e9998bb5843944ac8c (patch) | |
tree | efef1da465128d62805df36197fd8bf7fd926d87 | |
parent | a3bba38b49a1d12d22f7949786a266e410aa884e (diff) |
Accuracy improvements to the fixed-point celt_rsqrt().
-rw-r--r-- | libcelt/mathops.h | 27 |
1 files changed, 22 insertions, 5 deletions
diff --git a/libcelt/mathops.h b/libcelt/mathops.h index 62bdc7c..a79cf47 100644 --- a/libcelt/mathops.h +++ b/libcelt/mathops.h @@ -190,15 +190,32 @@ static inline celt_word32 celt_rsqrt(celt_word32 x) { int k; celt_word16 n; + celt_word16 r; + celt_word16 r2; + celt_word16 y; celt_word32 rt; - const celt_word16 C[5] = {23126, -11496, 9812, -9097, 4100}; k = celt_ilog2(x)>>1; x = VSHR32(x, (k-7)<<1); - /* Range of n is [-16384,32767] */ + /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */ n = x-32768; - rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], - MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4]))))))))); - rt = VSHR32(rt,k); + /* Get a rough initial guess for the root. + The optimal minimax quadratic approximation is + r = 1.4288615575712422-n*(0.8452316405039975+n*0.4519141640876117). + Coefficients here, and the final result r, are Q14.*/ + r = ADD16(23410, MULT16_16_Q15(n, ADD16(-13848, MULT16_16_Q15(n, 7405)))); + /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14. + We can compute the result from n and r using Q15 multiplies with some + adjustment, carefully done to avoid overflow. + Range of y is [-2014,2362]. */ + r2 = MULT16_16_Q15(r, r); + y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1); + /* Apply a 2nd-order Householder iteration: r' = r*(1+y*(-0.5+y*0.375)). */ + rt = ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y, + ADD16(-16384, MULT16_16_Q15(y, 12288))))); + /* rt is now the Q14 reciprocal square root of the Q16 x, with a maximum + error of 2.70970/16384 and a MSE of 0.587003/16384^2. */ + /* Most of the error in this approximation comes from the following shift: */ + rt = PSHR32(rt,k); return rt; } |