Enhancements the fixed-point approximations of non-linear functions.

Accuracy for rsqrt, rcp, cos, and log2 is now at the level of truncation error for the current output resolution of these functions. sqrt and exp2 still have non-trivial algebraic error, but this cannot be reduced much further using the current method without additional computation. Also updates the fast float approximations for log2 and exp2 with coefficients that give slightly lower maximum relative error. Patch modified by Jean-Marc Valin to leave the cos approximation as is and leave the check for x<-15 in exp2 as is.
author: Timothy B. Terriberry <tterribe@xiph.org> 2009-10-21 08:18:41 +0400
committer: Jean-Marc Valin <jean-marc.valin@usherbrooke.ca> 2009-10-22 04:30:46 +0400
commit: a9ffc14ab7f00496b41f84a89c395b839e8452c4 (patch)
tree: 5af0e0279031eb7dec432fc2765a2d874bd9b5e4
parent: ab4dcc5c90140f7a6fefdef2e2acfdf7ecc152c9 (diff)
2 files changed, 94 insertions, 33 deletions
diff --git a/libcelt/mathops.h b/libcelt/mathops.h
index a79cf47..2ec3a12 100644
--- a/libcelt/mathops.h
+++ b/libcelt/mathops.h
@@ -130,9 +130,9 @@ static inline float celt_log2(float x)
    in.f = x;
    integer = (in.i>>23)-127;
    in.i -= integer<<23;
-   frac = in.f - 1.5;
-   /* -0.41446   0.96093  -0.33981   0.15600 */
-   frac = -0.41446 + frac*(0.96093 + frac*(-0.33981 + frac*0.15600));
+   frac = in.f - 1.5f;
+   frac = -0.41445418f + frac*(0.95909232f
+          + frac*(-0.33951290f + frac*0.16541097f));
    return 1+integer+frac;
 }
 
@@ -150,7 +150,8 @@ static inline float celt_exp2(float x)
       return 0;
    frac = x-integer;
    /* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */
-   res.f = 1.f + frac * (0.696147f + frac * (0.224411f + 0.079442f*frac));
+   res.f = 0.99992522f + frac * (0.69583354f
+           + frac * (0.22606716f + 0.078024523f*frac));
    res.i = (res.i + (integer<<23)) & 0x7fffffff;
    return res.f;
 }
@@ -199,22 +200,23 @@ static inline celt_word32 celt_rsqrt(celt_word32 x)
    /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
    n = x-32768;
    /* Get a rough initial guess for the root.
-      The optimal minimax quadratic approximation is
-       r = 1.4288615575712422-n*(0.8452316405039975+n*0.4519141640876117).
+      The optimal minimax quadratic approximation (using relative error) is
+       r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
       Coefficients here, and the final result r, are Q14.*/
-   r = ADD16(23410, MULT16_16_Q15(n, ADD16(-13848, MULT16_16_Q15(n, 7405))));
+   r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));
    /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14.
       We can compute the result from n and r using Q15 multiplies with some
        adjustment, carefully done to avoid overflow.
-      Range of y is [-2014,2362]. */
+      Range of y is [-1564,1594]. */
    r2 = MULT16_16_Q15(r, r);
    y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
-   /* Apply a 2nd-order Householder iteration: r' = r*(1+y*(-0.5+y*0.375)). */
+   /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5). */
    rt = ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
-              ADD16(-16384, MULT16_16_Q15(y, 12288)))));
+              SUB16(MULT16_16_Q15(y, 12288), 16384))));
    /* rt is now the Q14 reciprocal square root of the Q16 x, with a maximum
-       error of 2.70970/16384 and a MSE of 0.587003/16384^2. */
-   /* Most of the error in this approximation comes from the following shift: */
+       relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
+       peak absolute error of 2.26591/16384. */
+   /* Most of the error in this function comes from the following shift: */
    rt = PSHR32(rt,k);
    return rt;
 }
@@ -225,7 +227,7 @@ static inline celt_word32 celt_sqrt(celt_word32 x)
    int k;
    celt_word16 n;
    celt_word32 rt;
-   const celt_word16 C[5] = {23174, 11584, -3011, 1570, -557};
+   const celt_word16 C[5] = {23175, 11561, -3011, 1699, -664};
    if (x==0)
       return 0;
    k = (celt_ilog2(x)>>1)-7;
@@ -244,7 +246,7 @@ static inline celt_word32 celt_psqrt(celt_word32 x)
    int k;
    celt_word16 n;
    celt_word32 rt;
-   const celt_word16 C[5] = {23174, 11584, -3011, 1570, -557};
+   const celt_word16 C[5] = {23175, 11561, -3011, 1699, -664};
    k = (celt_ilog2(x)>>1)-7;
    x = VSHR32(x, (k<<1));
    n = x-32768;
@@ -301,12 +303,14 @@ static inline celt_word16 celt_log2(celt_word32 x)
    int i;
    celt_word16 n, frac;
    /*-0.41446   0.96093  -0.33981   0.15600 */
-   const celt_word16 C[4] = {-6791, 7872, -1392, 319};
+   /* -0.4144541824871411+32/16384, 0.9590923197873218, -0.3395129038105771,
+       0.16541096501128538 */
+   const celt_word16 C[4] = {-6758, 15715, -5563, 2708};
    if (x==0)
       return -32767;
    i = celt_ilog2(x);
    n = VSHR32(x,i-15)-32768-16384;
-   frac = ADD16(C[0], MULT16_16_Q14(n, ADD16(C[1], MULT16_16_Q14(n, ADD16(C[2], MULT16_16_Q14(n, (C[3])))))));
+   frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], MULT16_16_Q15(n, (C[3])))))));
    return SHL16(i-13,8)+SHR16(frac,14-8);
 }
 
@@ -316,10 +320,10 @@ static inline celt_word16 celt_log2(celt_word32 x)
  K2 = 3-4*log(2)
  K3 = 3*log(2) - 2
 */
-#define D0 16384
-#define D1 11356
-#define D2 3726
-#define D3 1301
+#define D0 16383
+#define D1 22804
+#define D2 14819
+#define D3 10204
 /** Base-2 exponential approximation (2^x). (Q11 input, Q16 output) */
 static inline celt_word32 celt_exp2(celt_word16 x)
 {
@@ -331,7 +335,7 @@ static inline celt_word32 celt_exp2(celt_word16 x)
    else if (integer < -15)
       return 0;
    frac = SHL16(x-SHL16(integer,11),3);
-   frac = ADD16(D0, MULT16_16_Q14(frac, ADD16(D1, MULT16_16_Q14(frac, ADD16(D2 , MULT16_16_Q14(D3,frac))))));
+   frac = ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac))))));
    return VSHR32(EXTEND32(frac), -integer-2);
 }
 
@@ -339,14 +343,29 @@ static inline celt_word32 celt_exp2(celt_word16 x)
 static inline celt_word32 celt_rcp(celt_word32 x)
 {
    int i;
-   celt_word16 n, frac;
-   const celt_word16 C[5] = {21848, -7251, 2403, -934, 327};
+   celt_word16 n;
+   celt_word16 r;
    celt_assert2(x>0, "celt_rcp() only defined for positive values");
    i = celt_ilog2(x);
-   n = VSHR32(x,i-16)-SHL32(EXTEND32(3),15);
-   frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], 
-                MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
-   return VSHR32(EXTEND32(frac),i-16);
+   /* n is Q15 with range [0,1). */
+   n = VSHR32(x,i-15)-32768;
+   /* Start with a linear approximation:
+      r = 1.8823529411764706-0.9411764705882353*n.
+      The coefficients and the result are Q14 in the range [15420,30840].*/
+   r = ADD16(30840, MULT16_16_Q15(-15420, n));
+   /* Perform two Newton iterations:
+      r -= r*((r*n)-1.Q15)
+         = r*((r*n)+(r-1.Q15)). */
+   r = SUB16(r, MULT16_16_Q15(r,
+             ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));
+   /* We subtract an extra 1 in the second iteration to avoid overflow; it also
+       neatly compensates for truncation error in the rest of the process. */
+   r = SUB16(r, ADD16(1, MULT16_16_Q15(r,
+             ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));
+   /* r is now the Q15 solution to 2/(n+1), with a maximum relative error
+       of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
+       error of 1.24665/32768. */
+   return VSHR32(EXTEND32(r),i-16);
 }
 
 #define celt_div(a,b) MULT32_32_Q31((celt_word32)(a),celt_rcp(b))
diff --git a/tests/mathops-test.c b/tests/mathops-test.c
index 423d1e9..5f086ae 100644
--- a/tests/mathops-test.c
+++ b/tests/mathops-test.c
@@ -30,7 +30,7 @@ void testdiv(void)
 #else
       prod = val*i;
 #endif
-      if (fabs(prod-1) > .001)
+      if (fabs(prod-1) > .00025)
       {
          fprintf (stderr, "div failed: 1/%d="WORD" (product = %f)\n", i, val, prod);
          ret = 1;
@@ -47,7 +47,7 @@ void testsqrt(void)
       celt_word16 val;
       val = celt_sqrt(i);
       ratio = val/sqrt(i);
-      if (fabs(ratio - 1) > .001 && fabs(val-sqrt(i)) > 2)
+      if (fabs(ratio - 1) > .0005 && fabs(val-sqrt(i)) > 2)
       {
          fprintf (stderr, "sqrt failed: sqrt(%d)="WORD" (ratio = %f)\n", i, val, ratio);
          ret = 1;
@@ -81,7 +81,7 @@ void testlog2(void)
    for (x=0.001;x<1677700.0;x+=(x/8.0))
    {
       float error = fabs((1.442695040888963387*log(x))-celt_log2(x));
-      if (error>0.001)
+      if (error>0.0009)
       {
          fprintf (stderr, "celt_log2 failed: fabs((1.442695040888963387*log(x))-celt_log2(x))>0.001 (x = %f, error = %f)\n", x,error);
          ret = 1;    
@@ -95,7 +95,7 @@ void testexp2(void)
    for (x=-11.0;x<24.0;x+=0.0007)
    {
       float error = fabs(x-(1.442695040888963387*log(celt_exp2(x))));
-      if (error>0.0005)
+      if (error>0.0002)
       {
          fprintf (stderr, "celt_exp2 failed: fabs(x-(1.442695040888963387*log(celt_exp2(x))))>0.0005 (x = %f, error = %f)\n", x,error);
          ret = 1;    
@@ -117,6 +117,49 @@ void testexp2log2(void)
    }
 }
 #else
+void testlog2(void)
+{
+   celt_word32 x;
+   for (x=8;x<1073741824;x+=(x>>3))
+   {
+      float error = fabs((1.442695040888963387*log(x/16384.0))-celt_log2(x)/256.0);
+      if (error>0.003)
+      {
+         fprintf (stderr, "celt_log2 failed: x = %ld, error = %f\n", (long)x,error);
+         ret = 1;
+      }
+   }
+}
+
+void testexp2(void)
+{
+   celt_word16 x;
+   for (x=-32768;x<-30720;x++)
+   {
+      float error1 = fabs(x/2048.0-(1.442695040888963387*log(celt_exp2(x)/65536.0)));
+      float error2 = fabs(exp(0.6931471805599453094*x/2048.0)-celt_exp2(x)/65536.0);
+      if (error1>0.0002&&error2>0.00004)
+      {
+         fprintf (stderr, "celt_exp2 failed: x = "WORD", error1 = %f, error2 = %f\n", x,error1,error2);
+         ret = 1;
+      }
+   }
+}
+
+void testexp2log2(void)
+{
+   celt_word32 x;
+   for (x=8;x<65536;x+=(x>>3))
+   {
+      float error = fabs(x-0.25*celt_exp2(celt_log2(x)<<3))/16384;
+      if (error>0.004)
+      {
+         fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_log2(celt_exp2(x))))>0.001 (x = %ld, error = %f)\n", (long)x,error);
+         ret = 1;
+      }
+   }
+}
+
 void testilog2(void)
 {
    celt_word32 x;
@@ -137,11 +180,10 @@ int main(void)
    testdiv();
    testsqrt();
    testrsqrt();
-#ifndef FIXED_POINT
    testlog2();
    testexp2();
    testexp2log2();
-#else
+#ifdef FIXED_POINT
    testilog2();
 #endif
    return ret;
author	Timothy B. Terriberry <tterribe@xiph.org>	2009-10-21 08:18:41 +0400
committer	Jean-Marc Valin <jean-marc.valin@usherbrooke.ca>	2009-10-22 04:30:46 +0400
commit	a9ffc14ab7f00496b41f84a89c395b839e8452c4 (patch)
tree	5af0e0279031eb7dec432fc2765a2d874bd9b5e4
parent	ab4dcc5c90140f7a6fefdef2e2acfdf7ecc152c9 (diff)