Fixes a regression in the fixed-point code and adds saturation for fixed-point.

author: Stephane Lesage <stephane.lesage@ateis-international.com> 2009-06-18 15:19:22 +0400
committer: Jean-Marc Valin <jean-marc.valin@usherbrooke.ca> 2009-06-18 15:19:22 +0400
commit: 0dd7bfebe55abcac7e9acbca9d2ac2eddeee2b6f (patch)
tree: 8c89c18f796ed2fac28a320bf30891ce42423b72
parent: e760e470625ef9412eacd7cb71537fbffdee815b (diff)
1 files changed, 14 insertions, 8 deletions
diff --git a/libspeex/resample.c b/libspeex/resample.c
index a61cddf..7b5a308 100644
--- a/libspeex/resample.c
+++ b/libspeex/resample.c
@@ -345,20 +345,26 @@ static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t c
       const spx_word16_t *iptr = & in[last_sample];
 
 #ifndef OVERRIDE_INNER_PRODUCT_SINGLE
-      float accum[4] = {0,0,0,0};
+      sum = 0;
+      for(j=0;j<N;j++) sum += MULT16_16(sinc[j], iptr[j]);
 
+/*    This code is slower on most DSPs which have only 2 accumulators.
+      Plus this this forces truncation to 32 bits and you lose the HW guard bits.
+      I think we can trust the compiler and let it vectorize and/or unroll itself.
+      spx_word32_t accum[4] = {0,0,0,0};
       for(j=0;j<N;j+=4) {
-        accum[0] += sinc[j]*iptr[j];
-        accum[1] += sinc[j+1]*iptr[j+1];
-        accum[2] += sinc[j+2]*iptr[j+2];
-        accum[3] += sinc[j+3]*iptr[j+3];
+        accum[0] += MULT16_16(sinc[j], iptr[j]);
+        accum[1] += MULT16_16(sinc[j+1], iptr[j+1]);
+        accum[2] += MULT16_16(sinc[j+2], iptr[j+2]);
+        accum[3] += MULT16_16(sinc[j+3], iptr[j+3]);
       }
       sum = accum[0] + accum[1] + accum[2] + accum[3];
+*/
 #else
       sum = inner_product_single(sinc, iptr, N);
 #endif
 
-      out[out_stride * out_sample++] = PSHR32(sum, 15);
+      out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 15), 32767);
       last_sample += int_advance;
       samp_frac_num += frac_advance;
       if (samp_frac_num >= den_rate)
@@ -463,13 +469,13 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
       }
 
       cubic_coef(frac, interp);
-      sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);
+      sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
 #else
       cubic_coef(frac, interp);
       sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
 #endif
       
-      out[out_stride * out_sample++] = PSHR32(sum,15);
+      out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 14), 32767);
       last_sample += int_advance;
       samp_frac_num += frac_advance;
       if (samp_frac_num >= den_rate)
author	Stephane Lesage <stephane.lesage@ateis-international.com>	2009-06-18 15:19:22 +0400
committer	Jean-Marc Valin <jean-marc.valin@usherbrooke.ca>	2009-06-18 15:19:22 +0400
commit	0dd7bfebe55abcac7e9acbca9d2ac2eddeee2b6f (patch)
tree	8c89c18f796ed2fac28a320bf30891ce42423b72
parent	e760e470625ef9412eacd7cb71537fbffdee815b (diff)