diff options
author | Stephane Lesage <stephane.lesage@ateis-international.com> | 2009-06-18 15:19:22 +0400 |
---|---|---|
committer | Jean-Marc Valin <jean-marc.valin@usherbrooke.ca> | 2009-06-18 15:19:22 +0400 |
commit | 0dd7bfebe55abcac7e9acbca9d2ac2eddeee2b6f (patch) | |
tree | 8c89c18f796ed2fac28a320bf30891ce42423b72 | |
parent | e760e470625ef9412eacd7cb71537fbffdee815b (diff) |
Fixes a regression in the fixed-point code and adds saturation for fixed-point.
-rw-r--r-- | libspeex/resample.c | 22 |
1 files changed, 14 insertions, 8 deletions
diff --git a/libspeex/resample.c b/libspeex/resample.c index a61cddf..7b5a308 100644 --- a/libspeex/resample.c +++ b/libspeex/resample.c @@ -345,20 +345,26 @@ static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t c const spx_word16_t *iptr = & in[last_sample]; #ifndef OVERRIDE_INNER_PRODUCT_SINGLE - float accum[4] = {0,0,0,0}; + sum = 0; + for(j=0;j<N;j++) sum += MULT16_16(sinc[j], iptr[j]); +/* This code is slower on most DSPs which have only 2 accumulators. + Plus this this forces truncation to 32 bits and you lose the HW guard bits. + I think we can trust the compiler and let it vectorize and/or unroll itself. + spx_word32_t accum[4] = {0,0,0,0}; for(j=0;j<N;j+=4) { - accum[0] += sinc[j]*iptr[j]; - accum[1] += sinc[j+1]*iptr[j+1]; - accum[2] += sinc[j+2]*iptr[j+2]; - accum[3] += sinc[j+3]*iptr[j+3]; + accum[0] += MULT16_16(sinc[j], iptr[j]); + accum[1] += MULT16_16(sinc[j+1], iptr[j+1]); + accum[2] += MULT16_16(sinc[j+2], iptr[j+2]); + accum[3] += MULT16_16(sinc[j+3], iptr[j+3]); } sum = accum[0] + accum[1] + accum[2] + accum[3]; +*/ #else sum = inner_product_single(sinc, iptr, N); #endif - out[out_stride * out_sample++] = PSHR32(sum, 15); + out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 15), 32767); last_sample += int_advance; samp_frac_num += frac_advance; if (samp_frac_num >= den_rate) @@ -463,13 +469,13 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3 } cubic_coef(frac, interp); - sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]); + sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1)); #else cubic_coef(frac, interp); sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); #endif - out[out_stride * out_sample++] = PSHR32(sum,15); + out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 14), 32767); last_sample += int_advance; samp_frac_num += frac_advance; if (samp_frac_num >= den_rate) |