diff options
author | Jean-Marc Valin <jean-marc.valin@usherbrooke.ca> | 2008-04-21 18:10:52 +0400 |
---|---|---|
committer | Jean-Marc Valin <jean-marc.valin@usherbrooke.ca> | 2008-04-21 18:10:52 +0400 |
commit | 30e1b4d36c432b5211e7be7d0066e91a846ce04d (patch) | |
tree | fb5680e5230a100247a24c36acf701b645d1745a | |
parent | ba11d78372ef22f240aea5235af3036760abe1fa (diff) |
Moved the windowing operation from compute_mdcts() to mdct_forward() in an
attempt to reduce copying.
-rw-r--r-- | libcelt/celt.c | 44 | ||||
-rw-r--r-- | libcelt/mdct.c | 50 | ||||
-rw-r--r-- | libcelt/mdct.h | 2 | ||||
-rw-r--r-- | tests/mdct-test.c | 2 |
4 files changed, 62 insertions, 36 deletions
diff --git a/libcelt/celt.c b/libcelt/celt.c index cb68520..8a7f39f 100644 --- a/libcelt/celt.c +++ b/libcelt/celt.c @@ -154,44 +154,34 @@ static inline celt_int16_t SIG2INT16(celt_sig_t x) static void compute_mdcts(const CELTMode *mode, const celt_word16_t * restrict window, celt_sig_t * restrict in, celt_sig_t * restrict out) { int c, N4; - VARDECL(celt_word32_t, x); - VARDECL(celt_word32_t, tmp); - const int C = CHANNELS(mode); const mdct_lookup *lookup = MDCT(mode); const int N = FRAMESIZE(mode); + const int C = CHANNELS(mode); const int overlap = OVERLAP(mode); - SAVE_STACK; N4 = (N-overlap)>>1; - ALLOC(x, 2*N, celt_word32_t); - ALLOC(tmp, N, celt_word32_t); - for (c=0;c<C;c++) + if (C==1) { - int j; - celt_word32_t * restrict x1, * restrict x2; - for (j=0;j<2*N-2*N4;j++) - x[j+N4] = in[C*j+c]; - x1 = x+N4; - x2 = x+2*N-N4-1; - for (j=0;j<overlap;j++) - { - *x1 = MULT16_32_Q15(window[j],*x1); - *x2 = MULT16_32_Q15(window[j],*x2); - x1++; - x2--; - } - CELT_MEMSET(x, 0, N4); - CELT_MEMSET(x+2*N-N4, 0, N4); - if (C==1) + mdct_forward(lookup, in-N4, out, window, overlap); + } else { + VARDECL(celt_word32_t, x); + VARDECL(celt_word32_t, tmp); + SAVE_STACK; + ALLOC(x, 2*N, celt_word32_t); + ALLOC(tmp, N, celt_word32_t); + for (c=0;c<C;c++) { - mdct_forward(lookup, x, out); - } else { - mdct_forward(lookup, x, tmp); + int j; + for (j=0;j<2*N-2*N4;j++) + x[j+N4] = in[C*j+c]; + CELT_MEMSET(x, 0, N4); + CELT_MEMSET(x+2*N-N4, 0, N4); + mdct_forward(lookup, x, tmp, window, overlap); /* Interleaving the sub-frames */ for (j=0;j<N;j++) out[C*j+c] = tmp[j]; } + RESTORE_STACK; } - RESTORE_STACK; } /** Compute the IMDCT and apply window for all sub-frames and all channels in a frame */ diff --git a/libcelt/mdct.c b/libcelt/mdct.c index c38409e..9db0707 100644 --- a/libcelt/mdct.c +++ b/libcelt/mdct.c @@ -86,7 +86,7 @@ void mdct_clear(mdct_lookup *l) celt_free(l->trig); } -void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out) +void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16_t *window, int overlap) { int i; int N, N2, N4; @@ -105,12 +105,30 @@ void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * r const kiss_fft_scalar * restrict xp2 = in+N2+N4-1; kiss_fft_scalar * restrict yp = out; kiss_fft_scalar *t = &l->trig[0]; - for(i=0;i<N/8;i++) + const celt_word16_t * restrict wp1 = window+overlap/2; + const celt_word16_t * restrict wp2 = window+overlap/2-1; + for(i=0;i<overlap/4;i++) { kiss_fft_scalar re, im; /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ - re = -HALF32(xp1[N2] + *xp2); - im = -HALF32(*xp1 - xp2[-N2]); + re = -HALF32(MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2)); + im = -HALF32(MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2])); + xp1+=2; + xp2-=2; + wp1+=2; + wp2-=2; + /* We could remove the HALF32 above and just use MULT16_32_Q16 below + (MIXED_PRECISION only) */ + *yp++ = S_MUL(re,t[0]) - S_MUL(im,t[N4]); + *yp++ = S_MUL(im,t[0]) + S_MUL(re,t[N4]); + t++; + } + for(;i<N/8;i++) + { + kiss_fft_scalar re, im; + /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ + re = -HALF32(*xp2); + im = -HALF32(*xp1); xp1+=2; xp2-=2; /* We could remove the HALF32 above and just use MULT16_32_Q16 below @@ -119,12 +137,14 @@ void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * r *yp++ = S_MUL(im,t[0]) + S_MUL(re,t[N4]); t++; } - for(;i<N4;i++) + wp1 = window; + wp2 = window+overlap-1; + for(;i<N4-overlap/4;i++) { kiss_fft_scalar re, im; /* Real part arranged as a-bR, Imag part arranged as -c-dR */ - re = HALF32(xp1[-N2] - *xp2); - im = -HALF32(*xp1 + xp2[N2]); + re = HALF32(-*xp2); + im = -HALF32(*xp1); xp1+=2; xp2-=2; /* We could remove the HALF32 above and just use MULT16_32_Q16 below @@ -133,6 +153,22 @@ void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * r *yp++ = S_MUL(im,t[0]) + S_MUL(re,t[N4]); t++; } + for(;i<N4;i++) + { + kiss_fft_scalar re, im; + /* Real part arranged as a-bR, Imag part arranged as -c-dR */ + re = HALF32(MULT16_32_Q15(*wp1, xp1[-N2]) - MULT16_32_Q15(*wp2, *xp2)); + im = -HALF32(MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2])); + xp1+=2; + xp2-=2; + wp1+=2; + wp2-=2; + /* We could remove the HALF32 above and just use MULT16_32_Q16 below + (MIXED_PRECISION only) */ + *yp++ = S_MUL(re,t[0]) - S_MUL(im,t[N4]); + *yp++ = S_MUL(im,t[0]) + S_MUL(re,t[N4]); + t++; + } } /* N/4 complex FFT, which should normally down-scale by 4/N (but doesn't now) */ diff --git a/libcelt/mdct.h b/libcelt/mdct.h index 9411942..46f3c13 100644 --- a/libcelt/mdct.h +++ b/libcelt/mdct.h @@ -57,7 +57,7 @@ void mdct_init(mdct_lookup *l,int N); void mdct_clear(mdct_lookup *l); /** Compute a forward MDCT and scale by 2/N */ -void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out); +void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out, const celt_word16_t *window, int overlap); /** Compute a backward MDCT (no scaling) */ void mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out); diff --git a/tests/mdct-test.c b/tests/mdct-test.c index 5cd95ee..e4cc65c 100644 --- a/tests/mdct-test.c +++ b/tests/mdct-test.c @@ -102,7 +102,7 @@ void test1d(int nfft,int isinverse) mdct_backward(&cfg,in,out); check_inv(in,out,nfft,isinverse); } else { - mdct_forward(&cfg,in,out); + mdct_forward(&cfg,in,out,NULL, 0); check(in,out,nfft,isinverse); } /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/ |