Moved the windowing operation from compute_mdcts() to mdct_forward() in an

attempt to reduce copying.
author: Jean-Marc Valin <jean-marc.valin@usherbrooke.ca> 2008-04-21 18:10:52 +0400
committer: Jean-Marc Valin <jean-marc.valin@usherbrooke.ca> 2008-04-21 18:10:52 +0400
commit: 30e1b4d36c432b5211e7be7d0066e91a846ce04d (patch)
tree: fb5680e5230a100247a24c36acf701b645d1745a
parent: ba11d78372ef22f240aea5235af3036760abe1fa (diff)
4 files changed, 62 insertions, 36 deletions
diff --git a/libcelt/celt.c b/libcelt/celt.c
index cb68520..8a7f39f 100644
--- a/libcelt/celt.c
+++ b/libcelt/celt.c
@@ -154,44 +154,34 @@ static inline celt_int16_t SIG2INT16(celt_sig_t x)
 static void compute_mdcts(const CELTMode *mode, const celt_word16_t * restrict window, celt_sig_t * restrict in, celt_sig_t * restrict out)
 {
    int c, N4;
-   VARDECL(celt_word32_t, x);
-   VARDECL(celt_word32_t, tmp);
-   const int C = CHANNELS(mode);
    const mdct_lookup *lookup = MDCT(mode);
    const int N = FRAMESIZE(mode);
+   const int C = CHANNELS(mode);
    const int overlap = OVERLAP(mode);
-   SAVE_STACK;
    N4 = (N-overlap)>>1;
-   ALLOC(x, 2*N, celt_word32_t);
-   ALLOC(tmp, N, celt_word32_t);
-   for (c=0;c<C;c++)
+   if (C==1)
    {
-      int j;
-      celt_word32_t * restrict x1, * restrict x2;
-      for (j=0;j<2*N-2*N4;j++)
-         x[j+N4] = in[C*j+c];
-      x1 = x+N4;
-      x2 = x+2*N-N4-1;
-      for (j=0;j<overlap;j++)
-      {
-         *x1 = MULT16_32_Q15(window[j],*x1);
-         *x2 = MULT16_32_Q15(window[j],*x2);
-         x1++;
-         x2--;
-      }
-      CELT_MEMSET(x, 0, N4);
-      CELT_MEMSET(x+2*N-N4, 0, N4);
-      if (C==1)
+      mdct_forward(lookup, in-N4, out, window, overlap);
+   } else {
+      VARDECL(celt_word32_t, x);
+      VARDECL(celt_word32_t, tmp);
+      SAVE_STACK;
+      ALLOC(x, 2*N, celt_word32_t);
+      ALLOC(tmp, N, celt_word32_t);
+      for (c=0;c<C;c++)
       {
-         mdct_forward(lookup, x, out);
-      } else {
-         mdct_forward(lookup, x, tmp);
+         int j;
+         for (j=0;j<2*N-2*N4;j++)
+            x[j+N4] = in[C*j+c];
+         CELT_MEMSET(x, 0, N4);
+         CELT_MEMSET(x+2*N-N4, 0, N4);
+         mdct_forward(lookup, x, tmp, window, overlap);
          /* Interleaving the sub-frames */
          for (j=0;j<N;j++)
             out[C*j+c] = tmp[j];
       }
+      RESTORE_STACK;
    }
-   RESTORE_STACK;
 }
 
 /** Compute the IMDCT and apply window for all sub-frames and all channels in a frame */
diff --git a/libcelt/mdct.c b/libcelt/mdct.c
index c38409e..9db0707 100644
--- a/libcelt/mdct.c
+++ b/libcelt/mdct.c
@@ -86,7 +86,7 @@ void mdct_clear(mdct_lookup *l)
    celt_free(l->trig);
 }
 
-void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out)
+void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16_t *window, int overlap)
 {
    int i;
    int N, N2, N4;
@@ -105,12 +105,30 @@ void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * r
       const kiss_fft_scalar * restrict xp2 = in+N2+N4-1;
       kiss_fft_scalar * restrict yp = out;
       kiss_fft_scalar *t = &l->trig[0];
-      for(i=0;i<N/8;i++)
+      const celt_word16_t * restrict wp1 = window+overlap/2;
+      const celt_word16_t * restrict wp2 = window+overlap/2-1;
+      for(i=0;i<overlap/4;i++)
       {
          kiss_fft_scalar re, im;
          /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
-         re = -HALF32(xp1[N2] + *xp2);
-         im = -HALF32(*xp1    - xp2[-N2]);
+         re = -HALF32(MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2));
+         im = -HALF32(MULT16_32_Q15(*wp1, *xp1)    - MULT16_32_Q15(*wp2, xp2[-N2]));
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+         /* We could remove the HALF32 above and just use MULT16_32_Q16 below
+         (MIXED_PRECISION only) */
+         *yp++ = S_MUL(re,t[0])  -  S_MUL(im,t[N4]);
+         *yp++ = S_MUL(im,t[0])  +  S_MUL(re,t[N4]);
+         t++;
+      }
+      for(;i<N/8;i++)
+      {
+         kiss_fft_scalar re, im;
+         /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
+         re = -HALF32(*xp2);
+         im = -HALF32(*xp1);
          xp1+=2;
          xp2-=2;
          /* We could remove the HALF32 above and just use MULT16_32_Q16 below
@@ -119,12 +137,14 @@ void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * r
          *yp++ = S_MUL(im,t[0])  +  S_MUL(re,t[N4]);
 	 t++;
       }
-      for(;i<N4;i++)
+      wp1 = window;
+      wp2 = window+overlap-1;
+      for(;i<N4-overlap/4;i++)
       {
          kiss_fft_scalar re, im;
          /* Real part arranged as a-bR, Imag part arranged as -c-dR */
-         re =  HALF32(xp1[-N2] - *xp2);
-         im = -HALF32(*xp1 + xp2[N2]);
+         re =  HALF32(-*xp2);
+         im = -HALF32(*xp1);
          xp1+=2;
          xp2-=2;
          /* We could remove the HALF32 above and just use MULT16_32_Q16 below
@@ -133,6 +153,22 @@ void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * r
          *yp++ = S_MUL(im,t[0])  +  S_MUL(re,t[N4]);
 	 t++;
       }
+      for(;i<N4;i++)
+      {
+         kiss_fft_scalar re, im;
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         re =  HALF32(MULT16_32_Q15(*wp1, xp1[-N2]) - MULT16_32_Q15(*wp2, *xp2));
+         im = -HALF32(MULT16_32_Q15(*wp2, *xp1)     + MULT16_32_Q15(*wp1, xp2[N2]));
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+         /* We could remove the HALF32 above and just use MULT16_32_Q16 below
+         (MIXED_PRECISION only) */
+         *yp++ = S_MUL(re,t[0])  -  S_MUL(im,t[N4]);
+         *yp++ = S_MUL(im,t[0])  +  S_MUL(re,t[N4]);
+         t++;
+      }
    }
 
    /* N/4 complex FFT, which should normally down-scale by 4/N (but doesn't now) */
diff --git a/libcelt/mdct.h b/libcelt/mdct.h
index 9411942..46f3c13 100644
--- a/libcelt/mdct.h
+++ b/libcelt/mdct.h
@@ -57,7 +57,7 @@ void mdct_init(mdct_lookup *l,int N);
 void mdct_clear(mdct_lookup *l);
 
 /** Compute a forward MDCT and scale by 2/N */
-void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out);
+void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out, const celt_word16_t *window, int overlap);
 
 /** Compute a backward MDCT (no scaling) */
 void mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out);
diff --git a/tests/mdct-test.c b/tests/mdct-test.c
index 5cd95ee..e4cc65c 100644
--- a/tests/mdct-test.c
+++ b/tests/mdct-test.c
@@ -102,7 +102,7 @@ void test1d(int nfft,int isinverse)
        mdct_backward(&cfg,in,out);
        check_inv(in,out,nfft,isinverse);
     } else {
-       mdct_forward(&cfg,in,out);
+       mdct_forward(&cfg,in,out,NULL, 0);
        check(in,out,nfft,isinverse);
     }
     /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/
author	Jean-Marc Valin <jean-marc.valin@usherbrooke.ca>	2008-04-21 18:10:52 +0400
committer	Jean-Marc Valin <jean-marc.valin@usherbrooke.ca>	2008-04-21 18:10:52 +0400
commit	30e1b4d36c432b5211e7be7d0066e91a846ce04d (patch)
tree	fb5680e5230a100247a24c36acf701b645d1745a
parent	ba11d78372ef22f240aea5235af3036760abe1fa (diff)