diff options
author | Jean-Marc Valin <Jean-Marc.Valin@csiro.au> | 2008-06-02 07:14:56 +0400 |
---|---|---|
committer | Jean-Marc Valin <Jean-Marc.Valin@csiro.au> | 2008-06-02 07:14:56 +0400 |
commit | 372ce2e6ca636276179446ca818c1a72cfcc718f (patch) | |
tree | e43dd5812792a2208c21e7f93a932423b581b9eb /libspeex | |
parent | 2fee1222850076810d0ea575a66a9596a4b807a7 (diff) | |
parent | d794a6ab626ab6d3bbe6689185b22a1df993b8e0 (diff) |
Merge branch 'stereo'
Diffstat (limited to 'libspeex')
-rw-r--r-- | libspeex/mdf.c | 454 |
1 files changed, 268 insertions, 186 deletions
diff --git a/libspeex/mdf.c b/libspeex/mdf.c index 93adbf4..d8bacaf 100644 --- a/libspeex/mdf.c +++ b/libspeex/mdf.c @@ -137,6 +137,8 @@ struct SpeexEchoState_ { int adapted; int saturated; int screwed_up; + int C; /** Number of input channels (microphones) */ + int K; /** Number of output channels (loudspeakers) */ spx_int32_t sampling_rate; spx_word16_t spec_average; spx_word16_t beta0; @@ -177,10 +179,10 @@ struct SpeexEchoState_ { spx_word16_t *window; spx_word16_t *prop; void *fft_table; - spx_word16_t memX, memD, memE; + spx_word16_t *memX, *memD, *memE; spx_word16_t preemph; spx_word16_t notch_radius; - spx_mem_t notch_mem[2]; + spx_mem_t *notch_mem; /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */ spx_int16_t *play_buf; @@ -188,7 +190,7 @@ struct SpeexEchoState_ { int play_buf_started; }; -static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem) +static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride) { int i; spx_word16_t den2; @@ -200,7 +202,7 @@ static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/ for (i=0;i<len;i++) { - spx_word16_t vin = in[i]; + spx_word16_t vin = in[i*stride]; spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15); #ifdef FIXED_POINT mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1); @@ -240,6 +242,18 @@ static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N ps[j]=MULT16_16(X[i],X[i]); } +/** Compute power spectrum of a half-complex (packed) vector and accumulate */ +static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N) +{ + int i, j; + ps[0]+=MULT16_16(X[0],X[0]); + for (i=1,j=1;i<N-1;i+=2,j++) + { + ps[j] += MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]); + } + ps[j]+=MULT16_16(X[i],X[i]); +} + /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */ #ifdef FIXED_POINT static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M) @@ -336,16 +350,17 @@ static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_fl prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i])); } -static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, spx_word16_t *prop) +static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop) { - int i, j; + int i, j, p; spx_word16_t max_sum = 1; spx_word32_t prop_sum = 1; for (i=0;i<M;i++) { spx_word32_t tmp = 1; - for (j=0;j<N;j++) - tmp += MULT16_16(EXTRACT16(SHR32(W[i*N+j],18)), EXTRACT16(SHR32(W[i*N+j],18))); + for (p=0;p<P;p++) + for (j=0;j<N;j++) + tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18))); #ifdef FIXED_POINT /* Just a security in case an overflow were to occur */ tmp = MIN32(ABS32(tmp), 536870912); @@ -386,9 +401,18 @@ static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const sp /** Creates a new echo canceller state */ EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) { - int i,N,M; + return speex_echo_state_init_mc(frame_size, filter_length, 1, 1); +} + +EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers) +{ + int i,N,M, C, K; SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState)); + st->K = nb_speakers; + st->C = nb_mic; + C=st->C; + K=st->K; #ifdef DUMP_ECHO_CANCEL_DATA if (rFile || pFile || oFile) speex_fatal("Opening dump files twice"); @@ -419,23 +443,23 @@ EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) st->fft_table = spx_fft_init(N); - st->e = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); - st->x = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); - st->input = (spx_word16_t*)speex_alloc(st->frame_size*sizeof(spx_word16_t)); - st->y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); - st->last_y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); + st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); + st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t)); + st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t)); + st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); + st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); - st->X = (spx_word16_t*)speex_alloc((M+1)*N*sizeof(spx_word16_t)); - st->Y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); - st->E = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); - st->W = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t)); + st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t)); + st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); + st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); + st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t)); #ifdef TWO_PATH - st->foreground = (spx_word16_t*)speex_alloc(M*N*sizeof(spx_word16_t)); + st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t)); #endif st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t)); @@ -456,7 +480,7 @@ EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) #endif for (i=0;i<=st->frame_size;i++) st->power_1[i] = FLOAT_ONE; - for (i=0;i<N*M;i++) + for (i=0;i<N*M*K*C;i++) st->W[i] = 0; { spx_word32_t sum = 0; @@ -471,11 +495,13 @@ EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) } for (i=M-1;i>=0;i--) { - st->prop[i] = DIV32(MULT16_16(QCONST16(.8,15), st->prop[i]),sum); + st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum); } } - st->memX=st->memD=st->memE=0; + st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t)); + st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); + st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); st->preemph = QCONST16(.9,15); if (st->sampling_rate<12000) st->notch_radius = QCONST16(.9, 15); @@ -484,7 +510,7 @@ EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) else st->notch_radius = QCONST16(.992, 15); - st->notch_mem[0] = st->notch_mem[1] = 0; + st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t)); st->adapted = 0; st->Pey = st->Pyy = FLOAT_ONE; @@ -493,7 +519,7 @@ EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) st->Dvar1 = st->Dvar2 = FLOAT_ZERO; #endif - st->play_buf = (spx_int16_t*)speex_alloc((PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); + st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; st->play_buf_started = 0; @@ -503,11 +529,13 @@ EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) /** Resets echo canceller state */ EXPORT void speex_echo_state_reset(SpeexEchoState *st) { - int i, M, N; + int i, M, N, C, K; st->cancel_count=0; st->screwed_up = 0; N = st->window_size; M = st->M; + C=st->C; + K=st->K; for (i=0;i<N*M;i++) st->W[i] = 0; #ifdef TWO_PATH @@ -527,13 +555,20 @@ EXPORT void speex_echo_state_reset(SpeexEchoState *st) { st->last_y[i] = 0; } - for (i=0;i<N;i++) + for (i=0;i<N*C;i++) { st->E[i] = 0; + } + for (i=0;i<N*K;i++) + { st->x[i] = 0; } - st->notch_mem[0] = st->notch_mem[1] = 0; - st->memX=st->memD=st->memE=0; + for (i=0;i<2*C;i++) + st->notch_mem[i] = 0; + for (i=0;i<C;i++) + st->memD[i]=st->memE[i]=0; + for (i=0;i<K;i++) + st->memX[i]=0; st->saturated = 0; st->adapted = 0; @@ -651,8 +686,8 @@ EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const s /** Performs echo cancellation on a frame */ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) { - int i,j; - int N,M; + int i,j, chan, speak; + int N,M, C, K; spx_word32_t Syy,See,Sxx,Sdd, Sff; #ifdef TWO_PATH spx_word32_t Dbf; @@ -667,6 +702,9 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c N = st->window_size; M = st->M; + C = st->C; + K = st->K; + st->cancel_count++; #ifdef FIXED_POINT ss=DIV32_16(11469,M); @@ -676,137 +714,178 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c ss_1 = 1-ss; #endif - /* Apply a notch filter to make sure DC doesn't end up causing problems */ - filter_dc_notch16(in, st->notch_radius, st->input, st->frame_size, st->notch_mem); - /* Copy input data to buffer and apply pre-emphasis */ - for (i=0;i<st->frame_size;i++) + for (chan = 0; chan < C; chan++) { - spx_word32_t tmp32; - tmp32 = SUB32(EXTEND32(far_end[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX))); -#ifdef FIXED_POINT - /* If saturation occurs here, we need to freeze adaptation for M+1 frames (not just one) */ - if (tmp32 > 32767) + /* Apply a notch filter to make sure DC doesn't end up causing problems */ + filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C); + /* Copy input data to buffer and apply pre-emphasis */ + /* Copy input data to buffer */ + for (i=0;i<st->frame_size;i++) { - tmp32 = 32767; - st->saturated = M+1; + spx_word32_t tmp32; + /* FIXME: This core has changed a bit, need to merge properly */ + tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan]))); +#ifdef FIXED_POINT + if (tmp32 > 32767) + { + tmp32 = 32767; + if (st->saturated == 0) + st->saturated = 1; + } + if (tmp32 < -32767) + { + tmp32 = -32767; + if (st->saturated == 0) + st->saturated = 1; + } +#endif + st->memD[chan] = st->input[chan*st->frame_size+i]; + st->input[chan*st->frame_size+i] = EXTRACT16(tmp32); } - if (tmp32 < -32767) + } + + for (speak = 0; speak < K; speak++) + { + for (i=0;i<st->frame_size;i++) { - tmp32 = -32767; - st->saturated = M+1; - } -#endif - st->x[i+st->frame_size] = EXTRACT16(tmp32); - st->memX = far_end[i]; - - tmp32 = SUB32(EXTEND32(st->input[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD))); + spx_word32_t tmp32; + st->x[speak*N+i] = st->x[speak*N+i+st->frame_size]; + tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak]))); #ifdef FIXED_POINT - if (tmp32 > 32767) - { - tmp32 = 32767; - if (st->saturated == 0) - st->saturated = 1; - } - if (tmp32 < -32767) + /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */ + if (tmp32 > 32767) + { + tmp32 = 32767; + st->saturated = M+1; + } + if (tmp32 < -32767) + { + tmp32 = -32767; + st->saturated = M+1; + } +#endif + st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32); + st->memX[speak] = far_end[i*K+speak]; + } + } + + for (speak = 0; speak < K; speak++) + { + /* Shift memory: this could be optimized eventually*/ + for (j=M-1;j>=0;j--) { - tmp32 = -32767; - if (st->saturated == 0) - st->saturated = 1; + for (i=0;i<N;i++) + st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i]; } -#endif - st->memD = st->input[i]; - st->input[i] = tmp32; + /* Convert x (echo input) to frequency domain */ + spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]); } - - /* Shift memory: this could be optimized eventually*/ - for (j=M-1;j>=0;j--) + + Sxx = 0; + for (speak = 0; speak < K; speak++) { - for (i=0;i<N;i++) - st->X[(j+1)*N+i] = st->X[j*N+i]; + Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); + power_spectrum_accum(st->X+speak*N, st->Xf, N); } - - /* Convert x (far end) to frequency domain */ - spx_fft(st->fft_table, st->x, &st->X[0]); - for (i=0;i<N;i++) - st->last_y[i] = st->x[i]; - Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size); - for (i=0;i<st->frame_size;i++) - st->x[i] = st->x[i+st->frame_size]; - /* From here on, the top part of x is used as scratch space */ + Sff = 0; + for (chan = 0; chan < C; chan++) + { #ifdef TWO_PATH - /* Compute foreground filter */ - spectral_mul_accum16(st->X, st->foreground, st->Y, N, M); - spx_ifft(st->fft_table, st->Y, st->e); - for (i=0;i<st->frame_size;i++) - st->e[i] = SUB16(st->input[i], st->e[i+st->frame_size]); - Sff = mdf_inner_prod(st->e, st->e, st->frame_size); + /* Compute foreground filter */ + spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K); + spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N); + for (i=0;i<st->frame_size;i++) + st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]); + Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); #endif + } /* Adjust proportional adaption rate */ - mdf_adjust_prop (st->W, N, M, st->prop); + /* FIXME: Adjust that for C, K*/ + if (st->adapted) + mdf_adjust_prop (st->W, N, M, C*K, st->prop); /* Compute weight gradient */ if (st->saturated == 0) { - for (j=M-1;j>=0;j--) + for (chan = 0; chan < C; chan++) { - weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N], st->E, st->PHI, N); - for (i=0;i<N;i++) - st->W[j*N+i] = ADD32(st->W[j*N+i], st->PHI[i]); - + for (speak = 0; speak < K; speak++) + { + for (j=M-1;j>=0;j--) + { + weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N); + for (i=0;i<N;i++) + st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i]; + } + } } } else { st->saturated--; } + /* FIXME: MC conversion required */ /* Update weight to prevent circular convolution (MDF / AUMDF) */ - for (j=0;j<M;j++) + for (chan = 0; chan < C; chan++) { - /* This is a variant of the Alternatively Updated MDF (AUMDF) */ - /* Remove the "if" to make this an MDF filter */ - if (j==0 || st->cancel_count%(M-1) == j-1) + for (speak = 0; speak < K; speak++) { -#ifdef FIXED_POINT - for (i=0;i<N;i++) - st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16)); - spx_ifft(st->fft_table, st->wtmp2, st->wtmp); - for (i=0;i<st->frame_size;i++) - { - st->wtmp[i]=0; - } - for (i=st->frame_size;i<N;i++) + for (j=0;j<M;j++) { - st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); - } - spx_fft(st->fft_table, st->wtmp, st->wtmp2); - /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ - for (i=0;i<N;i++) - st->W[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); + /* This is a variant of the Alternatively Updated MDF (AUMDF) */ + /* Remove the "if" to make this an MDF filter */ + if (j==0 || st->cancel_count%(M-1) == j-1) + { +#ifdef FIXED_POINT + for (i=0;i<N;i++) + st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16)); + spx_ifft(st->fft_table, st->wtmp2, st->wtmp); + for (i=0;i<st->frame_size;i++) + { + st->wtmp[i]=0; + } + for (i=st->frame_size;i<N;i++) + { + st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); + } + spx_fft(st->fft_table, st->wtmp, st->wtmp2); + /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ + for (i=0;i<N;i++) + st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); #else - spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); - for (i=st->frame_size;i<N;i++) - { - st->wtmp[i]=0; - } - spx_fft(st->fft_table, st->wtmp, &st->W[j*N]); + spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp); + for (i=st->frame_size;i<N;i++) + { + st->wtmp[i]=0; + } + spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]); #endif + } + } } } - - /* Compute filter response Y */ - spectral_mul_accum(st->X, st->W, st->Y, N, M); - spx_ifft(st->fft_table, st->Y, st->y); - + + /* So we can use power_spectrum_accum */ + for (i=0;i<=st->frame_size;i++) + st->Rf[i] = st->Yf[i] = st->Xf[i] = 0; + + Dbf = 0; + See = 0; #ifdef TWO_PATH /* Difference in response, this is used to estimate the variance of our residual power estimate */ - for (i=0;i<st->frame_size;i++) - st->e[i] = SUB16(st->e[i+st->frame_size], st->y[i+st->frame_size]); - Dbf = 10+mdf_inner_prod(st->e, st->e, st->frame_size); + for (chan = 0; chan < C; chan++) + { + spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K); + spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N); + for (i=0;i<st->frame_size;i++) + st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]); + Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); + for (i=0;i<st->frame_size;i++) + st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); + See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); + } #endif - for (i=0;i<st->frame_size;i++) - st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]); - See = mdf_inner_prod(st->e, st->e, st->frame_size); #ifndef TWO_PATH Sff = See; #endif @@ -843,11 +922,12 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c st->Davg1 = st->Davg2 = 0; st->Dvar1 = st->Dvar2 = FLOAT_ZERO; /* Copy background filter to foreground filter */ - for (i=0;i<N*M;i++) + for (i=0;i<N*M*C*K;i++) st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16)); /* Apply a smooth transition so as to not introduce blocking artifacts */ - for (i=0;i<st->frame_size;i++) - st->e[i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[i+st->frame_size]); + for (chan = 0; chan < C; chan++) + for (i=0;i<st->frame_size;i++) + st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]); } else { int reset_background=0; /* Otherwise, check if the background filter is significantly worse */ @@ -860,13 +940,16 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c if (reset_background) { /* Copy foreground filter to background filter */ - for (i=0;i<N*M;i++) + for (i=0;i<N*M*C*K;i++) st->W[i] = SHL32(EXTEND32(st->foreground[i]),16); /* We also need to copy the output so as to get correct adaptation */ - for (i=0;i<st->frame_size;i++) - st->y[i+st->frame_size] = st->e[i+st->frame_size]; - for (i=0;i<st->frame_size;i++) - st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]); + for (chan = 0; chan < C; chan++) + { + for (i=0;i<st->frame_size;i++) + st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size]; + for (i=0;i<st->frame_size;i++) + st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); + } See = Sff; st->Davg1 = st->Davg2 = 0; st->Dvar1 = st->Dvar2 = FLOAT_ZERO; @@ -874,41 +957,57 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c } #endif - /* Compute error signal (for the output with de-emphasis) */ - for (i=0;i<st->frame_size;i++) - { - spx_word32_t tmp_out; + Sey = Syy = Sdd = 0; + for (chan = 0; chan < C; chan++) + { + /* Compute error signal (for the output with de-emphasis) */ + for (i=0;i<st->frame_size;i++) + { + spx_word32_t tmp_out; #ifdef TWO_PATH - tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->e[i+st->frame_size])); + tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size])); #else - tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->y[i+st->frame_size])); + tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size])); #endif - tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE))); + tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan]))); /* This is an arbitrary test for saturation in the microphone signal */ - if (in[i] <= -32000 || in[i] >= 32000) - { + if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000) + { if (st->saturated == 0) st->saturated = 1; + } + out[i*C+chan] = WORD2INT(tmp_out); + st->memE[chan] = tmp_out; } - out[i] = WORD2INT(tmp_out); - st->memE = tmp_out; - } - + #ifdef DUMP_ECHO_CANCEL_DATA - dump_audio(in, far_end, out, st->frame_size); + dump_audio(in, far_end, out, st->frame_size); #endif - /* Compute error signal (filter update version) */ - for (i=0;i<st->frame_size;i++) - { - st->e[i+st->frame_size] = st->e[i]; - st->e[i] = 0; + /* Compute error signal (filter update version) */ + for (i=0;i<st->frame_size;i++) + { + st->e[chan*N+i+st->frame_size] = st->e[chan*N+i]; + st->e[chan*N+i] = 0; + } + + /* Compute a bunch of correlations */ + /* FIXME: bad merge */ + Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); + Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); + Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size); + + /* Convert error to frequency domain */ + spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N); + for (i=0;i<st->frame_size;i++) + st->y[i+chan*N] = 0; + spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N); + + /* Compute power spectrum of echo (X), error (E) and filter response (Y) */ + power_spectrum_accum(st->E+chan*N, st->Rf, N); + power_spectrum_accum(st->Y+chan*N, st->Yf, N); + } - - /* Compute a bunch of correlations */ - Sey = mdf_inner_prod(st->e+st->frame_size, st->y+st->frame_size, st->frame_size); - Syy = mdf_inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size); - Sdd = mdf_inner_prod(st->input, st->input, st->frame_size); /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/ @@ -921,7 +1020,7 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c { /* Things have gone really bad */ st->screwed_up += 50; - for (i=0;i<st->frame_size;i++) + for (i=0;i<st->frame_size*C;i++) out[i] = 0; } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6))) { @@ -940,36 +1039,17 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c /* Add a small noise floor to make sure not to have problems when dividing */ See = MAX32(See, SHR32(MULT16_16(N, 100),6)); + + for (speak = 0; speak < K; speak++) + { + Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); + power_spectrum_accum(st->X+speak*N, st->Xf, N); + } - /* Convert error to frequency domain */ - spx_fft(st->fft_table, st->e, st->E); - for (i=0;i<st->frame_size;i++) - st->y[i] = 0; - spx_fft(st->fft_table, st->y, st->Y); - - /* Compute power spectrum of far end (X), error (E) and filter response (Y) */ - power_spectrum(st->E, st->Rf, N); - power_spectrum(st->Y, st->Yf, N); - power_spectrum(st->X, st->Xf, N); /* Smooth far end energy estimate over time */ for (j=0;j<=st->frame_size;j++) st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]); - - /* Enable this to compute the power based only on the tail (would need to compute more - efficiently to make this really useful */ - if (0) - { - float scale2 = .5f/M; - for (j=0;j<=st->frame_size;j++) - st->power[j] = 100; - for (i=0;i<M;i++) - { - power_spectrum(&st->X[i*N], st->Xf, N); - for (j=0;j<=st->frame_size;j++) - st->power[j] += scale2*st->Xf[j]; - } - } /* Compute filtered spectra and (cross-)correlations */ for (j=st->frame_size;j>=0;j--) @@ -1091,13 +1171,13 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c st->sum_adapt = ADD32(st->sum_adapt,adapt_rate); } - /* Save residual echo so it can be used by the nonlinear processor */ + /* FIXME: MC conversion required */ + for (i=0;i<st->frame_size;i++) + st->last_y[i] = st->last_y[st->frame_size+i]; if (st->adapted) { /* If the filter is adapted, take the filtered echo */ for (i=0;i<st->frame_size;i++) - st->last_y[i] = st->last_y[st->frame_size+i]; - for (i=0;i<st->frame_size;i++) st->last_y[st->frame_size+i] = in[i]-out[i]; } else { /* If filter isn't adapted yet, all we can do is take the far end signal directly */ @@ -1170,6 +1250,7 @@ EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) (*(int*)ptr) = st->sampling_rate; break; case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE: + /*FIXME: Implement this for multiple channels */ *((spx_int32_t *)ptr) = st->M * st->frame_size; break; case SPEEX_ECHO_GET_IMPULSE_RESPONSE: @@ -1178,6 +1259,7 @@ EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) spx_int32_t *filt = (spx_int32_t *) ptr; for(j=0;j<M;j++) { + /*FIXME: Implement this for multiple channels */ #ifdef FIXED_POINT for (i=0;i<N;i++) st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN)); |