diff options
author | Jean-Marc Valin <Jean-Marc.Valin@csiro.au> | 2006-08-17 10:27:15 +0400 |
---|---|---|
committer | Jean-Marc Valin <Jean-Marc.Valin@csiro.au> | 2008-05-19 08:26:01 +0400 |
commit | dd007a35405e4908d456782260a3c5583fd5d8b9 (patch) | |
tree | 172d4dd519afb48bdfd8e7f5dd6dd43aa930bb3c | |
parent | b731c7769d7fc8758cd3fc21a4e87768e0684705 (diff) |
some multi-channel conversion (lots more to do)
-rw-r--r-- | include/speex/speex_echo.h | 14 | ||||
-rw-r--r-- | libspeex/mdf.c | 216 |
2 files changed, 132 insertions, 98 deletions
diff --git a/include/speex/speex_echo.h b/include/speex/speex_echo.h index 4813b5a..2bf8e3f 100644 --- a/include/speex/speex_echo.h +++ b/include/speex/speex_echo.h @@ -55,22 +55,22 @@ struct SpeexEchoState_; typedef struct SpeexEchoState_ SpeexEchoState; /** Creates a new echo canceller state */ -SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length); +SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length, int nb_mic, int nb_speakers); /** Destroys an echo canceller state */ -void speex_echo_state_destroy(SpeexEchoState *st); +void mc_echo_state_destroy(SpeexEchoState *st); /** Performs echo cancellation a frame */ -void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *rec, const spx_int16_t *play, spx_int16_t *out, spx_int32_t *Yout); +void mc_echo_cancel(SpeexEchoState *st, const spx_int16_t *rec, const spx_int16_t *play, spx_int16_t *out, spx_int32_t *Yout); /** Perform echo cancellation using internal playback buffer */ -void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out, spx_int32_t *Yout); +void mc_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out, spx_int32_t *Yout); /** Let the echo canceller know that a frame was just played */ -void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play); +void mc_echo_playback(SpeexEchoState *st, const spx_int16_t *play); /** Reset the echo canceller state */ -void speex_echo_state_reset(SpeexEchoState *st); +void mc_echo_state_reset(SpeexEchoState *st); /** Used like the ioctl function to control the echo canceller parameters * @@ -79,7 +79,7 @@ void speex_echo_state_reset(SpeexEchoState *st); * @param ptr Data exchanged to-from function * @return 0 if no error, -1 if request in unknown */ -int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr); +int mc_echo_ctl(SpeexEchoState *st, int request, void *ptr); #ifdef __cplusplus } diff --git a/libspeex/mdf.c b/libspeex/mdf.c index afadb7b..cf9316f 100644 --- a/libspeex/mdf.c +++ b/libspeex/mdf.c @@ -111,6 +111,8 @@ struct SpeexEchoState_ { int cancel_count; int adapted; int saturated; + int C; /** Number of input channels (microphones) */ + int K; /** Number of output channels (loudspeakers) */ spx_int32_t sampling_rate; spx_word16_t spec_average; spx_word16_t beta0; @@ -143,7 +145,7 @@ struct SpeexEchoState_ { spx_word16_t *window; spx_word16_t *prop; void *fft_table; - spx_word16_t memX, memD, memE; + spx_word16_t *memX, *memD, *memE; spx_word16_t preemph; spx_word16_t notch_radius; spx_mem_t notch_mem[2]; @@ -153,7 +155,7 @@ struct SpeexEchoState_ { int play_buf_pos; }; -static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem) +static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride) { int i; spx_word16_t den2; @@ -165,7 +167,7 @@ static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/ for (i=0;i<len;i++) { - spx_word16_t vin = in[i]; + spx_word16_t vin = in[i*stride]; spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15); #ifdef FIXED_POINT mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1); @@ -269,11 +271,15 @@ static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_wo /** Creates a new echo canceller state */ -SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) +SpeexEchoState *mc_echo_state_init(int frame_size, int filter_length, int nb_mic, int nb_speakers) { - int i,N,M; + int i,N,M, C, K; SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState)); + st->K = nb_speakers; + st->C = nb_mic; + C=st->C; + K=st->K; st->frame_size = frame_size; st->window_size = 2*frame_size; N = st->window_size; @@ -294,25 +300,25 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) st->fft_table = spx_fft_init(N); - st->e = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); - st->x = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); + st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); + st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t)); st->d = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); - st->y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); - st->Yps = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); - st->last_y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); - st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); - st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); - st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); - st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); - st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); - - st->X = (spx_word16_t*)speex_alloc((M+1)*N*sizeof(spx_word16_t)); - st->Y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); - st->E = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); - st->W = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t)); + st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); + st->Yps = (spx_word32_t*)speex_alloc(C*N*sizeof(spx_word32_t)); + st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); + st->Yf = (spx_word32_t*)speex_alloc(C*(st->frame_size+1)*sizeof(spx_word32_t)); + st->Rf = (spx_word32_t*)speex_alloc(C*(st->frame_size+1)*sizeof(spx_word32_t)); + st->Xf = (spx_word32_t*)speex_alloc(K*(st->frame_size+1)*sizeof(spx_word32_t)); + st->Yh = (spx_word32_t*)speex_alloc(C*(st->frame_size+1)*sizeof(spx_word32_t)); + st->Eh = (spx_word32_t*)speex_alloc(C*(st->frame_size+1)*sizeof(spx_word32_t)); + + st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t)); + st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); + st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); + st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t)); st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); - st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t)); - st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t)); + st->power = (spx_word32_t*)speex_alloc(K*(frame_size+1)*sizeof(spx_word32_t)); + st->power_1 = (spx_float_t*)speex_alloc(K*(frame_size+1)*sizeof(spx_float_t)); st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t)); st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); @@ -350,7 +356,9 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) } } - st->memX=st->memD=st->memE=0; + st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t)); + st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); + st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); st->preemph = QCONST16(.9,15); if (st->sampling_rate<12000) st->notch_radius = QCONST16(.9, 15); @@ -363,7 +371,7 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) st->adapted = 0; st->Pey = st->Pyy = FLOAT_ONE; - st->play_buf = (spx_int16_t*)speex_alloc(2*st->frame_size*sizeof(spx_int16_t)); + st->play_buf = (spx_int16_t*)speex_alloc(K*2*st->frame_size*sizeof(spx_int16_t)); st->play_buf_pos = 0; return st; @@ -395,7 +403,7 @@ void speex_echo_state_reset(SpeexEchoState *st) } /** Destroys an echo canceller state */ -void speex_echo_state_destroy(SpeexEchoState *st) +void mc_echo_state_destroy(SpeexEchoState *st) { spx_fft_destroy(st->fft_table); @@ -428,12 +436,12 @@ void speex_echo_state_destroy(SpeexEchoState *st) speex_free(st); } -void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out, spx_int32_t *Yout) +void mc_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out, spx_int32_t *Yout) { int i; if (st->play_buf_pos>=st->frame_size) { - speex_echo_cancel(st, rec, st->play_buf, out, Yout); + mc_echo_cancel(st, rec, st->play_buf, out, Yout); st->play_buf_pos -= st->frame_size; for (i=0;i<st->frame_size;i++) st->play_buf[i] = st->play_buf[i+st->frame_size]; @@ -449,7 +457,7 @@ void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t } } -void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) +void mc_echo_playback(SpeexEchoState *st, const spx_int16_t *play) { if (st->play_buf_pos<=st->frame_size) { @@ -463,10 +471,10 @@ void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) } /** Performs echo cancellation on a frame */ -void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int16_t *echo, spx_int16_t *out, spx_int32_t *Yout) +void mc_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int16_t *echo, spx_int16_t *out, spx_int32_t *Yout) { - int i,j; - int N,M; + int i,j, chan, speak; + int N,M, C, K; spx_word32_t Syy,See,Sxx; spx_word16_t leak_estimate; spx_word16_t ss, ss_1; @@ -477,6 +485,8 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int N = st->window_size; M = st->M; + C = st->C; + K = st->K; st->cancel_count++; #ifdef FIXED_POINT ss=DIV32_16(11469,M); @@ -486,73 +496,94 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int ss_1 = 1-ss; #endif - filter_dc_notch16(ref, st->notch_radius, st->d, st->frame_size, st->notch_mem); - /* Copy input data to buffer */ - for (i=0;i<st->frame_size;i++) + for (chan = 0; chan < C; chan++) { - spx_word16_t tmp; - spx_word32_t tmp32; - st->x[i] = st->x[i+st->frame_size]; - tmp32 = SUB32(EXTEND32(echo[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX))); -#ifdef FIXED_POINT - /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */ - if (tmp32 > 32767) - { - tmp32 = 32767; - st->saturated = 1; - } - if (tmp32 < -32767) + filter_dc_notch16(ref+chan, st->notch_radius, st->d, st->frame_size, st->notch_mem, C); + /* Copy input data to buffer */ + for (i=0;i<st->frame_size;i++) { - tmp32 = -32767; - st->saturated = 1; - } -#endif - st->x[i+st->frame_size] = EXTRACT16(tmp32); - st->memX = echo[i]; - - tmp = st->d[i]; - st->d[i] = st->d[i+st->frame_size]; - tmp32 = SUB32(EXTEND32(tmp), EXTEND32(MULT16_16_P15(st->preemph, st->memD))); + spx_word16_t tmp; + spx_word32_t tmp32; + tmp = st->d[chan*N+i]; + st->d[chan*N+i] = st->d[chan*N+i+st->frame_size]; + tmp32 = SUB32(EXTEND32(tmp), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan]))); #ifdef FIXED_POINT - if (tmp32 > 32767) - { - tmp32 = 32767; - st->saturated = 1; - } - if (tmp32 < -32767) - { - tmp32 = -32767; - st->saturated = 1; - } + if (tmp32 > 32767) + { + tmp32 = 32767; + st->saturated = 1; + } + if (tmp32 < -32767) + { + tmp32 = -32767; + st->saturated = 1; + } #endif - st->d[i+st->frame_size] = tmp32; - st->memD = tmp; + st->d[chan*N+i+st->frame_size] = tmp32; + st->memD[chan] = tmp; + } } - /* Shift memory: this could be optimized eventually*/ - for (j=M-1;j>=0;j--) + for (speak = 0; speak < K; speak++) { - for (i=0;i<N;i++) - st->X[(j+1)*N+i] = st->X[j*N+i]; + for (i=0;i<st->frame_size;i++) + { + spx_word16_t tmp; + spx_word32_t tmp32; + st->x[speak+N+i] = st->x[speak+N+i+st->frame_size]; + tmp32 = SUB32(EXTEND32(echo[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak]))); +#ifdef FIXED_POINT + /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */ + if (tmp32 > 32767) + { + tmp32 = 32767; + st->saturated = 1; + } + if (tmp32 < -32767) + { + tmp32 = -32767; + st->saturated = 1; + } +#endif + st->x[speak+N+i+st->frame_size] = EXTRACT16(tmp32); + st->memX[speak] = echo[i]; + } + } + + for (speak = 0; speak < K; speak++) + { + /* Shift memory: this could be optimized eventually*/ + for (j=M-1;j>=0;j--) + { + for (i=0;i<N;i++) + st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i]; + } + /* Convert x (echo input) to frequency domain */ + spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]); } - - /* Convert x (echo input) to frequency domain */ - spx_fft(st->fft_table, st->x, &st->X[0]); + for (chan = 0; chan < C; chan++) + { #ifdef SMOOTH_BLOCKS - spectral_mul_accum(st->X, st->W, st->Y, N, M); - spx_ifft(st->fft_table, st->Y, st->e); + spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K); + spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N); #endif - + } + /* Compute weight gradient */ if (!st->saturated) { - for (j=M-1;j>=0;j--) + for (chan = 0; chan < C; chan++) { - weighted_spectral_mul_conj(st->power_1, &st->X[(j+1)*N], st->E, st->PHI, N); - for (i=0;i<N;i++) - st->W[j*N+i] += MULT16_32_Q15(st->prop[j], st->PHI[i]); - + for (speak = 0; speak < K; speak++) + { + for (j=M-1;j>=0;j--) + { + weighted_spectral_mul_conj(st->power_1+K*N, &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N); + for (i=0;i<N;i++) + st->W[chan*N*K*M + j*N*K + speak*N +i] += MULT16_32_Q15(st->prop[j], st->PHI[i]); + } + } } } @@ -592,9 +623,12 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int } } - /* Compute filter response Y */ - spectral_mul_accum(st->X, st->W, st->Y, N, M); - spx_ifft(st->fft_table, st->Y, st->y); + for (chan = 0; chan < C; chan++) + { + /* Compute filter response Y */ + spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K); + spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N); + } /* Compute error signal (for the output with de-emphasis) */ @@ -776,17 +810,17 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int if (Yout) { spx_word16_t leak2; + for (i=0;i<st->frame_size;i++) + st->last_y[i] = st->last_y[st->frame_size+i]; if (st->adapted) { /* If the filter is adapted, take the filtered echo */ for (i=0;i<st->frame_size;i++) - st->last_y[i] = st->last_y[st->frame_size+i]; - for (i=0;i<st->frame_size;i++) st->last_y[st->frame_size+i] = ref[i]-out[i]; } else { /* If filter isn't adapted yet, all we can do is take the echo signal directly */ - for (i=0;i<N;i++) - st->last_y[i] = st->x[i]; + for (i=0;i<st->frame_size;i++) + st->last_y[st->frame_size+i] = echo[i]; } /* Apply hanning window (should pre-compute it)*/ @@ -815,7 +849,7 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int } -int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) +int mc_echo_ctl(SpeexEchoState *st, int request, void *ptr) { switch(request) { |