diff options
author | Jean-Marc Valin <Jean-Marc.Valin@csiro.au> | 2007-05-04 09:11:18 +0400 |
---|---|---|
committer | Jean-Marc Valin <Jean-Marc.Valin@csiro.au> | 2008-05-19 08:53:14 +0400 |
commit | 3d7a6f0bd0a60145d8ac3a2f4037da623f407fba (patch) | |
tree | f90d32540ec3269ef8a405a2e97daaf2f83ffcab /libspeex | |
parent | 6bd022014a21ecca9c27d6041397009a5933ac39 (diff) | |
parent | d2cddf7e2f3c1a75265c43cabaa391037c830745 (diff) |
Big update in the multi-channel AEC to bring it up-to-date with the single
channel AEC. Mainly this means:
1) dual-path adaptive filter
2) Adaptive (pseudo-proportional) learning rate for different taps
3) API change
4) Other minor details
Merge commit 'd2cddf7e2f3c1a75265c43cabaa391037c830745' into stereo
Conflicts:
include/speex/speex_echo.h
libspeex/mdf.c
libspeex/testecho.c
Diffstat (limited to 'libspeex')
52 files changed, 4726 insertions, 3019 deletions
diff --git a/libspeex/Makefile.am b/libspeex/Makefile.am index a784002..ff6d4bc 100644 --- a/libspeex/Makefile.am +++ b/libspeex/Makefile.am @@ -2,7 +2,7 @@ #AUTOMAKE_OPTIONS = no-dependencies -EXTRA_DIST=testenc.c testenc_wb.c testenc_uwb.c testdenoise.c testecho.c +EXTRA_DIST=echo_diagnostic.m INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include -I$(top_builddir) @OGG_CFLAGS@ @@ -16,7 +16,7 @@ libspeex_la_SOURCES = nb_celp.c sb_celp.c lpc.c ltp.c lsp.c quant_lsp.c \ exc_10_16_table.c exc_20_32_table.c hexc_10_32_table.c misc.c speex_header.c \ speex_callbacks.c math_approx.c stereo.c preprocess.c smallft.c lbr_48k_tables.c \ jitter.c mdf.c vorbis_psy.c fftwrap.c kiss_fft.c _kiss_fft_guts.h kiss_fft.h \ - kiss_fftr.c kiss_fftr.h window.c + kiss_fftr.c kiss_fftr.h window.c filterbank.c resample.c noinst_HEADERS = lsp.h nb_celp.h lpc.h lpc_bfin.h ltp.h quant_lsp.h \ cb_search.h filters.h stack_alloc.h vq.h vq_sse.h vq_arm4.h vq_bfin.h \ @@ -24,19 +24,19 @@ noinst_HEADERS = lsp.h nb_celp.h lpc.h lpc_bfin.h ltp.h quant_lsp.h \ ltp_bfin.h filters_sse.h filters_arm4.h filters_bfin.h math_approx.h \ smallft.h arch.h fixed_arm4.h fixed_arm5e.h fixed_bfin.h fixed_debug.h \ fixed_generic.h cb_search_sse.h cb_search_arm4.h cb_search_bfin.h vorbis_psy.h \ - fftwrap.h pseudofloat.h lsp_bfin.h quant_lsp_bfin.h + fftwrap.h pseudofloat.h lsp_bfin.h quant_lsp_bfin.h filterbank.h libspeex_la_LDFLAGS = -no-undefined -version-info @SPEEX_LT_CURRENT@:@SPEEX_LT_REVISION@:@SPEEX_LT_AGE@ noinst_PROGRAMS = testenc testenc_wb testenc_uwb testdenoise testecho testenc_SOURCES = testenc.c -testenc_LDADD = $(top_builddir)/libspeex/libspeex.la +testenc_LDADD = libspeex.la testenc_wb_SOURCES = testenc_wb.c -testenc_wb_LDADD = $(top_builddir)/libspeex/libspeex.la +testenc_wb_LDADD = libspeex.la testenc_uwb_SOURCES = testenc_uwb.c -testenc_uwb_LDADD = $(top_builddir)/libspeex/libspeex.la +testenc_uwb_LDADD = libspeex.la testdenoise_SOURCES = testdenoise.c -testdenoise_LDADD = $(top_builddir)/libspeex/libspeex.la +testdenoise_LDADD = libspeex.la testecho_SOURCES = testecho.c -testecho_LDADD = $(top_builddir)/libspeex/libspeex.la +testecho_LDADD = libspeex.la diff --git a/libspeex/_kiss_fft_guts.h b/libspeex/_kiss_fft_guts.h index 72acee1..526a73b 100644 --- a/libspeex/_kiss_fft_guts.h +++ b/libspeex/_kiss_fft_guts.h @@ -20,6 +20,7 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND and defines typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */ #include "kiss_fft.h" +#include "math_approx.h" #define MAXFACTORS 32 /* e.g. an fft of length 128 has 4 factors @@ -67,6 +68,10 @@ struct kiss_fft_state{ do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \ (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0) +# define C_MUL4(m,a,b) \ + do{ (m).r = PSHR32( smul((a).r,(b).r) - smul((a).i,(b).i),17 ); \ + (m).i = PSHR32( smul((a).r,(b).i) + smul((a).i,(b).r),17 ); }while(0) + # define DIVSCALAR(x,k) \ (x) = sround( smul( x, SAMP_MAX/k ) ) @@ -84,6 +89,9 @@ struct kiss_fft_state{ #define C_MUL(m,a,b) \ do{ (m).r = (a).r*(b).r - (a).i*(b).i;\ (m).i = (a).r*(b).i + (a).i*(b).r; }while(0) + +#define C_MUL4(m,a,b) C_MUL(m,a,b) + # define C_FIXDIV(c,div) /* NOOP */ # define C_MULBYSCALAR( c, s ) \ do{ (c).r *= (s);\ @@ -140,6 +148,11 @@ struct kiss_fft_state{ (x)->r = KISS_FFT_COS(phase);\ (x)->i = KISS_FFT_SIN(phase);\ }while(0) +#define kf_cexp2(x,phase) \ + do{ \ + (x)->r = spx_cos_norm((phase));\ + (x)->i = spx_cos_norm((phase)-32768);\ +}while(0) /* a debugging function */ diff --git a/libspeex/arch.h b/libspeex/arch.h index 0500437..e2d731a 100644 --- a/libspeex/arch.h +++ b/libspeex/arch.h @@ -35,12 +35,16 @@ #ifndef ARCH_H #define ARCH_H +#ifndef OUTSIDE_SPEEX #include "speex/speex_types.h" +#endif #define ABS(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute integer value. */ #define ABS16(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 16-bit value. */ +#define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Maximum 16-bit value. */ #define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */ #define ABS32(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 32-bit value. */ +#define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Maximum 32-bit value. */ #define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */ #ifdef FIXED_POINT @@ -68,6 +72,7 @@ typedef spx_word32_t spx_sig_t; #define VERY_SMALL 0 #define VERY_LARGE32 ((spx_word32_t)2147483647) #define VERY_LARGE16 ((spx_word16_t)32767) +#define Q15_ONE ((spx_word16_t)32767) #ifdef FIXED_DEBUG @@ -113,6 +118,7 @@ typedef float spx_word32_t; #define VERY_SMALL 1e-15f #define VERY_LARGE32 1e15f #define VERY_LARGE16 1e15f +#define Q15_ONE ((spx_word16_t)1.f) #define QCONST16(x,bits) (x) #define QCONST32(x,bits) (x) @@ -127,6 +133,7 @@ typedef float spx_word32_t; #define SHL32(a,shift) (a) #define PSHR16(a,shift) (a) #define PSHR32(a,shift) (a) +#define VSHR32(a,shift) (a) #define SATURATE16(x,a) (x) #define SATURATE32(x,a) (x) @@ -147,6 +154,7 @@ typedef float spx_word32_t; #define MULT16_32_Q13(a,b) ((a)*(b)) #define MULT16_32_Q14(a,b) ((a)*(b)) #define MULT16_32_Q15(a,b) ((a)*(b)) +#define MULT16_32_P15(a,b) ((a)*(b)) #define MAC16_32_Q11(c,a,b) ((c)+(a)*(b)) #define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) diff --git a/libspeex/bits.c b/libspeex/bits.c index 376e804..5c4cb0e 100644 --- a/libspeex/bits.c +++ b/libspeex/bits.c @@ -76,6 +76,7 @@ void speex_bits_destroy(SpeexBits *bits) void speex_bits_reset(SpeexBits *bits) { + /* We only need to clear the first byte now */ bits->chars[0]=0; bits->nbBits=0; bits->charPtr=0; @@ -96,7 +97,7 @@ void speex_bits_read_from(SpeexBits *bits, char *chars, int len) int nchars = len / BYTES_PER_CHAR; if (nchars > bits->buf_size) { - speex_warning_int("Packet is larger than allocated buffer: ", len); + speex_notify("Packet is larger than allocated buffer"); if (bits->owner) { char *tmp = (char*)speex_realloc(bits->chars, nchars); @@ -109,7 +110,7 @@ void speex_bits_read_from(SpeexBits *bits, char *chars, int len) speex_warning("Could not resize input buffer: truncating input"); } } else { - speex_warning("Do not own input buffer: truncating input"); + speex_warning("Do not own input buffer: truncating oversize input"); nchars=bits->buf_size; } } @@ -158,10 +159,10 @@ void speex_bits_read_whole_bytes(SpeexBits *bits, char *chars, int nbytes) bits->chars=tmp; } else { nchars=bits->buf_size-(bits->nbBits>>LOG2_BITS_PER_CHAR)-1; - speex_warning("Could not resize input buffer: truncating input"); + speex_warning("Could not resize input buffer: truncating oversize input"); } } else { - speex_warning("Do not own input buffer: truncating input"); + speex_warning("Do not own input buffer: truncating oversize input"); nchars=bits->buf_size; } } @@ -222,14 +223,13 @@ void speex_bits_pack(SpeexBits *bits, int data, int nbBits) if (bits->charPtr+((nbBits+bits->bitPtr)>>LOG2_BITS_PER_CHAR) >= bits->buf_size) { - speex_warning("Buffer too small to pack bits"); + speex_notify("Buffer too small to pack bits"); if (bits->owner) { - int new_nchars = ((bits->buf_size+5)*3)>>1; + int new_nchars = ((bits->buf_size+5)*3)>>1; char *tmp = (char*)speex_realloc(bits->chars, new_nchars); if (tmp) { - speex_memset_bytes(tmp, 0, new_nchars); bits->buf_size=new_nchars; bits->chars=tmp; } else { diff --git a/libspeex/cb_search.c b/libspeex/cb_search.c index 5c68826..cab2b71 100644 --- a/libspeex/cb_search.c +++ b/libspeex/cb_search.c @@ -181,7 +181,7 @@ int update_target t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]); #ifdef FIXED_POINT - if (sign) + if (sign==1) { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5); @@ -226,11 +226,13 @@ int update_target /* Update target: only update target if necessary */ if (update_target) { - VARDECL(spx_sig_t *r2); - ALLOC(r2, nsf, spx_sig_t); - syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack); + VARDECL(spx_word16_t *r2); + ALLOC(r2, nsf, spx_word16_t); for (j=0;j<nsf;j++) - target[j]=SUB16(target[j],EXTRACT16(PSHR32(r2[j],8))); + r2[j] = EXTRACT16(PSHR32(e[j] ,6)); + syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack); + for (j=0;j<nsf;j++) + target[j]=SUB16(target[j],PSHR16(r2[j],2)); } } @@ -263,7 +265,6 @@ int update_target #endif VARDECL(spx_word16_t *t); VARDECL(spx_sig_t *e); - VARDECL(spx_sig_t *r2); VARDECL(spx_word16_t *tmp); VARDECL(spx_word32_t *ndist); VARDECL(spx_word32_t *odist); @@ -316,7 +317,6 @@ int update_target #endif ALLOC(t, nsf, spx_word16_t); ALLOC(e, nsf, spx_sig_t); - ALLOC(r2, nsf, spx_sig_t); ALLOC(ind, nb_subvect, int); ALLOC(tmp, 2*N*nsf, spx_word16_t); @@ -495,9 +495,13 @@ int update_target /* Update target: only update target if necessary */ if (update_target) { - syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack); + VARDECL(spx_word16_t *r2); + ALLOC(r2, nsf, spx_word16_t); + for (j=0;j<nsf;j++) + r2[j] = EXTRACT16(PSHR32(e[j] ,6)); + syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack); for (j=0;j<nsf;j++) - target[j]=SUB16(target[j],EXTRACT16(PSHR32(r2[j],8))); + target[j]=SUB16(target[j],PSHR16(r2[j],2)); } } @@ -577,14 +581,12 @@ int update_target ) { int i; - VARDECL(spx_sig_t *tmp); - ALLOC(tmp, nsf, spx_sig_t); - for (i=0;i<nsf;i++) - tmp[i]=PSHR32(EXTEND32(target[i]),SIG_SHIFT); - residue_percep_zero(tmp, ak, awk1, awk2, tmp, nsf, p, stack); + VARDECL(spx_word16_t *tmp); + ALLOC(tmp, nsf, spx_word16_t); + residue_percep_zero16(target, ak, awk1, awk2, tmp, nsf, p, stack); for (i=0;i<nsf;i++) - exc[i]+=tmp[i]; + exc[i]+=SHL32(EXTEND32(tmp[i]),8); for (i=0;i<nsf;i++) target[i]=0; } diff --git a/libspeex/cb_search_bfin.h b/libspeex/cb_search_bfin.h index 52cc4b3..ae9cf83 100644 --- a/libspeex/cb_search_bfin.h +++ b/libspeex/cb_search_bfin.h @@ -73,7 +73,10 @@ void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t * : : "m" (subvect_size), "m" (shape_cb), "m" (r), "m" (resp), "m" (E) : "A0", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "I0", "I1", "L0", - "L1", "A0", "A1", "memory", "LC0", "LC1" + "L1", "A0", "A1", "memory" +#if !(__GNUC__ == 3) + , "LC0", "LC1" /* gcc 3.4 doesn't know about LC registers */ +#endif ); shape_cb += subvect_size; resp += subvect_size; diff --git a/libspeex/echo_diagnostic.m b/libspeex/echo_diagnostic.m new file mode 100644 index 0000000..aebf390 --- /dev/null +++ b/libspeex/echo_diagnostic.m @@ -0,0 +1,72 @@ +% Attempts to diagnose AEC problems from recorded samples +% +% out = echo_diagnostic(rec_file, play_file, out_file, tail_length) +% +% Computes the full matrix inversion to cancel echo from the +% recording 'rec_file' using the far end signal 'play_file' using +% a filter length of 'tail_length'. The output is saved to 'out_file'. +function out = echo_diagnostic(rec_file, play_file, out_file, tail_length) + +F=fopen(rec_file,'rb'); +rec=fread(F,Inf,'short'); +fclose (F); +F=fopen(play_file,'rb'); +play=fread(F,Inf,'short'); +fclose (F); + +rec = [rec; zeros(1024,1)]; +play = [play; zeros(1024,1)]; + +N = length(rec); +corr = real(ifft(fft(rec).*conj(fft(play)))); +acorr = real(ifft(fft(play).*conj(fft(play)))); + +[a,b] = max(corr); + +if b > N/2 + b = b-N; +end +printf ("Far end to near end delay is %d samples\n", b); +if (b > .3*tail_length) + printf ('This is too much delay, try delaying the far-end signal a bit\n'); +else if (b < 0) + printf ('You have a negative delay, the echo canceller has no chance to cancel anything!\n'); + else + printf ('Delay looks OK.\n'); + end + end +end +N2 = round(N/2); +corr1 = real(ifft(fft(rec(1:N2)).*conj(fft(play(1:N2))))); +corr2 = real(ifft(fft(rec(N2+1:end)).*conj(fft(play(N2+1:end))))); + +[a,b1] = max(corr1); +if b1 > N2/2 + b1 = b1-N2; +end +[a,b2] = max(corr2); +if b2 > N2/2 + b2 = b2-N2; +end +drift = (b1-b2)/N2; +printf ('Drift estimate is %f%% (%d samples)\n', 100*drift, b1-b2); +if abs(b1-b2) < 10 + printf ('A drift of a few (+-10) samples is normal.\n'); +else + if abs(b1-b2) < 30 + printf ('There may be (not sure) excessive clock drift. Is the capture and playback done on the same soundcard?\n'); + else + printf ('Your clock is drifting! No way the AEC will be able to do anything with that. Most likely, you''re doing capture and playback from two different cards.\n'); + end + end +end +acorr(1) = .001+1.00001*acorr(1); +AtA = toeplitz(acorr(1:tail_length)); +bb = corr(1:tail_length); +h = AtA\bb; + +out = (rec - filter(h, 1, play)); + +F=fopen(out_file,'w'); +fwrite(F,out,'short'); +fclose (F); diff --git a/libspeex/fftwrap.c b/libspeex/fftwrap.c index 43a9b18..35e2d05 100644 --- a/libspeex/fftwrap.c +++ b/libspeex/fftwrap.c @@ -64,7 +64,7 @@ static int maximize_range(spx_word16_t *in, spx_word16_t *out, spx_word16_t boun } for (i=0;i<len;i++) { - out[i] = in[i] << shift; + out[i] = SHL16(in[i], shift); } return shift; } @@ -74,7 +74,7 @@ static void renorm_range(spx_word16_t *in, spx_word16_t *out, int shift, int len int i; for (i=0;i<len;i++) { - out[i] = (in[i] + (1<<(shift-1))) >> shift; + out[i] = PSHR16(in[i], shift); } } #endif @@ -103,8 +103,8 @@ void spx_fft(void *table, float *in, float *out) if (in==out) { int i; - speex_warning("FFT should not be done in-place"); float scale = 1./((struct drft_lookup *)table)->n; + speex_warning("FFT should not be done in-place"); for (i=0;i<((struct drft_lookup *)table)->n;i++) out[i] = scale*in[i]; } else { @@ -120,7 +120,6 @@ void spx_ifft(void *table, float *in, float *out) { if (in==out) { - int i; speex_warning("FFT should not be done in-place"); } else { int i; @@ -138,7 +137,6 @@ void spx_ifft(void *table, float *in, float *out) struct kiss_config { kiss_fftr_cfg forward; kiss_fftr_cfg backward; - kiss_fft_cpx *freq_data; int N; }; @@ -146,7 +144,6 @@ void *spx_fft_init(int size) { struct kiss_config *table; table = (struct kiss_config*)speex_alloc(sizeof(struct kiss_config)); - table->freq_data = (kiss_fft_cpx*)speex_alloc(sizeof(kiss_fft_cpx)*((size>>1)+1)); table->forward = kiss_fftr_alloc(size,0,NULL,NULL); table->backward = kiss_fftr_alloc(size,1,NULL,NULL); table->N = size; @@ -158,7 +155,6 @@ void spx_fft_destroy(void *table) struct kiss_config *t = (struct kiss_config *)table; kiss_fftr_free(t->forward); kiss_fftr_free(t->backward); - speex_free(t->freq_data); speex_free(table); } @@ -166,18 +162,10 @@ void spx_fft_destroy(void *table) void spx_fft(void *table, spx_word16_t *in, spx_word16_t *out) { - int i; int shift; struct kiss_config *t = (struct kiss_config *)table; shift = maximize_range(in, in, 32000, t->N); - kiss_fftr(t->forward, in, t->freq_data); - out[0] = t->freq_data[0].r; - for (i=1;i<t->N>>1;i++) - { - out[(i<<1)-1] = t->freq_data[i].r; - out[(i<<1)] = t->freq_data[i].i; - } - out[(i<<1)-1] = t->freq_data[i].r; + kiss_fftr2(t->forward, in, out); renorm_range(in, in, shift, t->N); renorm_range(out, out, shift, t->N); } @@ -190,32 +178,16 @@ void spx_fft(void *table, spx_word16_t *in, spx_word16_t *out) float scale; struct kiss_config *t = (struct kiss_config *)table; scale = 1./t->N; - kiss_fftr(t->forward, in, t->freq_data); - out[0] = scale*t->freq_data[0].r; - for (i=1;i<t->N>>1;i++) - { - out[(i<<1)-1] = scale*t->freq_data[i].r; - out[(i<<1)] = scale*t->freq_data[i].i; - } - out[(i<<1)-1] = scale*t->freq_data[i].r; + kiss_fftr2(t->forward, in, out); + for (i=0;i<t->N;i++) + out[i] *= scale; } #endif void spx_ifft(void *table, spx_word16_t *in, spx_word16_t *out) { - int i; struct kiss_config *t = (struct kiss_config *)table; - t->freq_data[0].r = in[0]; - t->freq_data[0].i = 0; - for (i=1;i<t->N>>1;i++) - { - t->freq_data[i].r = in[(i<<1)-1]; - t->freq_data[i].i = in[(i<<1)]; - } - t->freq_data[i].r = in[(i<<1)-1]; - t->freq_data[i].i = 0; - - kiss_fftri(t->backward, t->freq_data, out); + kiss_fftri2(t->backward, in, out); } diff --git a/libspeex/filterbank.c b/libspeex/filterbank.c new file mode 100644 index 0000000..187d5ee --- /dev/null +++ b/libspeex/filterbank.c @@ -0,0 +1,226 @@ +/* Copyright (C) 2006 Jean-Marc Valin */ +/** + @file filterbank.c + @brief Converting between psd and filterbank + */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "filterbank.h" +#include "misc.h" +#include <math.h> +#include "math_approx.h" + +#ifdef FIXED_POINT + +#define toBARK(n) (MULT16_16(26829,spx_atan(SHR32(MULT16_16(97,n),2))) + MULT16_16(4588,spx_atan(MULT16_32_Q15(20,MULT16_16(n,n)))) + MULT16_16(3355,n)) + +#else +#define toBARK(n) (13.1f*atan(.00074f*(n))+2.24f*atan((n)*(n)*1.85e-8f)+1e-4f*(n)) +#endif + +#define toMEL(n) (2595.f*log10(1.f+(n)/700.f)) + +FilterBank *filterbank_new(int banks, spx_word32_t sampling, int len, int type) +{ + FilterBank *bank; + spx_word32_t df; + spx_word32_t max_mel, mel_interval; + int i; + int id1; + int id2; + df = DIV32(SHL32(sampling,15),MULT16_16(2,len)); + max_mel = toBARK(EXTRACT16(MULT16_16_Q15(QCONST16(.5f,15),sampling))); + mel_interval = PDIV32(max_mel,banks-1); + + bank = (FilterBank*)speex_alloc(sizeof(FilterBank)); + bank->nb_banks = banks; + bank->len = len; + bank->bank_left = (int*)speex_alloc(len*sizeof(int)); + bank->bank_right = (int*)speex_alloc(len*sizeof(int)); + bank->filter_left = (spx_word16_t*)speex_alloc(len*sizeof(spx_word16_t)); + bank->filter_right = (spx_word16_t*)speex_alloc(len*sizeof(spx_word16_t)); + /* Think I can safely disable normalisation that for fixed-point (and probably float as well) */ +#ifndef FIXED_POINT + bank->scaling = (float*)speex_alloc(banks*sizeof(float)); +#endif + for (i=0;i<len;i++) + { + spx_word16_t curr_freq; + spx_word32_t mel; + spx_word16_t val; + curr_freq = EXTRACT16(MULT16_32_P15(i,df)); + mel = toBARK(curr_freq); + if (mel > max_mel) + break; +#ifdef FIXED_POINT + id1 = DIV32(mel,mel_interval); +#else + id1 = (int)(floor(mel/mel_interval)); +#endif + if (id1>banks-2) + { + id1 = banks-2; + val = Q15_ONE; + } else { + val = DIV32_16(mel - id1*mel_interval,EXTRACT16(PSHR32(mel_interval,15))); + } + id2 = id1+1; + bank->bank_left[i] = id1; + bank->filter_left[i] = SUB16(Q15_ONE,val); + bank->bank_right[i] = id2; + bank->filter_right[i] = val; + } + + /* Think I can safely disable normalisation for fixed-point (and probably float as well) */ +#ifndef FIXED_POINT + for (i=0;i<bank->nb_banks;i++) + bank->scaling[i] = 0; + for (i=0;i<bank->len;i++) + { + int id = bank->bank_left[i]; + bank->scaling[id] += bank->filter_left[i]; + id = bank->bank_right[i]; + bank->scaling[id] += bank->filter_right[i]; + } + for (i=0;i<bank->nb_banks;i++) + bank->scaling[i] = Q15_ONE/(bank->scaling[i]); +#endif + return bank; +} + +void filterbank_destroy(FilterBank *bank) +{ + speex_free(bank->bank_left); + speex_free(bank->bank_right); + speex_free(bank->filter_left); + speex_free(bank->filter_right); +#ifndef FIXED_POINT + speex_free(bank->scaling); +#endif + speex_free(bank); +} + +void filterbank_compute_bank32(FilterBank *bank, spx_word32_t *ps, spx_word32_t *mel) +{ + int i; + for (i=0;i<bank->nb_banks;i++) + mel[i] = 0; + + for (i=0;i<bank->len;i++) + { + int id; + id = bank->bank_left[i]; + mel[id] += MULT16_32_P15(bank->filter_left[i],ps[i]); + id = bank->bank_right[i]; + mel[id] += MULT16_32_P15(bank->filter_right[i],ps[i]); + } + /* Think I can safely disable normalisation that for fixed-point (and probably float as well) */ +#ifndef FIXED_POINT + /*for (i=0;i<bank->nb_banks;i++) + mel[i] = MULT16_32_P15(Q15(bank->scaling[i]),mel[i]); + */ +#endif +} + +void filterbank_compute_psd16(FilterBank *bank, spx_word16_t *mel, spx_word16_t *ps) +{ + int i; + for (i=0;i<bank->len;i++) + { + spx_word32_t tmp; + int id1, id2; + id1 = bank->bank_left[i]; + id2 = bank->bank_right[i]; + tmp = MULT16_16(mel[id1],bank->filter_left[i]); + tmp += MULT16_16(mel[id2],bank->filter_right[i]); + ps[i] = EXTRACT16(PSHR32(tmp,15)); + } +} + + +#ifndef FIXED_POINT +void filterbank_compute_bank(FilterBank *bank, float *ps, float *mel) +{ + int i; + for (i=0;i<bank->nb_banks;i++) + mel[i] = 0; + + for (i=0;i<bank->len;i++) + { + int id = bank->bank_left[i]; + mel[id] += bank->filter_left[i]*ps[i]; + id = bank->bank_right[i]; + mel[id] += bank->filter_right[i]*ps[i]; + } + for (i=0;i<bank->nb_banks;i++) + mel[i] *= bank->scaling[i]; +} + +void filterbank_compute_psd(FilterBank *bank, float *mel, float *ps) +{ + int i; + for (i=0;i<bank->len;i++) + { + int id = bank->bank_left[i]; + ps[i] = mel[id]*bank->filter_left[i]; + id = bank->bank_right[i]; + ps[i] += mel[id]*bank->filter_right[i]; + } +} + +void filterbank_psy_smooth(FilterBank *bank, float *ps, float *mask) +{ + /* Low freq slope: 14 dB/Bark*/ + /* High freq slope: 9 dB/Bark*/ + /* Noise vs tone: 5 dB difference */ + /* FIXME: Temporary kludge */ + float bark[100]; + int i; + /* Assumes 1/3 Bark resolution */ + float decay_low = 0.34145f; + float decay_high = 0.50119f; + filterbank_compute_bank(bank, ps, bark); + for (i=1;i<bank->nb_banks;i++) + { + /*float decay_high = 13-1.6*log10(bark[i-1]); + decay_high = pow(10,(-decay_high/30.f));*/ + bark[i] = bark[i] + decay_high*bark[i-1]; + } + for (i=bank->nb_banks-2;i>=0;i--) + { + bark[i] = bark[i] + decay_low*bark[i+1]; + } + filterbank_compute_psd(bank, bark, mask); +} + +#endif diff --git a/libspeex/filterbank.h b/libspeex/filterbank.h new file mode 100644 index 0000000..5ded6b9 --- /dev/null +++ b/libspeex/filterbank.h @@ -0,0 +1,66 @@ +/* Copyright (C) 2006 Jean-Marc Valin */ +/** + @file filterbank.h + @brief Converting between psd and filterbank + */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef FILTERBANK_H +#define FILTERBANK_H + +#include "misc.h" + +typedef struct { + int *bank_left; + int *bank_right; + spx_word16_t *filter_left; + spx_word16_t *filter_right; +#ifndef FIXED_POINT + float *scaling; +#endif + int nb_banks; + int len; +} FilterBank; + + +FilterBank *filterbank_new(int banks, spx_word32_t sampling, int len, int type); + +void filterbank_destroy(FilterBank *bank); + +void filterbank_compute_bank32(FilterBank *bank, spx_word32_t *ps, spx_word32_t *mel); + +void filterbank_compute_psd16(FilterBank *bank, spx_word16_t *mel, spx_word16_t *psd); + +#ifndef FIXED_POINT +void filterbank_compute_bank(FilterBank *bank, float *psd, float *mel); +void filterbank_compute_psd(FilterBank *bank, float *mel, float *psd); +#endif + + +#endif diff --git a/libspeex/filters.c b/libspeex/filters.c index a1111ee..48b4753 100644 --- a/libspeex/filters.c +++ b/libspeex/filters.c @@ -62,6 +62,24 @@ void bw_lpc(spx_word16_t gamma, const spx_coef_t *lpc_in, spx_coef_t *lpc_out, i } } +void sanitize_values32(spx_word32_t *vec, spx_word32_t min_val, spx_word32_t max_val, int len) +{ + int i; + for (i=0;i<len;i++) + { + /* It's important we do the test that way so we can catch NaNs, which are neither greater nor smaller */ + if (!(vec[i]>=min_val && vec[i] <= max_val)) + { + if (vec[i] < min_val) + vec[i] = min_val; + else if (vec[i] > max_val) + vec[i] = max_val; + else /* Has to be NaN */ + vec[i] = 0; + } + } +} + void highpass(const spx_word16_t *x, spx_word16_t *y, int len, int filtID, spx_mem_t *mem) { int i; @@ -83,8 +101,8 @@ void highpass(const spx_word16_t *x, spx_word16_t *y, int len, int filtID, spx_m spx_word16_t yi; spx_word32_t vout = ADD32(MULT16_16(num[0], x[i]),mem[0]); yi = EXTRACT16(SATURATE(PSHR32(vout,14),32767)); - mem[0] = ADD32(MAC16_16(mem[1], num[1],x[i]), MULT16_32_Q14(-den[1],vout)); - mem[1] = ADD32(MULT16_16(num[2],x[i]), MULT16_32_Q14(-den[2],vout)); + mem[0] = ADD32(MAC16_16(mem[1], num[1],x[i]), SHL32(MULT16_32_Q15(-den[1],vout),1)); + mem[1] = ADD32(MULT16_16(num[2],x[i]), SHL32(MULT16_32_Q15(-den[2],vout),1)); y[i] = yi; } } @@ -218,10 +236,10 @@ spx_word16_t compute_rms16(const spx_word16_t *x, int len) for (i=0;i<len;i+=4) { spx_word32_t sum2=0; - sum2 = MAC16_16(sum2,PSHR16(x[i],1),PSHR16(x[i],1)); - sum2 = MAC16_16(sum2,PSHR16(x[i+1],1),PSHR16(x[i+1],1)); - sum2 = MAC16_16(sum2,PSHR16(x[i+2],1),PSHR16(x[i+2],1)); - sum2 = MAC16_16(sum2,PSHR16(x[i+3],1),PSHR16(x[i+3],1)); + sum2 = MAC16_16(sum2,SHR16(x[i],1),SHR16(x[i],1)); + sum2 = MAC16_16(sum2,SHR16(x[i+1],1),SHR16(x[i+1],1)); + sum2 = MAC16_16(sum2,SHR16(x[i+2],1),SHR16(x[i+2],1)); + sum2 = MAC16_16(sum2,SHR16(x[i+3],1),SHR16(x[i+3],1)); sum = ADD32(sum,SHR32(sum2,6)); } return SHL16(spx_sqrt(DIV32(sum,len)),4); @@ -297,53 +315,6 @@ spx_word16_t compute_rms16(const spx_word16_t *x, int len) -#ifndef OVERRIDE_FILTER_MEM2 -#ifdef PRECISION16 -void filter_mem2(const spx_sig_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem) -{ - int i,j; - spx_word16_t xi,yi,nyi; - - for (i=0;i<N;i++) - { - xi= EXTRACT16(PSHR32(SATURATE(x[i],536870911),SIG_SHIFT)); - yi = EXTRACT16(PSHR32(SATURATE(ADD32(x[i], SHL32(mem[0],1)),536870911),SIG_SHIFT)); - nyi = NEG16(yi); - for (j=0;j<ord-1;j++) - { - mem[j] = MAC16_16(MAC16_16(mem[j+1], num[j],xi), den[j],nyi); - } - mem[ord-1] = ADD32(MULT16_16(num[ord-1],xi), MULT16_16(den[ord-1],nyi)); - y[i] = SHL32(EXTEND32(yi),SIG_SHIFT); - } -} -#else -void filter_mem2(const spx_sig_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem) -{ - int i,j; - spx_sig_t xi,yi,nyi; - - for (i=0;i<ord;i++) - mem[i] = SHR32(mem[i],1); - for (i=0;i<N;i++) - { - xi=SATURATE(x[i],805306368); - yi = SATURATE(ADD32(xi, SHL32(mem[0],2)),805306368); - nyi = NEG32(yi); - for (j=0;j<ord-1;j++) - { - mem[j] = MAC16_32_Q15(MAC16_32_Q15(mem[j+1], num[j],xi), den[j],nyi); - } - mem[ord-1] = SUB32(MULT16_32_Q15(num[ord-1],xi), MULT16_32_Q15(den[ord-1],yi)); - y[i] = yi; - } - for (i=0;i<ord;i++) - mem[i] = SHL32(mem[i],1); -} -#endif -#endif - -#ifdef FIXED_POINT #ifndef OVERRIDE_FILTER_MEM16 void filter_mem16(const spx_word16_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) { @@ -363,60 +334,7 @@ void filter_mem16(const spx_word16_t *x, const spx_coef_t *num, const spx_coef_t } } #endif -#else -void filter_mem16(const spx_word16_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) -{ - filter_mem2(x, num, den, y, N, ord, mem); -} -#endif - - -#ifndef OVERRIDE_IIR_MEM2 -#ifdef PRECISION16 -void iir_mem2(const spx_sig_t *x, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem) -{ - int i,j; - spx_word16_t yi,nyi; - - for (i=0;i<N;i++) - { - yi = EXTRACT16(PSHR32(SATURATE(x[i] + SHL32(mem[0],1),536870911),SIG_SHIFT)); - nyi = NEG16(yi); - for (j=0;j<ord-1;j++) - { - mem[j] = MAC16_16(mem[j+1],den[j],nyi); - } - mem[ord-1] = MULT16_16(den[ord-1],nyi); - y[i] = SHL32(EXTEND32(yi),SIG_SHIFT); - } -} -#else -void iir_mem2(const spx_sig_t *x, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem) -{ - int i,j; - spx_word32_t xi,yi,nyi; - - for (i=0;i<ord;i++) - mem[i] = SHR32(mem[i],1); - for (i=0;i<N;i++) - { - xi=SATURATE(x[i],805306368); - yi = SATURATE(xi + SHL32(mem[0],2),805306368); - nyi = NEG32(yi); - for (j=0;j<ord-1;j++) - { - mem[j] = MAC16_32_Q15(mem[j+1],den[j],nyi); - } - mem[ord-1] = MULT16_32_Q15(den[ord-1],nyi); - y[i] = yi; - } - for (i=0;i<ord;i++) - mem[i] = SHL32(mem[i],1); -} -#endif -#endif -#ifdef FIXED_POINT #ifndef OVERRIDE_IIR_MEM16 void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) { @@ -436,59 +354,7 @@ void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, in } } #endif -#else -void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) -{ - iir_mem2(x, den, y, N, ord, mem); -} -#endif - - -#ifndef OVERRIDE_FIR_MEM2 -#ifdef PRECISION16 -void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, int ord, spx_mem_t *mem) -{ - int i,j; - spx_word16_t xi,yi; - - for (i=0;i<N;i++) - { - xi= EXTRACT16(PSHR32(SATURATE(x[i],536870911),SIG_SHIFT)); - yi = EXTRACT16(PSHR32(SATURATE(x[i] + SHL32(mem[0],1),536870911),SIG_SHIFT)); - for (j=0;j<ord-1;j++) - { - mem[j] = MAC16_16(mem[j+1], num[j],xi); - } - mem[ord-1] = MULT16_16(num[ord-1],xi); - y[i] = SHL32(EXTEND32(yi),SIG_SHIFT); - } -} -#else -void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, int ord, spx_mem_t *mem) -{ - int i,j; - spx_word32_t xi,yi; - - for (i=0;i<ord;i++) - mem[i] = SHR32(mem[i],1); - for (i=0;i<N;i++) - { - xi=SATURATE(x[i],805306368); - yi = xi + SHL32(mem[0],2); - for (j=0;j<ord-1;j++) - { - mem[j] = MAC16_32_Q15(mem[j+1], num[j],xi); - } - mem[ord-1] = MULT16_32_Q15(num[ord-1],xi); - y[i] = SATURATE(yi,805306368); - } - for (i=0;i<ord;i++) - mem[i] = SHL32(mem[i],1); -} -#endif -#endif -#ifdef FIXED_POINT #ifndef OVERRIDE_FIR_MEM16 void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) { @@ -508,44 +374,34 @@ void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, in } } #endif -#else -void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) -{ - fir_mem2(x, num, y, N, ord, mem); -} -#endif - - - - -void syn_percep_zero(const spx_sig_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_sig_t *y, int N, int ord, char *stack) +void syn_percep_zero16(const spx_word16_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack) { int i; VARDECL(spx_mem_t *mem); ALLOC(mem, ord, spx_mem_t); for (i=0;i<ord;i++) - mem[i]=0; - iir_mem2(xx, ak, y, N, ord, mem); + mem[i]=0; + iir_mem16(xx, ak, y, N, ord, mem, stack); for (i=0;i<ord;i++) mem[i]=0; - filter_mem2(y, awk1, awk2, y, N, ord, mem); + filter_mem16(y, awk1, awk2, y, N, ord, mem, stack); } - -void residue_percep_zero(const spx_sig_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_sig_t *y, int N, int ord, char *stack) +void residue_percep_zero16(const spx_word16_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack) { int i; VARDECL(spx_mem_t *mem); ALLOC(mem, ord, spx_mem_t); for (i=0;i<ord;i++) mem[i]=0; - filter_mem2(xx, ak, awk1, y, N, ord, mem); + filter_mem16(xx, ak, awk1, y, N, ord, mem, stack); for (i=0;i<ord;i++) - mem[i]=0; - fir_mem2(y, awk2, y, N, ord, mem); + mem[i]=0; + fir_mem16(y, awk2, y, N, ord, mem, stack); } + #ifndef OVERRIDE_COMPUTE_IMPULSE_RESPONSE void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack) { @@ -581,7 +437,8 @@ void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, cons } #endif -void qmf_decomp(const spx_word16_t *xx, const spx_word16_t *aa, spx_sig_t *y1, spx_sig_t *y2, int N, int M, spx_word16_t *mem, char *stack) +/* Decomposes a signal into low-band and high-band using a QMF */ +void qmf_decomp(const spx_word16_t *xx, const spx_word16_t *aa, spx_word16_t *y1, spx_word16_t *y2, int N, int M, spx_word16_t *mem, char *stack) { int i,j,k,M2; VARDECL(spx_word16_t *a); @@ -594,105 +451,139 @@ void qmf_decomp(const spx_word16_t *xx, const spx_word16_t *aa, spx_sig_t *y1, s M2=M>>1; for (i=0;i<M;i++) a[M-i-1]= aa[i]; - for (i=0;i<M-1;i++) x[i]=mem[M-i-2]; for (i=0;i<N;i++) - x[i+M-1]=SATURATE(PSHR(xx[i],1),16383); + x[i+M-1]=SHR16(xx[i],1); + for (i=0;i<M-1;i++) + mem[i]=SHR16(xx[N-i-1],1); for (i=0,k=0;i<N;i+=2,k++) { - y1[k]=0; - y2[k]=0; + spx_word32_t y1k=0, y2k=0; for (j=0;j<M2;j++) { - y1[k]=ADD32(y1[k],MULT16_16(a[j],ADD16(x[i+j],x2[i-j]))); - y2[k]=SUB32(y2[k],MULT16_16(a[j],SUB16(x[i+j],x2[i-j]))); + y1k=ADD32(y1k,MULT16_16(a[j],ADD16(x[i+j],x2[i-j]))); + y2k=SUB32(y2k,MULT16_16(a[j],SUB16(x[i+j],x2[i-j]))); j++; - y1[k]=ADD32(y1[k],MULT16_16(a[j],ADD16(x[i+j],x2[i-j]))); - y2[k]=ADD32(y2[k],MULT16_16(a[j],SUB16(x[i+j],x2[i-j]))); + y1k=ADD32(y1k,MULT16_16(a[j],ADD16(x[i+j],x2[i-j]))); + y2k=ADD32(y2k,MULT16_16(a[j],SUB16(x[i+j],x2[i-j]))); } - y1[k] = SHR32(y1[k],1); - y2[k] = SHR32(y2[k],1); + y1[k] = EXTRACT16(SATURATE(PSHR32(y1k,15),32767)); + y2[k] = EXTRACT16(SATURATE(PSHR32(y2k,15),32767)); } - for (i=0;i<M-1;i++) - mem[i]=SATURATE(PSHR(xx[N-i-1],1),16383); } - -/* By segher */ -void fir_mem_up(const spx_sig_t *x, const spx_word16_t *a, spx_sig_t *y, int N, int M, spx_word32_t *mem, char *stack) +/* Re-synthesised a signal from the QMF low-band and high-band signals */ +void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word32_t *mem1, spx_word32_t *mem2, char *stack) /* assumptions: all odd x[i] are zero -- well, actually they are left out of the array now N and M are multiples of 4 */ { int i, j; - VARDECL(spx_word16_t *xx); + int M2, N2; + VARDECL(spx_word16_t *xx1); + VARDECL(spx_word16_t *xx2); - ALLOC(xx, M+N-1, spx_word16_t); - - for (i = 0; i < N/2; i++) - xx[2*i] = PSHR32(x[N/2-1-i],SIG_SHIFT); - for (i = 0; i < M - 1; i += 2) - xx[N+i] = mem[i+1]; - - for (i = 0; i < N; i += 4) { + M2 = M>>1; + N2 = N>>1; + ALLOC(xx1, M2+N2, spx_word16_t); + ALLOC(xx2, M2+N2, spx_word16_t); + + for (i = 0; i < N2; i++) + xx1[i] = x1[N2-1-i]; + for (i = 0; i < M2; i++) + xx1[N2+i] = mem1[2*i+1]; + for (i = 0; i < N2; i++) + xx2[i] = x2[N2-1-i]; + for (i = 0; i < M2; i++) + xx2[N2+i] = mem2[2*i+1]; + + for (i = 0; i < N2; i += 2) { spx_sig_t y0, y1, y2, y3; - spx_word16_t x0; + spx_word16_t x10, x20; y0 = y1 = y2 = y3 = 0; - x0 = xx[N-4-i]; + x10 = xx1[N2-2-i]; + x20 = xx2[N2-2-i]; - for (j = 0; j < M; j += 4) { - spx_word16_t x1; + for (j = 0; j < M2; j += 2) { + spx_word16_t x11, x21; spx_word16_t a0, a1; - a0 = a[j]; - a1 = a[j+1]; - x1 = xx[N-2+j-i]; - - y0 = ADD32(y0,SHR(MULT16_16(a0, x1),2)); - y1 = ADD32(y1,SHR(MULT16_16(a1, x1),2)); - y2 = ADD32(y2,SHR(MULT16_16(a0, x0),2)); - y3 = ADD32(y3,SHR(MULT16_16(a1, x0),2)); + a0 = a[2*j]; + a1 = a[2*j+1]; + x11 = xx1[N2-1+j-i]; + x21 = xx2[N2-1+j-i]; - a0 = a[j+2]; - a1 = a[j+3]; - x0 = xx[N+j-i]; +#ifdef FIXED_POINT + /* We multiply twice by the same coef to avoid overflows */ + y0 = MAC16_16(MAC16_16(y0, a0, x11), NEG16(a0), x21); + y1 = MAC16_16(MAC16_16(y1, a1, x11), a1, x21); + y2 = MAC16_16(MAC16_16(y2, a0, x10), NEG16(a0), x20); + y3 = MAC16_16(MAC16_16(y3, a1, x10), a1, x20); +#else + y0 = ADD32(y0,MULT16_16(a0, x11-x21)); + y1 = ADD32(y1,MULT16_16(a1, x11+x21)); + y2 = ADD32(y2,MULT16_16(a0, x10-x20)); + y3 = ADD32(y3,MULT16_16(a1, x10+x20)); +#endif + a0 = a[2*j+2]; + a1 = a[2*j+3]; + x10 = xx1[N2+j-i]; + x20 = xx2[N2+j-i]; - y0 = ADD32(y0,SHR(MULT16_16(a0, x0),2)); - y1 = ADD32(y1,SHR(MULT16_16(a1, x0),2)); - y2 = ADD32(y2,SHR(MULT16_16(a0, x1),2)); - y3 = ADD32(y3,SHR(MULT16_16(a1, x1),2)); +#ifdef FIXED_POINT + /* We multiply twice by the same coef to avoid overflows */ + y0 = MAC16_16(MAC16_16(y0, a0, x10), NEG16(a0), x20); + y1 = MAC16_16(MAC16_16(y1, a1, x10), a1, x20); + y2 = MAC16_16(MAC16_16(y2, a0, x11), NEG16(a0), x21); + y3 = MAC16_16(MAC16_16(y3, a1, x11), a1, x21); +#else + y0 = ADD32(y0,MULT16_16(a0, x10-x20)); + y1 = ADD32(y1,MULT16_16(a1, x10+x20)); + y2 = ADD32(y2,MULT16_16(a0, x11-x21)); + y3 = ADD32(y3,MULT16_16(a1, x11+x21)); +#endif } - y[i] = y0; - y[i+1] = y1; - y[i+2] = y2; - y[i+3] = y3; +#ifdef FIXED_POINT + y[2*i] = EXTRACT16(SATURATE32(PSHR32(y0,15),32767)); + y[2*i+1] = EXTRACT16(SATURATE32(PSHR32(y1,15),32767)); + y[2*i+2] = EXTRACT16(SATURATE32(PSHR32(y2,15),32767)); + y[2*i+3] = EXTRACT16(SATURATE32(PSHR32(y3,15),32767)); +#else + /* Normalize up explicitly if we're in float */ + y[2*i] = 2.f*y0; + y[2*i+1] = 2.f*y1; + y[2*i+2] = 2.f*y2; + y[2*i+3] = 2.f*y3; +#endif } - for (i = 0; i < M - 1; i += 2) - mem[i+1] = xx[i]; + for (i = 0; i < M2; i++) + mem1[2*i+1] = xx1[i]; + for (i = 0; i < M2; i++) + mem2[2*i+1] = xx2[i]; } #ifdef FIXED_POINT #if 0 -spx_word16_t shift_filt[3][7] = {{-33, 1043, -4551, 19959, 19959, -4551, 1043}, +const spx_word16_t shift_filt[3][7] = {{-33, 1043, -4551, 19959, 19959, -4551, 1043}, {-98, 1133, -4425, 29179, 8895, -2328, 444}, {444, -2328, 8895, 29179, -4425, 1133, -98}}; #else -spx_word16_t shift_filt[3][7] = {{-390, 1540, -4993, 20123, 20123, -4993, 1540}, +const spx_word16_t shift_filt[3][7] = {{-390, 1540, -4993, 20123, 20123, -4993, 1540}, {-1064, 2817, -6694, 31589, 6837, -990, -209}, {-209, -990, 6837, 31589, -6694, 2817, -1064}}; #endif #else #if 0 -float shift_filt[3][7] = {{-9.9369e-04, 3.1831e-02, -1.3889e-01, 6.0910e-01, 6.0910e-01, -1.3889e-01, 3.1831e-02}, +const float shift_filt[3][7] = {{-9.9369e-04, 3.1831e-02, -1.3889e-01, 6.0910e-01, 6.0910e-01, -1.3889e-01, 3.1831e-02}, {-0.0029937, 0.0345613, -0.1350474, 0.8904793, 0.2714479, -0.0710304, 0.0135403}, {0.0135403, -0.0710304, 0.2714479, 0.8904793, -0.1350474, 0.0345613, -0.0029937}}; #else -float shift_filt[3][7] = {{-0.011915, 0.046995, -0.152373, 0.614108, 0.614108, -0.152373, 0.046995}, - {-0.0324855, 0.0859768, -0.2042986, 0.9640297, 0.2086420, -0.0302054, -0.0063646}, - {-0.0063646, -0.0302054, 0.2086420, 0.9640297, -0.2042986, 0.0859768, -0.0324855}}; +const float shift_filt[3][7] = {{-0.011915f, 0.046995f, -0.152373f, 0.614108f, 0.614108f, -0.152373f, 0.046995f}, + {-0.0324855f, 0.0859768f, -0.2042986f, 0.9640297f, 0.2086420f, -0.0302054f, -0.0063646f}, + {-0.0063646f, -0.0302054f, 0.2086420f, 0.9640297f, -0.2042986f, 0.0859768f, -0.0324855f}}; #endif #endif @@ -784,7 +675,9 @@ char *stack spx_word16_t g1, g2; spx_word16_t ngain; spx_word16_t gg1, gg2; - +#ifdef FIXED_POINT + int scaledown=0; +#endif #if 0 /* Set to 1 to enable full pitch search */ int nol_pitch[6]; spx_word16_t nol_pitch_coef[6]; @@ -819,6 +712,23 @@ char *stack else interp_pitch(exc, iexc+nsf, -corr_pitch, 80); +#ifdef FIXED_POINT + for (i=0;i<nsf;i++) + { + if (ABS16(exc[i])>16383) + { + scaledown = 1; + break; + } + } + if (scaledown) + { + for (i=0;i<nsf;i++) + exc[i] = SHR16(exc[i],1); + for (i=0;i<2*nsf;i++) + iexc[i] = SHR16(iexc[i],1); + } +#endif /*interp_pitch(exc, iexc+2*nsf, 2*corr_pitch, 80);*/ /*printf ("%d %d %f\n", pitch, corr_pitch, max_corr*ener_1);*/ @@ -898,5 +808,14 @@ char *stack for (i=0;i<nsf;i++) new_exc[i] = MULT16_16_Q14(ngain, new_exc[i]); +#ifdef FIXED_POINT + if (scaledown) + { + for (i=0;i<nsf;i++) + exc[i] = SHL16(exc[i],1); + for (i=0;i<nsf;i++) + new_exc[i] = SHL16(SATURATE16(new_exc[i],16383),1); + } +#endif } diff --git a/libspeex/filters.h b/libspeex/filters.h index b29aa21..b363a9a 100644 --- a/libspeex/filters.h +++ b/libspeex/filters.h @@ -58,13 +58,8 @@ int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int le void highpass(const spx_word16_t *x, spx_word16_t *y, int len, int filtID, spx_mem_t *mem); -void qmf_decomp(const spx_word16_t *xx, const spx_word16_t *aa, spx_sig_t *, spx_sig_t *y2, int N, int M, spx_word16_t *mem, char *stack); -void fir_mem_up(const spx_sig_t *x, const spx_word16_t *a, spx_sig_t *y, int N, int M, spx_word32_t *mem, char *stack); - - -void filter_mem2(const spx_sig_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem); -void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, int ord, spx_mem_t *mem); -void iir_mem2(const spx_sig_t *x, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem); +void qmf_decomp(const spx_word16_t *xx, const spx_word16_t *aa, spx_word16_t *, spx_word16_t *y2, int N, int M, spx_word16_t *mem, char *stack); +void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word32_t *mem1, spx_word32_t *mem2, char *stack); void filter_mem16(const spx_word16_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack); void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack); @@ -72,12 +67,11 @@ void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, in /* Apply bandwidth expansion on LPC coef */ void bw_lpc(spx_word16_t , const spx_coef_t *lpc_in, spx_coef_t *lpc_out, int order); +void sanitize_values32(spx_word32_t *vec, spx_word32_t min_val, spx_word32_t max_val, int len); - -void syn_percep_zero(const spx_sig_t *x, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_sig_t *y, int N, int ord, char *stack); - -void residue_percep_zero(const spx_sig_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_sig_t *y, int N, int ord, char *stack); +void syn_percep_zero16(const spx_word16_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack); +void residue_percep_zero16(const spx_word16_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack); void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack); diff --git a/libspeex/filters_arm4.h b/libspeex/filters_arm4.h index ac4d7a9..9138610 100644 --- a/libspeex/filters_arm4.h +++ b/libspeex/filters_arm4.h @@ -95,295 +95,3 @@ int normalize16(const spx_sig_t *x, spx_word16_t *y, int max_scale, int len) return sig_shift; } -#define OVERRIDE_FILTER_MEM2 -void filter_mem2(const spx_sig_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem) -{ - int i,j; - spx_sig_t xi,yi,nyi; - - for (i=0;i<ord;i++) - mem[i] = SHR32(mem[i],1); - for (i=0;i<N;i++) - { - int deadm, deadn, deadd, deadidx, x1, y1, dead1, dead2, dead3, dead4, dead5, dead6; - xi=SATURATE(x[i],805306368); - yi = SATURATE(ADD32(xi, SHL(mem[0],2)),805306368); - nyi = -yi; - y[i] = yi; - __asm__ __volatile__ ( - "\tldrsh %6, [%1], #2\n" - "\tsmull %8, %9, %4, %6\n" -#ifdef SHORTCUTS - "\tldrsh %6, [%2], #2\n" - "\tldr %10, [%0, #4]\n" - "\tmov %8, %8, lsr #15\n" - "\tsmull %7, %11, %5, %6\n" - "\tldrsh %6, [%1], #2\n" - "\tadd %8, %8, %9, lsl #17\n" - "\tadd %10, %10, %8\n" - "\tsmull %8, %9, %4, %6\n" - "\tadd %10, %10, %7, lsr #15\n" - "\tadd %10, %10, %11, lsl #17\n" - "\tstr %10, [%0], #4 \n" - - "\tldrsh %6, [%2], #2\n" - "\tldr %10, [%0, #4]\n" - "\tmov %8, %8, lsr #15\n" - "\tsmull %7, %11, %5, %6\n" - "\tldrsh %6, [%1], #2\n" - "\tadd %8, %8, %9, lsl #17\n" - "\tadd %10, %10, %8\n" - "\tsmull %8, %9, %4, %6\n" - "\tadd %10, %10, %7, lsr #15\n" - "\tadd %10, %10, %11, lsl #17\n" - "\tstr %10, [%0], #4 \n" - - "\tldrsh %6, [%2], #2\n" - "\tldr %10, [%0, #4]\n" - "\tmov %8, %8, lsr #15\n" - "\tsmull %7, %11, %5, %6\n" - "\tldrsh %6, [%1], #2\n" - "\tadd %8, %8, %9, lsl #17\n" - "\tadd %10, %10, %8\n" - "\tsmull %8, %9, %4, %6\n" - "\tadd %10, %10, %7, lsr #15\n" - "\tadd %10, %10, %11, lsl #17\n" - "\tstr %10, [%0], #4 \n" - - "\tldrsh %6, [%2], #2\n" - "\tldr %10, [%0, #4]\n" - "\tmov %8, %8, lsr #15\n" - "\tsmull %7, %11, %5, %6\n" - "\tldrsh %6, [%1], #2\n" - "\tadd %8, %8, %9, lsl #17\n" - "\tadd %10, %10, %8\n" - "\tsmull %8, %9, %4, %6\n" - "\tadd %10, %10, %7, lsr #15\n" - "\tadd %10, %10, %11, lsl #17\n" - "\tstr %10, [%0], #4 \n" - - "\tldrsh %6, [%2], #2\n" - "\tldr %10, [%0, #4]\n" - "\tmov %8, %8, lsr #15\n" - "\tsmull %7, %11, %5, %6\n" - "\tldrsh %6, [%1], #2\n" - "\tadd %8, %8, %9, lsl #17\n" - "\tadd %10, %10, %8\n" - "\tsmull %8, %9, %4, %6\n" - "\tadd %10, %10, %7, lsr #15\n" - "\tadd %10, %10, %11, lsl #17\n" - "\tstr %10, [%0], #4 \n" - - "\tldrsh %6, [%2], #2\n" - "\tldr %10, [%0, #4]\n" - "\tmov %8, %8, lsr #15\n" - "\tsmull %7, %11, %5, %6\n" - "\tldrsh %6, [%1], #2\n" - "\tadd %8, %8, %9, lsl #17\n" - "\tadd %10, %10, %8\n" - "\tsmull %8, %9, %4, %6\n" - "\tadd %10, %10, %7, lsr #15\n" - "\tadd %10, %10, %11, lsl #17\n" - "\tstr %10, [%0], #4 \n" - - "\tldrsh %6, [%2], #2\n" - "\tldr %10, [%0, #4]\n" - "\tmov %8, %8, lsr #15\n" - "\tsmull %7, %11, %5, %6\n" - "\tldrsh %6, [%1], #2\n" - "\tadd %8, %8, %9, lsl #17\n" - "\tadd %10, %10, %8\n" - "\tsmull %8, %9, %4, %6\n" - "\tadd %10, %10, %7, lsr #15\n" - "\tadd %10, %10, %11, lsl #17\n" - "\tstr %10, [%0], #4 \n" - - "\tldrsh %6, [%2], #2\n" - "\tldr %10, [%0, #4]\n" - "\tmov %8, %8, lsr #15\n" - "\tsmull %7, %11, %5, %6\n" - "\tldrsh %6, [%1], #2\n" - "\tadd %8, %8, %9, lsl #17\n" - "\tadd %10, %10, %8\n" - "\tsmull %8, %9, %4, %6\n" - "\tadd %10, %10, %7, lsr #15\n" - "\tadd %10, %10, %11, lsl #17\n" - "\tstr %10, [%0], #4 \n" - - "\tldrsh %6, [%2], #2\n" - "\tldr %10, [%0, #4]\n" - "\tmov %8, %8, lsr #15\n" - "\tsmull %7, %11, %5, %6\n" - "\tldrsh %6, [%1], #2\n" - "\tadd %8, %8, %9, lsl #17\n" - "\tadd %10, %10, %8\n" - "\tsmull %8, %9, %4, %6\n" - "\tadd %10, %10, %7, lsr #15\n" - "\tadd %10, %10, %11, lsl #17\n" - "\tstr %10, [%0], #4 \n" - - -#else - ".filterloop%=: \n" - "\tldrsh %6, [%2], #2\n" - "\tldr %10, [%0, #4]\n" - "\tmov %8, %8, lsr #15\n" - "\tsmull %7, %11, %5, %6\n" - "\tadd %8, %8, %9, lsl #17\n" - "\tldrsh %6, [%1], #2\n" - "\tadd %10, %10, %8\n" - "\tsmull %8, %9, %4, %6\n" - "\tadd %10, %10, %7, lsr #15\n" - "\tsubs %3, %3, #1\n" - "\tadd %10, %10, %11, lsl #17\n" - "\tstr %10, [%0], #4 \n" - "\t bne .filterloop%=\n" -#endif - "\tmov %8, %8, lsr #15\n" - "\tadd %10, %8, %9, lsl #17\n" - "\tldrsh %6, [%2], #2\n" - "\tsmull %8, %9, %5, %6\n" - "\tadd %10, %10, %8, lsr #15\n" - "\tadd %10, %10, %9, lsl #17\n" - "\tstr %10, [%0], #4 \n" - - : "=r" (deadm), "=r" (deadn), "=r" (deadd), "=r" (deadidx), - "=r" (xi), "=r" (nyi), "=r" (dead1), "=r" (dead2), - "=r" (dead3), "=r" (dead4), "=r" (dead5), "=r" (dead6) - : "0" (mem), "1" (num), "2" (den), "3" (ord-1), "4" (xi), "5" (nyi) - : "cc", "memory"); - - } - for (i=0;i<ord;i++) - mem[i] = SHL32(mem[i],1); -} - -#define OVERRIDE_IIR_MEM2 -void iir_mem2(const spx_sig_t *x, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem) -{ - int i,j; - spx_sig_t xi,yi,nyi; - - for (i=0;i<ord;i++) - mem[i] = SHR32(mem[i],1); - - for (i=0;i<N;i++) - { - int deadm, deadd, deadidx, dead1, dead2, dead3, dead4, dead5, dead6; - xi=SATURATE(x[i],805306368); - yi = SATURATE(ADD32(xi, SHL(mem[0],2)),805306368); - nyi = -yi; - y[i] = yi; - __asm__ __volatile__ ( - "\tldrsh %4, [%1], #2\n" - "\tsmull %5, %6, %3, %4\n" - -#ifdef SHORTCUTS - - "\tldrsh %4, [%1], #2\n" - "\tmov %5, %5, lsr #15\n" - "\tldr %7, [%0, #4]\n" - "\tadd %8, %5, %6, lsl #17\n" - "\tsmull %5, %6, %3, %4\n" - "\tadd %7, %7, %8\n" - "\tstr %7, [%0], #4 \n" - - - "\tldrsh %4, [%1], #2\n" - "\tmov %5, %5, lsr #15\n" - "\tldr %9, [%0, #4]\n" - "\tadd %8, %5, %6, lsl #17\n" - "\tsmull %5, %6, %3, %4\n" - "\tadd %9, %9, %8\n" - "\tstr %9, [%0], #4 \n" - - "\tldrsh %4, [%1], #2\n" - "\tmov %5, %5, lsr #15\n" - "\tldr %7, [%0, #4]\n" - "\tadd %8, %5, %6, lsl #17\n" - "\tsmull %5, %6, %3, %4\n" - "\tadd %7, %7, %8\n" - "\tstr %7, [%0], #4 \n" - - - "\tldrsh %4, [%1], #2\n" - "\tmov %5, %5, lsr #15\n" - "\tldr %9, [%0, #4]\n" - "\tadd %8, %5, %6, lsl #17\n" - "\tsmull %5, %6, %3, %4\n" - "\tadd %9, %9, %8\n" - "\tstr %9, [%0], #4 \n" - - "\tldrsh %4, [%1], #2\n" - "\tmov %5, %5, lsr #15\n" - "\tldr %7, [%0, #4]\n" - "\tadd %8, %5, %6, lsl #17\n" - "\tsmull %5, %6, %3, %4\n" - "\tadd %7, %7, %8\n" - "\tstr %7, [%0], #4 \n" - - - "\tldrsh %4, [%1], #2\n" - "\tmov %5, %5, lsr #15\n" - "\tldr %9, [%0, #4]\n" - "\tadd %8, %5, %6, lsl #17\n" - "\tsmull %5, %6, %3, %4\n" - "\tadd %9, %9, %8\n" - "\tstr %9, [%0], #4 \n" - - "\tldrsh %4, [%1], #2\n" - "\tmov %5, %5, lsr #15\n" - "\tldr %7, [%0, #4]\n" - "\tadd %8, %5, %6, lsl #17\n" - "\tsmull %5, %6, %3, %4\n" - "\tadd %7, %7, %8\n" - "\tstr %7, [%0], #4 \n" - - - "\tldrsh %4, [%1], #2\n" - "\tmov %5, %5, lsr #15\n" - "\tldr %9, [%0, #4]\n" - "\tadd %8, %5, %6, lsl #17\n" - "\tsmull %5, %6, %3, %4\n" - "\tadd %9, %9, %8\n" - "\tstr %9, [%0], #4 \n" - - "\tldrsh %4, [%1], #2\n" - "\tmov %5, %5, lsr #15\n" - "\tldr %7, [%0, #4]\n" - "\tadd %8, %5, %6, lsl #17\n" - "\tsmull %5, %6, %3, %4\n" - "\tadd %7, %7, %8\n" - "\tstr %7, [%0], #4 \n" - - - -#else - ".iirloop%=: \n" - "\tldr %7, [%0, #4]\n" - - "\tldrsh %4, [%1], #2\n" - "\tmov %5, %5, lsr #15\n" - "\tadd %8, %5, %6, lsl #17\n" - "\tsmull %5, %6, %3, %4\n" - "\tadd %7, %7, %8\n" - "\tstr %7, [%0], #4 \n" - "\tsubs %2, %2, #1\n" - "\t bne .iirloop%=\n" - -#endif - "\tmov %5, %5, lsr #15\n" - "\tadd %7, %5, %6, lsl #17\n" - "\tstr %7, [%0], #4 \n" - - : "=r" (deadm), "=r" (deadd), "=r" (deadidx), "=r" (nyi), - "=r" (dead1), "=r" (dead2), "=r" (dead3), "=r" (dead4), - "=r" (dead5), "=r" (dead6) - : "0" (mem), "1" (den), "2" (ord-1), "3" (nyi) - : "cc", "memory"); - - } - for (i=0;i<ord;i++) - mem[i] = SHL32(mem[i],1); - -} diff --git a/libspeex/filters_bfin.h b/libspeex/filters_bfin.h index 2180ed4..1e433ee 100644 --- a/libspeex/filters_bfin.h +++ b/libspeex/filters_bfin.h @@ -79,143 +79,6 @@ int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int le return sig_shift; } -#define OVERRIDE_FILTER_MEM2 -void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *_y, int N, int ord, spx_mem_t *mem) -{ - spx_word32_t xy2[N+1]; - spx_word32_t *xy = xy2+1; - spx_word32_t numden_a[2*ord+2]; - spx_word16_t *numden = (spx_word16_t*) numden_a; - int i; - for (i=0;i<ord;i++) - { - numden[2*i] = num[i]; - numden[2*i+1] = den[i]; - } - __asm__ __volatile__ - ( - /* Register setup */ - "R0 = %5;\n\t" /*ord */ - - "P0 = %3;\n\t" - "I0 = P0;\n\t" - "B0 = P0;\n\t" /* numden */ - "L0 = 0;\n\t" - - "P2 = %0;\n\t" /* Fused xy */ - "I2 = P2;\n\t" - "L2 = 0;\n\t" - - "P4 = %6;\n\t" /* mem */ - "P0 = %1;\n\t" /* _x */ - "P1 = %2;\n\t" /* _y */ - - /* First sample */ - "R1 = [P4++];\n\t" - "R1 <<= 1;\n\t" /* shift mem */ - "R2 = [P0++];\n\t" /* load x[0] */ - "R1 = R1 + R2;\n\t" - "[P1++] = R1;\n\t" /* store y[0] */ - "R1 <<= 2;\n\t" - "R2 <<= 2;\n\t" - "R2 = PACK(R1.H, R2.H);\n\t" /* pack x16 and y16 */ - "[P2] = R2;\n\t" - - /* Samples 1 to ord-1 (using memory) */ - "R0 += -1;\n\t" - "R3 = 0;\n\t" - "LC0 = R0;\n\t" - "LOOP filter_start%= LC0;\n\t" - "LOOP_BEGIN filter_start%=;\n\t" - "R3 += 1;\n\t" - "LC1 = R3;\n\t" - - "R1 = [P4++];\n\t" - "A1 = R1;\n\t" - "A0 = 0;\n\t" - "I0 = B0;\n\t" - "I2 = P2;\n\t" - "P2 += 4;\n\t" - "R4 = [I0++] || R5 = [I2--];\n\t" - "LOOP filter_start_inner%= LC1;\n\t" - "LOOP_BEGIN filter_start_inner%=;\n\t" - "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t" - "LOOP_END filter_start_inner%=;\n\t" - "A0 += A1;\n\t" - "R4 = A0;\n\t" - "R4 <<= 1;\n\t" /* shift mem */ - "R2 = [P0++];\n\t" /* load x */ - "R4 = R4 + R2;\n\t" - "[P1++] = R4;\n\t" /* store y */ - "R4 <<= 2;\n\t" - "R2 <<= 2;\n\t" - "R2 = PACK(R4.H, R2.H);\n\t" /* pack x16 and y16 */ - "[P2] = R2;\n\t" - - "LOOP_END filter_start%=;\n\t" - - /* Samples ord to N*/ - "R0 = %5;\n\t" - "R0 <<= 1;\n\t" - "I0 = B0;\n\t" /* numden */ - "R0 <<= 1;\n\t" - "L0 = R0;\n\t" - - "R0 = %5;\n\t" /* org */ - "R2 = %4;\n\t" /* N */ - "R2 = R2 - R0;\n\t" - "R4 = [I0++];\n\t" /* numden */ - "LC0 = R2;\n\t" - "P3 = R0;\n\t" - "R0 <<= 2;\n\t" - "R0 += 8;\n\t" - "I2 = P2;\n\t" - "M0 = R0;\n\t" - "A1 = A0 = 0;\n\t" - "R5 = [I2--];\n\t" /* load xy */ - "LOOP filter_mid%= LC0;\n\t" - "LOOP_BEGIN filter_mid%=;\n\t" - "LOOP filter_mid_inner%= LC1=P3;\n\t" - "LOOP_BEGIN filter_mid_inner%=;\n\t" - "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t" - "LOOP_END filter_mid_inner%=;\n\t" - "R0 = (A0 += A1) || I2 += M0;\n\t" - "R0 = R0 << 1 || R5 = [P0++];\n\t" /* load x */ - "R0 = R0 + R5;\n\t" - "R0 = R0 << 2 || [P1++] = R0;\n\t" /* shift y | store y */ - "R5 = R5 << 2;\n\t" - "R5 = PACK(R0.H, R5.H);\n\t" - "A1 = A0 = 0 || [I2--] = R5\n\t" - "LOOP_END filter_mid%=;\n\t" - "I2 += 4;\n\t" - "P2 = I2;\n\t" - /* Update memory */ - "P4 = %6;\n\t" - "R0 = %5;\n\t" - "LC0 = R0;\n\t" - "P0 = B0;\n\t" - "A1 = A0 = 0;\n\t" - "LOOP mem_update%= LC0;\n\t" - "LOOP_BEGIN mem_update%=;\n\t" - "I2 = P2;\n\t" - "I0 = P0;\n\t" - "P0 += 4;\n\t" - "R0 = LC0;\n\t" - "LC1 = R0;\n\t" - "R5 = [I2--] || R4 = [I0++];\n\t" - "LOOP mem_accum%= LC1;\n\t" - "LOOP_BEGIN mem_accum%=;\n\t" - "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t" - "LOOP_END mem_accum%=;\n\t" - "R0 = (A0 += A1);\n\t" - "A1 = A0 = 0 || [P4++] = R0;\n\t" - "LOOP_END mem_update%=;\n\t" - "L0 = 0;\n\t" - : : "m" (xy), "m" (_x), "m" (_y), "m" (numden), "m" (N), "m" (ord), "m" (mem) - : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B0", "I0", "I2", "L0", "L2", "M0", "memory" - ); - -} #define OVERRIDE_FILTER_MEM16 @@ -363,130 +226,6 @@ void filter_mem16(const spx_word16_t *_x, const spx_coef_t *num, const spx_coef_ - -#define OVERRIDE_IIR_MEM2 -void iir_mem2(const spx_sig_t *_x, const spx_coef_t *den, spx_sig_t *_y, int N, int ord, spx_mem_t *mem) -{ - spx_word16_t y[N+2]; - spx_word16_t *yy; - yy = y+2; - __asm__ __volatile__ - ( - /* Register setup */ - "R0 = %5;\n\t" /*ord */ - - "P1 = %3;\n\t" - "I1 = P1;\n\t" - "B1 = P1;\n\t" - "L1 = 0;\n\t" - - "P3 = %0;\n\t" - "I3 = P3;\n\t" - "L3 = 0;\n\t" - - "P4 = %6;\n\t" - "P0 = %1;\n\t" - "P1 = %2;\n\t" - - /* First sample */ - "R1 = [P4++];\n\t" - "R1 <<= 1;\n\t" - "R2 = [P0++];\n\t" - "R1 = R1 + R2;\n\t" - "[P1++] = R1;\n\t" - "R1 <<= 2;\n\t" - "W[P3] = R1.H;\n\t" - "R2 <<= 2;\n\t" - - /* Samples 1 to ord-1 (using memory) */ - "R0 += -1;\n\t" - "R3 = 0;\n\t" - "LC0 = R0;\n\t" - "LOOP filter_start%= LC0;\n\t" - "LOOP_BEGIN filter_start%=;\n\t" - "R3 += 1;\n\t" - "LC1 = R3;\n\t" - - "R1 = [P4++];\n\t" - "A1 = R1;\n\t" - "I1 = B1;\n\t" - "I3 = P3;\n\t" - "P3 += 2;\n\t" - "LOOP filter_start_inner%= LC1;\n\t" - "LOOP_BEGIN filter_start_inner%=;\n\t" - "R4.L = W[I1++];\n\t" - "R5.L = W[I3--];\n\t" - "A1 -= R4.L*R5.L (IS);\n\t" - "LOOP_END filter_start_inner%=;\n\t" - - "R1 = A1;\n\t" - "R1 <<= 1;\n\t" - "R2 = [P0++];\n\t" - "R1 = R1 + R2;\n\t" - "[P1++] = R1;\n\t" - "R1 <<= 2;\n\t" - "W[P3] = R1.H;\n\t" - "R2 <<= 2;\n\t" - "LOOP_END filter_start%=;\n\t" - - /* Samples ord to N*/ - "R0 = %5;\n\t" - "R0 <<= 1;\n\t" - "I1 = B1;\n\t" - "L1 = R0;\n\t" - - "R0 = %5;\n\t" - "R2 = %4;\n\t" - "R2 = R2 - R0;\n\t" - "R4.L = W[I1++];\n\t" - "LC0 = R2;\n\t" - "LOOP filter_mid%= LC0;\n\t" - "LOOP_BEGIN filter_mid%=;\n\t" - "LC1 = R0;\n\t" - "A1 = 0;\n\t" - "I3 = P3;\n\t" - "P3 += 2;\n\t" - "R5.L = W[I3--];\n\t" - "LOOP filter_mid_inner%= LC1;\n\t" - "LOOP_BEGIN filter_mid_inner%=;\n\t" - "A1 -= R4.L*R5.L (IS) || R4.L = W[I1++] || R5.L = W[I3--];\n\t" - "LOOP_END filter_mid_inner%=;\n\t" - "R1 = A1;\n\t" - "R1 = R1 << 1 || R2 = [P0++];\n\t" - "R1 = R1 + R2;\n\t" - "R1 = R1 << 2 || [P1++] = R1;\n\t" - "W[P3] = R1.H;\n\t" - "LOOP_END filter_mid%=;\n\t" - - /* Update memory */ - "P4 = %6;\n\t" - "R0 = %5;\n\t" - "LC0 = R0;\n\t" - "P1 = B1;\n\t" - "LOOP mem_update%= LC0;\n\t" - "LOOP_BEGIN mem_update%=;\n\t" - "A0 = 0;\n\t" - "I3 = P3;\n\t" - "I1 = P1;\n\t" - "P1 += 2;\n\t" - "R0 = LC0;\n\t" - "LC1=R0;\n\t" - "R5.L = W[I3--] || R4.L = W[I1++];\n\t" - "LOOP mem_accum%= LC1;\n\t" - "LOOP_BEGIN mem_accum%=;\n\t" - "A0 -= R4.L*R5.L (IS) || R4.L = W[I1++] || R5.L = W[I3--];\n\t" - "LOOP_END mem_accum%=;\n\t" - "R0 = A0;\n\t" - "[P4++] = R0;\n\t" - "LOOP_END mem_update%=;\n\t" - "L1 = 0;\n\t" - : : "m" (yy), "m" (_x), "m" (_y), "m" (den), "m" (N), "m" (ord), "m" (mem) - : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B1", "I1", "I3", "L1", "L3", "memory" - ); - -} - - #define OVERRIDE_IIR_MEM16 void iir_mem16(const spx_word16_t *_x, const spx_coef_t *den, spx_word16_t *_y, int N, int ord, spx_mem_t *mem, char *stack) { @@ -612,18 +351,6 @@ void iir_mem16(const spx_word16_t *_x, const spx_coef_t *den, spx_word16_t *_y, } -#define OVERRIDE_FIR_MEM2 -void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, int ord, spx_mem_t *mem) -{ - int i; - spx_coef_t den2[12]; - spx_coef_t *den; - den = (spx_coef_t*)((((int)den2)+4)&0xfffffffc); - for (i=0;i<10;i++) - den[i] = 0; - filter_mem2(x, num, den, y, N, ord, mem); -} - #define OVERRIDE_FIR_MEM16 void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) { diff --git a/libspeex/filters_sse.h b/libspeex/filters_sse.h index 2f03747..4bb333d 100644 --- a/libspeex/filters_sse.h +++ b/libspeex/filters_sse.h @@ -34,7 +34,7 @@ #include <xmmintrin.h> -void filter_mem2_10(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem) +void filter_mem16_10(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem) { __m128 num[3], den[3], mem[3]; @@ -87,7 +87,7 @@ void filter_mem2_10(const float *x, const float *_num, const float *_den, float _mm_store_ss(_mem+9, mem[2]); } -void filter_mem2_8(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem) +void filter_mem16_8(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem) { __m128 num[2], den[2], mem[2]; @@ -130,18 +130,18 @@ void filter_mem2_8(const float *x, const float *_num, const float *_den, float * } -#define OVERRIDE_FILTER_MEM2 -void filter_mem2(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem) +#define OVERRIDE_FILTER_MEM16 +void filter_mem16(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem, char *stack) { if(ord==10) - filter_mem2_10(x, _num, _den, y, N, ord, _mem); + filter_mem16_10(x, _num, _den, y, N, ord, _mem); else if (ord==8) - filter_mem2_8(x, _num, _den, y, N, ord, _mem); + filter_mem16_8(x, _num, _den, y, N, ord, _mem); } -void iir_mem2_10(const float *x, const float *_den, float *y, int N, int ord, float *_mem) +void iir_mem16_10(const float *x, const float *_den, float *y, int N, int ord, float *_mem) { __m128 den[3], mem[3]; @@ -190,7 +190,7 @@ void iir_mem2_10(const float *x, const float *_den, float *y, int N, int ord, fl } -void iir_mem2_8(const float *x, const float *_den, float *y, int N, int ord, float *_mem) +void iir_mem16_8(const float *x, const float *_den, float *y, int N, int ord, float *_mem) { __m128 den[2], mem[2]; @@ -229,17 +229,17 @@ void iir_mem2_8(const float *x, const float *_den, float *y, int N, int ord, flo _mm_storeu_ps(_mem+4, mem[1]); } -#define OVERRIDE_IIR_MEM2 -void iir_mem2(const float *x, const float *_den, float *y, int N, int ord, float *_mem) +#define OVERRIDE_IIR_MEM16 +void iir_mem16(const float *x, const float *_den, float *y, int N, int ord, float *_mem, char *stack) { if(ord==10) - iir_mem2_10(x, _den, y, N, ord, _mem); + iir_mem16_10(x, _den, y, N, ord, _mem); else if (ord==8) - iir_mem2_8(x, _den, y, N, ord, _mem); + iir_mem16_8(x, _den, y, N, ord, _mem); } -void fir_mem2_10(const float *x, const float *_num, float *y, int N, int ord, float *_mem) +void fir_mem16_10(const float *x, const float *_num, float *y, int N, int ord, float *_mem) { __m128 num[3], mem[3]; @@ -287,7 +287,7 @@ void fir_mem2_10(const float *x, const float *_num, float *y, int N, int ord, fl _mm_store_ss(_mem+9, mem[2]); } -void fir_mem2_8(const float *x, const float *_num, float *y, int N, int ord, float *_mem) +void fir_mem16_8(const float *x, const float *_num, float *y, int N, int ord, float *_mem) { __m128 num[2], mem[2]; @@ -326,11 +326,11 @@ void fir_mem2_8(const float *x, const float *_num, float *y, int N, int ord, flo _mm_storeu_ps(_mem+4, mem[1]); } -#define OVERRIDE_FIR_MEM2 -void fir_mem2(const float *x, const float *_num, float *y, int N, int ord, float *_mem) +#define OVERRIDE_FIR_MEM16 +void fir_mem16(const float *x, const float *_num, float *y, int N, int ord, float *_mem, char *stack) { if(ord==10) - fir_mem2_10(x, _num, y, N, ord, _mem); + fir_mem16_10(x, _num, y, N, ord, _mem); else if (ord==8) - fir_mem2_8(x, _num, y, N, ord, _mem); + fir_mem16_8(x, _num, y, N, ord, _mem); } diff --git a/libspeex/fixed_debug.h b/libspeex/fixed_debug.h index 65c5712..d5c449f 100644 --- a/libspeex/fixed_debug.h +++ b/libspeex/fixed_debug.h @@ -74,53 +74,57 @@ static inline int NEG32(long long x) return res; } -static inline short EXTRACT16(int x) +#define EXTRACT16(x) _EXTRACT16(x, __FILE__, __LINE__) +static inline short _EXTRACT16(int x, char *file, int line) { int res; if (!VERIFY_SHORT(x)) { - fprintf (stderr, "EXTRACT16: input is not short: %d\n", x); + fprintf (stderr, "EXTRACT16: input is not short: %d in %s: line %d\n", x, file, line); } res = x; spx_mips++; return res; } -static inline int EXTEND32(int x) +#define EXTEND32(x) _EXTEND32(x, __FILE__, __LINE__) +static inline int _EXTEND32(int x, char *file, int line) { int res; if (!VERIFY_SHORT(x)) { - fprintf (stderr, "EXTRACT16: input is not short: %d\n", x); + fprintf (stderr, "EXTEND32: input is not short: %d in %s: line %d\n", x, file, line); } res = x; spx_mips++; return res; } -static inline short SHR16(int a, int shift) +#define SHR16(a, shift) _SHR16(a, shift, __FILE__, __LINE__) +static inline short _SHR16(int a, int shift, char *file, int line) { int res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift)) { - fprintf (stderr, "SHR16: inputs are not short: %d %d\n", a, shift); + fprintf (stderr, "SHR16: inputs are not short: %d >> %d in %s: line %d\n", a, shift, file, line); } res = a>>shift; if (!VERIFY_SHORT(res)) - fprintf (stderr, "SHR16: output is not short: %d\n", res); + fprintf (stderr, "SHR16: output is not short: %d in %s: line %d\n", res, file, line); spx_mips++; return res; } -static inline short SHL16(int a, int shift) +#define SHL16(a, shift) _SHL16(a, shift, __FILE__, __LINE__) +static inline short _SHL16(int a, int shift, char *file, int line) { int res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift)) { - fprintf (stderr, "SHR16: inputs are not short: %d %d\n", a, shift); + fprintf (stderr, "SHL16: inputs are not short: %d %d in %s: line %d\n", a, shift, file, line); } res = a<<shift; if (!VERIFY_SHORT(res)) - fprintf (stderr, "SHR16: output is not short: %d\n", res); + fprintf (stderr, "SHL16: output is not short: %d in %s: line %d\n", res, file, line); spx_mips++; return res; } @@ -134,7 +138,9 @@ static inline int SHR32(long long a, int shift) } res = a>>shift; if (!VERIFY_INT(res)) + { fprintf (stderr, "SHR32: output is not int: %d\n", (int)res); + } spx_mips++; return res; } @@ -143,62 +149,71 @@ static inline int SHL32(long long a, int shift) long long res; if (!VERIFY_INT(a) || !VERIFY_SHORT(shift)) { - fprintf (stderr, "SHR32: inputs are not int: %d %d\n", (int)a, shift); + fprintf (stderr, "SHL32: inputs are not int: %d %d\n", (int)a, shift); } res = a<<shift; if (!VERIFY_INT(res)) - fprintf (stderr, "SHR32: output is not int: %d\n", (int)res); + { + fprintf (stderr, "SHL32: output is not int: %d\n", (int)res); + } spx_mips++; return res; } +#define PSHR16(a,shift) (SHR16(ADD16((a),((1<<((shift))>>1))),shift)) +#define PSHR32(a,shift) (SHR32(ADD32((a),((1<<((shift))>>1))),shift)) +#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift))) -#define PSHR16(a,shift) (SHR16(ADD16(a,(1<<((shift)-1))),shift)) -#define PSHR32(a,shift) (SHR32(ADD32(a,(1<<((shift)-1))),shift)) #define SATURATE16(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) #define SATURATE32(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) -#define SHR(a,shift) ((a) >> (shift)) -#define SHL(a,shift) ((a) << (shift)) +//#define SHR(a,shift) ((a) >> (shift)) +//#define SHL(a,shift) ((a) << (shift)) -static inline short ADD16(int a, int b) +#define ADD16(a, b) _ADD16(a, b, __FILE__, __LINE__) +static inline short _ADD16(int a, int b, char *file, int line) { int res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) { - fprintf (stderr, "ADD16: inputs are not short: %d %d\n", a, b); + fprintf (stderr, "ADD16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); } res = a+b; if (!VERIFY_SHORT(res)) - fprintf (stderr, "ADD16: output is not short: %d+%d=%d\n", a,b,res); + { + fprintf (stderr, "ADD16: output is not short: %d+%d=%d in %s: line %d\n", a,b,res, file, line); + } spx_mips++; return res; } -static inline short SUB16(int a, int b) + +#define SUB16(a, b) _SUB16(a, b, __FILE__, __LINE__) +static inline short _SUB16(int a, int b, char *file, int line) { int res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) { - fprintf (stderr, "SUB16: inputs are not short: %d %d\n", a, b); + fprintf (stderr, "SUB16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); } res = a-b; if (!VERIFY_SHORT(res)) - fprintf (stderr, "SUB16: output is not short: %d\n", res); + fprintf (stderr, "SUB16: output is not short: %d in %s: line %d\n", res, file, line); spx_mips++; return res; } -static inline int ADD32(long long a, long long b) +#define ADD32(a, b) _ADD32(a, b, __FILE__, __LINE__) +static inline int _ADD32(long long a, long long b, char *file, int line) { long long res; if (!VERIFY_INT(a) || !VERIFY_INT(b)) { - fprintf (stderr, "ADD32: inputs are not int: %d %d\n", (int)a, (int)b); + fprintf (stderr, "ADD32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line); } res = a+b; if (!VERIFY_INT(res)) { - fprintf (stderr, "ADD32: output is not int: %d\n", (int)res); + fprintf (stderr, "ADD32: output is not int: %d in %s: line %d\n", (int)res, file, line); } spx_mips++; return res; @@ -220,8 +235,6 @@ static inline int SUB32(long long a, long long b) #define ADD64(a,b) (MIPS_INC(a)+(b)) -#define PSHR(a,shift) (SHR((a)+(1<<((shift)-1)),shift)) - /* result fits in 16 bits */ static inline short MULT16_16_16(int a, int b) { @@ -237,36 +250,56 @@ static inline short MULT16_16_16(int a, int b) return res; } -static inline int MULT16_16(int a, int b) +#define MULT16_16(a, b) _MULT16_16(a, b, __FILE__, __LINE__) +static inline int _MULT16_16(int a, int b, char *file, int line) { long long res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) { - fprintf (stderr, "MULT16_16: inputs are not short: %d %d\n", a, b); + fprintf (stderr, "MULT16_16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); } res = ((long long)a)*b; if (!VERIFY_INT(res)) - fprintf (stderr, "MULT16_16: output is not int: %d\n", (int)res); + fprintf (stderr, "MULT16_16: output is not int: %d in %s: line %d\n", (int)res, file, line); spx_mips++; return res; } #define MAC16_16(c,a,b) (spx_mips--,ADD32((c),MULT16_16((a),(b)))) -#define MAC16_16_Q11(c,a,b) (ADD16((c),EXTRACT16(SHR32(MULT16_16((a),(b)),11)))) -#define MAC16_16_Q13(c,a,b) (ADD16((c),EXTRACT16(SHR32(MULT16_16((a),(b)),13)))) -#define MAC16_16_P13(c,a,b) (ADD32((c),SHR(ADD32(4096,MULT16_16((a),(b))),13))) +#define MAC16_16_Q11(c,a,b) (EXTRACT16(ADD16((c),EXTRACT16(SHR32(MULT16_16((a),(b)),11))))) +#define MAC16_16_Q13(c,a,b) (EXTRACT16(ADD16((c),EXTRACT16(SHR32(MULT16_16((a),(b)),13))))) +#define MAC16_16_P13(c,a,b) (EXTRACT16(ADD32((c),SHR32(ADD32(4096,MULT16_16((a),(b))),13)))) -static inline int MULT16_32_QX(int a, long long b, int Q) +#define MULT16_32_QX(a, b, Q) _MULT16_32_QX(a, b, Q, __FILE__, __LINE__) +static inline int _MULT16_32_QX(int a, long long b, int Q, char *file, int line) { long long res; if (!VERIFY_SHORT(a) || !VERIFY_INT(b)) { - fprintf (stderr, "MULT16_32_Q%d: inputs are not short+int: %d %d\n", Q, (int)a, (int)b); + fprintf (stderr, "MULT16_32_Q%d: inputs are not short+int: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line); } + if (ABS32(b)>=(1<<(15+Q))) + fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line); res = (((long long)a)*(long long)b) >> Q; if (!VERIFY_INT(res)) - fprintf (stderr, "MULT16_32_Q%d: output is not int: %d*%d=%d\n", Q, (int)a, (int)b,(int)res); + fprintf (stderr, "MULT16_32_Q%d: output is not int: %d*%d=%d in %s: line %d\n", Q, (int)a, (int)b,(int)res, file, line); + spx_mips+=5; + return res; +} + +static inline int MULT16_32_PX(int a, long long b, int Q) +{ + long long res; + if (!VERIFY_SHORT(a) || !VERIFY_INT(b)) + { + fprintf (stderr, "MULT16_32_P%d: inputs are not short+int: %d %d\n", Q, (int)a, (int)b); + } + if (ABS32(b)>=(1<<(15+Q))) + fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d\n", Q, (int)a, (int)b); + res = ((((long long)a)*(long long)b) + ((1<<Q)>>1))>> Q; + if (!VERIFY_INT(res)) + fprintf (stderr, "MULT16_32_P%d: output is not int: %d*%d=%d\n", Q, (int)a, (int)b,(int)res); spx_mips+=5; return res; } @@ -278,6 +311,7 @@ static inline int MULT16_32_QX(int a, long long b, int Q) #define MULT16_32_Q13(a,b) MULT16_32_QX(a,b,13) #define MULT16_32_Q14(a,b) MULT16_32_QX(a,b,14) #define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15) +#define MULT16_32_P15(a,b) MULT16_32_PX(a,b,15) #define MAC16_32_Q15(c,a,b) ADD32((c),MULT16_32_Q15((a),(b))) static inline int SATURATE(int a, int b) @@ -341,7 +375,9 @@ static inline short MULT16_16_Q15(int a, int b) res = ((long long)a)*b; res >>= 15; if (!VERIFY_SHORT(res)) + { fprintf (stderr, "MULT16_16_Q15: output is not short: %d\n", (int)res); + } spx_mips+=3; return res; } @@ -398,23 +434,24 @@ static inline short MULT16_16_P15(int a, int b) return res; } +#define DIV32_16(a, b) _DIV32_16(a, b, __FILE__, __LINE__) -static inline int DIV32_16(long long a, long long b) +static inline int _DIV32_16(long long a, long long b, char *file, int line) { long long res; if (b==0) { - fprintf(stderr, "DIV32_16: divide by zero: %d/%d\n", (int)a, (int)b); + fprintf(stderr, "DIV32_16: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line); return 0; } if (!VERIFY_INT(a) || !VERIFY_SHORT(b)) { - fprintf (stderr, "DIV32_16: inputs are not int/short: %d %d\n", (int)a, (int)b); + fprintf (stderr, "DIV32_16: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line); } res = a/b; if (!VERIFY_SHORT(res)) { - fprintf (stderr, "DIV32_16: output is not short: %d / %d = %d\n", (int)a,(int)b,(int)res); + fprintf (stderr, "DIV32_16: output is not short: %d / %d = %d in %s: line %d\n", (int)a,(int)b,(int)res, file, line); if (res>32767) res = 32767; if (res<-32768) @@ -423,22 +460,24 @@ static inline int DIV32_16(long long a, long long b) spx_mips+=20; return res; } -static inline int DIV32(long long a, long long b) + +#define DIV32(a, b) _DIV32(a, b, __FILE__, __LINE__) +static inline int _DIV32(long long a, long long b, char *file, int line) { long long res; if (b==0) { - fprintf(stderr, "DIV32: divide by zero: %d/%d\n", (int)a, (int)b); + fprintf(stderr, "DIV32: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line); return 0; } if (!VERIFY_INT(a) || !VERIFY_INT(b)) { - fprintf (stderr, "DIV32: inputs are not int/short: %d %d\n", (int)a, (int)b); + fprintf (stderr, "DIV32: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line); } res = a/b; if (!VERIFY_INT(res)) - fprintf (stderr, "DIV32: output is not int: %d\n", (int)res); + fprintf (stderr, "DIV32: output is not int: %d in %s: line %d\n", (int)res, file, line); spx_mips+=36; return res; } diff --git a/libspeex/fixed_generic.h b/libspeex/fixed_generic.h index 375050c..2948177 100644 --- a/libspeex/fixed_generic.h +++ b/libspeex/fixed_generic.h @@ -46,14 +46,15 @@ #define SHL16(a,shift) ((a) << (shift)) #define SHR32(a,shift) ((a) >> (shift)) #define SHL32(a,shift) ((a) << (shift)) -#define PSHR16(a,shift) (SHR16((a)+(1<<((shift)-1)),shift)) -#define PSHR32(a,shift) (SHR32((a)+(1<<((shift)-1)),shift)) +#define PSHR16(a,shift) (SHR16((a)+((1<<((shift))>>1)),shift)) +#define PSHR32(a,shift) (SHR32((a)+((1<<((shift))>>1)),shift)) +#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift))) #define SATURATE16(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) #define SATURATE32(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) #define SHR(a,shift) ((a) >> (shift)) #define SHL(a,shift) ((spx_word32_t)(a) << (shift)) -#define PSHR(a,shift) (SHR((a)+(1<<((shift)-1)),shift)) +#define PSHR(a,shift) (SHR((a)+((1<<((shift))>>1)),shift)) #define SATURATE(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) @@ -77,6 +78,7 @@ #define MULT16_32_Q11(a,b) ADD32(MULT16_16((a),SHR((b),11)), SHR(MULT16_16((a),((b)&0x000007ff)),11)) #define MAC16_32_Q11(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),11)), SHR(MULT16_16((a),((b)&0x000007ff)),11))) +#define MULT16_32_P15(a,b) ADD32(MULT16_16((a),SHR((b),15)), PSHR(MULT16_16((a),((b)&0x00007fff)),15)) #define MULT16_32_Q15(a,b) ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)) #define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) diff --git a/libspeex/jitter.c b/libspeex/jitter.c index 6d5f2ad..2b64453 100644 --- a/libspeex/jitter.c +++ b/libspeex/jitter.c @@ -41,9 +41,12 @@ #include <speex/speex.h> #include <speex/speex_bits.h> #include <speex/speex_jitter.h> -#include <stdio.h> -#define LATE_BINS 10 +#ifndef NULL +#define NULL 0 +#endif + +#define LATE_BINS 15 #define MAX_MARGIN 30 /**< Number of bins in margin histogram */ #define SPEEX_JITTER_MAX_BUFFER_SIZE 200 /**< Maximum number of packets in jitter buffer */ @@ -68,7 +71,9 @@ struct JitterBuffer_ { int tick_size; /**< Output granularity */ int reset_state; /**< True if state was just reset */ int buffer_margin; /**< How many frames we want to keep in the buffer (lower bound) */ - + int late_cutoff; /**< How late must a packet be for it not to be considered at all */ + int interp_requested; /**< An interpolation is requested by speex_jitter_update_delay() */ + int lost_count; /**< Number of consecutive lost packets */ float shortterm_margin[MAX_MARGIN]; /**< Short term margin histogram */ float longterm_margin[MAX_MARGIN]; /**< Long term margin histogram */ @@ -86,6 +91,7 @@ JitterBuffer *jitter_buffer_init(int tick) jitter->buf[i]=NULL; jitter->tick_size = tick; jitter->buffer_margin = 1; + jitter->late_cutoff = 50; jitter_buffer_reset(jitter); } return jitter; @@ -141,6 +147,7 @@ void jitter_buffer_put(JitterBuffer *jitter, const JitterBufferPacket *packet) /* Cleanup buffer (remove old packets that weren't played) */ for (i=0;i<SPEEX_JITTER_MAX_BUFFER_SIZE;i++) { + /* Make sure we don't discard a "just-late" packet in case we want to play it next (if we interpolate). */ if (jitter->buf[i] && LE32(jitter->timestamp[i] + jitter->span[i], jitter->pointer_timestamp)) { /*fprintf (stderr, "cleaned (not played)\n");*/ @@ -187,27 +194,33 @@ void jitter_buffer_put(JitterBuffer *jitter, const JitterBufferPacket *packet) jitter->span[i]=packet->span; jitter->len[i]=packet->len; - /* Adjust the buffer size depending on network conditions */ - arrival_margin = (packet->timestamp - jitter->current_timestamp) - jitter->buffer_margin*jitter->tick_size; + /* Adjust the buffer size depending on network conditions. + The arrival margin is how much in advance (or late) the packet it */ + arrival_margin = (((spx_int32_t)packet->timestamp) - ((spx_int32_t)jitter->current_timestamp))/jitter->tick_size - jitter->buffer_margin; - if (arrival_margin >= -LATE_BINS*jitter->tick_size) + if (arrival_margin >= -jitter->late_cutoff) { + /* Here we compute the histogram based on the time of arrival of the packet. + This is based on a (first-order) recursive average. We keep both a short-term + histogram and a long-term histogram */ spx_int32_t int_margin; + /* First, apply the "damping" of the recursive average to all bins */ for (i=0;i<MAX_MARGIN;i++) { jitter->shortterm_margin[i] *= .98; jitter->longterm_margin[i] *= .995; } - int_margin = LATE_BINS + arrival_margin/jitter->tick_size; + /* What histogram bin the packet should be counted in */ + int_margin = LATE_BINS + arrival_margin; if (int_margin>MAX_MARGIN-1) int_margin = MAX_MARGIN-1; - if (int_margin>=0) - { - jitter->shortterm_margin[int_margin] += .02; - jitter->longterm_margin[int_margin] += .005; - } + if (int_margin<0) + int_margin = 0; + /* Add the packet to the right bin */ + jitter->shortterm_margin[int_margin] += .02; + jitter->longterm_margin[int_margin] += .005; } else { - + /* Packet has arrived *way* too late, we pretty much consider it lost and not take it into account in the histogram */ /*fprintf (stderr, "way too late = %d\n", arrival_margin);*/ if (jitter->lost_count>20) { @@ -229,9 +242,10 @@ void jitter_buffer_put(JitterBuffer *jitter, const JitterBufferPacket *packet) } /** Get one packet from the jitter buffer */ -int jitter_buffer_get(JitterBuffer *jitter, JitterBufferPacket *packet, spx_uint32_t *start_offset) +int jitter_buffer_get(JitterBuffer *jitter, JitterBufferPacket *packet, spx_int32_t *start_offset) { - int i, j; + int i; + unsigned int j; float late_ratio_short; float late_ratio_long; float ontime_ratio_short; @@ -241,6 +255,17 @@ int jitter_buffer_get(JitterBuffer *jitter, JitterBufferPacket *packet, spx_uint int chunk_size; int incomplete = 0; + if (jitter->interp_requested) + { + jitter->interp_requested = 0; + if (start_offset) + *start_offset = 0; + packet->timestamp = jitter->pointer_timestamp; + packet->span = jitter->tick_size; + jitter->pointer_timestamp += jitter->tick_size; + packet->len = 0; + return JITTER_BUFFER_MISSING; + } if (LT32(jitter->current_timestamp+jitter->tick_size, jitter->pointer_timestamp)) { jitter->current_timestamp = jitter->pointer_timestamp; @@ -255,14 +280,17 @@ int jitter_buffer_get(JitterBuffer *jitter, JitterBufferPacket *packet, spx_uint late_ratio_short = 0; late_ratio_long = 0; + /* Count the proportion of packets that are late */ for (i=0;i<LATE_BINS;i++) { late_ratio_short += jitter->shortterm_margin[i]; late_ratio_long += jitter->longterm_margin[i]; } + /* Count the proportion of packets that are just on time */ ontime_ratio_short = jitter->shortterm_margin[LATE_BINS]; ontime_ratio_long = jitter->longterm_margin[LATE_BINS]; early_ratio_short = early_ratio_long = 0; + /* Count the proportion of packets that are early */ for (i=LATE_BINS+1;i<MAX_MARGIN;i++) { early_ratio_short += jitter->shortterm_margin[i]; @@ -274,42 +302,6 @@ int jitter_buffer_get(JitterBuffer *jitter, JitterBufferPacket *packet, spx_uint /*fprintf (stderr, "%f %f\n", early_ratio_short + ontime_ratio_short + late_ratio_short, early_ratio_long + ontime_ratio_long + late_ratio_long);*/ } - /* Adjusting the buffering */ - - if (late_ratio_short > .1 || late_ratio_long > .03) - { - /* If too many packets are arriving late */ - jitter->shortterm_margin[MAX_MARGIN-1] += jitter->shortterm_margin[MAX_MARGIN-2]; - jitter->longterm_margin[MAX_MARGIN-1] += jitter->longterm_margin[MAX_MARGIN-2]; - for (i=MAX_MARGIN-3;i>=0;i--) - { - jitter->shortterm_margin[i+1] = jitter->shortterm_margin[i]; - jitter->longterm_margin[i+1] = jitter->longterm_margin[i]; - } - jitter->shortterm_margin[0] = 0; - jitter->longterm_margin[0] = 0; - jitter->pointer_timestamp -= jitter->tick_size; - jitter->current_timestamp -= jitter->tick_size; - /*fprintf (stderr, "i");*/ - /*fprintf (stderr, "interpolate (getting some slack)\n");*/ - } else if (late_ratio_short + ontime_ratio_short < .005 && late_ratio_long + ontime_ratio_long < .01 && early_ratio_short > .8) - { - /* Many frames arriving early */ - jitter->shortterm_margin[0] += jitter->shortterm_margin[1]; - jitter->longterm_margin[0] += jitter->longterm_margin[1]; - for (i=1;i<MAX_MARGIN-1;i++) - { - jitter->shortterm_margin[i] = jitter->shortterm_margin[i+1]; - jitter->longterm_margin[i] = jitter->longterm_margin[i+1]; - } - jitter->shortterm_margin[MAX_MARGIN-1] = 0; - jitter->longterm_margin[MAX_MARGIN-1] = 0; - /*fprintf (stderr, "drop frame\n");*/ - /*fprintf (stderr, "d");*/ - jitter->pointer_timestamp += jitter->tick_size; - jitter->current_timestamp += jitter->tick_size; - /*fprintf (stderr, "dropping packet (getting more aggressive)\n");*/ - } /* Searching for the packet that fits best */ @@ -325,7 +317,7 @@ int jitter_buffer_get(JitterBuffer *jitter, JitterBufferPacket *packet, spx_uint { for (i=0;i<SPEEX_JITTER_MAX_BUFFER_SIZE;i++) { - if (jitter->buf[i] && jitter->timestamp[i]<=jitter->pointer_timestamp && GE32(jitter->timestamp[i]+jitter->span[i],jitter->pointer_timestamp+chunk_size)) + if (jitter->buf[i] && LE32(jitter->timestamp[i], jitter->pointer_timestamp) && GE32(jitter->timestamp[i]+jitter->span[i],jitter->pointer_timestamp+chunk_size)) break; } } @@ -335,7 +327,7 @@ int jitter_buffer_get(JitterBuffer *jitter, JitterBufferPacket *packet, spx_uint { for (i=0;i<SPEEX_JITTER_MAX_BUFFER_SIZE;i++) { - if (jitter->buf[i] && jitter->timestamp[i]<=jitter->pointer_timestamp && GT32(jitter->timestamp[i]+jitter->span[i],jitter->pointer_timestamp)) + if (jitter->buf[i] && LE32(jitter->timestamp[i], jitter->pointer_timestamp) && GT32(jitter->timestamp[i]+jitter->span[i],jitter->pointer_timestamp)) break; } } @@ -385,7 +377,7 @@ int jitter_buffer_get(JitterBuffer *jitter, JitterBufferPacket *packet, spx_uint jitter->buf[i] = NULL; /* Set timestamp and span (if requested) */ if (start_offset) - *start_offset = jitter->timestamp[i]-jitter->pointer_timestamp; + *start_offset = (spx_int32_t)jitter->timestamp[i]-(spx_int32_t)jitter->pointer_timestamp; packet->timestamp = jitter->timestamp[i]; packet->span = jitter->span[i]; /* Point at the end of the current packet */ @@ -409,6 +401,26 @@ int jitter_buffer_get(JitterBuffer *jitter, JitterBufferPacket *packet, spx_uint packet->span = jitter->tick_size; jitter->pointer_timestamp += chunk_size; packet->len = 0; + + /* Adjusting the buffering bssed on the amount of packets that are early/on time/late */ + if (late_ratio_short > .1 || late_ratio_long > .03) + { + /* If too many packets are arriving late */ + jitter->shortterm_margin[MAX_MARGIN-1] += jitter->shortterm_margin[MAX_MARGIN-2]; + jitter->longterm_margin[MAX_MARGIN-1] += jitter->longterm_margin[MAX_MARGIN-2]; + for (i=MAX_MARGIN-3;i>=0;i--) + { + jitter->shortterm_margin[i+1] = jitter->shortterm_margin[i]; + jitter->longterm_margin[i+1] = jitter->longterm_margin[i]; + } + jitter->shortterm_margin[0] = 0; + jitter->longterm_margin[0] = 0; + jitter->pointer_timestamp -= jitter->tick_size; + jitter->current_timestamp -= jitter->tick_size; + /*fprintf (stderr, "i");*/ + /*fprintf (stderr, "interpolate (getting some slack)\n");*/ + } + return JITTER_BUFFER_MISSING; } @@ -424,7 +436,113 @@ void jitter_buffer_tick(JitterBuffer *jitter) jitter->current_timestamp += jitter->tick_size; } +/* Let the jitter buffer know it's the right time to adjust the buffering delay to the network conditions */ +int jitter_buffer_update_delay(JitterBuffer *jitter, JitterBufferPacket *packet, spx_int32_t *start_offset) +{ + int i; + float late_ratio_short; + float late_ratio_long; + float ontime_ratio_short; + float ontime_ratio_long; + float early_ratio_short; + float early_ratio_long; + + if (LT32(jitter->current_timestamp+jitter->tick_size, jitter->pointer_timestamp)) + { + jitter->current_timestamp = jitter->pointer_timestamp; + speex_warning("did you forget to call jitter_buffer_tick() by any chance?"); + } + /*fprintf (stderr, "get packet %d %d\n", jitter->pointer_timestamp, jitter->current_timestamp);*/ + + /* FIXME: This should be only what remaining of the current tick */ + late_ratio_short = 0; + late_ratio_long = 0; + /* Count the proportion of packets that are late */ + for (i=0;i<LATE_BINS;i++) + { + late_ratio_short += jitter->shortterm_margin[i]; + late_ratio_long += jitter->longterm_margin[i]; + } + /* Count the proportion of packets that are just on time */ + ontime_ratio_short = jitter->shortterm_margin[LATE_BINS]; + ontime_ratio_long = jitter->longterm_margin[LATE_BINS]; + early_ratio_short = early_ratio_long = 0; + /* Count the proportion of packets that are early */ + for (i=LATE_BINS+1;i<MAX_MARGIN;i++) + { + early_ratio_short += jitter->shortterm_margin[i]; + early_ratio_long += jitter->longterm_margin[i]; + } + + /* Adjusting the buffering bssed on the amount of packets that are early/on time/late */ + if (late_ratio_short > .1 || late_ratio_long > .03) + { + /* If too many packets are arriving late */ + jitter->shortterm_margin[MAX_MARGIN-1] += jitter->shortterm_margin[MAX_MARGIN-2]; + jitter->longterm_margin[MAX_MARGIN-1] += jitter->longterm_margin[MAX_MARGIN-2]; + for (i=MAX_MARGIN-3;i>=0;i--) + { + jitter->shortterm_margin[i+1] = jitter->shortterm_margin[i]; + jitter->longterm_margin[i+1] = jitter->longterm_margin[i]; + } + jitter->shortterm_margin[0] = 0; + jitter->longterm_margin[0] = 0; + jitter->pointer_timestamp -= jitter->tick_size; + jitter->current_timestamp -= jitter->tick_size; + jitter->interp_requested = 1; + return JITTER_BUFFER_ADJUST_INTERPOLATE; + + } else if (late_ratio_short + ontime_ratio_short < .005 && late_ratio_long + ontime_ratio_long < .01 && early_ratio_short > .8) + { + /* Many frames arriving early */ + jitter->shortterm_margin[0] += jitter->shortterm_margin[1]; + jitter->longterm_margin[0] += jitter->longterm_margin[1]; + for (i=1;i<MAX_MARGIN-1;i++) + { + jitter->shortterm_margin[i] = jitter->shortterm_margin[i+1]; + jitter->longterm_margin[i] = jitter->longterm_margin[i+1]; + } + jitter->shortterm_margin[MAX_MARGIN-1] = 0; + jitter->longterm_margin[MAX_MARGIN-1] = 0; + /*fprintf (stderr, "drop frame\n");*/ + /*fprintf (stderr, "d");*/ + jitter->pointer_timestamp += jitter->tick_size; + jitter->current_timestamp += jitter->tick_size; + return JITTER_BUFFER_ADJUST_DROP; + } + + return JITTER_BUFFER_ADJUST_OK; +} +/* Used like the ioctl function to control the jitter buffer parameters */ +int jitter_buffer_ctl(JitterBuffer *jitter, int request, void *ptr) +{ + int count, i; + switch(request) + { + case JITTER_BUFFER_SET_MARGIN: + jitter->buffer_margin = *(spx_int32_t*)ptr; + break; + case JITTER_BUFFER_GET_MARGIN: + *(spx_int32_t*)ptr = jitter->buffer_margin; + break; + case JITTER_BUFFER_GET_AVALIABLE_COUNT: + count = 0; + for (i=0;i<SPEEX_JITTER_MAX_BUFFER_SIZE;i++) + { + if (jitter->buf[i] && LE32(jitter->pointer_timestamp, jitter->timestamp[i])) + { + count++; + } + } + *(spx_int32_t*)ptr = count; + break; + default: + speex_warning_int("Unknown jitter_buffer_ctl request: ", request); + return -1; + } + return 0; +} @@ -499,6 +617,7 @@ void speex_jitter_get(SpeexJitter *jitter, short *out, int *current_timestamp) out[i]=0; } } + jitter_buffer_update_delay(jitter->packets, &packet, NULL); jitter_buffer_tick(jitter->packets); } diff --git a/libspeex/kiss_fft.c b/libspeex/kiss_fft.c index a0b3724..775a257 100644 --- a/libspeex/kiss_fft.c +++ b/libspeex/kiss_fft.c @@ -1,5 +1,6 @@ /* Copyright (c) 2003-2004, Mark Borgerding +Copyright (c) 2005-2007, Jean-Marc Valin All rights reserved. @@ -24,121 +25,142 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND fixed or floating point complex numbers. It also delares the kf_ internal functions. */ -static kiss_fft_cpx *scratchbuf=NULL; -static size_t nscratchbuf=0; -static kiss_fft_cpx *tmpbuf=NULL; -static size_t ntmpbuf=0; - -#define CHECKBUF(buf,nbuf,n) \ - do { \ - if ( nbuf < (size_t)(n) ) {\ - speex_free(buf); \ - buf = (kiss_fft_cpx*)KISS_FFT_MALLOC(sizeof(kiss_fft_cpx)*(n)); \ - nbuf = (size_t)(n); \ - } \ - }while(0) - static void kf_bfly2( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_cfg st, - int m + int m, + int N, + int mm ) { kiss_fft_cpx * Fout2; - kiss_fft_cpx * tw1 = st->twiddles; + kiss_fft_cpx * tw1; kiss_fft_cpx t; - Fout2 = Fout + m; if (!st->inverse) { - int i; - kiss_fft_cpx *x=Fout; - for (i=0;i<2*m;i++) + int i,j; + kiss_fft_cpx * Fout_beg = Fout; + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + Fout2 = Fout + m; + tw1 = st->twiddles; + for(j=0;j<m;j++) + { + /* Almost the same as the code path below, except that we divide the input by two + (while keeping the best accuracy possible) */ + spx_word32_t tr, ti; + tr = SHR32(SUB32(MULT16_16(Fout2->r , tw1->r),MULT16_16(Fout2->i , tw1->i)), 1); + ti = SHR32(ADD32(MULT16_16(Fout2->i , tw1->r),MULT16_16(Fout2->r , tw1->i)), 1); + tw1 += fstride; + Fout2->r = PSHR32(SUB32(SHL32(EXTEND32(Fout->r), 14), tr), 15); + Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15); + Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15); + Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15); + ++Fout2; + ++Fout; + } + } + } else { + int i,j; + kiss_fft_cpx * Fout_beg = Fout; + for (i=0;i<N;i++) { - x[i].r = SHR(x[i].r,1); - x[i].i = SHR(x[i].i,1); + Fout = Fout_beg + i*mm; + Fout2 = Fout + m; + tw1 = st->twiddles; + for(j=0;j<m;j++) + { + C_MUL (t, *Fout2 , *tw1); + tw1 += fstride; + C_SUB( *Fout2 , *Fout , t ); + C_ADDTO( *Fout , t ); + ++Fout2; + ++Fout; + } } } - - do{ - C_MUL (t, *Fout2 , *tw1); - tw1 += fstride; - C_SUB( *Fout2 , *Fout , t ); - C_ADDTO( *Fout , t ); - ++Fout2; - ++Fout; - }while (--m); } static void kf_bfly4( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_cfg st, - const size_t m + const size_t m, + int N, + int mm ) { kiss_fft_cpx *tw1,*tw2,*tw3; kiss_fft_cpx scratch[6]; - size_t k=m; const size_t m2=2*m; const size_t m3=3*m; + int i, j; - tw3 = tw2 = tw1 = st->twiddles; - - if (!st->inverse) { - int i; - kiss_fft_cpx *x=Fout; - for (i=0;i<4*m;i++) - { - x[i].r = PSHR16(x[i].r,2); - x[i].i = PSHR16(x[i].i,2); - } - } if (st->inverse) { - do { - C_MUL(scratch[0],Fout[m] , *tw1 ); - C_MUL(scratch[1],Fout[m2] , *tw2 ); - C_MUL(scratch[2],Fout[m3] , *tw3 ); - - C_SUB( scratch[5] , *Fout, scratch[1] ); - C_ADDTO(*Fout, scratch[1]); - C_ADD( scratch[3] , scratch[0] , scratch[2] ); - C_SUB( scratch[4] , scratch[0] , scratch[2] ); - C_SUB( Fout[m2], *Fout, scratch[3] ); - tw1 += fstride; - tw2 += fstride*2; - tw3 += fstride*3; - C_ADDTO( *Fout , scratch[3] ); - - Fout[m].r = scratch[5].r - scratch[4].i; - Fout[m].i = scratch[5].i + scratch[4].r; - Fout[m3].r = scratch[5].r + scratch[4].i; - Fout[m3].i = scratch[5].i - scratch[4].r; - ++Fout; - } while(--k); + kiss_fft_cpx * Fout_beg = Fout; + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + tw3 = tw2 = tw1 = st->twiddles; + for (j=0;j<m;j++) + { + C_MUL(scratch[0],Fout[m] , *tw1 ); + C_MUL(scratch[1],Fout[m2] , *tw2 ); + C_MUL(scratch[2],Fout[m3] , *tw3 ); + + C_SUB( scratch[5] , *Fout, scratch[1] ); + C_ADDTO(*Fout, scratch[1]); + C_ADD( scratch[3] , scratch[0] , scratch[2] ); + C_SUB( scratch[4] , scratch[0] , scratch[2] ); + C_SUB( Fout[m2], *Fout, scratch[3] ); + tw1 += fstride; + tw2 += fstride*2; + tw3 += fstride*3; + C_ADDTO( *Fout , scratch[3] ); + + Fout[m].r = scratch[5].r - scratch[4].i; + Fout[m].i = scratch[5].i + scratch[4].r; + Fout[m3].r = scratch[5].r + scratch[4].i; + Fout[m3].i = scratch[5].i - scratch[4].r; + ++Fout; + } + } } else { - do { - C_MUL(scratch[0],Fout[m] , *tw1 ); - C_MUL(scratch[1],Fout[m2] , *tw2 ); - C_MUL(scratch[2],Fout[m3] , *tw3 ); - - C_SUB( scratch[5] , *Fout, scratch[1] ); - C_ADDTO(*Fout, scratch[1]); - C_ADD( scratch[3] , scratch[0] , scratch[2] ); - C_SUB( scratch[4] , scratch[0] , scratch[2] ); - C_SUB( Fout[m2], *Fout, scratch[3] ); - tw1 += fstride; - tw2 += fstride*2; - tw3 += fstride*3; - C_ADDTO( *Fout , scratch[3] ); - - Fout[m].r = scratch[5].r + scratch[4].i; - Fout[m].i = scratch[5].i - scratch[4].r; - Fout[m3].r = scratch[5].r - scratch[4].i; - Fout[m3].i = scratch[5].i + scratch[4].r; - ++Fout; - }while(--k); + kiss_fft_cpx * Fout_beg = Fout; + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + tw3 = tw2 = tw1 = st->twiddles; + for (j=0;j<m;j++) + { + C_MUL4(scratch[0],Fout[m] , *tw1 ); + C_MUL4(scratch[1],Fout[m2] , *tw2 ); + C_MUL4(scratch[2],Fout[m3] , *tw3 ); + + Fout->r = PSHR16(Fout->r, 2); + Fout->i = PSHR16(Fout->i, 2); + C_SUB( scratch[5] , *Fout, scratch[1] ); + C_ADDTO(*Fout, scratch[1]); + C_ADD( scratch[3] , scratch[0] , scratch[2] ); + C_SUB( scratch[4] , scratch[0] , scratch[2] ); + Fout[m2].r = PSHR16(Fout[m2].r, 2); + Fout[m2].i = PSHR16(Fout[m2].i, 2); + C_SUB( Fout[m2], *Fout, scratch[3] ); + tw1 += fstride; + tw2 += fstride*2; + tw3 += fstride*3; + C_ADDTO( *Fout , scratch[3] ); + + Fout[m].r = scratch[5].r + scratch[4].i; + Fout[m].i = scratch[5].i - scratch[4].r; + Fout[m3].r = scratch[5].r - scratch[4].i; + Fout[m3].i = scratch[5].i + scratch[4].r; + ++Fout; + } + } } } @@ -263,10 +285,13 @@ static void kf_bfly_generic( int u,k,q1,q; kiss_fft_cpx * twiddles = st->twiddles; kiss_fft_cpx t; + kiss_fft_cpx scratchbuf[17]; int Norig = st->nfft; - CHECKBUF(scratchbuf,nscratchbuf,p); - + /*CHECKBUF(scratchbuf,nscratchbuf,p);*/ + if (p>17) + speex_error("KissFFT: max radix supported is 17"); + for ( u=0; u<m; ++u ) { k=u; for ( q1=0 ; q1<p ; ++q1 ) { @@ -291,6 +316,39 @@ static void kf_bfly_generic( } } } + +static +void kf_shuffle( + kiss_fft_cpx * Fout, + const kiss_fft_cpx * f, + const size_t fstride, + int in_stride, + int * factors, + const kiss_fft_cfg st + ) +{ + const int p=*factors++; /* the radix */ + const int m=*factors++; /* stage's fft length/p */ + + /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/ + if (m==1) + { + int j; + for (j=0;j<p;j++) + { + Fout[j] = *f; + f += fstride*in_stride; + } + } else { + int j; + for (j=0;j<p;j++) + { + kf_shuffle( Fout , f, fstride*p, in_stride, factors,st); + f += fstride*in_stride; + Fout += m; + } + } +} static void kf_work( @@ -299,24 +357,34 @@ void kf_work( const size_t fstride, int in_stride, int * factors, - const kiss_fft_cfg st + const kiss_fft_cfg st, + int N, + int s2, + int m2 ) { + int i; kiss_fft_cpx * Fout_beg=Fout; const int p=*factors++; /* the radix */ const int m=*factors++; /* stage's fft length/p */ - const kiss_fft_cpx * Fout_end = Fout + p*m; - - if (m==1) { - do{ - *Fout = *f; - f += fstride*in_stride; - }while(++Fout != Fout_end ); - }else{ - do{ - kf_work( Fout , f, fstride*p, in_stride, factors,st); - f += fstride*in_stride; - }while( (Fout += m) != Fout_end ); +#if 0 + /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/ + if (m==1) + { + /* int j; + for (j=0;j<p;j++) + { + Fout[j] = *f; + f += fstride*in_stride; + }*/ + } else { + int j; + for (j=0;j<p;j++) + { + kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m); + f += fstride*in_stride; + Fout += m; + } } Fout=Fout_beg; @@ -328,6 +396,36 @@ void kf_work( case 5: kf_bfly5(Fout,fstride,st,m); break; default: kf_bfly_generic(Fout,fstride,st,m,p); break; } +#else + /*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/ + if (m==1) + { + /*for (i=0;i<N;i++) + { + int j; + Fout = Fout_beg+i*m2; + const kiss_fft_cpx * f2 = f+i*s2; + for (j=0;j<p;j++) + { + *Fout++ = *f2; + f2 += fstride*in_stride; + } + }*/ + }else{ + kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m); + } + + + + + switch (p) { + case 2: kf_bfly2(Fout,fstride,st,m, N, m2); break; + case 3: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly3(Fout,fstride,st,m);} break; + case 4: kf_bfly4(Fout,fstride,st,m, N, m2); break; + case 5: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly5(Fout,fstride,st,m);} break; + default: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly_generic(Fout,fstride,st,m,p);} break; + } +#endif } /* facbuf is populated by p1,m1,p2,m2, ... @@ -338,8 +436,6 @@ static void kf_factor(int n,int * facbuf) { int p=4; - double floor_sqrt; - floor_sqrt = floor( sqrt((double)n) ); /*factor out powers of 4, powers of 2, then any remaining primes */ do { @@ -349,7 +445,7 @@ void kf_factor(int n,int * facbuf) case 2: p = 3; break; default: p += 2; break; } - if (p > floor_sqrt) + if (p>32000 || (spx_int32_t)p*(spx_int32_t)p > n) p = n; /* no more factors, skip to end */ } n /= p; @@ -357,7 +453,6 @@ void kf_factor(int n,int * facbuf) *facbuf++ = n; } while (n > 1); } - /* * * User-callable function to allocate all necessary storage space for the fft. @@ -382,15 +477,22 @@ kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem int i; st->nfft=nfft; st->inverse = inverse_fft; - +#ifdef FIXED_POINT for (i=0;i<nfft;++i) { - const double pi=3.14159265358979323846264338327; - double phase = ( -2*pi /nfft ) * i; - if (st->inverse) - phase *= -1; - kf_cexp(st->twiddles+i, phase ); + spx_word32_t phase = i; + if (!st->inverse) + phase = -phase; + kf_cexp2(st->twiddles+i, DIV32(SHL32(phase,17),nfft)); } - +#else + for (i=0;i<nfft;++i) { + const double pi=3.14159265358979323846264338327; + double phase = ( -2*pi /nfft ) * i; + if (st->inverse) + phase *= -1; + kf_cexp(st->twiddles+i, phase ); + } +#endif kf_factor(nfft,st->factors); } return st; @@ -401,12 +503,15 @@ kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride) { - if (fin == fout) { - CHECKBUF(tmpbuf,ntmpbuf,st->nfft); - kf_work(tmpbuf,fin,1,in_stride, st->factors,st); - speex_move(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft); - }else{ - kf_work( fout, fin, 1,in_stride, st->factors,st ); + if (fin == fout) + { + speex_error("In-place FFT not supported"); + /*CHECKBUF(tmpbuf,ntmpbuf,st->nfft); + kf_work(tmpbuf,fin,1,in_stride, st->factors,st); + speex_move(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);*/ + } else { + kf_shuffle( fout, fin, 1,in_stride, st->factors,st); + kf_work( fout, fin, 1,in_stride, st->factors,st, 1, in_stride, 1); } } @@ -415,16 +520,3 @@ void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) kiss_fft_stride(cfg,fin,fout,1); } - -/* not really necessary to call, but if someone is doing in-place ffts, they may want to free the - buffers from CHECKBUF - */ -void kiss_fft_cleanup(void) -{ - speex_free(scratchbuf); - scratchbuf = NULL; - nscratchbuf=0; - speex_free(tmpbuf); - tmpbuf=NULL; - ntmpbuf=0; -} diff --git a/libspeex/kiss_fftr.c b/libspeex/kiss_fftr.c index b90b725..392945c 100644 --- a/libspeex/kiss_fftr.c +++ b/libspeex/kiss_fftr.c @@ -58,13 +58,22 @@ kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenme st->super_twiddles = st->tmpbuf + nfft; kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize); - for (i = 0; i < nfft; ++i) { - double phase = - -3.14159265358979323846264338327 * ((double) i / nfft + .5); - if (inverse_fft) - phase *= -1; - kf_cexp (st->super_twiddles+i,phase); +#ifdef FIXED_POINT + for (i=0;i<nfft;++i) { + spx_word32_t phase = i+(nfft>>1); + if (!inverse_fft) + phase = -phase; + kf_cexp2(st->super_twiddles+i, DIV32(SHL32(phase,16),nfft)); } +#else + for (i=0;i<nfft;++i) { + const double pi=3.14159265358979323846264338327; + double phase = pi*(((double)i) /nfft + .5); + if (!inverse_fft) + phase = -phase; + kf_cexp(st->super_twiddles+i, phase ); + } +#endif return st; } @@ -75,8 +84,7 @@ void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *fr kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc; if ( st->substate->inverse) { - speex_warning("kiss fft usage error: improper alloc\n"); - exit(1); + speex_error("kiss fft usage error: improper alloc\n"); } ncfft = st->substate->nfft; @@ -124,14 +132,13 @@ void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *fr } } -void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata) +void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata, kiss_fft_scalar *timedata) { /* input buffer timedata is stored row-wise */ int k, ncfft; if (st->substate->inverse == 0) { - speex_warning ("kiss fft usage error: improper alloc\n"); - exit (1); + speex_error ("kiss fft usage error: improper alloc\n"); } ncfft = st->substate->nfft; @@ -161,3 +168,129 @@ void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *t } kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata); } + +void kiss_fftr2(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_scalar *freqdata) +{ + /* input buffer timedata is stored row-wise */ + int k,ncfft; + kiss_fft_cpx f2k,tdc; + spx_word32_t f1kr, f1ki, twr, twi; + + if ( st->substate->inverse) { + speex_error("kiss fft usage error: improper alloc\n"); + } + + ncfft = st->substate->nfft; + + /*perform the parallel fft of two real signals packed in real,imag*/ + kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf ); + /* The real part of the DC element of the frequency spectrum in st->tmpbuf + * contains the sum of the even-numbered elements of the input time sequence + * The imag part is the sum of the odd-numbered elements + * + * The sum of tdc.r and tdc.i is the sum of the input time sequence. + * yielding DC of input time sequence + * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1... + * yielding Nyquist bin of input time sequence + */ + + tdc.r = st->tmpbuf[0].r; + tdc.i = st->tmpbuf[0].i; + C_FIXDIV(tdc,2); + CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i); + CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i); + freqdata[0] = tdc.r + tdc.i; + freqdata[2*ncfft-1] = tdc.r - tdc.i; + + for ( k=1;k <= ncfft/2 ; ++k ) + { + /*fpk = st->tmpbuf[k]; + fpnk.r = st->tmpbuf[ncfft-k].r; + fpnk.i = - st->tmpbuf[ncfft-k].i; + C_FIXDIV(fpk,2); + C_FIXDIV(fpnk,2); + + C_ADD( f1k, fpk , fpnk ); + C_SUB( f2k, fpk , fpnk ); + + C_MUL( tw , f2k , st->super_twiddles[k]); + + freqdata[2*k-1] = HALF_OF(f1k.r + tw.r); + freqdata[2*k] = HALF_OF(f1k.i + tw.i); + freqdata[2*(ncfft-k)-1] = HALF_OF(f1k.r - tw.r); + freqdata[2*(ncfft-k)] = HALF_OF(tw.i - f1k.i); + */ + + /*f1k.r = PSHR32(ADD32(EXTEND32(st->tmpbuf[k].r), EXTEND32(st->tmpbuf[ncfft-k].r)),1); + f1k.i = PSHR32(SUB32(EXTEND32(st->tmpbuf[k].i), EXTEND32(st->tmpbuf[ncfft-k].i)),1); + f2k.r = PSHR32(SUB32(EXTEND32(st->tmpbuf[k].r), EXTEND32(st->tmpbuf[ncfft-k].r)),1); + f2k.i = SHR32(ADD32(EXTEND32(st->tmpbuf[k].i), EXTEND32(st->tmpbuf[ncfft-k].i)),1); + + C_MUL( tw , f2k , st->super_twiddles[k]); + + freqdata[2*k-1] = HALF_OF(f1k.r + tw.r); + freqdata[2*k] = HALF_OF(f1k.i + tw.i); + freqdata[2*(ncfft-k)-1] = HALF_OF(f1k.r - tw.r); + freqdata[2*(ncfft-k)] = HALF_OF(tw.i - f1k.i); + */ + f2k.r = SHR32(SUB32(EXTEND32(st->tmpbuf[k].r), EXTEND32(st->tmpbuf[ncfft-k].r)),1); + f2k.i = PSHR32(ADD32(EXTEND32(st->tmpbuf[k].i), EXTEND32(st->tmpbuf[ncfft-k].i)),1); + + f1kr = SHL32(ADD32(EXTEND32(st->tmpbuf[k].r), EXTEND32(st->tmpbuf[ncfft-k].r)),13); + f1ki = SHL32(SUB32(EXTEND32(st->tmpbuf[k].i), EXTEND32(st->tmpbuf[ncfft-k].i)),13); + + twr = SHR32(SUB32(MULT16_16(f2k.r,st->super_twiddles[k].r),MULT16_16(f2k.i,st->super_twiddles[k].i)), 1); + twi = SHR32(ADD32(MULT16_16(f2k.i,st->super_twiddles[k].r),MULT16_16(f2k.r,st->super_twiddles[k].i)), 1); + +#ifdef FIXED_POINT + freqdata[2*k-1] = PSHR32(f1kr + twr, 15); + freqdata[2*k] = PSHR32(f1ki + twi, 15); + freqdata[2*(ncfft-k)-1] = PSHR32(f1kr - twr, 15); + freqdata[2*(ncfft-k)] = PSHR32(twi - f1ki, 15); +#else + freqdata[2*k-1] = .5f*(f1kr + twr); + freqdata[2*k] = .5f*(f1ki + twi); + freqdata[2*(ncfft-k)-1] = .5f*(f1kr - twr); + freqdata[2*(ncfft-k)] = .5f*(twi - f1ki); + +#endif + } +} + +void kiss_fftri2(kiss_fftr_cfg st,const kiss_fft_scalar *freqdata,kiss_fft_scalar *timedata) +{ + /* input buffer timedata is stored row-wise */ + int k, ncfft; + + if (st->substate->inverse == 0) { + speex_error ("kiss fft usage error: improper alloc\n"); + } + + ncfft = st->substate->nfft; + + st->tmpbuf[0].r = freqdata[0] + freqdata[2*ncfft-1]; + st->tmpbuf[0].i = freqdata[0] - freqdata[2*ncfft-1]; + /*C_FIXDIV(st->tmpbuf[0],2);*/ + + for (k = 1; k <= ncfft / 2; ++k) { + kiss_fft_cpx fk, fnkc, fek, fok, tmp; + fk.r = freqdata[2*k-1]; + fk.i = freqdata[2*k]; + fnkc.r = freqdata[2*(ncfft - k)-1]; + fnkc.i = -freqdata[2*(ncfft - k)]; + /*C_FIXDIV( fk , 2 ); + C_FIXDIV( fnkc , 2 );*/ + + C_ADD (fek, fk, fnkc); + C_SUB (tmp, fk, fnkc); + C_MUL (fok, tmp, st->super_twiddles[k]); + C_ADD (st->tmpbuf[k], fek, fok); + C_SUB (st->tmpbuf[ncfft - k], fek, fok); +#ifdef USE_SIMD + st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0); +#else + st->tmpbuf[ncfft - k].i *= -1; +#endif + } + kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata); +} diff --git a/libspeex/kiss_fftr.h b/libspeex/kiss_fftr.h index 2e8351a..7bfb423 100644 --- a/libspeex/kiss_fftr.h +++ b/libspeex/kiss_fftr.h @@ -32,7 +32,12 @@ void kiss_fftr(kiss_fftr_cfg cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *f output freqdata has nfft/2+1 complex points */ +void kiss_fftr2(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_scalar *freqdata); + void kiss_fftri(kiss_fftr_cfg cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata); + +void kiss_fftri2(kiss_fftr_cfg st,const kiss_fft_scalar *freqdata, kiss_fft_scalar *timedata); + /* input freqdata has nfft/2+1 complex points output timedata has nfft scalar points diff --git a/libspeex/lbr_48k_tables.c b/libspeex/lbr_48k_tables.c index 2e6db3f..d4d80dc 100644 --- a/libspeex/lbr_48k_tables.c +++ b/libspeex/lbr_48k_tables.c @@ -34,74 +34,74 @@ #endif -int dummy_epic_48k_variable=0; +const int dummy_epic_48k_variable=0; #ifdef EPIC_48K -const signed char gain_cdbk_ulbr[192] = { --31, -48, -30, --19, -10, -18, --33, -22, -45, --5, -56, -43, --30, -56, -3, --59, -17, -52, --41, -60, -58, --64, -47, -22, --30, -31, -31, --29, -14, -31, --22, -37, -58, --31, -44, 13, --37, 0, 1, --46, -55, -35, --56, -14, -53, --8, 1, -36, --29, -15, -27, --29, -39, -28, --43, -5, 3, --51, -27, -54, -10, -46, -36, -3, -3, -42, --27, 16, -22, --34, -52, 13, --31, -21, -28, --34, -45, -40, --20, -48, 4, --40, -27, 16, --6, 11, -44, --35, 12, -5, -19, -33, -37, --29, 18, -32, --29, -23, -19, -16, -47, -28, --34, -30, 17, --20, 2, -26, --38, -40, -36, -15, -14, -40, --39, 14, -9, --15, 25, -39, --26, 19, -32, --39, 17, -14, -10, -36, -26, -14, -13, -40, --29, -21, -12, --8, 19, -39, --36, -18, 15, --32, -38, -38, --19, 4, -23, --38, -7, 11, -9, -10, -39, --37, 24, -19, --34, -5, -8, --20, 23, -41, --4, 17, -31, --17, -26, -26, --24, 28, -36, --7, 15, -39, --42, 16, -11, --29, 14, -6, --36, 28, -27, --21, 5, -26, -11, -9, -39, --38, -7, 13, +const signed char gain_cdbk_ulbr[256] = { +-31, -48, -30, 10, +-19, -10, -18, 25, +-33, -22, -45, 12, +-5, -56, -43, 31, +-30, -56, -3, 28, +-59, -17, -52, 31, +-41, -60, -58, 32, +-64, -47, -22, 29, +-30, -31, -31, 2, +-29, -14, -31, 11, +-22, -37, -58, 21, +-31, -44, 13, 29, +-37, 0, 1, 35, +-46, -55, -35, 20, +-56, -14, -53, 32, +-8, 1, -36, 31, +-29, -15, -27, 13, +-29, -39, -28, 7, +-43, -5, 3, 37, +-51, -27, -54, 23, +10, -46, -36, 30, +3, -3, -42, 37, +-27, 16, -22, 32, +-34, -52, 13, 34, +-31, -21, -28, 8, +-34, -45, -40, 12, +-20, -48, 4, 32, +-40, -27, 16, 31, +-6, 11, -44, 41, +-35, 12, -5, 37, +19, -33, -37, 29, +-29, 18, -32, 27, +-29, -23, -19, 13, +16, -47, -28, 34, +-34, -30, 17, 27, +-20, 2, -26, 26, +-38, -40, -36, 9, +15, -14, -40, 37, +-39, 14, -9, 38, +-15, 25, -39, 41, +-26, 19, -32, 29, +-39, 17, -14, 37, +10, -36, -26, 26, +14, -13, -40, 37, +-29, -21, -12, 17, +-8, 19, -39, 41, +-36, -18, 15, 33, +-32, -38, -38, 6, +-19, 4, -23, 29, +-38, -7, 11, 37, +9, -10, -39, 35, +-37, 24, -19, 37, +-34, -5, -8, 27, +-20, 23, -41, 38, +-4, 17, -31, 39, +-17, -26, -26, 14, +-24, 28, -36, 36, +-7, 15, -39, 40, +-42, 16, -11, 40, +-29, 14, -6, 38, +-36, 28, -27, 35, +-21, 5, -26, 27, +11, -9, -39, 37, +-38, -7, 13, 38 }; diff --git a/libspeex/lsp.c b/libspeex/lsp.c index 3fdc08a..a73d883 100644 --- a/libspeex/lsp.c +++ b/libspeex/lsp.c @@ -509,7 +509,7 @@ void lsp_to_lpc(spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack) /* hard limit ak's to +/- 32767 */ - if (a < -32767) a = 32767; + if (a < -32767) a = -32767; if (a > 32767) a = 32767; ak[j-1] = (short)a; diff --git a/libspeex/ltp.c b/libspeex/ltp.c index 27e4f4d..fa77da2 100644 --- a/libspeex/ltp.c +++ b/libspeex/ltp.c @@ -176,20 +176,56 @@ void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *p VARDECL(spx_word32_t *best_ener); spx_word32_t e0; VARDECL(spx_word32_t *corr); +#ifdef FIXED_POINT + /* In fixed-point, we need only one (temporary) array of 32-bit values and two (corr16, ener16) + arrays for (normalized) 16-bit values */ + VARDECL(spx_word16_t *corr16); + VARDECL(spx_word16_t *ener16); + spx_word32_t *energy; + int cshift=0, eshift=0; + int scaledown = 0; + ALLOC(corr16, end-start+1, spx_word16_t); + ALLOC(ener16, end-start+1, spx_word16_t); + ALLOC(corr, end-start+1, spx_word32_t); + energy = corr; +#else + /* In floating-point, we need to float arrays and no normalized copies */ VARDECL(spx_word32_t *energy); - + spx_word16_t *corr16; + spx_word16_t *ener16; + ALLOC(energy, end-start+2, spx_word32_t); + ALLOC(corr, end-start+1, spx_word32_t); + corr16 = corr; + ener16 = energy; +#endif + ALLOC(best_score, N, spx_word32_t); ALLOC(best_ener, N, spx_word32_t); - ALLOC(corr, end-start+1, spx_word32_t); - ALLOC(energy, end-start+2, spx_word32_t); - for (i=0;i<N;i++) { best_score[i]=-1; best_ener[i]=0; pitch[i]=start; } - + +#ifdef FIXED_POINT + for (i=-end;i<len;i++) + { + if (ABS16(sw[i])>16383) + { + scaledown=1; + break; + } + } + /* If the weighted input is close to saturation, then we scale it down */ + if (scaledown) + { + for (i=-end;i<len;i++) + { + sw[i]=SHR16(sw[i],1); + } + } +#endif energy[0]=inner_prod(sw-start, sw-start, len); e0=inner_prod(sw, sw, len); for (i=start;i<end;i++) @@ -199,59 +235,42 @@ void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *p if (energy[i-start+1] < 0) energy[i-start+1] = 0; } - + +#ifdef FIXED_POINT + eshift = normalize16(energy, ener16, 32766, end-start+1); +#endif + + /* In fixed-point, this actually overrites the energy array (aliased to corr) */ pitch_xcorr(sw, sw-end, corr, len, end-start+1, stack); - - /* FIXME: Fixed-point and floating-point code should be merged */ + #ifdef FIXED_POINT + /* Normalize to 180 so we can square it and it still fits in 16 bits */ + cshift = normalize16(corr, corr16, 180, end-start+1); + /* If we scaled weighted input down, we need to scale it up again (OK, so we've just lost the LSB, who cares?) */ + if (scaledown) { - VARDECL(spx_word16_t *corr16); - VARDECL(spx_word16_t *ener16); - ALLOC(corr16, end-start+1, spx_word16_t); - ALLOC(ener16, end-start+1, spx_word16_t); - /* Normalize to 180 so we can square it and it still fits in 16 bits */ - normalize16(corr, corr16, 180, end-start+1); - normalize16(energy, ener16, 180, end-start+1); - - for (i=start;i<=end;i++) + for (i=-end;i<len;i++) { - spx_word16_t tmp = MULT16_16_16(corr16[i-start],corr16[i-start]); - /* Instead of dividing the tmp by the energy, we multiply on the other side */ - if (MULT16_16(tmp,best_ener[N-1])>MULT16_16(best_score[N-1],ADD16(1,ener16[i-start]))) - { - /* We can safely put it last and then check */ - best_score[N-1]=tmp; - best_ener[N-1]=ener16[i-start]+1; - pitch[N-1]=i; - /* Check if it comes in front of others */ - for (j=0;j<N-1;j++) - { - if (MULT16_16(tmp,best_ener[j])>MULT16_16(best_score[j],ADD16(1,ener16[i-start]))) - { - for (k=N-1;k>j;k--) - { - best_score[k]=best_score[k-1]; - best_ener[k]=best_ener[k-1]; - pitch[k]=pitch[k-1]; - } - best_score[j]=tmp; - best_ener[j]=ener16[i-start]+1; - pitch[j]=i; - break; - } - } - } + sw[i]=SHL16(sw[i],1); } - } -#else + } +#endif + + /* Search for the best pitch prediction gain */ for (i=start;i<=end;i++) { - float tmp = corr[i-start]*corr[i-start]; - if (tmp*best_ener[N-1]>best_score[N-1]*(1+energy[i-start])) + spx_word16_t tmp = MULT16_16_16(corr16[i-start],corr16[i-start]); + /* Instead of dividing the tmp by the energy, we multiply on the other side */ + if (MULT16_16(tmp,best_ener[N-1])>MULT16_16(best_score[N-1],ADD16(1,ener16[i-start]))) { - for (j=0;j<N;j++) + /* We can safely put it last and then check */ + best_score[N-1]=tmp; + best_ener[N-1]=ener16[i-start]+1; + pitch[N-1]=i; + /* Check if it comes in front of others */ + for (j=0;j<N-1;j++) { - if (tmp*best_ener[j]>best_score[j]*(1+energy[i-start])) + if (MULT16_16(tmp,best_ener[j])>MULT16_16(best_score[j],ADD16(1,ener16[i-start]))) { for (k=N-1;k>j;k--) { @@ -260,29 +279,30 @@ void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *p pitch[k]=pitch[k-1]; } best_score[j]=tmp; - best_ener[j]=energy[i-start]+1; + best_ener[j]=ener16[i-start]+1; pitch[j]=i; break; } } } } -#endif - - /* Compute open-loop gain */ + + /* Compute open-loop gain if necessary */ if (gain) { - for (j=0;j<N;j++) - { - spx_word16_t g; - i=pitch[j]; - g = DIV32(corr[i-start], 10+SHR32(MULT16_16(spx_sqrt(e0),spx_sqrt(energy[i-start])),6)); - /* FIXME: g = max(g,corr/energy) */ - if (g<0) - g = 0; - gain[j]=g; - } + for (j=0;j<N;j++) + { + spx_word16_t g; + i=pitch[j]; + g = DIV32(SHL32(EXTEND32(corr16[i-start]),cshift), 10+SHR32(MULT16_16(spx_sqrt(e0),spx_sqrt(SHL32(EXTEND32(ener16[i-start]),eshift))),6)); + /* FIXME: g = max(g,corr/energy) */ + if (g<0) + g = 0; + gain[j]=g; + } } + + } #endif @@ -342,7 +362,8 @@ const spx_word16_t *r, spx_word16_t *new_target, int *cdbk_index, int plc_tuning, -spx_word32_t cumul_gain +spx_word32_t cumul_gain, +int scaledown ) { int i,j; @@ -366,6 +387,9 @@ spx_word32_t cumul_gain x[1]=tmp1+nsf; x[2]=tmp1+2*nsf; + for (j=0;j<nsf;j++) + new_target[j] = target[j]; + { VARDECL(spx_mem_t *mm); int pp=pitch-1; @@ -379,6 +403,16 @@ spx_word32_t cumul_gain else e[j]=0; } +#ifdef FIXED_POINT + /* Scale target and excitation down if needed (avoiding overflow) */ + if (scaledown) + { + for (j=0;j<nsf;j++) + e[j] = SHR16(e[j],1); + for (j=0;j<nsf;j++) + new_target[j] = SHR16(new_target[j],1); + } +#endif for (j=0;j<p;j++) mm[j] = 0; iir_mem16(e, ak, e, nsf, p, mm, stack); @@ -391,13 +425,18 @@ spx_word32_t cumul_gain for (i=1;i>=0;i--) { spx_word16_t e0=exc2[-pitch-1+i]; +#ifdef FIXED_POINT + /* Scale excitation down if needed (avoiding overflow) */ + if (scaledown) + e0 = SHR16(e0,1); +#endif x[i][0]=MULT16_16_Q14(r[0], e0); for (j=0;j<nsf-1;j++) x[i][j+1]=ADD32(x[i+1][j],MULT16_16_P14(r[j+1], e0)); } for (i=0;i<3;i++) - corr[i]=inner_prod(x[i],target,nsf); + corr[i]=inner_prod(x[i],new_target,nsf); for (i=0;i<3;i++) for (j=0;j<=i;j++) A[i][j]=A[j][i]=inner_prod(x[i],x[j],nsf); @@ -478,7 +517,7 @@ spx_word32_t cumul_gain { spx_word32_t tmp = ADD32(ADD32(MULT16_16(gain[0],x[2][i]),MULT16_16(gain[1],x[1][i])), MULT16_16(gain[2],x[0][i])); - new_target[i] = SUB16(target[i], EXTRACT16(PSHR32(tmp,6))); + new_target[i] = SUB16(new_target[i], EXTRACT16(PSHR32(tmp,6))); } err = inner_prod(new_target, new_target, nsf); @@ -520,7 +559,8 @@ spx_word32_t *cumul_gain const ltp_params *params; const signed char *gain_cdbk; int gain_cdbk_size; - + int scaledown=0; + VARDECL(int *nbest); params = (const ltp_params*) par; @@ -545,6 +585,25 @@ spx_word32_t *cumul_gain return start; } +#ifdef FIXED_POINT + /* Check if we need to scale everything down in the pitch search to avoid overflows */ + for (i=0;i<nsf;i++) + { + if (ABS16(target[i])>16383) + { + scaledown=1; + break; + } + } + for (i=-end;i<nsf;i++) + { + if (ABS16(exc2[i])>16383) + { + scaledown=1; + break; + } + } +#endif if (N>end-start+1) N=end-start+1; if (end != start) @@ -562,7 +621,7 @@ spx_word32_t *cumul_gain for (j=0;j<nsf;j++) exc[j]=0; err=pitch_gain_search_3tap(target, ak, awk1, awk2, exc, gain_cdbk, gain_cdbk_size, pitch, p, nsf, - bits, stack, exc2, r, new_target, &cdbk_index, plc_tuning, *cumul_gain); + bits, stack, exc2, r, new_target, &cdbk_index, plc_tuning, *cumul_gain, scaledown); if (err<best_err || best_err<0) { for (j=0;j<nsf;j++) @@ -588,7 +647,14 @@ spx_word32_t *cumul_gain exc[i]=best_exc[i]; for (i=0;i<nsf;i++) target[i]=best_target[i]; - +#ifdef FIXED_POINT + /* Scale target back up if needed */ + if (scaledown) + { + for (i=0;i<nsf;i++) + target[i]=SHL16(target[i],1); + } +#endif return pitch; } @@ -717,8 +783,8 @@ spx_word32_t *cumul_gain ) { int i; - VARDECL(spx_sig_t *res); - ALLOC(res, nsf, spx_sig_t); + VARDECL(spx_word16_t *res); + ALLOC(res, nsf, spx_word16_t); #ifdef FIXED_POINT if (pitch_coef>63) pitch_coef=63; @@ -734,9 +800,11 @@ spx_word32_t *cumul_gain { exc[i]=MULT16_32_Q15(SHL16(pitch_coef, 9),exc[i-start]); } - syn_percep_zero(exc, ak, awk1, awk2, res, nsf, p, stack); for (i=0;i<nsf;i++) - target[i]=EXTRACT16(SATURATE(SUB32(EXTEND32(target[i]),PSHR32(res[i],SIG_SHIFT-1)),32700)); + res[i] = EXTRACT16(PSHR32(exc[i], SIG_SHIFT-1)); + syn_percep_zero16(res, ak, awk1, awk2, res, nsf, p, stack); + for (i=0;i<nsf;i++) + target[i]=EXTRACT16(SATURATE(SUB32(EXTEND32(target[i]),EXTEND32(res[i])),32700)); return start; } @@ -770,7 +838,7 @@ int cdbk_offset for (i=0;i<nsf;i++) { exc_out[i]=MULT16_16(exc[i-start],SHL16(pitch_coef,7)); - exc[i] = PSHR(exc_out[i],13); + exc[i] = EXTRACT16(PSHR32(exc_out[i],13)); } *pitch_val = start; gain_val[0]=gain_val[2]=0; diff --git a/libspeex/ltp_arm4.h b/libspeex/ltp_arm4.h index 7479e8b..cdb94e6 100644 --- a/libspeex/ltp_arm4.h +++ b/libspeex/ltp_arm4.h @@ -75,9 +75,10 @@ spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) "\tadd %2, %2, %7, asr #5\n" "\tadd %3, %3, %10, asr #5\n" "\tbne .inner_prod_loop%=\n" - : "=r" (deadx), "=r" (deady), "=r" (sum1), "=r" (sum2), "=r" (deadlen), - "=r" (dead1), "=r" (dead2), "=r" (dead3), "=r" (dead4), "=r" (dead5), "=r" (dead6) - : "0" (x), "1" (y), "2" (sum1), "3" (sum2), "4" (len>>3) + : "=r" (deadx), "=r" (deady), "+r" (sum1), "+r" (sum2), + "=r" (deadlen), "=r" (dead1), "=r" (dead2), "=r" (dead3), + "=r" (dead4), "=r" (dead5), "=r" (dead6) + : "0" (x), "1" (y), "4" (len>>3) : "cc" ); return (sum1+sum2)>>1; @@ -169,13 +170,11 @@ void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *c "\tstr %6, %13 \n" "\tstr %7, %14 \n" - : "=r" (y0), "=r" (y1), "=r" (y2), "=r" (y3), + : "+r" (y0), "+r" (y1), "+r" (y2), "+r" (y3), "=r" (part1), "=r" (part2), "=r" (part3), "=r" (part4), - "=r" (x), "=r" (y), "=r" (x0), - "=m" (sum1), "=m" (sum2), "=m" (sum3), "=m" (sum4), "=r" (dead1) - : "0" (y0), "1" (y1), "2" (y2), "3" (y3), - "8" (x), "9" (y), - "11" (sum1), "12" (sum2), "13" (sum3), "14" (sum4) + "+r" (x), "+r" (y), "=r" (x0), "+m" (sum1), + "+m" (sum2), "+m" (sum3), "+m" (sum4), "=r" (dead1) + : : "cc", "memory" ); } diff --git a/libspeex/ltp_bfin.h b/libspeex/ltp_bfin.h index c466902..b530f85 100644 --- a/libspeex/ltp_bfin.h +++ b/libspeex/ltp_bfin.h @@ -330,7 +330,6 @@ static int pitch_gain_search_3tap_vq( " %0 = 0;\n\t" /* %0: best_sum */ " %1 = 0;\n\t" /* %1: best_cbdk */ " P1 = 0;\n\t" /* P1: loop counter */ -" R5 = 64;\n\t" /* R5: pitch_control */ " LSETUP (pgs1, pgs2) LC1 = %4;\n\t" "pgs1: R2 = B [P0++] (X);\n\t" /* R2: g[0] */ @@ -339,6 +338,7 @@ static int pitch_gain_search_3tap_vq( " R2 += 32;\n\t" " R3 += 32;\n\t" " R4 += 32;\n\t" +" R4.H = 64;\n\t" /* R4.H: pitch_control */ " R0 = B [P0++] (X);\n\t" " B0 = R0;\n\t" /* BO: gain_sum */ @@ -349,13 +349,13 @@ static int pitch_gain_search_3tap_vq( " A0 = 0;\n\t" " R0.L = W[I1++];\n\t" -" R1.L = R2.L*R5.L (IS);\n\t" +" R1.L = R2.L*R4.H (IS);\n\t" " A0 += R1.L*R0.L (IS) || R0.L = W[I1++];\n\t" -" R1.L = R3.L*R5.L (IS);\n\t" +" R1.L = R3.L*R4.H (IS);\n\t" " A0 += R1.L*R0.L (IS) || R0.L = W[I1++];\n\t" -" R1.L = R4.L*R5.L (IS);\n\t" +" R1.L = R4.L*R4.H (IS);\n\t" " A0 += R1.L*R0.L (IS) || R0.L = W[I1++];\n\t" " R1.L = R2.L*R3.L (IS);\n\t" @@ -406,7 +406,7 @@ static int pitch_gain_search_3tap_vq( : "=&d" (best_sum), "=&d" (best_cdbk) : "a" (gain_cdbk), "a" (C16), "a" (gain_cdbk_size), "a" (max_gain), "b" (-VERY_LARGE32) - : "R0", "R1", "R2", "R3", "R4", "R5", "P0", + : "R0", "R1", "R2", "R3", "R4", "P0", "P1", "I1", "L1", "A0", "B0" #if (__GNUC__ == 4) , "LC1" diff --git a/libspeex/math_approx.c b/libspeex/math_approx.c index d98e05b..21af766 100644 --- a/libspeex/math_approx.c +++ b/libspeex/math_approx.c @@ -37,67 +37,83 @@ #include "math_approx.h" #include "misc.h" -#ifdef FIXED_POINT - -/* sqrt(x) ~= 0.22178 + 1.29227*x - 0.77070*x^2 + 0.25723*x^3 (for .25 < x < 1) */ -#define C0 3634 -#define C1 21173 -#define C2 -12627 -#define C3 4215 - -spx_word16_t spx_sqrt(spx_word32_t x) +spx_int16_t spx_ilog2(spx_uint32_t x) { - int k=0; - spx_word32_t rt; - - if (x<=0) - return 0; -#if 1 - if (x>=16777216) + int r=0; + if (x>=(spx_int32_t)65536) { - x>>=10; - k+=5; + x >>= 16; + r += 16; } - if (x>=1048576) + if (x>=256) { - x>>=6; - k+=3; + x >>= 8; + r += 8; } - if (x>=262144) + if (x>=16) { - x>>=4; - k+=2; + x >>= 4; + r += 4; } - if (x>=32768) + if (x>=4) { - x>>=2; - k+=1; + x >>= 2; + r += 2; } - if (x>=16384) + if (x>=2) { - x>>=2; - k+=1; + r += 1; } -#else - while (x>=16384) + return r; +} + +spx_int16_t spx_ilog4(spx_uint32_t x) +{ + int r=0; + if (x>=(spx_int32_t)65536) { - x>>=2; - k++; - } -#endif - while (x<4096) + x >>= 16; + r += 8; + } + if (x>=256) { - x<<=2; - k--; + x >>= 8; + r += 4; } + if (x>=16) + { + x >>= 4; + r += 2; + } + if (x>=4) + { + r += 1; + } + return r; +} + +#ifdef FIXED_POINT + +/* sqrt(x) ~= 0.22178 + 1.29227*x - 0.77070*x^2 + 0.25723*x^3 (for .25 < x < 1) */ +/*#define C0 3634 +#define C1 21173 +#define C2 -12627 +#define C3 4215*/ + +/* sqrt(x) ~= 0.22178 + 1.29227*x - 0.77070*x^2 + 0.25659*x^3 (for .25 < x < 1) */ +#define C0 3634 +#define C1 21173 +#define C2 -12627 +#define C3 4204 + +spx_word16_t spx_sqrt(spx_word32_t x) +{ + int k; + spx_word32_t rt; + k = spx_ilog4(x)-6; + x = VSHR32(x, (k<<1)); rt = ADD16(C0, MULT16_16_Q14(x, ADD16(C1, MULT16_16_Q14(x, ADD16(C2, MULT16_16_Q14(x, (C3))))))); - if (rt > 16383) - rt = 16383; - if (k>0) - rt <<= k; - else - rt >>= -k; - rt >>=7; + rt = VSHR32(rt,7-k); return rt; } @@ -151,6 +167,101 @@ spx_word16_t spx_cos(spx_word16_t x) } } +#define L1 32767 +#define L2 -7651 +#define L3 8277 +#define L4 -626 + +static inline spx_word16_t _spx_cos_pi_2(spx_word16_t x) +{ + spx_word16_t x2; + + x2 = MULT16_16_P15(x,x); + return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2)))))))); +} + +spx_word16_t spx_cos_norm(spx_word32_t x) +{ + x = x&0x0001ffff; + if (x>SHL32(EXTEND32(1), 16)) + x = SUB32(SHL32(EXTEND32(1), 17),x); + if (x&0x00007fff) + { + if (x<SHL32(EXTEND32(1), 15)) + { + return _spx_cos_pi_2(EXTRACT16(x)); + } else { + return NEG32(_spx_cos_pi_2(EXTRACT16(65536-x))); + } + } else { + if (x&0x0000ffff) + return 0; + else if (x&0x0001ffff) + return -32767; + else + return 32767; + } +} + +/* + K0 = 1 + K1 = log(2) + K2 = 3-4*log(2) + K3 = 3*log(2) - 2 +*/ +#define D0 16384 +#define D1 11356 +#define D2 3726 +#define D3 1301 +/* Input in Q11 format, output in Q16 */ +static spx_word32_t spx_exp2(spx_word16_t x) +{ + int integer; + spx_word16_t frac; + integer = SHR16(x,11); + if (integer>14) + return 0x7fffffff; + else if (integer < -15) + return 0; + frac = SHL16(x-SHL16(integer,11),3); + frac = ADD16(D0, MULT16_16_Q14(frac, ADD16(D1, MULT16_16_Q14(frac, ADD16(D2 , MULT16_16_Q14(D3,frac)))))); + return VSHR32(EXTEND32(frac), -integer-2); +} + +/* Input in Q11 format, output in Q16 */ +spx_word32_t spx_exp(spx_word16_t x) +{ + if (x>21290) + return 0x7fffffff; + else if (x<-21290) + return 0; + else + return spx_exp2(MULT16_16_P14(23637,x)); +} +#define M1 32767 +#define M2 -21 +#define M3 -11943 +#define M4 4936 + +static inline spx_word16_t spx_atan01(spx_word16_t x) +{ + return MULT16_16_P15(x, ADD32(M1, MULT16_16_P15(x, ADD32(M2, MULT16_16_P15(x, ADD32(M3, MULT16_16_P15(M4, x))))))); +} + +/* Input in Q15, output in Q14 */ +spx_word16_t spx_atan(spx_word32_t x) +{ + if (x <= 32767) + { + return SHR16(spx_atan01(x),1); + } else { + int e = spx_ilog2(x); + if (e>=29) + return 25736; + x = DIV32_16(SHL32(EXTEND32(32767),29-e), EXTRACT16(SHR32(x, e-14))); + return SUB16(25736, SHR16(spx_atan01(x),1)); + } +} #else #ifndef M_PI @@ -177,5 +288,4 @@ spx_word16_t spx_cos(spx_word16_t x) } } - #endif diff --git a/libspeex/math_approx.h b/libspeex/math_approx.h index 377bf1a..49cfda6 100644 --- a/libspeex/math_approx.h +++ b/libspeex/math_approx.h @@ -38,13 +38,25 @@ #include "misc.h" spx_word16_t spx_cos(spx_word16_t x); - +spx_int16_t spx_ilog2(spx_uint32_t x); +spx_int16_t spx_ilog4(spx_uint32_t x); #ifdef FIXED_POINT spx_word16_t spx_sqrt(spx_word32_t x); spx_word16_t spx_acos(spx_word16_t x); +spx_word32_t spx_exp(spx_word16_t x); +spx_word16_t spx_cos_norm(spx_word32_t x); + +/* Input in Q15, output in Q14 */ +spx_word16_t spx_atan(spx_word32_t x); + #else + #define spx_sqrt sqrt #define spx_acos acos +#define spx_exp exp +#define spx_cos_norm(x) (cos((.5f*M_PI)*(x))) +#define spx_atan atan + #endif #endif diff --git a/libspeex/mdf.c b/libspeex/mdf.c index 3d79383..014ea25 100644 --- a/libspeex/mdf.c +++ b/libspeex/mdf.c @@ -41,8 +41,8 @@ double-talk is achieved using a variable learning rate as described in: Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo - Cancellation With Double-Talk. To appear in IEEE Transactions on Audio, - Speech and Language Processing, 2006. + Cancellation With Double-Talk. IEEE Transactions on Audio, + Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007. http://people.xiph.org/~jm/papers/valin_taslp2006.pdf There is no explicit double-talk detection, but a continuous variation @@ -79,9 +79,6 @@ #define M_PI 3.14159265358979323846 #endif -#define min(a,b) ((a)<(b) ? (a) : (b)) -#define max(a,b) ((a)>(b) ? (a) : (b)) - #ifdef FIXED_POINT #define WEIGHT_SHIFT 11 #define NORMALIZE_SCALEDOWN 5 @@ -93,16 +90,40 @@ /* If enabled, the transition between blocks is smooth, so there isn't any blocking aftifact when adapting. The cost is an extra FFT and a matrix-vector multiply */ #define SMOOTH_BLOCKS +/* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk + and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */ +#define TWO_PATH #ifdef FIXED_POINT -static const spx_float_t MIN_LEAK = {16777, -19}; +static const spx_float_t MIN_LEAK = {20972, -22}; + +/* Constants for the two-path filter */ +static const spx_float_t VAR1_SMOOTH = {23593, -16}; +static const spx_float_t VAR2_SMOOTH = {23675, -15}; +static const spx_float_t VAR1_UPDATE = {16384, -15}; +static const spx_float_t VAR2_UPDATE = {16384, -16}; +static const spx_float_t VAR_BACKTRACK = {16384, -12}; #define TOP16(x) ((x)>>16) + #else -static const spx_float_t MIN_LEAK = .032f; + +static const spx_float_t MIN_LEAK = .0032f; + +/* Constants for the two-path filter */ +static const spx_float_t VAR1_SMOOTH = .36f; +static const spx_float_t VAR2_SMOOTH = .7225f; +static const spx_float_t VAR1_UPDATE = .5f; +static const spx_float_t VAR2_UPDATE = .25f; +static const spx_float_t VAR_BACKTRACK = 4.f; #define TOP16(x) (x) #endif +#define PLAYBACK_DELAY 2 + +void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len); + + /** Speex echo cancellation state. */ struct SpeexEchoState_ { int frame_size; /**< Number of samples processed each time */ @@ -111,6 +132,7 @@ struct SpeexEchoState_ { int cancel_count; int adapted; int saturated; + int screwed_up; int C; /** Number of input channels (microphones) */ int K; /** Number of output channels (loudspeakers) */ spx_int32_t sampling_rate; @@ -118,30 +140,38 @@ struct SpeexEchoState_ { spx_word16_t beta0; spx_word16_t beta_max; spx_word32_t sum_adapt; - spx_word16_t *e; - spx_word16_t *x; - spx_word16_t *X; - spx_word16_t *d; - spx_word16_t *y; + spx_word16_t leak_estimate; + + spx_word16_t *e; /* scratch */ + spx_word16_t *x; /* Far-end input buffer (2N) */ + spx_word16_t *X; /* Far-end buffer (M+1 frames) in frequency domain */ + spx_word16_t *input; /* scratch */ + spx_word16_t *y; /* scratch */ spx_word16_t *last_y; - spx_word32_t *Yps; - spx_word16_t *Y; + spx_word16_t *Y; /* scratch */ spx_word16_t *E; - spx_word32_t *PHI; - spx_word32_t *W; - spx_word32_t *power; - spx_float_t *power_1; - spx_word16_t *wtmp; + spx_word32_t *PHI; /* scratch */ + spx_word32_t *W; /* (Background) filter weights */ +#ifdef TWO_PATH + spx_word16_t *foreground; /* Foreground filter weights */ + spx_word32_t Davg1; /* 1st recursive average of the residual power difference */ + spx_word32_t Davg2; /* 2nd recursive average of the residual power difference */ + spx_float_t Dvar1; /* Estimated variance of 1st estimator */ + spx_float_t Dvar2; /* Estimated variance of 2nd estimator */ +#endif + spx_word32_t *power; /* Power of the far-end signal */ + spx_float_t *power_1;/* Inverse power of far-end */ + spx_word16_t *wtmp; /* scratch */ #ifdef FIXED_POINT - spx_word16_t *wtmp2; + spx_word16_t *wtmp2; /* scratch */ #endif - spx_word32_t *Rf; - spx_word32_t *Yf; - spx_word32_t *Xf; + spx_word32_t *Rf; /* scratch */ + spx_word32_t *Yf; /* scratch */ + spx_word32_t *Xf; /* scratch */ spx_word32_t *Eh; spx_word32_t *Yh; - spx_float_t Pey; - spx_float_t Pyy; + spx_float_t Pey; + spx_float_t Pyy; spx_word16_t *window; spx_word16_t *prop; void *fft_table; @@ -153,6 +183,7 @@ struct SpeexEchoState_ { /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */ spx_int16_t *play_buf; int play_buf_pos; + int play_buf_started; }; static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride) @@ -179,6 +210,7 @@ static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, } } +/* This inner product is slightly different from the codec version because of fixed-point */ static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) { spx_word32_t sum=0; @@ -247,6 +279,34 @@ static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t } acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT); } +static inline void spectral_mul_accum16(const spx_word16_t *X, const spx_word16_t *Y, spx_word16_t *acc, int N, int M) +{ + int i,j; + spx_word32_t tmp1=0,tmp2=0; + for (j=0;j<M;j++) + { + tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]); + } + acc[0] = PSHR32(tmp1,WEIGHT_SHIFT); + for (i=1;i<N-1;i+=2) + { + tmp1 = tmp2 = 0; + for (j=0;j<M;j++) + { + tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1])); + tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]); + } + acc[i] = PSHR32(tmp1,WEIGHT_SHIFT); + acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT); + } + tmp1 = tmp2 = 0; + for (j=0;j<M;j++) + { + tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]); + } + acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT); +} + #else static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M) { @@ -266,21 +326,73 @@ static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t Y += N; } } +#define spectral_mul_accum16 spectral_mul_accum #endif /** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */ -static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N) +static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N) { int i, j; - prod[0] = FLOAT_MUL32(w[0],MULT16_16(X[0],Y[0])); + spx_float_t W; + W = FLOAT_AMULT(p, w[0]); + prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0])); for (i=1,j=1;i<N-1;i+=2,j++) { - prod[i] = FLOAT_MUL32(w[j],MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1])); - prod[i+1] = FLOAT_MUL32(w[j],MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1])); + W = FLOAT_AMULT(p, w[j]); + prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1])); + prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1])); + } + W = FLOAT_AMULT(p, w[j]); + prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i])); +} + +static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop) +{ + int i, j, p; + spx_word16_t max_sum = 1; + spx_word32_t prop_sum = 1; + for (i=0;i<M;i++) + { + spx_word32_t tmp = 1; + for (p=0;p<P;p++) + for (j=0;j<N;j++) + tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18))); +#ifdef FIXED_POINT + /* Just a security in case an overflow were to occur */ + tmp = MIN32(ABS32(tmp), 536870912); +#endif + prop[i] = spx_sqrt(tmp); + if (prop[i] > max_sum) + max_sum = prop[i]; + } + for (i=0;i<M;i++) + { + prop[i] += MULT16_16_Q15(QCONST16(.03f,15),max_sum); + prop_sum += EXTEND32(prop[i]); + } + for (i=0;i<M;i++) + { + prop[i] = DIV32(MULT16_16(QCONST16(.99f,15), prop[i]),prop_sum); + /*printf ("%f ", prop[i]);*/ } - prod[i] = FLOAT_MUL32(w[j],MULT16_16(X[i],Y[i])); + /*printf ("\n");*/ } +#ifdef DUMP_ECHO_CANCEL_DATA +#include <stdio.h> +static FILE *rFile=NULL, *pFile=NULL, *oFile=NULL; + +static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const spx_int16_t *out, int len) +{ + if (!(rFile && pFile && oFile)) + { + speex_error("Dump files not open"); + } + fwrite(rec, sizeof(spx_int16_t), len, rFile); + fwrite(play, sizeof(spx_int16_t), len, pFile); + fwrite(out, sizeof(spx_int16_t), len, oFile); +} +#endif /** Creates a new echo canceller state */ SpeexEchoState *mc_echo_state_init(int frame_size, int filter_length, int nb_mic, int nb_speakers) @@ -292,6 +404,14 @@ SpeexEchoState *mc_echo_state_init(int frame_size, int filter_length, int nb_mic st->C = nb_mic; C=st->C; K=st->K; +#ifdef DUMP_ECHO_CANCEL_DATA + if (rFile || pFile || oFile) + speex_error("Opening dump files twice"); + rFile = fopen("aec_rec.sw", "w"); + pFile = fopen("aec_play.sw", "w"); + oFile = fopen("aec_out.sw", "w"); +#endif + st->frame_size = frame_size; st->window_size = 2*frame_size; N = st->window_size; @@ -299,7 +419,8 @@ SpeexEchoState *mc_echo_state_init(int frame_size, int filter_length, int nb_mic st->cancel_count=0; st->sum_adapt = 0; st->saturated = 0; - /* FIXME: Make that an init option (new API call?) */ + st->screwed_up = 0; + /* This is the default sampling rate */ st->sampling_rate = 8000; st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); #ifdef FIXED_POINT @@ -309,14 +430,14 @@ SpeexEchoState *mc_echo_state_init(int frame_size, int filter_length, int nb_mic st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; st->beta_max = (.5f*st->frame_size)/st->sampling_rate; #endif + st->leak_estimate = 0; st->fft_table = spx_fft_init(N); st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t)); - st->d = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); + st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t)); st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); - st->Yps = (spx_word32_t*)speex_alloc(C*N*sizeof(spx_word32_t)); st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); @@ -328,6 +449,9 @@ SpeexEchoState *mc_echo_state_init(int frame_size, int filter_length, int nb_mic st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t)); +#ifdef TWO_PATH + st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t)); +#endif st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t)); st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t)); @@ -349,12 +473,10 @@ SpeexEchoState *mc_echo_state_init(int frame_size, int filter_length, int nb_mic st->power_1[i] = FLOAT_ONE; for (i=0;i<N*M*K*C;i++) st->W[i] = 0; - for (i=0;i<N;i++) - st->PHI[i] = 0; { spx_word32_t sum = 0; /* Ratio of ~10 between adaptation rate of first and last block */ - spx_word16_t decay = QCONST16(exp(-2.4/M),15); + spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1); st->prop[0] = QCONST16(.7, 15); sum = EXTEND32(st->prop[0]); for (i=1;i<M;i++) @@ -364,7 +486,7 @@ SpeexEchoState *mc_echo_state_init(int frame_size, int filter_length, int nb_mic } for (i=M-1;i>=0;i--) { - st->prop[i] = DIV32(SHL32(EXTEND32(st->prop[i]),15),sum); + st->prop[i] = DIV32(MULT16_16(QCONST16(.99f,15), st->prop[i]),sum); } } @@ -383,9 +505,15 @@ SpeexEchoState *mc_echo_state_init(int frame_size, int filter_length, int nb_mic st->adapted = 0; st->Pey = st->Pyy = FLOAT_ONE; - st->play_buf = (spx_int16_t*)speex_alloc(K*2*st->frame_size*sizeof(spx_int16_t)); - st->play_buf_pos = 0; - +#ifdef TWO_PATH + st->Davg1 = st->Davg2 = 0; + st->Dvar1 = st->Dvar2 = FLOAT_ZERO; +#endif + + st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); + st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; + st->play_buf_started = 0; + return st; } @@ -394,26 +522,57 @@ void speex_echo_state_reset(SpeexEchoState *st) { int i, M, N, C, K; st->cancel_count=0; + st->screwed_up = 0; N = st->window_size; M = st->M; C=st->C; K=st->K; for (i=0;i<N*M;i++) st->W[i] = 0; +#ifdef TWO_PATH + for (i=0;i<N*M;i++) + st->foreground[i] = 0; +#endif for (i=0;i<N*(M+1);i++) st->X[i] = 0; for (i=0;i<=st->frame_size;i++) + { st->power[i] = 0; - for (i=0;i<N;i++) + st->power_1[i] = FLOAT_ONE; + st->Eh[i] = 0; + st->Yh[i] = 0; + } + for (i=0;i<st->frame_size;i++) + { + st->last_y[i] = 0; + } + for (i=0;i<N*C;i++) + { st->E[i] = 0; + } + for (i=0;i<N*K;i++) + { + st->x[i] = 0; + } for (i=0;i<2*C;i++) st->notch_mem[i] = 0; - + for (i=0;i<C;i++) + st->memD[i]=st->memE[i]=0; + for (i=0;i<K;i++) + st->memX[i]=0; + st->saturated = 0; st->adapted = 0; st->sum_adapt = 0; st->Pey = st->Pyy = FLOAT_ONE; - st->play_buf_pos = 0; +#ifdef TWO_PATH + st->Davg1 = st->Davg2 = 0; + st->Dvar1 = st->Dvar2 = FLOAT_ZERO; +#endif + for (i=0;i<3*st->frame_size;i++) + st->play_buf[i] = 0; + st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; + st->play_buf_started = 0; } @@ -424,10 +583,9 @@ void mc_echo_state_destroy(SpeexEchoState *st) speex_free(st->e); speex_free(st->x); - speex_free(st->d); + speex_free(st->input); speex_free(st->y); speex_free(st->last_y); - speex_free(st->Yps); speex_free(st->Yf); speex_free(st->Rf); speex_free(st->Xf); @@ -438,6 +596,9 @@ void mc_echo_state_destroy(SpeexEchoState *st) speex_free(st->Y); speex_free(st->E); speex_free(st->W); +#ifdef TWO_PATH + speex_free(st->foreground); +#endif speex_free(st->PHI); speex_free(st->power); speex_free(st->power_1); @@ -449,19 +610,29 @@ void mc_echo_state_destroy(SpeexEchoState *st) #endif speex_free(st->play_buf); speex_free(st); + +#ifdef DUMP_ECHO_CANCEL_DATA + fclose(rFile); + fclose(pFile); + fclose(oFile); + rFile = pFile = oFile = NULL; +#endif } -void mc_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out, spx_int32_t *Yout) + +void mc_echo_capture2(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out) { int i; + /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/ + st->play_buf_started = 1; if (st->play_buf_pos>=st->frame_size) { - mc_echo_cancel(st, rec, st->play_buf, out, Yout); + mc_echo_cancellation(st, rec, st->play_buf, out); st->play_buf_pos -= st->frame_size; - for (i=0;i<st->frame_size;i++) + for (i=0;i<st->play_buf_pos;i++) st->play_buf[i] = st->play_buf[i+st->frame_size]; } else { - speex_warning("no playback frame available"); + speex_warning("No playback frame available (your application is buggy and/or got xruns)"); if (st->play_buf_pos!=0) { speex_warning("internal playback buffer corruption?"); @@ -474,23 +645,47 @@ void mc_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *ou void mc_echo_playback(SpeexEchoState *st, const spx_int16_t *play) { - if (st->play_buf_pos<=st->frame_size) + /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/ + if (!st->play_buf_started) + { + speex_warning("discarded first playback frame"); + return; + } + if (st->play_buf_pos<=PLAYBACK_DELAY*st->frame_size) { int i; for (i=0;i<st->frame_size;i++) st->play_buf[st->play_buf_pos+i] = play[i]; st->play_buf_pos += st->frame_size; + if (st->play_buf_pos <= (PLAYBACK_DELAY-1)*st->frame_size) + { + speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)"); + for (i=0;i<st->frame_size;i++) + st->play_buf[st->play_buf_pos+i] = play[i]; + st->play_buf_pos += st->frame_size; + } } else { - speex_warning("had to discard a playback frame"); + speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)"); } } /** Performs echo cancellation on a frame */ -void mc_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int16_t *echo, spx_int16_t *out, spx_int32_t *Yout) +void mc_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout) +{ + mc_echo_cancellation(st, in, far_end, out); +} + +/** Performs echo cancellation on a frame (deprecated, last arg now ignored) */ +void mc_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) { int i,j, chan, speak; int N,M, C, K; - spx_word16_t leak_estimate; + spx_word32_t Syy,See,Sxx,Sdd, Sff; +#ifdef TWO_PATH + spx_word32_t Dbf; + int update_foreground; +#endif + spx_word32_t Sey; spx_word16_t ss, ss_1; spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE; spx_float_t alpha, alpha_1; @@ -501,7 +696,6 @@ void mc_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int16_ M = st->M; C = st->C; K = st->K; - spx_word32_t Syy=0,See=0,Sxx=0; st->cancel_count++; #ifdef FIXED_POINT @@ -514,29 +708,31 @@ void mc_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int16_ for (chan = 0; chan < C; chan++) { - filter_dc_notch16(ref+chan, st->notch_radius, st->d+chan*N, st->frame_size, st->notch_mem+2*chan, C); + /* Apply a notch filter to make sure DC doesn't end up causing problems */ + filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C); + /* Copy input data to buffer and apply pre-emphasis */ /* Copy input data to buffer */ for (i=0;i<st->frame_size;i++) { - spx_word16_t tmp; spx_word32_t tmp32; - tmp = st->d[chan*N+i]; - st->d[chan*N+i] = st->d[chan*N+i+st->frame_size]; - tmp32 = SUB32(EXTEND32(tmp), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan]))); + /* FIXME: This core has changed a bit, need to merge properly */ + tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan]))); #ifdef FIXED_POINT if (tmp32 > 32767) { tmp32 = 32767; - st->saturated = 1; + if (st->saturated == 0) + st->saturated = 1; } if (tmp32 < -32767) { tmp32 = -32767; - st->saturated = 1; + if (st->saturated == 0) + st->saturated = 1; } #endif - st->d[chan*N+i+st->frame_size] = tmp32; - st->memD[chan] = tmp; + st->memD[chan] = st->input[chan*st->frame_size+i]; + st->input[chan*st->frame_size+i] = EXTRACT16(tmp32); } } @@ -544,25 +740,24 @@ void mc_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int16_ { for (i=0;i<st->frame_size;i++) { - spx_word16_t tmp; spx_word32_t tmp32; st->x[speak*N+i] = st->x[speak*N+i+st->frame_size]; - tmp32 = SUB32(EXTEND32(echo[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak]))); + tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak]))); #ifdef FIXED_POINT /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */ if (tmp32 > 32767) { tmp32 = 32767; - st->saturated = 1; + st->saturated = M+1; } if (tmp32 < -32767) { tmp32 = -32767; - st->saturated = 1; + st->saturated = M+1; } #endif st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32); - st->memX[speak] = echo[i*K+speak]; + st->memX[speak] = far_end[i*K+speak]; } } @@ -578,16 +773,32 @@ void mc_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int16_ spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]); } + Sxx = 0; + for (speak = 0; speak < K; speak++) + { + Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); + power_spectrum_accum(st->X+speak*N, st->Xf, N); + } + + Sff = 0; for (chan = 0; chan < C; chan++) { -#ifdef SMOOTH_BLOCKS - spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K); +#ifdef TWO_PATH + /* Compute foreground filter */ + spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K); spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N); + for (i=0;i<st->frame_size;i++) + st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]); + Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); #endif } + /* Adjust proportional adaption rate */ + /* FIXME: Adjust that for C, K*/ + if (st->adapted) + mdf_adjust_prop (st->W, N, M, C*K, st->prop); /* Compute weight gradient */ - if (!st->saturated) + if (st->saturated == 0) { for (chan = 0; chan < C; chan++) { @@ -595,16 +806,16 @@ void mc_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int16_ { for (j=M-1;j>=0;j--) { - weighted_spectral_mul_conj(st->power_1, &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N); + weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N); for (i=0;i<N;i++) - st->W[chan*N*K*M + j*N*K + speak*N + i] += MULT16_32_Q15(st->prop[j], st->PHI[i]); + st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i]; } } - } + } + } else { + st->saturated--; } - st->saturated = 0; - /* FIXME: MC conversion required */ /* Update weight to prevent circular convolution (MDF / AUMDF) */ for (chan = 0; chan < C; chan++) @@ -649,52 +860,141 @@ void mc_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int16_ /* So we can use power_spectrum_accum */ for (i=0;i<=st->frame_size;i++) st->Rf[i] = st->Yf[i] = st->Xf[i] = 0; - + + Dbf = 0; + See = 0; +#ifdef TWO_PATH + /* Difference in response, this is used to estimate the variance of our residual power estimate */ for (chan = 0; chan < C; chan++) { - /* Compute filter response Y */ spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K); spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N); + for (i=0;i<st->frame_size;i++) + st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]); + Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); + for (i=0;i<st->frame_size;i++) + st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); + See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); + } +#endif + +#ifndef TWO_PATH + Sff = See; +#endif +#ifdef TWO_PATH + /* Logic for updating the foreground filter */ + + /* For two time windows, compute the mean of the energy difference, as well as the variance */ + st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See))); + st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See))); + st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf))); + st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf))); + + /* Equivalent float code: + st->Davg1 = .6*st->Davg1 + .4*(Sff-See); + st->Davg2 = .85*st->Davg2 + .15*(Sff-See); + st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf; + st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf; + */ + + update_foreground = 0; + /* Check if we have a statistically significant reduction in the residual echo */ + /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */ + if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf))) + update_foreground = 1; + else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1)))) + update_foreground = 1; + else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2)))) + update_foreground = 1; + /* Do we update? */ + if (update_foreground) + { + st->Davg1 = st->Davg2 = 0; + st->Dvar1 = st->Dvar2 = FLOAT_ZERO; + /* Copy background filter to foreground filter */ + for (i=0;i<N*M*C*K;i++) + st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16)); + /* Apply a smooth transition so as to not introduce blocking artifacts */ + for (chan = 0; chan < C; chan++) + for (i=0;i<st->frame_size;i++) + st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]); + } else { + int reset_background=0; + /* Otherwise, check if the background filter is significantly worse */ + if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf)))) + reset_background = 1; + if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1))) + reset_background = 1; + if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2))) + reset_background = 1; + if (reset_background) + { + /* Copy foreground filter to background filter */ + for (i=0;i<N*M*C*K;i++) + st->W[i] = SHL32(EXTEND32(st->foreground[i]),16); + /* We also need to copy the output so as to get correct adaptation */ + for (chan = 0; chan < C; chan++) + { + for (i=0;i<st->frame_size;i++) + st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size]; + for (i=0;i<st->frame_size;i++) + st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); + } + See = Sff; + st->Davg1 = st->Davg2 = 0; + st->Dvar1 = st->Dvar2 = FLOAT_ZERO; + } + } +#endif + + Sey = Syy = Sdd = 0; + for (chan = 0; chan < C; chan++) + { /* Compute error signal (for the output with de-emphasis) */ for (i=0;i<st->frame_size;i++) { spx_word32_t tmp_out; -#ifdef SMOOTH_BLOCKS - spx_word16_t y = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]); - tmp_out = SUB32(EXTEND32(st->d[chan*N+i+st->frame_size]), EXTEND32(y)); +#ifdef TWO_PATH + tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size])); #else - tmp_out = SUB32(EXTEND32(st->d[chan*N+i+st->frame_size]), EXTEND32(st->y[chan*N+i+st->frame_size])); + tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size])); #endif - /* Saturation */ if (tmp_out>32767) tmp_out = 32767; else if (tmp_out<-32768) tmp_out = -32768; tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan]))); - /* This is an arbitrary test for saturation */ - if (ref[i*C+chan] <= -32000 || ref[i*C+chan] >= 32000) + /* This is an arbitrary test for saturation in the microphone signal */ + if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000) { tmp_out = 0; + if (st->saturated == 0) st->saturated = 1; } out[i*C+chan] = (spx_int16_t)tmp_out; st->memE[chan] = tmp_out; } - + +#ifdef DUMP_ECHO_CANCEL_DATA + dump_audio(in, far_end, out, st->frame_size); +#endif + /* Compute error signal (filter update version) */ for (i=0;i<st->frame_size;i++) { + st->e[chan*N+i+st->frame_size] = st->e[chan*N+i]; st->e[chan*N+i] = 0; - st->e[chan*N+i+st->frame_size] = st->d[chan*N+i+st->frame_size] - st->y[chan*N+i+st->frame_size]; } /* Compute a bunch of correlations */ - See += mdf_inner_prod(st->e+chan*N+st->frame_size, st->e+chan*N+st->frame_size, st->frame_size); + /* FIXME: bad merge */ + Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); - + Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size); + /* Convert error to frequency domain */ spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N); for (i=0;i<st->frame_size;i++) @@ -704,16 +1004,48 @@ void mc_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int16_ /* Compute power spectrum of echo (X), error (E) and filter response (Y) */ power_spectrum_accum(st->E+chan*N, st->Rf, N); power_spectrum_accum(st->Y+chan*N, st->Yf, N); + } - See = ADD32(See, SHR32(EXTEND32(10000),6)); + /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/ + + /* Do some sanity check */ + if (!(Syy>=0 && Sxx>=0 && See >= 0) +#ifndef FIXED_POINT + || !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9) +#endif + ) + { + /* Things have gone really bad */ + st->screwed_up += 50; + for (i=0;i<st->frame_size*C;i++) + out[i] = 0; + } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6))) + { + /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */ + st->screwed_up++; + } else { + /* Everything's fine */ + st->screwed_up=0; + } + if (st->screwed_up>=50) + { + speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now."); + speex_echo_state_reset(st); + return; + } + + /* Add a small noise floor to make sure not to have problems when dividing */ + See = MAX32(See, SHR32(MULT16_16(N, 100),6)); + for (speak = 0; speak < K; speak++) { Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); power_spectrum_accum(st->X+speak*N, st->Xf, N); } + - /* Smooth echo energy estimate over time */ + /* Smooth far end energy estimate over time */ for (j=0;j<=st->frame_size;j++) st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]); @@ -754,40 +1086,53 @@ void mc_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int16_ if (FLOAT_GT(st->Pey, st->Pyy)) st->Pey = st->Pyy; /* leak_estimate is the linear regression result */ - leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14)); + st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14)); /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */ - if (leak_estimate > 16383) - leak_estimate = 32767; + if (st->leak_estimate > 16383) + st->leak_estimate = 32767; else - leak_estimate = SHL16(leak_estimate,1); - /*printf ("%f\n", leak_estimate);*/ + st->leak_estimate = SHL16(st->leak_estimate,1); + /*printf ("%f\n", st->leak_estimate);*/ /* Compute Residual to Error Ratio */ #ifdef FIXED_POINT - tmp32 = MULT16_32_Q15(leak_estimate,Syy); - tmp32 = ADD32(tmp32, SHL32(tmp32,1)); + tmp32 = MULT16_32_Q15(st->leak_estimate,Syy); + tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1))); + /* Check for y in e (lower bound on RER) */ + { + spx_float_t bound = PSEUDOFLOAT(Sey); + bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy))); + if (FLOAT_GT(bound, PSEUDOFLOAT(See))) + tmp32 = See; + else if (tmp32 < FLOAT_EXTRACT32(bound)) + tmp32 = FLOAT_EXTRACT32(bound); + } if (tmp32 > SHR32(See,1)) tmp32 = SHR32(See,1); RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15)); -#else - RER = (.0001*Sxx + 3.*MULT16_32_Q15(leak_estimate,Syy)) / See; +#else + RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See; + /* Check for y in e (lower bound on RER) */ + if (RER < Sey*Sey/(1+See*Syy)) + RER = Sey*Sey/(1+See*Syy); if (RER > .5) RER = .5; #endif /* We consider that the filter has had minimal adaptation if the following is true*/ - if (!st->adapted && st->sum_adapt > QCONST32(1,15)) + if (!st->adapted && st->sum_adapt > QCONST32(M,15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy)) { st->adapted = 1; } if (st->adapted) { + /* Normal learning rate calculation once we're past the minimal adaptation phase */ for (i=0;i<=st->frame_size;i++) { spx_word32_t r, e; /* Compute frequency-domain adaptation mask */ - r = MULT16_32_Q15(leak_estimate,SHL32(st->Yf[i],3)); + r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3)); e = SHL32(st->Rf[i],3)+1; #ifdef FIXED_POINT if (r>SHR32(e,1)) @@ -804,16 +1149,18 @@ void mc_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int16_ /* Temporary adaption rate if filter is not yet adapted enough */ spx_word16_t adapt_rate=0; - tmp32 = MULT16_32_Q15(QCONST16(.15f, 15), Sxx); + if (Sxx > SHR32(MULT16_16(N, 1000),6)) + { + tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx); #ifdef FIXED_POINT - if (Sxx > SHR32(See,2)) - Sxx = SHR32(See,2); + if (tmp32 > SHR32(See,2)) + tmp32 = SHR32(See,2); #else - if (Sxx > .25*See) - Sxx = .25*See; + if (tmp32 > .25*See) + tmp32 = .25*See; #endif - adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(Sxx, See),15)); - + adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15)); + } for (i=0;i<=st->frame_size;i++) st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1); @@ -823,49 +1170,55 @@ void mc_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int16_ } /* FIXME: MC conversion required */ - /* Compute spectrum of estimated echo for use in an echo post-filter (if necessary)*/ - if (Yout) - { - spx_word16_t leak2; for (i=0;i<st->frame_size;i++) st->last_y[i] = st->last_y[st->frame_size+i]; - if (st->adapted) - { - /* If the filter is adapted, take the filtered echo */ - for (i=0;i<st->frame_size;i++) - st->last_y[st->frame_size+i] = ref[i]-out[i]; - } else { - /* If filter isn't adapted yet, all we can do is take the echo signal directly */ - for (i=0;i<st->frame_size;i++) - st->last_y[st->frame_size+i] = echo[i]; - } - - /* Apply hanning window (should pre-compute it)*/ - for (i=0;i<N;i++) - st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]); + if (st->adapted) + { + /* If the filter is adapted, take the filtered echo */ + for (i=0;i<st->frame_size;i++) + st->last_y[st->frame_size+i] = in[i]-out[i]; + } else { + /* If filter isn't adapted yet, all we can do is take the far end signal directly */ + /* moved earlier: for (i=0;i<N;i++) + st->last_y[i] = st->x[i];*/ + } + +} + +/* Compute spectrum of estimated echo for use in an echo post-filter */ +void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len) +{ + int i; + spx_word16_t leak2; + int N; + + N = st->window_size; + + /* Apply hanning window (should pre-compute it)*/ + for (i=0;i<N;i++) + st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]); - /* Compute power spectrum of the echo */ - spx_fft(st->fft_table, st->y, st->Y); - power_spectrum(st->Y, st->Yps, N); + /* Compute power spectrum of the echo */ + spx_fft(st->fft_table, st->y, st->Y); + power_spectrum(st->Y, residual_echo, N); #ifdef FIXED_POINT - if (leak_estimate > 16383) - leak2 = 32767; - else - leak2 = SHL16(leak_estimate, 1); + if (st->leak_estimate > 16383) + leak2 = 32767; + else + leak2 = SHL16(st->leak_estimate, 1); #else - if (leak_estimate>.5) - leak2 = 1; - else - leak2 = 2*leak_estimate; + if (st->leak_estimate>.5) + leak2 = 1; + else + leak2 = 2*st->leak_estimate; #endif - /* Estimate residual echo */ - for (i=0;i<=st->frame_size;i++) - Yout[i] = (spx_int32_t)MULT16_32_Q15(leak2,st->Yps[i]); - } + /* Estimate residual echo */ + for (i=0;i<=st->frame_size;i++) + residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]); + } - int mc_echo_ctl(SpeexEchoState *st, int request, void *ptr) { switch(request) diff --git a/libspeex/misc.c b/libspeex/misc.c index 53bdd0b..df44d86 100644 --- a/libspeex/misc.c +++ b/libspeex/misc.c @@ -63,74 +63,6 @@ long long spx_mips=0; #endif -spx_uint32_t be_int(spx_uint32_t i) -{ - spx_uint32_t ret=i; -#ifndef WORDS_BIGENDIAN - ret = i>>24; - ret += (i>>8)&0x0000ff00; - ret += (i<<8)&0x00ff0000; - ret += (i<<24); -#endif - return ret; -} - -spx_uint32_t le_int(spx_uint32_t i) -{ - spx_uint32_t ret=i; -#ifdef WORDS_BIGENDIAN - ret = i>>24; - ret += (i>>8)&0x0000ff00; - ret += (i<<8)&0x00ff0000; - ret += (i<<24); -#endif - return ret; -} - -#if BYTES_PER_CHAR == 2 -void speex_memcpy_bytes(char *dst, char *src, int nbytes) -{ - int i; - int nchars = nbytes/BYTES_PER_CHAR; - for (i=0;i<nchars;i++) - dst[i]=src[i]; - if (nbytes & 1) { - /* copy in the last byte */ - int last_i = nchars; - char last_dst_char = dst[last_i]; - char last_src_char = src[last_i]; - last_dst_char &= 0xff00; - last_dst_char |= (last_src_char & 0x00ff); - dst[last_i] = last_dst_char; - } -} -void speex_memset_bytes(char *dst, char c, int nbytes) -{ - int i; - spx_int16_t cc = ((c << 8) | c); - int nchars = nbytes/BYTES_PER_CHAR; - for (i=0;i<nchars;i++) - dst[i]=cc; - if (nbytes & 1) { - /* copy in the last byte */ - int last_i = nchars; - char last_dst_char = dst[last_i]; - last_dst_char &= 0xff00; - last_dst_char |= (c & 0x00ff); - dst[last_i] = last_dst_char; - } -} -#else -void speex_memcpy_bytes(char *dst, char *src, int nbytes) -{ - memcpy(dst, src, nbytes); -} -void speex_memset_bytes(char *dst, char src, int nbytes) -{ - memset(dst, src, nbytes); -} -#endif - #ifndef OVERRIDE_SPEEX_ALLOC void *speex_alloc (int size) { @@ -176,7 +108,7 @@ void *speex_move (void *dest, void *src, int n) #ifndef OVERRIDE_SPEEX_ERROR void speex_error(const char *str) { - fprintf (stderr, "Fatal error: %s\n", str); + fprintf (stderr, "Fatal (internal) error: %s\n", str); exit(1); } #endif @@ -184,14 +116,27 @@ void speex_error(const char *str) #ifndef OVERRIDE_SPEEX_WARNING void speex_warning(const char *str) { +#ifndef DISABLE_WARNINGS fprintf (stderr, "warning: %s\n", str); +#endif } #endif #ifndef OVERRIDE_SPEEX_WARNING_INT void speex_warning_int(const char *str, int val) { +#ifndef DISABLE_WARNINGS fprintf (stderr, "warning: %s %d\n", str, val); +#endif +} +#endif + +#ifndef OVERRIDE_SPEEX_NOTIFY +void speex_notify(const char *str) +{ +#ifndef DISABLE_NOTIFICATIONS + fprintf (stderr, "notification: %s\n", str); +#endif } #endif @@ -201,7 +146,7 @@ spx_word16_t speex_rand(spx_word16_t std, spx_int32_t *seed) spx_word32_t res; *seed = 1664525 * *seed + 1013904223; res = MULT16_16(EXTRACT16(SHR32(*seed,16)),std); - return PSHR32(SUB32(res, SHR(res, 3)),14); + return EXTRACT16(PSHR32(SUB32(res, SHR32(res, 3)),14)); } #else spx_word16_t speex_rand(spx_word16_t std, spx_int32_t *seed) diff --git a/libspeex/misc.h b/libspeex/misc.h index 4c70980..c6ea9e7 100644 --- a/libspeex/misc.h +++ b/libspeex/misc.h @@ -40,7 +40,7 @@ #define SPEEX_MINOR_VERSION 1 /**< Minor Speex version. */ #define SPEEX_MICRO_VERSION 13 /**< Micro Speex version. */ #define SPEEX_EXTRA_VERSION "" /**< Extra Speex version. */ -#define SPEEX_VERSION "speex-1.2-beta1" /**< Speex version string. */ +#define SPEEX_VERSION "speex-1.2beta1" /**< Speex version string. */ #endif /* A couple test to catch stupid option combinations */ @@ -75,10 +75,21 @@ void print_vec(float *vec, int len, char *name); #endif -/** Convert big endian */ -spx_uint32_t be_int(spx_uint32_t i); /** Convert little endian */ -spx_uint32_t le_int(spx_uint32_t i); +static inline spx_int32_t le_int(spx_int32_t i) +{ +#ifdef WORDS_BIGENDIAN + spx_uint32_t ui, ret; + ui = i; + ret = ui>>24; + ret |= (ui>>8)&0x0000ff00; + ret |= (ui<<8)&0x00ff0000; + ret |= (ui<<24); + return ret; +#else + return i; +#endif +} /** Speex wrapper for calloc. To do your own dynamic allocation, all you need to do is replace this function, speex_realloc and speex_free */ void *speex_alloc (int size); @@ -98,21 +109,18 @@ void speex_free_scratch (void *ptr); /** Speex wrapper for mem_move */ void *speex_move (void *dest, void *src, int n); -/** Speex wrapper for memcpy */ -void speex_memcpy_bytes(char *dst, char *src, int nbytes); - -/** Speex wrapper for memset */ -void speex_memset_bytes(char *dst, char src, int nbytes); - -/** Print error message to stderr */ +/** Abort with an error message to stderr (internal Speex error) */ void speex_error(const char *str); -/** Print warning message to stderr */ +/** Print warning message to stderr (programming error) */ void speex_warning(const char *str); /** Print warning message with integer argument to stderr */ void speex_warning_int(const char *str, int val); +/** Print notification message to stderr */ +void speex_notify(const char *str); + /** Generate a random number */ spx_word16_t speex_rand(spx_word16_t std, spx_int32_t *seed); diff --git a/libspeex/modes.c b/libspeex/modes.c index 97e7d1e..9a1fe9c 100644 --- a/libspeex/modes.c +++ b/libspeex/modes.c @@ -495,7 +495,7 @@ static const SpeexSBMode sb_wb_mode = { #endif .012, /*lag_factor*/ QCONST16(.0002,15), /*lpc_floor*/ - 0.9, + QCONST16(0.9f,15), {NULL, &wb_submode1, &wb_submode2, &wb_submode3, &wb_submode4, NULL, NULL, NULL}, 3, {1, 8, 2, 3, 4, 5, 5, 6, 6, 7, 7}, @@ -541,7 +541,7 @@ static const SpeexSBMode sb_uwb_mode = { #endif .012, /*lag_factor*/ QCONST16(.0002,15), /*lpc_floor*/ - 0.7, + QCONST16(0.7f,15), {NULL, &wb_submode1, NULL, NULL, NULL, NULL, NULL, NULL}, 1, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, @@ -608,11 +608,7 @@ static const SpeexSubmode nb_48k_submode = { split_cb_search_shape_sign, split_cb_shape_sign_unquant, &split_cb_nb_48k, -#ifdef FIXED_POINT - 22938, 16384, 11796, 18022, -#else - 0.7, 0.5, .36, .55, -#endif + QCONST16(.7,15), 144 }; @@ -622,7 +618,6 @@ static const SpeexNBMode nb_48k_mode = { 240, /*frameSize*/ 48, /*subframeSize*/ 10, /*lpcSize*/ - 640, /*bufSize*/ 17, /*pitchStart*/ 144, /*pitchEnd*/ 0.9, /*gamma1*/ @@ -667,7 +662,7 @@ const SpeexMode * speex_lib_get_mode (int mode) if (mode == SPEEX_MODEID_NB_48K) return &speex_nb_48k_mode; #endif - if (mode < 0 || mode > SPEEX_NB_MODES) return NULL; + if (mode < 0 || mode >= SPEEX_NB_MODES) return NULL; return speex_mode_list[mode]; } diff --git a/libspeex/modes.h b/libspeex/modes.h index 6a63240..5bf1971 100644 --- a/libspeex/modes.h +++ b/libspeex/modes.h @@ -46,6 +46,23 @@ #define SB_SUBMODES 8 #define SB_SUBMODE_BITS 3 +/* Used internally, NOT TO BE USED in applications */ +/** Used internally*/ +#define SPEEX_GET_PI_GAIN 100 +/** Used internally*/ +#define SPEEX_GET_EXC 101 +/** Used internally*/ +#define SPEEX_GET_INNOV 102 +/** Used internally*/ +#define SPEEX_GET_DTX_STATUS 103 +/** Used internally*/ +#define SPEEX_SET_INNOVATION_SAVE 104 +/** Used internally*/ +#define SPEEX_SET_WIDEBAND 105 + +/** Used internally*/ +#define SPEEX_GET_STACK 106 + /** Quantizes LSPs */ typedef void (*lsp_quant_func)(spx_lsp_t *, spx_lsp_t *, int, SpeexBits *); @@ -130,7 +147,7 @@ typedef struct SpeexSBMode { spx_word16_t gamma2; /**< Perceptual filter parameter #1 */ float lag_factor; /**< Lag-windowing parameter */ spx_word16_t lpc_floor; /**< Noise floor for LPC analysis */ - float folding_gain; + spx_word16_t folding_gain; const SpeexSubmode *submodes[SB_SUBMODES]; /**< Sub-mode data for the mode */ int defaultSubmode; /**< Default sub-mode to use when encoding */ diff --git a/libspeex/nb_celp.c b/libspeex/nb_celp.c index feedf71..1828aed 100644 --- a/libspeex/nb_celp.c +++ b/libspeex/nb_celp.c @@ -87,14 +87,14 @@ const spx_word16_t exc_gain_quant_scal1[2]={11546, 17224}; #else -const float exc_gain_quant_scal3_bound[7]={0.112338, 0.236980, 0.369316, 0.492054, 0.637471, 0.828874, 1.132784}; -const float exc_gain_quant_scal3[8]={0.061130, 0.163546, 0.310413, 0.428220, 0.555887, 0.719055, 0.938694, 1.326874}; -const float exc_gain_quant_scal1_bound[1]={0.87798}; -const float exc_gain_quant_scal1[2]={0.70469, 1.05127}; +const float exc_gain_quant_scal3_bound[7]={0.112338f, 0.236980f, 0.369316f, 0.492054f, 0.637471f, 0.828874f, 1.132784f}; +const float exc_gain_quant_scal3[8]={0.061130f, 0.163546f, 0.310413f, 0.428220f, 0.555887f, 0.719055f, 0.938694f, 1.326874f}; +const float exc_gain_quant_scal1_bound[1]={0.87798f}; +const float exc_gain_quant_scal1[2]={0.70469f, 1.05127f}; -#define LSP_MARGIN .002 -#define LSP_DELTA1 .2 -#define LSP_DELTA2 .05 +#define LSP_MARGIN .002f +#define LSP_DELTA1 .2f +#define LSP_DELTA2 .05f #endif @@ -187,7 +187,7 @@ void *nb_encoder_init(const SpeexMode *m) st->mem_exc2 = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); st->pi_gain = (spx_word32_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word32_t)); - st->innov_save = NULL; + st->innov_rms_save = NULL; st->pitch = (int*)speex_alloc((st->nbSubframes)*sizeof(int)); @@ -280,6 +280,8 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) int pitch_half[2]; int ol_pitch_id=0; #endif + spx_word32_t ener=0; + spx_word16_t fine_gain; spx_word16_t *in = (spx_word16_t*)vin; st=(EncState *)state; @@ -432,7 +434,7 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) ol_gain2=ol2; ol_gain2 = sqrt(2*ol_gain2*(ol1+ol2))*1.3*(1-.5*GAIN_SCALING_1*GAIN_SCALING_1*ol_pitch_coef*ol_pitch_coef); - ol_gain=SHR(sqrt(1+ol_gain2/st->frameSize),SIG_SHIFT); + ol_gain=SHR32(sqrt(1+ol_gain2/st->frameSize),SIG_SHIFT); } else #endif @@ -490,7 +492,7 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) /* delta_qual*=.1*(3+st->vbr_quality);*/ if (st->vbr_enabled) { - int mode; + spx_int32_t mode; int choice=0; float min_diff=100; mode = 8; @@ -540,7 +542,7 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) if (st->abr_enabled) { - int bitrate; + spx_int32_t bitrate; speex_encoder_ctl(state, SPEEX_GET_BITRATE, &bitrate); st->abr_drift+=(bitrate-st->abr_enabled); st->abr_drift2 = .95*st->abr_drift2 + .05*(bitrate-st->abr_enabled); @@ -720,7 +722,6 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) int offset; spx_word16_t *sw; spx_word16_t *exc; - spx_sig_t *innov_save = NULL; int pitch; int response_bound = st->subframeSize; #ifdef EPIC_48K @@ -739,9 +740,6 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) exc=st->exc+offset; /* Weighted signal */ sw=st->sw+offset; - /* Pointer for saving innovation */ - if (st->innov_save) - innov_save = st->innov_save+offset; /* LSP interpolation (quantized and unquantized) */ lsp_interpolate(st->old_lsp, lsp, interp_lsp, st->lpcSize, sub, st->nbSubframes); @@ -838,9 +836,9 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) for (i=0;i<st->lpcSize;i++) st->mem_sw[i]=mem[i]; - /* Compute target signal */ + /* Compute target signal (saturation prevents overflows on clipped input speech) */ for (i=0;i<st->subframeSize;i++) - target[i]=SUB16(sw[i],PSHR32(ringing[i],1)); + target[i]=EXTRACT16(SATURATE(SUB32(sw[i],PSHR32(ringing[i],1)),32767)); /* Reset excitation */ for (i=0;i<st->subframeSize;i++) @@ -901,75 +899,64 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) } /* Quantization of innovation */ - { - spx_word32_t ener=0; - spx_word16_t fine_gain; - - for (i=0;i<st->subframeSize;i++) - innov[i]=0; - - for (i=0;i<st->subframeSize;i++) - real_exc[i] = SUB16(real_exc[i], PSHR32(exc32[i],SIG_SHIFT-1)); - - ener = SHL32(EXTEND32(compute_rms16(real_exc, st->subframeSize)),SIG_SHIFT); - - /*FIXME: Should use DIV32_16 and make sure result fits in 16 bits */ + for (i=0;i<st->subframeSize;i++) + innov[i]=0; + + /* FIXME: Make sure this is save from overflows (so far so good) */ + for (i=0;i<st->subframeSize;i++) + real_exc[i] = EXTRACT16(SUB32(EXTEND32(real_exc[i]), PSHR32(exc32[i],SIG_SHIFT-1))); + + ener = SHL32(EXTEND32(compute_rms16(real_exc, st->subframeSize)),SIG_SHIFT); + + /*FIXME: Should use DIV32_16 and make sure result fits in 16 bits */ #ifdef FIXED_POINT - { - spx_word32_t f = PDIV32(ener,PSHR32(ol_gain,SIG_SHIFT)); - if (f<=32767) - fine_gain = f; - else - fine_gain = 32767; - } + { + spx_word32_t f = PDIV32(ener,PSHR32(ol_gain,SIG_SHIFT)); + if (f<=32767) + fine_gain = f; + else + fine_gain = 32767; + } #else - fine_gain = PDIV32_16(ener,PSHR32(ol_gain,SIG_SHIFT)); + fine_gain = PDIV32_16(ener,PSHR32(ol_gain,SIG_SHIFT)); #endif - /* Calculate gain correction for the sub-frame (if any) */ - if (SUBMODE(have_subframe_gain)) - { - int qe; - if (SUBMODE(have_subframe_gain)==3) - { - qe = scal_quant(fine_gain, exc_gain_quant_scal3_bound, 8); - speex_bits_pack(bits, qe, 3); - ener=MULT16_32_Q14(exc_gain_quant_scal3[qe],ol_gain); - } else { - qe = scal_quant(fine_gain, exc_gain_quant_scal1_bound, 2); - speex_bits_pack(bits, qe, 1); - ener=MULT16_32_Q14(exc_gain_quant_scal1[qe],ol_gain); - } - } else { - ener=ol_gain; - } - - /*printf ("%f %f\n", ener, ol_gain);*/ - - /* Normalize innovation */ - signal_div(target, target, ener, st->subframeSize); - - /* Quantize innovation */ - if (SUBMODE(innovation_quant)) + /* Calculate gain correction for the sub-frame (if any) */ + if (SUBMODE(have_subframe_gain)) + { + int qe; + if (SUBMODE(have_subframe_gain)==3) { - /* Codebook search */ - SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2, - SUBMODE(innovation_params), st->lpcSize, st->subframeSize, - innov, syn_resp, bits, stack, st->complexity, SUBMODE(double_codebook)); - - /* De-normalize innovation and update excitation */ - signal_mul(innov, innov, ener, st->subframeSize); - - for (i=0;i<st->subframeSize;i++) - exc[i] = EXTRACT16(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT)); + qe = scal_quant(fine_gain, exc_gain_quant_scal3_bound, 8); + speex_bits_pack(bits, qe, 3); + ener=MULT16_32_Q14(exc_gain_quant_scal3[qe],ol_gain); } else { - speex_error("No fixed codebook"); + qe = scal_quant(fine_gain, exc_gain_quant_scal1_bound, 2); + speex_bits_pack(bits, qe, 1); + ener=MULT16_32_Q14(exc_gain_quant_scal1[qe],ol_gain); } + } else { + ener=ol_gain; + } + + /*printf ("%f %f\n", ener, ol_gain);*/ + + /* Normalize innovation */ + signal_div(target, target, ener, st->subframeSize); + + /* Quantize innovation */ + if (SUBMODE(innovation_quant)) + { + /* Codebook search */ + SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2, + SUBMODE(innovation_params), st->lpcSize, st->subframeSize, + innov, syn_resp, bits, stack, st->complexity, SUBMODE(double_codebook)); + + /* De-normalize innovation and update excitation */ + signal_mul(innov, innov, ener, st->subframeSize); + + for (i=0;i<st->subframeSize;i++) + exc[i] = EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767)); - if (innov_save) - { - for (i=0;i<st->subframeSize;i++) - innov_save[i] = innov[i]; - } /* In some (rare) modes, we do a second search (more bits) to reduce noise even more */ if (SUBMODE(double_codebook)) { char *tmp_stack=stack; @@ -978,23 +965,26 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) for (i=0;i<st->subframeSize;i++) innov2[i]=0; for (i=0;i<st->subframeSize;i++) - target[i]=MULT16_16_P13(QCONST16(2.2,13), target[i]); + target[i]=MULT16_16_P13(QCONST16(2.2f,13), target[i]); SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2, SUBMODE(innovation_params), st->lpcSize, st->subframeSize, innov2, syn_resp, bits, stack, st->complexity, 0); - signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545,15),ener), st->subframeSize); + signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545f,15),ener), st->subframeSize); for (i=0;i<st->subframeSize;i++) - exc[i] = ADD32(exc[i],PSHR32(innov2[i],SIG_SHIFT)); - if (innov_save) - { - for (i=0;i<st->subframeSize;i++) - innov_save[i] = ADD32(innov_save[i],innov2[i]); - } + innov[i] = ADD32(innov[i],innov2[i]); stack = tmp_stack; } - + for (i=0;i<st->subframeSize;i++) + exc[i] = EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767)); + if (st->innov_rms_save) + { + st->innov_rms_save[sub] = compute_rms(innov, st->subframeSize); + } + } else { + speex_error("No fixed codebook"); } + for (i=0;i<st->subframeSize;i++) sw[i] = exc[i]; /* Final signal synthesis from excitation */ @@ -1145,7 +1135,7 @@ const spx_word16_t attenuation[10] = {1., 0.961, 0.852, 0.698, 0.527, 0.368, 0.2 static void nb_decode_lost(DecState *st, spx_word16_t *out, char *stack) { - int i, sub; + int i; int pitch_val; spx_word16_t pitch_gain; spx_word16_t fact; @@ -1166,7 +1156,7 @@ static void nb_decode_lost(DecState *st, spx_word16_t *out, char *stack) pitch_gain = st->last_pitch_gain; if (pitch_gain>54) pitch_gain = 54; - pitch_gain = SHL(pitch_gain, 9); + pitch_gain = SHL16(pitch_gain, 9); #else pitch_gain = GAIN_SCALING_1*st->last_pitch_gain; if (pitch_gain>.85) @@ -1200,7 +1190,7 @@ static void nb_decode_lost(DecState *st, spx_word16_t *out, char *stack) st->first = 0; st->count_lost++; - st->pitch_gain_buf[st->pitch_gain_buf_idx++] = PSHR(pitch_gain,9); + st->pitch_gain_buf[st->pitch_gain_buf_idx++] = PSHR16(pitch_gain,9); if (st->pitch_gain_buf_idx > 2) /* rollover */ st->pitch_gain_buf_idx = 0; } @@ -1226,7 +1216,7 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) VARDECL(spx_lsp_t *qlsp); spx_word16_t pitch_average=0; #ifdef EPIC_48K - int pitch_half[2]; + int pitch_half[2] = {0, 0}; int ol_pitch_id=0; #endif spx_word16_t *out = (spx_word16_t*)vout; @@ -1267,7 +1257,7 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) speex_mode_query(&speex_wb_mode, SPEEX_SUBMODE_BITS_PER_FRAME, &advance); if (advance < 0) { - speex_warning ("Invalid wideband mode encountered. Corrupted stream?"); + speex_notify("Invalid mode encountered. The stream is corrupted."); return -2; } advance -= (SB_SUBMODE_BITS+1); @@ -1282,7 +1272,7 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) speex_mode_query(&speex_wb_mode, SPEEX_SUBMODE_BITS_PER_FRAME, &advance); if (advance < 0) { - speex_warning ("Invalid wideband mode encountered: corrupted stream?"); + speex_notify("Invalid mode encountered. The stream is corrupted."); return -2; } advance -= (SB_SUBMODE_BITS+1); @@ -1290,7 +1280,7 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) wideband = speex_bits_unpack_unsigned(bits, 1); if (wideband) { - speex_warning ("More than two wideband layers found: corrupted stream?"); + speex_notify("More than two wideband layers found. The stream is corrupted."); return -2; } @@ -1315,7 +1305,7 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) return ret; } else if (m>8) /* Invalid mode */ { - speex_warning("Invalid mode encountered: corrupted stream?"); + speex_notify("Invalid mode encountered. The stream is corrupted."); return -2; } @@ -1338,7 +1328,7 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) { VARDECL(spx_coef_t *lpc); ALLOC(lpc, st->lpcSize, spx_coef_t); - bw_lpc(GAMMA_SCALING*.93, st->interp_qlpc, lpc, st->lpcSize); + bw_lpc(QCONST16(0.93f,15), st->interp_qlpc, lpc, st->lpcSize); { float innov_gain=0; float pgain=GAIN_SCALING_1*st->last_pitch_gain; @@ -1426,6 +1416,7 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) int qe; qe = speex_bits_unpack_unsigned(bits, 5); #ifdef FIXED_POINT + /* FIXME: Perhaps we could slightly lower the gain here when the output is going to saturate? */ ol_gain = MULT16_32_Q15(28406,ol_gain_table[qe]); #else ol_gain = SIG_SCALING*exp(qe/3.5); @@ -1458,7 +1449,7 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) int offset; spx_word16_t *exc; spx_word16_t *sp; - spx_sig_t *innov_save = NULL; + spx_word16_t *innov_save = NULL; spx_word16_t tmp; #ifdef EPIC_48K @@ -1535,7 +1526,11 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) #ifdef EPIC_48K } #endif - + /* Ensuring that things aren't blowing up as would happen if e.g. an encoder is + crafting packets to make us produce NaNs and slow down the decoder (vague DoS threat). + We can probably be even more aggressive and limit to 15000 or so. */ + sanitize_values32(exc32, NEG32(QCONST32(32000,SIG_SHIFT-1)), QCONST32(32000,SIG_SHIFT-1), st->subframeSize); + tmp = gain_3tap_to_1tap(pitch_gain); pitch_average += tmp; @@ -1576,16 +1571,38 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) { /*Fixed codebook contribution*/ SUBMODE(innovation_unquant)(innov, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed); + /* De-normalize innovation and update excitation */ +#ifdef FIXED_POINT + signal_mul(innov, innov, ener, st->subframeSize); +#else + signal_mul(innov, innov, ener, st->subframeSize); +#endif + /* Decode second codebook (only for some modes) */ + if (SUBMODE(double_codebook)) + { + char *tmp_stack=stack; + VARDECL(spx_sig_t *innov2); + ALLOC(innov2, st->subframeSize, spx_sig_t); + for (i=0;i<st->subframeSize;i++) + innov2[i]=0; + SUBMODE(innovation_unquant)(innov2, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed); + signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545f,15),ener), st->subframeSize); + for (i=0;i<st->subframeSize;i++) + innov[i] = ADD32(innov[i], innov2[i]); + stack = tmp_stack; + } + for (i=0;i<st->subframeSize;i++) + exc[i]=EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767)); + /*print_vec(exc, 40, "innov");*/ + if (innov_save) + { + for (i=0;i<st->subframeSize;i++) + innov_save[i] = EXTRACT16(PSHR32(innov[i], SIG_SHIFT)); + } } else { speex_error("No fixed codebook"); } - /* De-normalize innovation and update excitation */ -#ifdef FIXED_POINT - signal_mul(innov, innov, ener, st->subframeSize); -#else - signal_mul(innov, innov, ener, st->subframeSize); -#endif /*Vocoder mode*/ if (st->submodeID==1) { @@ -1617,35 +1634,8 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) st->voc_mean = .95*st->voc_mean + .05*exc[i]; exc[i]-=st->voc_mean; } - } else { - for (i=0;i<st->subframeSize;i++) - exc[i]=PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT); - /*print_vec(exc, 40, "innov");*/ - } - if (innov_save) - { - for (i=0;i<st->subframeSize;i++) - innov_save[i] = innov[i]; - } - /* Decode second codebook (only for some modes) */ - if (SUBMODE(double_codebook)) - { - char *tmp_stack=stack; - VARDECL(spx_sig_t *innov2); - ALLOC(innov2, st->subframeSize, spx_sig_t); - for (i=0;i<st->subframeSize;i++) - innov2[i]=0; - SUBMODE(innovation_unquant)(innov2, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed); - signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545,15),ener), st->subframeSize); - for (i=0;i<st->subframeSize;i++) - exc[i] = ADD16(exc[i],PSHR32(innov2[i],SIG_SHIFT)); - if (innov_save) - { - for (i=0;i<st->subframeSize;i++) - innov_save[i] = ADD32(innov_save[i],innov2[i]); - } - stack = tmp_stack; } + } } @@ -1712,7 +1702,7 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) for (i=0;i<st->lpcSize;i+=2) { /*pi_g += -st->interp_qlpc[i] + st->interp_qlpc[i+1];*/ - pi_g = ADD32(pi_g, SUB32(EXTEND32(st->interp_qlpc[i+1]),EXTEND32(st->interp_qlpc[i]))); + pi_g = ADD32(pi_g, SUB32(EXTEND32(ak[i+1]),EXTEND32(ak[i]))); } st->pi_gain[sub] = pi_g; } @@ -1759,40 +1749,40 @@ int nb_encoder_ctl(void *state, int request, void *ptr) switch(request) { case SPEEX_GET_FRAME_SIZE: - (*(int*)ptr) = st->frameSize; + (*(spx_int32_t*)ptr) = st->frameSize; break; case SPEEX_SET_LOW_MODE: case SPEEX_SET_MODE: - st->submodeSelect = st->submodeID = (*(int*)ptr); + st->submodeSelect = st->submodeID = (*(spx_int32_t*)ptr); break; case SPEEX_GET_LOW_MODE: case SPEEX_GET_MODE: - (*(int*)ptr) = st->submodeID; + (*(spx_int32_t*)ptr) = st->submodeID; break; case SPEEX_SET_VBR: - st->vbr_enabled = (*(int*)ptr); + st->vbr_enabled = (*(spx_int32_t*)ptr); break; case SPEEX_GET_VBR: - (*(int*)ptr) = st->vbr_enabled; + (*(spx_int32_t*)ptr) = st->vbr_enabled; break; case SPEEX_SET_VAD: - st->vad_enabled = (*(int*)ptr); + st->vad_enabled = (*(spx_int32_t*)ptr); break; case SPEEX_GET_VAD: - (*(int*)ptr) = st->vad_enabled; + (*(spx_int32_t*)ptr) = st->vad_enabled; break; case SPEEX_SET_DTX: - st->dtx_enabled = (*(int*)ptr); + st->dtx_enabled = (*(spx_int32_t*)ptr); break; case SPEEX_GET_DTX: - (*(int*)ptr) = st->dtx_enabled; + (*(spx_int32_t*)ptr) = st->dtx_enabled; break; case SPEEX_SET_ABR: st->abr_enabled = (*(spx_int32_t*)ptr); st->vbr_enabled = st->abr_enabled!=0; if (st->vbr_enabled) { - int i=10; + spx_int32_t i=10; spx_int32_t rate, target; float vbr_qual; target = (*(spx_int32_t*)ptr); @@ -1825,7 +1815,7 @@ int nb_encoder_ctl(void *state, int request, void *ptr) break; case SPEEX_SET_QUALITY: { - int quality = (*(int*)ptr); + int quality = (*(spx_int32_t*)ptr); if (quality < 0) quality = 0; if (quality > 10) @@ -1834,7 +1824,7 @@ int nb_encoder_ctl(void *state, int request, void *ptr) } break; case SPEEX_SET_COMPLEXITY: - st->complexity = (*(int*)ptr); + st->complexity = (*(spx_int32_t*)ptr); if (st->complexity<0) st->complexity=0; break; @@ -1843,7 +1833,7 @@ int nb_encoder_ctl(void *state, int request, void *ptr) break; case SPEEX_SET_BITRATE: { - int i=10; + spx_int32_t i=10; spx_int32_t rate, target; target = (*(spx_int32_t*)ptr); while (i>=0) @@ -1884,21 +1874,21 @@ int nb_encoder_ctl(void *state, int request, void *ptr) } break; case SPEEX_SET_SUBMODE_ENCODING: - st->encode_submode = (*(int*)ptr); + st->encode_submode = (*(spx_int32_t*)ptr); break; case SPEEX_GET_SUBMODE_ENCODING: - (*(int*)ptr) = st->encode_submode; + (*(spx_int32_t*)ptr) = st->encode_submode; break; case SPEEX_GET_LOOKAHEAD: - (*(int*)ptr)=(st->windowSize-st->frameSize); + (*(spx_int32_t*)ptr)=(st->windowSize-st->frameSize); break; case SPEEX_SET_PLC_TUNING: - st->plc_tuning = (*(int*)ptr); + st->plc_tuning = (*(spx_int32_t*)ptr); if (st->plc_tuning>100) st->plc_tuning=100; break; case SPEEX_GET_PLC_TUNING: - (*(int*)ptr)=(st->plc_tuning); + (*(spx_int32_t*)ptr)=(st->plc_tuning); break; case SPEEX_SET_VBR_MAX_BITRATE: st->vbr_max = (*(spx_int32_t*)ptr); @@ -1925,19 +1915,21 @@ int nb_encoder_ctl(void *state, int request, void *ptr) case SPEEX_GET_EXC: { int i; - spx_word16_t *e = (spx_word16_t*)ptr; - for (i=0;i<st->frameSize;i++) - e[i]=st->exc[i]; + for (i=0;i<st->nbSubframes;i++) + ((spx_word16_t*)ptr)[i] = compute_rms16(st->exc+i*st->subframeSize, st->subframeSize); } break; case SPEEX_GET_RELATIVE_QUALITY: (*(float*)ptr)=st->relative_quality; break; case SPEEX_SET_INNOVATION_SAVE: - st->innov_save = (spx_sig_t*)ptr; + st->innov_rms_save = (spx_word16_t*)ptr; break; case SPEEX_SET_WIDEBAND: - st->isWideband = *((int*)ptr); + st->isWideband = *((spx_int32_t*)ptr); + break; + case SPEEX_GET_STACK: + *((char**)ptr) = st->stack; break; default: speex_warning_int("Unknown nb_ctl request: ", request); @@ -1954,20 +1946,20 @@ int nb_decoder_ctl(void *state, int request, void *ptr) { case SPEEX_SET_LOW_MODE: case SPEEX_SET_MODE: - st->submodeID = (*(int*)ptr); + st->submodeID = (*(spx_int32_t*)ptr); break; case SPEEX_GET_LOW_MODE: case SPEEX_GET_MODE: - (*(int*)ptr) = st->submodeID; + (*(spx_int32_t*)ptr) = st->submodeID; break; case SPEEX_SET_ENH: - st->lpc_enh_enabled = *((int*)ptr); + st->lpc_enh_enabled = *((spx_int32_t*)ptr); break; case SPEEX_GET_ENH: - *((int*)ptr) = st->lpc_enh_enabled; + *((spx_int32_t*)ptr) = st->lpc_enh_enabled; break; case SPEEX_GET_FRAME_SIZE: - (*(int*)ptr) = st->frameSize; + (*(spx_int32_t*)ptr) = st->frameSize; break; case SPEEX_GET_BITRATE: if (st->submodes[st->submodeID]) @@ -2007,13 +1999,13 @@ int nb_decoder_ctl(void *state, int request, void *ptr) } break; case SPEEX_SET_SUBMODE_ENCODING: - st->encode_submode = (*(int*)ptr); + st->encode_submode = (*(spx_int32_t*)ptr); break; case SPEEX_GET_SUBMODE_ENCODING: - (*(int*)ptr) = st->encode_submode; + (*(spx_int32_t*)ptr) = st->encode_submode; break; case SPEEX_GET_LOOKAHEAD: - (*(int*)ptr)=st->subframeSize; + (*(spx_int32_t*)ptr)=st->subframeSize; break; case SPEEX_SET_HIGHPASS: st->highpass_enabled = (*(spx_int32_t*)ptr); @@ -2033,19 +2025,21 @@ int nb_decoder_ctl(void *state, int request, void *ptr) case SPEEX_GET_EXC: { int i; - spx_word16_t *e = (spx_word16_t*)ptr; - for (i=0;i<st->frameSize;i++) - e[i]=st->exc[i]; + for (i=0;i<st->nbSubframes;i++) + ((spx_word16_t*)ptr)[i] = compute_rms16(st->exc+i*st->subframeSize, st->subframeSize); } break; case SPEEX_GET_DTX_STATUS: - *((int*)ptr) = st->dtx_enabled; + *((spx_int32_t*)ptr) = st->dtx_enabled; break; case SPEEX_SET_INNOVATION_SAVE: - st->innov_save = (spx_sig_t*)ptr; + st->innov_save = (spx_word16_t*)ptr; break; case SPEEX_SET_WIDEBAND: - st->isWideband = *((int*)ptr); + st->isWideband = *((spx_int32_t*)ptr); + break; + case SPEEX_GET_STACK: + *((char**)ptr) = st->stack; break; default: speex_warning_int("Unknown nb_ctl request: ", request); diff --git a/libspeex/nb_celp.h b/libspeex/nb_celp.h index a72c2b1..1ebf717 100644 --- a/libspeex/nb_celp.h +++ b/libspeex/nb_celp.h @@ -96,12 +96,12 @@ typedef struct EncState { spx_mem_t *mem_exc2; /**< Filter memory for excitation (whole frame) */ spx_mem_t mem_hp[2]; /**< High-pass filter memory */ spx_word32_t *pi_gain; /**< Gain of LPC filter at theta=pi (fe/2) */ - spx_sig_t *innov_save; /**< If non-NULL, innovation is copied here */ + spx_word16_t *innov_rms_save; /**< If non-NULL, innovation RMS is copied here */ VBRState *vbr; /**< State of the VBR data */ float vbr_quality; /**< Quality setting for VBR encoding */ float relative_quality; /**< Relative quality that will be needed by VBR */ - int vbr_enabled; /**< 1 for enabling VBR, 0 otherwise */ + spx_int32_t vbr_enabled; /**< 1 for enabling VBR, 0 otherwise */ spx_int32_t vbr_max; /**< Max bit-rate allowed in VBR mode */ int vad_enabled; /**< 1 for enabling VAD, 0 otherwise */ int dtx_enabled; /**< 1 for enabling DTX, 0 otherwise */ @@ -148,7 +148,7 @@ typedef struct DecState { spx_mem_t *mem_sp; /**< Filter memory for synthesis signal */ spx_mem_t mem_hp[2]; /**< High-pass filter memory */ spx_word32_t *pi_gain; /**< Gain of LPC filter at theta=pi (fe/2) */ - spx_sig_t *innov_save; /** If non-NULL, innovation is copied here */ + spx_word16_t *innov_save; /** If non-NULL, innovation is copied here */ /* This is used in packet loss concealment */ int last_pitch; /**< Pitch of last correctly decoded frame */ diff --git a/libspeex/preprocess.c b/libspeex/preprocess.c index 02724f2..b5caf4b 100644 --- a/libspeex/preprocess.c +++ b/libspeex/preprocess.c @@ -1,6 +1,6 @@ -/* Copyright (C) 2003 Epic Games - Written by Jean-Marc Valin - +/* Copyright (C) 2003 Epic Games (written by Jean-Marc Valin) + Copyright (C) 2004-2006 Epic Games + File: preprocess.c Preprocessor with denoising based on the algorithm by Ephraim and Malah @@ -31,96 +31,369 @@ POSSIBILITY OF SUCH DAMAGE. */ + +/* + Recommended papers: + + Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error + short-time spectral amplitude estimator". IEEE Transactions on Acoustics, + Speech and Signal Processing, vol. ASSP-32, no. 6, pp. 1109-1121, 1984. + + Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error + log-spectral amplitude estimator". IEEE Transactions on Acoustics, Speech and + Signal Processing, vol. ASSP-33, no. 2, pp. 443-445, 1985. + + I. Cohen and B. Berdugo, "Speech enhancement for non-stationary noise environments". + Signal Processing, vol. 81, no. 2, pp. 2403-2418, 2001. + + Stefan Gustafsson, Rainer Martin, Peter Jax, and Peter Vary. "A psychoacoustic + approach to combined acoustic echo cancellation and noise reduction". IEEE + Transactions on Speech and Audio Processing, 2002. + + J.-M. Valin, J. Rouat, and F. Michaud, "Microphone array post-filter for separation + of simultaneous non-stationary sources". In Proceedings IEEE International + Conference on Acoustics, Speech, and Signal Processing, 2004. +*/ + #ifdef HAVE_CONFIG_H #include "config.h" #endif #include <math.h> #include "speex/speex_preprocess.h" +#include "speex/speex_echo.h" #include "misc.h" -#include "smallft.h" - -#define max(a,b) ((a) > (b) ? (a) : (b)) -#define min(a,b) ((a) < (b) ? (a) : (b)) +#include "fftwrap.h" +#include "filterbank.h" +#include "math_approx.h" #ifndef M_PI #define M_PI 3.14159263 #endif -#define SQRT_M_PI_2 0.88623 -#define LOUDNESS_EXP 2.5 +#define LOUDNESS_EXP 5.f +#define AMP_SCALE .001f +#define AMP_SCALE_1 1000.f + +#define NB_BANDS 24 + +#define SPEECH_PROB_START_DEFAULT QCONST16(0.35f,15) +#define SPEECH_PROB_CONTINUE_DEFAULT QCONST16(0.20f,15) +#define NOISE_SUPPRESS_DEFAULT -15 +#define ECHO_SUPPRESS_DEFAULT -40 +#define ECHO_SUPPRESS_ACTIVE_DEFAULT -15 + +#ifndef NULL +#define NULL 0 +#endif + +#define SQR(x) ((x)*(x)) +#define SQR16(x) (MULT16_16((x),(x))) +#define SQR16_Q15(x) (MULT16_16_Q15((x),(x))) + +#ifdef FIXED_POINT +static inline spx_word16_t DIV32_16_Q8(spx_word32_t a, spx_word32_t b) +{ + if (SHR32(a,7) >= b) + { + return 32767; + } else { + if (b>=QCONST32(1,23)) + { + a = SHR32(a,8); + b = SHR32(b,8); + } + if (b>=QCONST32(1,19)) + { + a = SHR32(a,4); + b = SHR32(b,4); + } + if (b>=QCONST32(1,15)) + { + a = SHR32(a,4); + b = SHR32(b,4); + } + a = SHL32(a,8); + return PDIV32_16(a,b); + } + +} +static inline spx_word16_t DIV32_16_Q15(spx_word32_t a, spx_word32_t b) +{ + if (SHR32(a,15) >= b) + { + return 32767; + } else { + if (b>=QCONST32(1,23)) + { + a = SHR32(a,8); + b = SHR32(b,8); + } + if (b>=QCONST32(1,19)) + { + a = SHR32(a,4); + b = SHR32(b,4); + } + if (b>=QCONST32(1,15)) + { + a = SHR32(a,4); + b = SHR32(b,4); + } + a = SHL32(a,15)-a; + return DIV32_16(a,b); + } +} +#define SNR_SCALING 256.f +#define SNR_SCALING_1 0.0039062f +#define SNR_SHIFT 8 + +#define FRAC_SCALING 32767.f +#define FRAC_SCALING_1 3.0518e-05 +#define FRAC_SHIFT 1 + +#define EXPIN_SCALING 2048.f +#define EXPIN_SCALING_1 0.00048828f +#define EXPIN_SHIFT 11 +#define EXPOUT_SCALING_1 1.5259e-05 + +#define NOISE_SHIFT 7 -#define NB_BANDS 8 +#else + +#define DIV32_16_Q8(a,b) ((a)/(b)) +#define DIV32_16_Q15(a,b) ((a)/(b)) +#define SNR_SCALING 1.f +#define SNR_SCALING_1 1.f +#define SNR_SHIFT 0 +#define FRAC_SCALING 1.f +#define FRAC_SCALING_1 1.f +#define FRAC_SHIFT 0 +#define NOISE_SHIFT 0 -#define SPEEX_PROB_START_DEFAULT 0.35f -#define SPEEX_PROB_CONTINUE_DEFAULT 0.20f +#define EXPIN_SCALING 1.f +#define EXPIN_SCALING_1 1.f +#define EXPOUT_SCALING_1 1.f -#define ZMIN .1 -#define ZMAX .316 -#define ZMIN_1 10 -#define LOG_MIN_MAX_1 0.86859 +#endif -static void conj_window(float *w, int len) +/** Speex pre-processor state. */ +struct SpeexPreprocessState_ { + /* Basic info */ + int frame_size; /**< Number of samples processed each time */ + int ps_size; /**< Number of points in the power spectrum */ + int sampling_rate; /**< Sampling rate of the input/output */ + int nbands; + FilterBank *bank; + + /* Parameters */ + int denoise_enabled; + int vad_enabled; + int dereverb_enabled; + spx_word16_t reverb_decay; + spx_word16_t reverb_level; + spx_word16_t speech_prob_start; + spx_word16_t speech_prob_continue; + int noise_suppress; + int echo_suppress; + int echo_suppress_active; + SpeexEchoState *echo_state; + + /* DSP-related arrays */ + spx_word16_t *frame; /**< Processing frame (2*ps_size) */ + spx_word16_t *ft; /**< Processing frame in freq domain (2*ps_size) */ + spx_word32_t *ps; /**< Current power spectrum */ + spx_word16_t *gain2; /**< Adjusted gains */ + spx_word16_t *gain_floor; /**< Minimum gain allowed */ + spx_word16_t *window; /**< Analysis/Synthesis window */ + spx_word32_t *noise; /**< Noise estimate */ + spx_word32_t *reverb_estimate; /**< Estimate of reverb energy */ + spx_word32_t *old_ps; /**< Power spectrum for last frame */ + spx_word16_t *gain; /**< Ephraim Malah gain */ + spx_word16_t *prior; /**< A-priori SNR */ + spx_word16_t *post; /**< A-posteriori SNR */ + + spx_word32_t *S; /**< Smoothed power spectrum */ + spx_word32_t *Smin; /**< See Cohen paper */ + spx_word32_t *Stmp; /**< See Cohen paper */ + int *update_prob; /**< Propability of speech presence for noise update */ + + spx_word16_t *zeta; /**< Smoothed a priori SNR */ + spx_word32_t *echo_noise; + spx_word32_t *residual_echo; + + /* Misc */ + spx_word16_t *inbuf; /**< Input buffer (overlapped analysis) */ + spx_word16_t *outbuf; /**< Output buffer (for overlap and add) */ + + /* AGC stuff, only for floating point for now */ +#ifndef FIXED_POINT + int agc_enabled; + float agc_level; + float loudness_accum; + float *loudness_weight; /**< Perceptual loudness curve */ + float loudness; /**< Loudness estimate */ + float agc_gain; /**< Current AGC gain */ + int nb_loudness_adapt; /**< Number of frames used for loudness adaptation so far */ + float max_gain; /**< Maximum gain allowed */ + float max_increase_step; /**< Maximum increase in gain from one frame to another */ + float max_decrease_step; /**< Maximum decrease in gain from one frame to another */ + float prev_loudness; /**< Loudness of previous frame */ + float init_max; /**< Current gain limit during initialisation */ +#endif + int nb_adapt; /**< Number of frames used for adaptation so far */ + int was_speech; + int min_count; /**< Number of frames processed so far */ + void *fft_lookup; /**< Lookup table for the FFT */ +#ifdef FIXED_POINT + int frame_shift; +#endif +}; + + +static void conj_window(spx_word16_t *w, int len) { int i; for (i=0;i<len;i++) { - float x=4*((float)i)/len; + spx_word16_t tmp; +#ifdef FIXED_POINT + spx_word16_t x = DIV32_16(MULT16_16(32767,i),len); +#else + spx_word16_t x = DIV32_16(MULT16_16(QCONST16(4.f,13),i),len); +#endif int inv=0; - if (x<1) + if (x<QCONST16(1.f,13)) { - } else if (x<2) + } else if (x<QCONST16(2.f,13)) { - x=2-x; + x=QCONST16(2.f,13)-x; inv=1; - } else if (x<3) + } else if (x<QCONST16(3.f,13)) { - x=x-2; + x=x-QCONST16(2.f,13); inv=1; } else { - x=4-x; + x=QCONST16(2.f,13)-x+QCONST16(2.f,13); /* 4 - x */ } - x*=1.9979; - w[i]=(.5-.5*cos(x))*(.5-.5*cos(x)); + x = MULT16_16_Q14(QCONST16(1.271903f,14), x); + tmp = SQR16_Q15(QCONST16(.5f,15)-MULT16_16_P15(QCONST16(.5f,15),spx_cos_norm(QCONST32(x,2)))); if (inv) - w[i]=1-w[i]; - w[i]=sqrt(w[i]); + tmp=SUB16(Q15_ONE,tmp); + w[i]=spx_sqrt(SHL32(EXTEND32(tmp),15)); } } + +#ifdef FIXED_POINT /* This function approximates the gain function y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x) which multiplied by xi/(1+xi) is the optimal gain in the loudness domain ( sqrt[amplitude] ) + Input in Q11 format, output in Q15 */ -static inline float hypergeom_gain(float x) +static inline spx_word32_t hypergeom_gain(spx_word32_t xx) +{ + int ind; + spx_word16_t frac; + /* Q13 table */ + static const spx_word16_t table[21] = { + 6730, 8357, 9868, 11267, 12563, 13770, 14898, + 15959, 16961, 17911, 18816, 19682, 20512, 21311, + 22082, 22827, 23549, 24250, 24931, 25594, 26241}; + ind = SHR32(xx,10); + if (ind<0) + return Q15_ONE; + if (ind>19) + return ADD32(EXTEND32(Q15_ONE),EXTEND32(DIV32_16(QCONST32(.1296,23), SHR32(xx,EXPIN_SHIFT-SNR_SHIFT)))); + frac = SHL32(xx-SHL32(ind,10),5); + return SHL32(DIV32_16(PSHR32(MULT16_16(Q15_ONE-frac,table[ind]) + MULT16_16(frac,table[ind+1]),7),(spx_sqrt(SHL32(xx,15)+6711))),7); +} + +static inline spx_word16_t qcurve(spx_word16_t x) +{ + x = MAX16(x, 1); + return DIV32_16(SHL32(EXTEND32(32767),9),ADD16(512,MULT16_16_Q15(QCONST16(.60f,15),DIV32_16(32767,x)))); +} + +/* Compute the gain floor based on different floors for the background noise and residual echo */ +static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len) +{ + int i; + + if (noise_suppress > effective_echo_suppress) + { + spx_word16_t noise_gain, gain_ratio; + noise_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),noise_suppress)),1))); + gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),effective_echo_suppress-noise_suppress)),1))); + + /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */ + for (i=0;i<len;i++) + gain_floor[i] = MULT16_16_Q15(noise_gain, + spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(PSHR32(noise[i],NOISE_SHIFT) + MULT16_32_Q15(gain_ratio,echo[i]), + (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15))); + } else { + spx_word16_t echo_gain, gain_ratio; + echo_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),effective_echo_suppress)),1))); + gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),noise_suppress-effective_echo_suppress)),1))); + + /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */ + for (i=0;i<len;i++) + gain_floor[i] = MULT16_16_Q15(echo_gain, + spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(MULT16_32_Q15(gain_ratio,PSHR32(noise[i],NOISE_SHIFT)) + echo[i], + (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15))); + } +} + +#else +/* This function approximates the gain function + y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x) + which multiplied by xi/(1+xi) is the optimal gain + in the loudness domain ( sqrt[amplitude] ) +*/ +static inline spx_word32_t hypergeom_gain(spx_word32_t xx) { int ind; float integer, frac; + float x; static const float table[21] = { 0.82157f, 1.02017f, 1.20461f, 1.37534f, 1.53363f, 1.68092f, 1.81865f, 1.94811f, 2.07038f, 2.18638f, 2.29688f, 2.40255f, 2.50391f, 2.60144f, 2.69551f, 2.78647f, 2.87458f, 2.96015f, 3.04333f, 3.12431f, 3.20326f}; - - integer = floor(2*x); - ind = (int)integer; - if (ind<0) - return 1; - if (ind>19) - return 1+.1296/x; - frac = 2*x-integer; - return ((1-frac)*table[ind] + frac*table[ind+1])/sqrt(x+.0001f); + x = EXPIN_SCALING_1*xx; + integer = floor(2*x); + ind = (int)integer; + if (ind<0) + return FRAC_SCALING; + if (ind>19) + return FRAC_SCALING*(1+.1296/x); + frac = 2*x-integer; + return FRAC_SCALING*((1-frac)*table[ind] + frac*table[ind+1])/sqrt(x+.0001f); } -static inline float qcurve(float x) +static inline spx_word16_t qcurve(spx_word16_t x) { - return 1.f/(1.f+.1f/(x*x)); + return 1.f/(1.f+.15f/(SNR_SCALING_1*x)); +} + +static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len) +{ + int i; + float echo_floor; + float noise_floor; + + noise_floor = exp(.2302585f*noise_suppress); + echo_floor = exp(.2302585f*effective_echo_suppress); + + /* Compute the gain floor based on different floors for the background noise and residual echo */ + for (i=0;i<len;i++) + gain_floor[i] = FRAC_SCALING*sqrt(noise_floor*PSHR32(noise[i],NOISE_SHIFT) + echo_floor*echo[i])/sqrt(1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]); } +#endif SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_rate) { int i; - int N, N3, N4; + int N, N3, N4, M; SpeexPreprocessState *st = (SpeexPreprocessState *)speex_alloc(sizeof(SpeexPreprocessState)); st->frame_size = frame_size; @@ -153,49 +426,51 @@ SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_r st->sampling_rate = sampling_rate; st->denoise_enabled = 1; - st->agc_enabled = 0; - st->agc_level = 8000; st->vad_enabled = 0; st->dereverb_enabled = 0; - st->reverb_decay = .5; - st->reverb_level = .2; - - st->speech_prob_start = SPEEX_PROB_START_DEFAULT; - st->speech_prob_continue = SPEEX_PROB_CONTINUE_DEFAULT; - - st->frame = (float*)speex_alloc(2*N*sizeof(float)); - st->ps = (float*)speex_alloc(N*sizeof(float)); - st->gain2 = (float*)speex_alloc(N*sizeof(float)); - st->window = (float*)speex_alloc(2*N*sizeof(float)); - st->noise = (float*)speex_alloc(N*sizeof(float)); - st->reverb_estimate = (float*)speex_alloc(N*sizeof(float)); - st->old_ps = (float*)speex_alloc(N*sizeof(float)); - st->gain = (float*)speex_alloc(N*sizeof(float)); - st->prior = (float*)speex_alloc(N*sizeof(float)); - st->post = (float*)speex_alloc(N*sizeof(float)); - st->loudness_weight = (float*)speex_alloc(N*sizeof(float)); - st->inbuf = (float*)speex_alloc(N3*sizeof(float)); - st->outbuf = (float*)speex_alloc(N3*sizeof(float)); - st->echo_noise = (float*)speex_alloc(N*sizeof(float)); - - st->S = (float*)speex_alloc(N*sizeof(float)); - st->Smin = (float*)speex_alloc(N*sizeof(float)); - st->Stmp = (float*)speex_alloc(N*sizeof(float)); - st->update_prob = (float*)speex_alloc(N*sizeof(float)); + st->reverb_decay = 0; + st->reverb_level = 0; + st->noise_suppress = NOISE_SUPPRESS_DEFAULT; + st->echo_suppress = ECHO_SUPPRESS_DEFAULT; + st->echo_suppress_active = ECHO_SUPPRESS_ACTIVE_DEFAULT; - st->zeta = (float*)speex_alloc(N*sizeof(float)); - st->Zpeak = 0; - st->Zlast = 0; + st->speech_prob_start = SPEECH_PROB_START_DEFAULT; + st->speech_prob_continue = SPEECH_PROB_CONTINUE_DEFAULT; - st->noise_bands = (float*)speex_alloc(NB_BANDS*sizeof(float)); - st->noise_bands2 = (float*)speex_alloc(NB_BANDS*sizeof(float)); - st->speech_bands = (float*)speex_alloc(NB_BANDS*sizeof(float)); - st->speech_bands2 = (float*)speex_alloc(NB_BANDS*sizeof(float)); - st->noise_bandsN = st->speech_bandsN = 1; + st->echo_state = NULL; + + st->nbands = NB_BANDS; + M = st->nbands; + st->bank = filterbank_new(M, sampling_rate, N, 1); + + st->frame = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); + st->window = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); + st->ft = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); + + st->ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); + st->noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); + st->echo_noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); + st->residual_echo = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); + st->reverb_estimate = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); + st->old_ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); + st->prior = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); + st->post = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); + st->gain = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); + st->gain2 = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); + st->gain_floor = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); + st->zeta = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); + + st->S = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); + st->Smin = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); + st->Stmp = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); + st->update_prob = (int*)speex_alloc(N*sizeof(int)); + + st->inbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t)); + st->outbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t)); conj_window(st->window, 2*N3); for (i=2*N3;i<2*st->ps_size;i++) - st->window[i]=1; + st->window[i]=Q15_ONE; if (N4>0) { @@ -205,51 +480,62 @@ SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_r st->window[i+N3]=1; } } - for (i=0;i<N;i++) + for (i=0;i<N+M;i++) { - st->noise[i]=1e4; - st->reverb_estimate[i]=0.; - st->old_ps[i]=1e4; - st->gain[i]=1; - st->post[i]=1; - st->prior[i]=1; + st->noise[i]=QCONST32(1.f,NOISE_SHIFT); + st->reverb_estimate[i]=0; + st->old_ps[i]=1; + st->gain[i]=Q15_ONE; + st->post[i]=SHL16(1, SNR_SHIFT); + st->prior[i]=SHL16(1, SNR_SHIFT); } + for (i=0;i<N;i++) + st->update_prob[i] = 1; for (i=0;i<N3;i++) { st->inbuf[i]=0; st->outbuf[i]=0; } - +#ifndef FIXED_POINT + st->agc_enabled = 0; + st->agc_level = 8000; + st->loudness_weight = (float*)speex_alloc(N*sizeof(float)); for (i=0;i<N;i++) { float ff=((float)i)*.5*sampling_rate/((float)N); + /*st->loudness_weight[i] = .5f*(1.f/(1.f+ff/8000.f))+1.f*exp(-.5f*(ff-3800.f)*(ff-3800.f)/9e5f);*/ st->loudness_weight[i] = .35f-.35f*ff/16000.f+.73f*exp(-.5f*(ff-3800)*(ff-3800)/9e5f); if (st->loudness_weight[i]<.01f) st->loudness_weight[i]=.01f; st->loudness_weight[i] *= st->loudness_weight[i]; } - - st->speech_prob = 0; - st->last_speech = 1000; - st->loudness = pow(6000,LOUDNESS_EXP); - st->loudness2 = 6000; + /*st->loudness = pow(AMP_SCALE*st->agc_level,LOUDNESS_EXP);*/ + st->loudness = 1e-15; + st->agc_gain = 1; st->nb_loudness_adapt = 0; + st->max_gain = 30; + st->max_increase_step = exp(0.11513f * 12.*st->frame_size / st->sampling_rate); + st->max_decrease_step = exp(-0.11513f * 40.*st->frame_size / st->sampling_rate); + st->prev_loudness = 1; + st->init_max = 1; +#endif + st->was_speech = 0; - st->fft_lookup = (struct drft_lookup*)speex_alloc(sizeof(struct drft_lookup)); - spx_drft_init(st->fft_lookup,2*N); + st->fft_lookup = spx_fft_init(2*N); st->nb_adapt=0; - st->consec_noise=0; - st->nb_preprocess=0; + st->min_count=0; return st; } void speex_preprocess_state_destroy(SpeexPreprocessState *st) { speex_free(st->frame); + speex_free(st->ft); speex_free(st->ps); speex_free(st->gain2); + speex_free(st->gain_floor); speex_free(st->window); speex_free(st->noise); speex_free(st->reverb_estimate); @@ -257,8 +543,11 @@ void speex_preprocess_state_destroy(SpeexPreprocessState *st) speex_free(st->gain); speex_free(st->prior); speex_free(st->post); +#ifndef FIXED_POINT speex_free(st->loudness_weight); +#endif speex_free(st->echo_noise); + speex_free(st->residual_echo); speex_free(st->S); speex_free(st->Smin); @@ -266,298 +555,65 @@ void speex_preprocess_state_destroy(SpeexPreprocessState *st) speex_free(st->update_prob); speex_free(st->zeta); - speex_free(st->noise_bands); - speex_free(st->noise_bands2); - speex_free(st->speech_bands); - speex_free(st->speech_bands2); - speex_free(st->inbuf); speex_free(st->outbuf); - spx_drft_clear(st->fft_lookup); - speex_free(st->fft_lookup); - + spx_fft_destroy(st->fft_lookup); + filterbank_destroy(st->bank); speex_free(st); } -static void update_noise(SpeexPreprocessState *st, float *ps, spx_int32_t *echo) +/* FIXME: The AGC doesn't work yet with fixed-point*/ +#ifndef FIXED_POINT +static void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx_word16_t *ft) { int i; - float beta; - st->nb_adapt++; - beta=1.0f/st->nb_adapt; - if (beta < .05f) - beta=.05f; - - if (!echo) - { - for (i=0;i<st->ps_size;i++) - st->noise[i] = (1.f-beta)*st->noise[i] + beta*ps[i]; - } else { - for (i=0;i<st->ps_size;i++) - st->noise[i] = (1.f-beta)*st->noise[i] + beta*max(1.f,ps[i]-st->frame_size*st->frame_size*1.0*echo[i]); -#if 0 - for (i=0;i<st->ps_size;i++) - st->noise[i] = 0; -#endif - } -} - -static int speex_compute_vad(SpeexPreprocessState *st, float *ps, float mean_prior, float mean_post) -{ - int i, is_speech=0; int N = st->ps_size; - float scale=.5f/N; - - /* FIXME: Clean this up a bit */ - { - float bands[NB_BANDS]; - int j; - float p0, p1; - float tot_loudness=0; - float x = sqrt(mean_post); - - for (i=5;i<N-10;i++) - { - tot_loudness += scale*st->ps[i] * st->loudness_weight[i]; - } - - for (i=0;i<NB_BANDS;i++) - { - bands[i]=1e4f; - for (j=i*N/NB_BANDS;j<(i+1)*N/NB_BANDS;j++) - { - bands[i] += ps[j]; - } - bands[i]=log(bands[i]); - } - - /*p1 = .0005+.6*exp(-.5*(x-.4)*(x-.4)*11)+.1*exp(-1.2*x); - if (x<1.5) - p0=.1*exp(2*(x-1.5)); - else - p0=.02+.1*exp(-.2*(x-1.5)); - */ - - p0=1.f/(1.f+exp(3.f*(1.5f-x))); - p1=1.f-p0; - - /*fprintf (stderr, "%f %f ", p0, p1);*/ - /*p0 *= .99*st->speech_prob + .01*(1-st->speech_prob); - p1 *= .01*st->speech_prob + .99*(1-st->speech_prob); - - st->speech_prob = p0/(p1+p0); - */ - - if (st->noise_bandsN < 50 || st->speech_bandsN < 50) - { - if (mean_post > 5.f) - { - float adapt = 1./st->speech_bandsN++; - if (adapt<.005f) - adapt = .005f; - for (i=0;i<NB_BANDS;i++) - { - st->speech_bands[i] = (1.f-adapt)*st->speech_bands[i] + adapt*bands[i]; - /*st->speech_bands2[i] = (1-adapt)*st->speech_bands2[i] + adapt*bands[i]*bands[i];*/ - st->speech_bands2[i] = (1.f-adapt)*st->speech_bands2[i] + adapt*(bands[i]-st->speech_bands[i])*(bands[i]-st->speech_bands[i]); - } - } else { - float adapt = 1./st->noise_bandsN++; - if (adapt<.005f) - adapt = .005f; - for (i=0;i<NB_BANDS;i++) - { - st->noise_bands[i] = (1.f-adapt)*st->noise_bands[i] + adapt*bands[i]; - /*st->noise_bands2[i] = (1-adapt)*st->noise_bands2[i] + adapt*bands[i]*bands[i];*/ - st->noise_bands2[i] = (1.f-adapt)*st->noise_bands2[i] + adapt*(bands[i]-st->noise_bands[i])*(bands[i]-st->noise_bands[i]); - } - } - } - p0=p1=1; - for (i=0;i<NB_BANDS;i++) - { - float noise_var, speech_var; - float noise_mean, speech_mean; - float tmp1, tmp2, pr; - - /*noise_var = 1.01*st->noise_bands2[i] - st->noise_bands[i]*st->noise_bands[i]; - speech_var = 1.01*st->speech_bands2[i] - st->speech_bands[i]*st->speech_bands[i];*/ - noise_var = st->noise_bands2[i]; - speech_var = st->speech_bands2[i]; - if (noise_var < .1f) - noise_var = .1f; - if (speech_var < .1f) - speech_var = .1f; - - /*speech_var = sqrt(speech_var*noise_var); - noise_var = speech_var;*/ - if (noise_var < .05f*speech_var) - noise_var = .05f*speech_var; - if (speech_var < .05f*noise_var) - speech_var = .05f*noise_var; - - if (bands[i] < st->noise_bands[i]) - speech_var = noise_var; - if (bands[i] > st->speech_bands[i]) - noise_var = speech_var; - - speech_mean = st->speech_bands[i]; - noise_mean = st->noise_bands[i]; - if (noise_mean < speech_mean - 5.f) - noise_mean = speech_mean - 5.f; - - tmp1 = exp(-.5f*(bands[i]-speech_mean)*(bands[i]-speech_mean)/speech_var)/sqrt(2.f*M_PI*speech_var); - tmp2 = exp(-.5f*(bands[i]-noise_mean)*(bands[i]-noise_mean)/noise_var)/sqrt(2.f*M_PI*noise_var); - /*fprintf (stderr, "%f ", (float)(p0/(.01+p0+p1)));*/ - /*fprintf (stderr, "%f ", (float)(bands[i]));*/ - pr = tmp1/(1e-25+tmp1+tmp2); - /*if (bands[i] < st->noise_bands[i]) - pr=.01; - if (bands[i] > st->speech_bands[i] && pr < .995) - pr=.995;*/ - if (pr>.999f) - pr=.999f; - if (pr<.001f) - pr=.001f; - /*fprintf (stderr, "%f ", pr);*/ - p0 *= pr; - p1 *= (1-pr); - } - - p0 = pow(p0,.2); - p1 = pow(p1,.2); - -#if 1 - p0 *= 2.f; - p0=p0/(p1+p0); - if (st->last_speech>20) - { - float tmp = sqrt(tot_loudness)/st->loudness2; - tmp = 1.f-exp(-10.f*tmp); - if (p0>tmp) - p0=tmp; - } - p1=1-p0; -#else - if (sqrt(tot_loudness) < .6f*st->loudness2 && p0>15.f*p1) - p0=15.f*p1; - if (sqrt(tot_loudness) < .45f*st->loudness2 && p0>7.f*p1) - p0=7.f*p1; - if (sqrt(tot_loudness) < .3f*st->loudness2 && p0>3.f*p1) - p0=3.f*p1; - if (sqrt(tot_loudness) < .15f*st->loudness2 && p0>p1) - p0=p1; - /*fprintf (stderr, "%f %f ", (float)(sqrt(tot_loudness) /( .25*st->loudness2)), p0/(p1+p0));*/ -#endif - - p0 *= .99f*st->speech_prob + .01f*(1-st->speech_prob); - p1 *= .01f*st->speech_prob + .99f*(1-st->speech_prob); - - st->speech_prob = p0/(1e-25f+p1+p0); - /*fprintf (stderr, "%f %f %f ", tot_loudness, st->loudness2, st->speech_prob);*/ - - if (st->speech_prob > st->speech_prob_start - || (st->last_speech < 20 && st->speech_prob > st->speech_prob_continue)) - { - is_speech = 1; - st->last_speech = 0; - } else { - st->last_speech++; - if (st->last_speech<20) - is_speech = 1; - } - - if (st->noise_bandsN > 50 && st->speech_bandsN > 50) - { - if (mean_post > 5) - { - float adapt = 1./st->speech_bandsN++; - if (adapt<.005f) - adapt = .005f; - for (i=0;i<NB_BANDS;i++) - { - st->speech_bands[i] = (1-adapt)*st->speech_bands[i] + adapt*bands[i]; - /*st->speech_bands2[i] = (1-adapt)*st->speech_bands2[i] + adapt*bands[i]*bands[i];*/ - st->speech_bands2[i] = (1-adapt)*st->speech_bands2[i] + adapt*(bands[i]-st->speech_bands[i])*(bands[i]-st->speech_bands[i]); - } - } else { - float adapt = 1./st->noise_bandsN++; - if (adapt<.005f) - adapt = .005f; - for (i=0;i<NB_BANDS;i++) - { - st->noise_bands[i] = (1-adapt)*st->noise_bands[i] + adapt*bands[i]; - /*st->noise_bands2[i] = (1-adapt)*st->noise_bands2[i] + adapt*bands[i]*bands[i];*/ - st->noise_bands2[i] = (1-adapt)*st->noise_bands2[i] + adapt*(bands[i]-st->noise_bands[i])*(bands[i]-st->noise_bands[i]); - } - } - } - - - } - - return is_speech; -} - -static void speex_compute_agc(SpeexPreprocessState *st, float mean_prior) -{ - int i; - int N = st->ps_size; - float scale=.5f/N; - float agc_gain; - int freq_start, freq_end; - float active_bands = 0; - - freq_start = (int)(300.0f*2*N/st->sampling_rate); - freq_end = (int)(2000.0f*2*N/st->sampling_rate); - for (i=freq_start;i<freq_end;i++) + float target_gain; + float loudness=1.f; + float rate; + + for (i=2;i<N;i++) { - if (st->S[i] > 20.f*st->Smin[i]+1000.f) - active_bands+=1; + loudness += 2.f*N*st->ps[i]* st->loudness_weight[i]; } - active_bands /= (freq_end-freq_start+1); - - if (active_bands > .2f) + loudness=sqrt(loudness); + /*if (loudness < 2*pow(st->loudness, 1.0/LOUDNESS_EXP) && + loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/ + if (Pframe>.3f) { - float loudness=0.f; - float rate, rate2=.2f; st->nb_loudness_adapt++; - rate=2.0f/(1+st->nb_loudness_adapt); - if (rate < .05f) - rate = .05f; - if (rate < .1f && pow(loudness, LOUDNESS_EXP) > st->loudness) - rate = .1f; - if (rate < .2f && pow(loudness, LOUDNESS_EXP) > 3.f*st->loudness) - rate = .2f; - if (rate < .4f && pow(loudness, LOUDNESS_EXP) > 10.f*st->loudness) - rate = .4f; - - for (i=2;i<N;i++) - { - loudness += scale*st->ps[i] * st->gain2[i] * st->gain2[i] * st->loudness_weight[i]; - } - loudness=sqrt(loudness); - /*if (loudness < 2*pow(st->loudness, 1.0/LOUDNESS_EXP) && - loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/ - st->loudness = (1-rate)*st->loudness + (rate)*pow(loudness, LOUDNESS_EXP); - - st->loudness2 = (1-rate2)*st->loudness2 + rate2*pow(st->loudness, 1.0f/LOUDNESS_EXP); - - loudness = pow(st->loudness, 1.0f/LOUDNESS_EXP); - - /*fprintf (stderr, "%f %f %f\n", loudness, st->loudness2, rate);*/ + /*rate=2.0f*Pframe*Pframe/(1+st->nb_loudness_adapt);*/ + rate = .03*Pframe*Pframe; + st->loudness = (1-rate)*st->loudness + (rate)*pow(AMP_SCALE*loudness, LOUDNESS_EXP); + st->loudness_accum = (1-rate)*st->loudness_accum + rate; + if (st->init_max < st->max_gain && st->nb_adapt > 20) + st->init_max *= 1.f + .1f*Pframe*Pframe; } + /*printf ("%f %f %f %f\n", Pframe, loudness, pow(st->loudness, 1.0f/LOUDNESS_EXP), st->loudness2);*/ - agc_gain = st->agc_level/st->loudness2; - /*fprintf (stderr, "%f %f %f %f\n", active_bands, st->loudness, st->loudness2, agc_gain);*/ - if (agc_gain>200) - agc_gain = 200; + target_gain = AMP_SCALE*st->agc_level*pow(st->loudness/(1e-4+st->loudness_accum), -1.0f/LOUDNESS_EXP); - for (i=0;i<N;i++) - st->gain2[i] *= agc_gain; + if ((Pframe>.5 && st->nb_adapt > 20) || target_gain < st->agc_gain) + { + if (target_gain > st->max_increase_step*st->agc_gain) + target_gain = st->max_increase_step*st->agc_gain; + if (target_gain < st->max_decrease_step*st->agc_gain && loudness < 10*st->prev_loudness) + target_gain = st->max_decrease_step*st->agc_gain; + if (target_gain > st->max_gain) + target_gain = st->max_gain; + if (target_gain > st->init_max) + target_gain = st->init_max; + st->agc_gain = target_gain; + } + /*fprintf (stderr, "%f %f %f\n", loudness, (float)AMP_SCALE_1*pow(st->loudness, 1.0f/LOUDNESS_EXP), st->agc_gain);*/ + + for (i=0;i<2*N;i++) + ft[i] *= st->agc_gain; + st->prev_loudness = loudness; } +#endif static void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x) { @@ -565,7 +621,7 @@ static void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x) int N = st->ps_size; int N3 = 2*N - st->frame_size; int N4 = st->frame_size - N3; - float *ps=st->ps; + spx_word32_t *ps=st->ps; /* 'Build' input frame */ for (i=0;i<N3;i++) @@ -579,295 +635,333 @@ static void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x) /* Windowing */ for (i=0;i<2*N;i++) - st->frame[i] *= st->window[i]; + st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]); +#ifdef FIXED_POINT + { + spx_word16_t max_val=0; + for (i=0;i<2*N;i++) + max_val = MAX16(max_val, ABS16(st->frame[i])); + st->frame_shift = 14-spx_ilog2(EXTEND32(max_val)); + for (i=0;i<2*N;i++) + st->frame[i] = SHL16(st->frame[i], st->frame_shift); + } +#endif + /* Perform FFT */ - spx_drft_forward(st->fft_lookup, st->frame); - + spx_fft(st->fft_lookup, st->frame, st->ft); + /* Power spectrum */ - ps[0]=1; + ps[0]=MULT16_16(st->ft[0],st->ft[0]); for (i=1;i<N;i++) - ps[i]=1+st->frame[2*i-1]*st->frame[2*i-1] + st->frame[2*i]*st->frame[2*i]; + ps[i]=MULT16_16(st->ft[2*i-1],st->ft[2*i-1]) + MULT16_16(st->ft[2*i],st->ft[2*i]); + for (i=0;i<N;i++) + st->ps[i] = PSHR32(st->ps[i], 2*st->frame_shift); + filterbank_compute_bank32(st->bank, ps, ps+N); } static void update_noise_prob(SpeexPreprocessState *st) { int i; + int min_range; int N = st->ps_size; for (i=1;i<N-1;i++) - st->S[i] = 100.f+ .8f*st->S[i] + .05f*st->ps[i-1]+.1f*st->ps[i]+.05f*st->ps[i+1]; + st->S[i] = MULT16_32_Q15(QCONST16(.8f,15),st->S[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i-1]) + + MULT16_32_Q15(QCONST16(.1f,15),st->ps[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i+1]); + st->S[0] = MULT16_32_Q15(QCONST16(.8f,15),st->S[0]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[0]); + st->S[N-1] = MULT16_32_Q15(QCONST16(.8f,15),st->S[N-1]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[N-1]); - if (st->nb_preprocess<1) + if (st->nb_adapt==1) { - for (i=1;i<N-1;i++) - st->Smin[i] = st->Stmp[i] = st->S[i]+100.f; + for (i=0;i<N;i++) + st->Smin[i] = st->Stmp[i] = 0; } - if (st->nb_preprocess%200==0) + if (st->nb_adapt < 100) + min_range = 15; + else if (st->nb_adapt < 1000) + min_range = 50; + else if (st->nb_adapt < 10000) + min_range = 150; + else + min_range = 300; + if (st->min_count > min_range) { - for (i=1;i<N-1;i++) + st->min_count = 0; + for (i=0;i<N;i++) { - st->Smin[i] = min(st->Stmp[i], st->S[i]); + st->Smin[i] = MIN32(st->Stmp[i], st->S[i]); st->Stmp[i] = st->S[i]; } } else { - for (i=1;i<N-1;i++) + for (i=0;i<N;i++) { - st->Smin[i] = min(st->Smin[i], st->S[i]); - st->Stmp[i] = min(st->Stmp[i], st->S[i]); + st->Smin[i] = MIN32(st->Smin[i], st->S[i]); + st->Stmp[i] = MIN32(st->Stmp[i], st->S[i]); } } - for (i=1;i<N-1;i++) + for (i=0;i<N;i++) { - st->update_prob[i] *= .2f; - if (st->S[i] > 2.5*st->Smin[i]) - st->update_prob[i] += .8f; + if (MULT16_32_Q15(QCONST16(.4f,15),st->S[i]) > ADD32(st->Smin[i],EXTEND32(20))) + st->update_prob[i] = 1; + else + st->update_prob[i] = 0; /*fprintf (stderr, "%f ", st->S[i]/st->Smin[i]);*/ /*fprintf (stderr, "%f ", st->update_prob[i]);*/ } } -#define NOISE_OVERCOMPENS 1.4 +#define NOISE_OVERCOMPENS 1. + +void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len); int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo) { + return speex_preprocess_run(st, x); +} + +int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x) +{ int i; - int is_speech=1; - float mean_post=0; - float mean_prior=0; + int M; int N = st->ps_size; int N3 = 2*N - st->frame_size; int N4 = st->frame_size - N3; - float scale=.5f/N; - float *ps=st->ps; - float Zframe=0, Pframe; - + spx_word32_t *ps=st->ps; + spx_word32_t Zframe; + spx_word16_t Pframe; + spx_word16_t beta, beta_1; + spx_word16_t effective_echo_suppress; + + st->nb_adapt++; + st->min_count++; + + beta = MAX16(QCONST16(.03,15),DIV32_16(Q15_ONE,st->nb_adapt)); + beta_1 = Q15_ONE-beta; + M = st->nbands; + /* Deal with residual echo if provided */ + if (st->echo_state) + { + speex_echo_get_residual(st->echo_state, st->residual_echo, N); +#ifndef FIXED_POINT + /* If there are NaNs or ridiculous values, it'll show up in the DC and we just reset everything to zero */ + if (!(st->residual_echo[0] >=0 && st->residual_echo[0]<N*1e9f)) + { + for (i=0;i<N;i++) + st->residual_echo[i] = 0; + } +#endif + for (i=0;i<N;i++) + st->echo_noise[i] = MAX32(MULT16_32_Q15(QCONST16(.6f,15),st->echo_noise[i]), st->residual_echo[i]); + filterbank_compute_bank32(st->bank, st->echo_noise, st->echo_noise+N); + } else { + for (i=0;i<N+M;i++) + st->echo_noise[i] = 0; + } preprocess_analysis(st, x); update_noise_prob(st); - st->nb_preprocess++; - - /* Noise estimation always updated for the 20 first times */ - if (st->nb_adapt<10) + /* Noise estimation always updated for the 10 first frames */ + /*if (st->nb_adapt<10) { - update_noise(st, ps, echo); + for (i=1;i<N-1;i++) + st->update_prob[i] = 0; } - - /* Deal with residual echo if provided */ - if (echo) - for (i=1;i<N;i++) - st->echo_noise[i] = (.3f*st->echo_noise[i] + st->frame_size*st->frame_size*1.0*echo[i]); - - /* Compute a posteriori SNR */ - for (i=1;i<N;i++) + */ + + /* Update the noise estimate for the frequencies where it can be */ + for (i=0;i<N;i++) { - float tot_noise = 1.f+ NOISE_OVERCOMPENS*st->noise[i] + st->echo_noise[i] + st->reverb_estimate[i]; - st->post[i] = ps[i]/tot_noise - 1.f; - if (st->post[i]>100.f) - st->post[i]=100.f; - /*if (st->post[i]<0) - st->post[i]=0;*/ - mean_post+=st->post[i]; + if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i], NOISE_SHIFT)) + st->noise[i] = MAX32(EXTEND32(0),MULT16_32_Q15(beta_1,st->noise[i]) + MULT16_32_Q15(beta,SHL32(st->ps[i],NOISE_SHIFT))); } - mean_post /= N; - if (mean_post<0.f) - mean_post=0.f; + filterbank_compute_bank32(st->bank, st->noise, st->noise+N); /* Special case for first frame */ if (st->nb_adapt==1) - for (i=1;i<N;i++) + for (i=0;i<N+M;i++) st->old_ps[i] = ps[i]; - /* Compute a priori SNR */ - { - /* A priori update rate */ - for (i=1;i<N;i++) - { - float gamma = .15+.85*st->prior[i]*st->prior[i]/((1+st->prior[i])*(1+st->prior[i])); - float tot_noise = 1.f+ NOISE_OVERCOMPENS*st->noise[i] + st->echo_noise[i] + st->reverb_estimate[i]; - /* A priori SNR update */ - st->prior[i] = gamma*max(0.0f,st->post[i]) + - (1.f-gamma)* (.8*st->gain[i]*st->gain[i]*st->old_ps[i]/tot_noise + .2*st->prior[i]); - - if (st->prior[i]>100.f) - st->prior[i]=100.f; - - mean_prior+=st->prior[i]; - } - } - mean_prior /= N; - -#if 0 - for (i=0;i<N;i++) - { - fprintf (stderr, "%f ", st->prior[i]); - } - fprintf (stderr, "\n"); -#endif - /*fprintf (stderr, "%f %f\n", mean_prior,mean_post);*/ - - if (st->nb_preprocess>=20) + /* Compute a posteriori SNR */ + for (i=0;i<N+M;i++) { - int do_update = 0; - float noise_ener=0, sig_ener=0; - /* If SNR is low (both a priori and a posteriori), update the noise estimate*/ - /*if (mean_prior<.23 && mean_post < .5)*/ - if (mean_prior<.23f && mean_post < .5f) - do_update = 1; - for (i=1;i<N;i++) - { - noise_ener += st->noise[i]; - sig_ener += ps[i]; - } - if (noise_ener > 3.f*sig_ener) - do_update = 1; - /*do_update = 0;*/ - if (do_update) - { - st->consec_noise++; - } else { - st->consec_noise=0; - } + spx_word16_t gamma; + + /* Total noise estimate including residual echo and reverberation */ + spx_word32_t tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(st->noise[i],NOISE_SHIFT)) , st->echo_noise[i]) , st->reverb_estimate[i]); + + /* A posteriori SNR = ps/noise - 1*/ + st->post[i] = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT)); + st->post[i]=MIN16(st->post[i], QCONST16(100.f,SNR_SHIFT)); + + /* Computing update gamma = .1 + .9*(old/(old+noise))^2 */ + gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(st->old_ps[i],ADD32(st->old_ps[i],tot_noise)))); + + /* A priori SNR update = gamma*max(0,post) + (1-gamma)*old/noise */ + st->prior[i] = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,st->post[i])), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(st->old_ps[i],tot_noise))), 15)); + st->prior[i]=MIN16(st->prior[i], QCONST16(100.f,SNR_SHIFT)); } - if (st->vad_enabled) - is_speech = speex_compute_vad(st, ps, mean_prior, mean_post); - + /*print_vec(st->post, N+M, "");*/ - if (st->consec_noise>=3) + /* Recursive average of the a priori SNR. A bit smoothed for the psd components */ + st->zeta[0] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[0]), MULT16_16(QCONST16(.3f,15),st->prior[0])),15); + for (i=1;i<N-1;i++) + st->zeta[i] = PSHR32(ADD32(ADD32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.15f,15),st->prior[i])), + MULT16_16(QCONST16(.075f,15),st->prior[i-1])), MULT16_16(QCONST16(.075f,15),st->prior[i+1])),15); + for (i=N-1;i<N+M;i++) + st->zeta[i] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.3f,15),st->prior[i])),15); + + /* Speech probability of presence for the entire frame is based on the average filterbank a priori SNR */ + Zframe = 0; + for (i=N;i<N+M;i++) + Zframe = ADD32(Zframe, EXTEND32(st->zeta[i])); + Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,st->nbands))); + + effective_echo_suppress = EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress), MULT16_16(Pframe, st->echo_suppress_active)),15)); + + compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M); + + /* Compute Ephraim & Malah gain speech probability of presence for each critical band (Bark scale) + Technically this is actually wrong because the EM gaim assumes a slightly different probability + distribution */ + for (i=N;i<N+M;i++) { - update_noise(st, st->old_ps, echo); - } else { - for (i=1;i<N-1;i++) - { - if (st->update_prob[i]<.5f/* || st->ps[i] < st->noise[i]*/) - { - if (echo) - st->noise[i] = .95f*st->noise[i] + .05f*max(1.0f,st->ps[i]-st->frame_size*st->frame_size*1.0*echo[i]); - else - st->noise[i] = .95f*st->noise[i] + .05f*st->ps[i]; - } - } - } + /* See EM and Cohen papers*/ + spx_word32_t theta; + /* Gain from hypergeometric function */ + spx_word32_t MM; + /* Weiner filter gain */ + spx_word16_t prior_ratio; + /* a priority probability of speech presence based on Bark sub-band alone */ + spx_word16_t P1; + /* Speech absence a priori probability (considering sub-band and frame) */ + spx_word16_t q; +#ifdef FIXED_POINT + spx_word16_t tmp; +#endif + + prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT))); + theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT)); - for (i=1;i<N;i++) - { - st->zeta[i] = .7f*st->zeta[i] + .3f*st->prior[i]; + MM = hypergeom_gain(theta); + /* Gain with bound */ + st->gain[i] = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM))); + /* Save old Bark power spectrum */ + st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]); + + P1 = QCONST16(.199f,15)+MULT16_16_Q15(QCONST16(.8f,15),qcurve (st->zeta[i])); + q = Q15_ONE-MULT16_16_Q15(Pframe,P1); +#ifdef FIXED_POINT + theta = MIN32(theta, EXTEND32(32767)); +/*Q8*/tmp = MULT16_16_Q15((SHL32(1,SNR_SHIFT)+st->prior[i]),EXTRACT16(MIN32(Q15ONE,SHR32(spx_exp(-EXTRACT16(theta)),1)))); + tmp = MIN16(QCONST16(3.,SNR_SHIFT), tmp); /* Prevent overflows in the next line*/ +/*Q8*/tmp = EXTRACT16(PSHR32(MULT16_16(PDIV32_16(SHL32(EXTEND32(q),8),(Q15_ONE-q)),tmp),8)); + st->gain2[i]=DIV32_16(SHL32(EXTEND32(32767),SNR_SHIFT), ADD16(256,tmp)); +#else + st->gain2[i]=1/(1.f + (q/(1.f-q))*(1+st->prior[i])*exp(-theta)); +#endif } - + /* Convert the EM gains and speech prob to linear frequency */ + filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2); + filterbank_compute_psd16(st->bank,st->gain+N, st->gain); + + /* Use 1 for linear gain resolution (best) or 0 for Bark gain resolution (faster) */ + if (1) { - int freq_start = (int)(300.0f*2.f*N/st->sampling_rate); - int freq_end = (int)(2000.0f*2.f*N/st->sampling_rate); - for (i=freq_start;i<freq_end;i++) + filterbank_compute_psd16(st->bank,st->gain_floor+N, st->gain_floor); + + /* Compute gain according to the Ephraim-Malah algorithm -- linear frequency */ + for (i=0;i<N;i++) { - Zframe += st->zeta[i]; - } - Zframe /= (freq_end-freq_start); - } - st->Zlast = Zframe; - - Pframe = qcurve(Zframe); + spx_word32_t MM; + spx_word32_t theta; + spx_word16_t prior_ratio; + spx_word16_t tmp; + spx_word16_t p; + spx_word16_t g; + + /* Wiener filter gain */ + prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT))); + theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT)); + + /* Optimal estimator for loudness domain */ + MM = hypergeom_gain(theta); + /* EM gain with bound */ + g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM))); + /* Interpolated speech probability of presence */ + p = st->gain2[i]; + + /* Constrain the gain to be close to the Bark scale gain */ + if (MULT16_16_Q15(QCONST16(.333f,15),g) > st->gain[i]) + g = MULT16_16(3,st->gain[i]); + st->gain[i] = g; + + /* Save old power spectrum */ + st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]); + + /* Apply gain floor */ + if (st->gain[i] < st->gain_floor[i]) + st->gain[i] = st->gain_floor[i]; - /*fprintf (stderr, "%f\n", Pframe);*/ - /* Compute gain according to the Ephraim-Malah algorithm */ - for (i=1;i<N;i++) - { - float MM; - float theta; - float prior_ratio; - float p, q; - float zeta1; - float P1; - - prior_ratio = st->prior[i]/(1.0001f+st->prior[i]); - theta = (1.f+st->post[i])*prior_ratio; - - if (i==1 || i==N-1) - zeta1 = st->zeta[i]; - else - zeta1 = .25f*st->zeta[i-1] + .5f*st->zeta[i] + .25f*st->zeta[i+1]; - P1 = qcurve (zeta1); - - /* FIXME: add global prob (P2) */ - q = 1-Pframe*P1; - q = 1-P1; - if (q>.95f) - q=.95f; - p=1.f/(1.f + (q/(1.f-q))*(1.f+st->prior[i])*exp(-theta)); - /*p=1;*/ - - /* Optimal estimator for loudness domain */ - MM = hypergeom_gain(theta); + /* Exponential decay model for reverberation (unused) */ + /*st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];*/ + + /* Take into account speech probability of presence (loudness domain MMSE estimator) */ + /* gain2 = [p*sqrt(gain)+(1-p)*sqrt(gain _floor) ]^2 */ + tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15))); + st->gain2[i]=SQR16_Q15(tmp); - st->gain[i] = prior_ratio * MM; - /*Put some (very arbitraty) limit on the gain*/ - if (st->gain[i]>2.f) - { - st->gain[i]=2.f; + /* Use this if you want a log-domain MMSE estimator instead */ + /*st->gain2[i] = pow(st->gain[i], p) * pow(st->gain_floor[i],1.f-p);*/ } - - st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i]; - if (st->denoise_enabled) + } else { + for (i=N;i<N+M;i++) { - /*st->gain2[i] = p*p*st->gain[i];*/ - st->gain2[i]=(p*sqrt(st->gain[i])+.2*(1-p)) * (p*sqrt(st->gain[i])+.2*(1-p)); - /*st->gain2[i] = pow(st->gain[i], p) * pow(.1f,1.f-p);*/ - } else { - st->gain2[i]=1.f; + spx_word16_t tmp; + spx_word16_t p = st->gain2[i]; + st->gain[i] = MAX16(st->gain[i], st->gain_floor[i]); + tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15))); + st->gain2[i]=SQR16_Q15(tmp); } + filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2); } - st->gain2[0]=st->gain[0]=0.f; - st->gain2[N-1]=st->gain[N-1]=0.f; - /* - for (i=30;i<N-2;i++) + /* If noise suppression is off, don't apply the gain (but then why call this in the first place!) */ + if (!st->denoise_enabled) { - st->gain[i] = st->gain2[i]*st->gain2[i] + (1-st->gain2[i])*.333*(.6*st->gain2[i-1]+st->gain2[i]+.6*st->gain2[i+1]+.4*st->gain2[i-2]+.4*st->gain2[i+2]); - } - for (i=30;i<N-2;i++) - st->gain2[i] = st->gain[i]; - */ - if (st->agc_enabled) - speex_compute_agc(st, mean_prior); - -#if 0 - if (!is_speech) - { - for (i=0;i<N;i++) - st->gain2[i] = 0; + for (i=0;i<N+M;i++) + st->gain2[i]=Q15_ONE; } -#if 0 - else { - for (i=0;i<N;i++) - st->gain2[i] = 1; - } -#endif -#endif - + /* Apply computed gain */ for (i=1;i<N;i++) { - st->frame[2*i-1] *= st->gain2[i]; - st->frame[2*i] *= st->gain2[i]; + st->ft[2*i-1] = MULT16_16_P15(st->gain2[i],st->ft[2*i-1]); + st->ft[2*i] = MULT16_16_P15(st->gain2[i],st->ft[2*i]); } - - /* Get rid of the DC and very low frequencies */ - st->frame[0]=0; - st->frame[1]=0; - st->frame[2]=0; - /* Nyquist frequency is mostly useless too */ - st->frame[2*N-1]=0; + st->ft[0] = MULT16_16_P15(st->gain2[0],st->ft[0]); + st->ft[2*N-1] = MULT16_16_P15(st->gain2[N-1],st->ft[2*N-1]); + + /*FIXME: This *will* not work for fixed-point */ +#ifndef FIXED_POINT + if (st->agc_enabled) + speex_compute_agc(st, Pframe, st->ft); +#endif /* Inverse FFT with 1/N scaling */ - spx_drft_backward(st->fft_lookup, st->frame); - + spx_ifft(st->fft_lookup, st->ft, st->frame); + /* Scale back to original (lower) amplitude */ for (i=0;i<2*N;i++) - st->frame[i] *= scale; + st->frame[i] = PSHR16(st->frame[i], st->frame_shift); + /*FIXME: This *will* not work for fixed-point */ +#ifndef FIXED_POINT + if (st->agc_enabled) { float max_sample=0; for (i=0;i<2*N;i++) @@ -880,9 +974,11 @@ int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo st->frame[i] *= damp; } } - +#endif + + /* Synthesis window (for WOLA) */ for (i=0;i<2*N;i++) - st->frame[i] *= st->window[i]; + st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]); /* Perform overlap and add */ for (i=0;i<N3;i++) @@ -894,47 +990,55 @@ int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo for (i=0;i<N3;i++) st->outbuf[i] = st->frame[st->frame_size+i]; - /* Save old power spectrum */ - for (i=1;i<N;i++) - st->old_ps[i] = ps[i]; - - return is_speech; + /* FIXME: This VAD is a kludge */ + if (st->vad_enabled) + { + if (Pframe > st->speech_prob_start || (st->was_speech && Pframe > st->speech_prob_continue)) + { + st->was_speech=1; + return 1; + } else + { + st->was_speech=0; + return 0; + } + } else { + return 1; + } } -void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo) +void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x) { int i; int N = st->ps_size; int N3 = 2*N - st->frame_size; + int M; + spx_word32_t *ps=st->ps; - float *ps=st->ps; - + M = st->nbands; + st->min_count++; + preprocess_analysis(st, x); update_noise_prob(st); - - st->nb_preprocess++; for (i=1;i<N-1;i++) { - if (st->update_prob[i]<.5f || st->ps[i] < st->noise[i]) + if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i],NOISE_SHIFT)) { - if (echo) - st->noise[i] = .95f*st->noise[i] + .1f*max(1.0f,st->ps[i]-st->frame_size*st->frame_size*1.0*echo[i]); - else - st->noise[i] = .95f*st->noise[i] + .1f*st->ps[i]; + st->noise[i] = MULT16_32_Q15(QCONST16(.95f,15),st->noise[i]) + MULT16_32_Q15(QCONST16(.05f,15),SHL32(st->ps[i],NOISE_SHIFT)); } } for (i=0;i<N3;i++) - st->outbuf[i] = x[st->frame_size-N3+i]*st->window[st->frame_size+i]; + st->outbuf[i] = MULT16_16_Q15(x[st->frame_size-N3+i],st->window[st->frame_size+i]); /* Save old power spectrum */ - for (i=1;i<N;i++) + for (i=0;i<N+M;i++) st->old_ps[i] = ps[i]; - for (i=1;i<N;i++) - st->reverb_estimate[i] *= st->reverb_decay; + for (i=0;i<N;i++) + st->reverb_estimate[i] = MULT16_32_Q15(st->reverb_decay, st->reverb_estimate[i]); } @@ -946,17 +1050,17 @@ int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr) switch(request) { case SPEEX_PREPROCESS_SET_DENOISE: - st->denoise_enabled = (*(int*)ptr); + st->denoise_enabled = (*(spx_int32_t*)ptr); break; case SPEEX_PREPROCESS_GET_DENOISE: - (*(int*)ptr) = st->denoise_enabled; + (*(spx_int32_t*)ptr) = st->denoise_enabled; break; - +#ifndef FIXED_POINT case SPEEX_PREPROCESS_SET_AGC: - st->agc_enabled = (*(int*)ptr); + st->agc_enabled = (*(spx_int32_t*)ptr); break; case SPEEX_PREPROCESS_GET_AGC: - (*(int*)ptr) = st->agc_enabled; + (*(spx_int32_t*)ptr) = st->agc_enabled; break; case SPEEX_PREPROCESS_SET_AGC_LEVEL: @@ -969,21 +1073,40 @@ int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr) case SPEEX_PREPROCESS_GET_AGC_LEVEL: (*(float*)ptr) = st->agc_level; break; - + case SPEEX_PREPROCESS_SET_AGC_INCREMENT: + st->max_increase_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate); + break; + case SPEEX_PREPROCESS_GET_AGC_INCREMENT: + (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_increase_step)*st->sampling_rate/st->frame_size); + break; + case SPEEX_PREPROCESS_SET_AGC_DECREMENT: + st->max_decrease_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate); + break; + case SPEEX_PREPROCESS_GET_AGC_DECREMENT: + (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_decrease_step)*st->sampling_rate/st->frame_size); + break; + case SPEEX_PREPROCESS_SET_AGC_MAX_GAIN: + st->max_gain = exp(0.11513f * (*(spx_int32_t*)ptr)); + break; + case SPEEX_PREPROCESS_GET_AGC_MAX_GAIN: + (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_gain)); + break; +#endif case SPEEX_PREPROCESS_SET_VAD: - st->vad_enabled = (*(int*)ptr); + speex_warning("The VAD has been replaced by a hack pending a complete rewrite"); + st->vad_enabled = (*(spx_int32_t*)ptr); break; case SPEEX_PREPROCESS_GET_VAD: - (*(int*)ptr) = st->vad_enabled; + (*(spx_int32_t*)ptr) = st->vad_enabled; break; case SPEEX_PREPROCESS_SET_DEREVERB: - st->dereverb_enabled = (*(int*)ptr); + st->dereverb_enabled = (*(spx_int32_t*)ptr); for (i=0;i<st->ps_size;i++) st->reverb_estimate[i]=0; break; case SPEEX_PREPROCESS_GET_DEREVERB: - (*(int*)ptr) = st->dereverb_enabled; + (*(spx_int32_t*)ptr) = st->dereverb_enabled; break; case SPEEX_PREPROCESS_SET_DEREVERB_LEVEL: @@ -1001,24 +1124,47 @@ int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr) break; case SPEEX_PREPROCESS_SET_PROB_START: - st->speech_prob_start = (*(int*)ptr) / 100.0; - if ( st->speech_prob_start > 1 || st->speech_prob_start < 0 ) - st->speech_prob_start = SPEEX_PROB_START_DEFAULT; + *(spx_int32_t*)ptr = MIN32(Q15_ONE,MAX32(0, *(spx_int32_t*)ptr)); + st->speech_prob_start = DIV32_16(MULT16_16(32767,*(spx_int32_t*)ptr), 100); break; case SPEEX_PREPROCESS_GET_PROB_START: - (*(int*)ptr) = st->speech_prob_start * 100; + (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_start, 100); break; case SPEEX_PREPROCESS_SET_PROB_CONTINUE: - st->speech_prob_continue = (*(int*)ptr) / 100.0; - if ( st->speech_prob_continue > 1 || st->speech_prob_continue < 0 ) - st->speech_prob_continue = SPEEX_PROB_CONTINUE_DEFAULT; + *(spx_int32_t*)ptr = MIN32(Q15_ONE,MAX32(0, *(spx_int32_t*)ptr)); + st->speech_prob_continue = DIV32_16(MULT16_16(32767,*(spx_int32_t*)ptr), 100); break; case SPEEX_PREPROCESS_GET_PROB_CONTINUE: - (*(int*)ptr) = st->speech_prob_continue * 100; + (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_continue, 100); + break; + + case SPEEX_PREPROCESS_SET_NOISE_SUPPRESS: + st->noise_suppress = -ABS(*(spx_int32_t*)ptr); + break; + case SPEEX_PREPROCESS_GET_NOISE_SUPPRESS: + (*(spx_int32_t*)ptr) = st->noise_suppress; + break; + case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS: + st->echo_suppress = -ABS(*(spx_int32_t*)ptr); + break; + case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS: + (*(spx_int32_t*)ptr) = st->echo_suppress; + break; + case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE: + st->echo_suppress_active = -ABS(*(spx_int32_t*)ptr); + break; + case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS_ACTIVE: + (*(spx_int32_t*)ptr) = st->echo_suppress_active; + break; + case SPEEX_PREPROCESS_SET_ECHO_STATE: + st->echo_state = (SpeexEchoState*)ptr; + break; + case SPEEX_PREPROCESS_GET_ECHO_STATE: + ptr = (void*)st->echo_state; break; - default: + default: speex_warning_int("Unknown speex_preprocess_ctl request: ", request); return -1; } diff --git a/libspeex/pseudofloat.h b/libspeex/pseudofloat.h index 9ff1b75..a6c4762 100644 --- a/libspeex/pseudofloat.h +++ b/libspeex/pseudofloat.h @@ -2,6 +2,15 @@ /** @file pseudofloat.h @brief Pseudo-floating point + * This header file provides a lightweight floating point type for + * use on fixed-point platforms when a large dynamic range is + * required. The new type is not compatible with the 32-bit IEEE format, + * it is not even remotely as accurate as 32-bit floats, and is not + * even guaranteed to produce even remotely correct results for code + * other than Speex. It makes all kinds of shortcuts that are acceptable + * for Speex, but may not be acceptable for your application. You're + * quite welcome to reuse this code and improve it, but don't assume + * it works out of the box. Most likely, it doesn't. */ /* Redistribution and use in source and binary forms, with or without @@ -65,18 +74,8 @@ static inline spx_float_t PSEUDOFLOAT(spx_int32_t x) spx_float_t r = {0,0}; return r; } - while (x>32767) - { - x >>= 1; - /*x *= .5;*/ - e++; - } - while (x<16383) - { - x <<= 1; - /*x *= 2;*/ - e--; - } + e = spx_ilog2(ABS32(x))-14; + x = VSHR32(x, e); if (sign) { spx_float_t r; @@ -167,9 +166,9 @@ static inline spx_float_t FLOAT_SUB(spx_float_t a, spx_float_t b) static inline int FLOAT_LT(spx_float_t a, spx_float_t b) { if (a.m==0) - return b.m<0; + return b.m>0; else if (b.m==0) - return a.m>0; + return a.m<0; if ((a).e > (b).e) return ((a).m>>1) < ((b).m>>MIN(15,(a).e-(b).e+1)); else @@ -205,6 +204,14 @@ static inline spx_float_t FLOAT_MULT(spx_float_t a, spx_float_t b) return r; } +static inline spx_float_t FLOAT_AMULT(spx_float_t a, spx_float_t b) +{ + spx_float_t r; + r.m = (spx_int16_t)((spx_int32_t)(a).m*(b).m>>15); + r.e = (a).e+(b).e+15; + return r; +} + static inline spx_float_t FLOAT_SHL(spx_float_t a, int b) { @@ -217,68 +224,53 @@ static inline spx_float_t FLOAT_SHL(spx_float_t a, int b) static inline spx_int16_t FLOAT_EXTRACT16(spx_float_t a) { if (a.e<0) - return EXTRACT16((EXTEND32(a.m)+(1<<(-a.e-1)))>>-a.e); + return EXTRACT16((EXTEND32(a.m)+(EXTEND32(1)<<(-a.e-1)))>>-a.e); else return a.m<<a.e; } -static inline spx_int32_t FLOAT_MUL32(spx_float_t a, spx_word32_t b) +static inline spx_int32_t FLOAT_EXTRACT32(spx_float_t a) { - if (a.e<-15) - return SHR32(MULT16_32_Q15(a.m, b),-a.e-15); + if (a.e<0) + return (EXTEND32(a.m)+(EXTEND32(1)<<(-a.e-1)))>>-a.e; else - return SHL32(MULT16_32_Q15(a.m, b),15+a.e); + return EXTEND32(a.m)<<a.e; +} + +static inline spx_int32_t FLOAT_MUL32(spx_float_t a, spx_word32_t b) +{ + return VSHR32(MULT16_32_Q15(a.m, b),-a.e-15); } static inline spx_float_t FLOAT_MUL32U(spx_word32_t a, spx_word32_t b) { - int e=0; + int e1, e2; spx_float_t r; - /* FIXME: Handle the sign */ - if (a==0) + if (a==0 || b==0) { return FLOAT_ZERO; } - while (a>32767) - { - a >>= 1; - e++; - } - while (a<16384) - { - a <<= 1; - e--; - } - while (b>32767) - { - b >>= 1; - e++; - } - while (b<16384) - { - b <<= 1; - e--; - } + e1 = spx_ilog2(ABS32(a)); + a = VSHR32(a, e1-14); + e2 = spx_ilog2(ABS32(b)); + b = VSHR32(b, e2-14); r.m = MULT16_16_Q15(a,b); - r.e = e+15; + r.e = e1+e2-13; return r; } +/* Do NOT attempt to divide by a negative number */ static inline spx_float_t FLOAT_DIV32_FLOAT(spx_word32_t a, spx_float_t b) { int e=0; spx_float_t r; - /* FIXME: Handle the sign */ if (a==0) { return FLOAT_ZERO; } - while (a<SHL32(EXTEND32(b.m),14)) - { - a <<= 1; - e--; - } - while (a>=SHL32(EXTEND32(b.m-1),15)) + e = spx_ilog2(ABS32(a))-spx_ilog2(b.m-1)-15; + a = VSHR32(a, e); + if (ABS32(a)>=SHL32(EXTEND32(b.m-1),15)) { a >>= 1; e++; @@ -289,41 +281,47 @@ static inline spx_float_t FLOAT_DIV32_FLOAT(spx_word32_t a, spx_float_t b) } +/* Do NOT attempt to divide by a negative number */ static inline spx_float_t FLOAT_DIV32(spx_word32_t a, spx_word32_t b) { - int e=0; + int e0=0,e=0; spx_float_t r; - /* FIXME: Handle the sign */ if (a==0) { return FLOAT_ZERO; } - while (b>32767) + if (b>32767) { - b >>= 1; - e--; + e0 = spx_ilog2(b)-14; + b = VSHR32(b, e0); + e0 = -e0; } - while (a<SHL32(b,14)) - { - a <<= 1; - e--; - } - while (a>=SHL32(b-1,15)) + e = spx_ilog2(ABS32(a))-spx_ilog2(b-1)-15; + a = VSHR32(a, e); + if (ABS32(a)>=SHL32(EXTEND32(b-1),15)) { a >>= 1; e++; } + e += e0; r.m = DIV32_16(a,b); r.e = e; return r; } +/* Do NOT attempt to divide by a negative number */ static inline spx_float_t FLOAT_DIVU(spx_float_t a, spx_float_t b) { int e=0; spx_int32_t num; spx_float_t r; + if (b.m<=0) + { + speex_warning_int("Attempted to divide by", b.m); + return FLOAT_ONE; + } num = a.m; + a.m = ABS16(a.m); while (a.m >= b.m) { e++; @@ -339,7 +337,7 @@ static inline spx_float_t FLOAT_SQRT(spx_float_t a) { spx_float_t r; spx_int32_t m; - m = a.m << 14; + m = SHL32(EXTEND32(a.m), 14); r.e = a.e - 14; if (r.e & 1) { @@ -359,9 +357,11 @@ static inline spx_float_t FLOAT_SQRT(spx_float_t a) #define FLOAT_HALF 0.5f #define PSEUDOFLOAT(x) (x) #define FLOAT_MULT(a,b) ((a)*(b)) +#define FLOAT_AMULT(a,b) ((a)*(b)) #define FLOAT_MUL32(a,b) ((a)*(b)) #define FLOAT_DIV32(a,b) ((a)/(b)) #define FLOAT_EXTRACT16(a) (a) +#define FLOAT_EXTRACT32(a) (a) #define FLOAT_ADD(a,b) ((a)+(b)) #define FLOAT_SUB(a,b) ((a)-(b)) #define REALFLOAT(x) (x) diff --git a/libspeex/quant_lsp.c b/libspeex/quant_lsp.c index bfca587..d907b98 100644 --- a/libspeex/quant_lsp.c +++ b/libspeex/quant_lsp.c @@ -417,7 +417,7 @@ void lsp_quant_48k(spx_lsp_t *lsp, spx_lsp_t *qlsp, int order, SpeexBits *bits) #ifdef FIXED_POINT for (i=0;i<order;i++) - qlsp[i]=PSHR(qlsp[i],2); + qlsp[i]=PSHR16(qlsp[i],2); #else for (i=0;i<order;i++) qlsp[i]=qlsp[i]*0.00097655; diff --git a/libspeex/resample.c b/libspeex/resample.c new file mode 100644 index 0000000..7135a29 --- /dev/null +++ b/libspeex/resample.c @@ -0,0 +1,1062 @@ +/* Copyright (C) 2007 Jean-Marc Valin + + File: resample.c + Arbitrary resampling code + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +/* + The design goals of this code are: + - Very fast algorithm + - SIMD-friendly algorithm + - Low memory requirement + - Good *perceptual* quality (and not best SNR) + + The code is working, but it's in a very early stage, so it may have + artifacts, noise or subliminal messages from satan. Also, the API + isn't stable and I can actually promise that I *will* change the API + some time in the future. + +TODO list: + - Variable calculation resolution depending on quality setting + - Single vs double in float mode + - 16-bit vs 32-bit (sinc only) in fixed-point mode + - Make sure the filter update works even when changing params + after only a few samples procesed +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef OUTSIDE_SPEEX +#include <stdlib.h> +static void *speex_alloc (int size) {return calloc(size,1);} +static void *speex_realloc (void *ptr, int size) {return realloc(ptr, size);} +static void speex_free (void *ptr) {free(ptr);} +#include "speex_resampler.h" +#include "arch.h" +#else /* OUTSIDE_SPEEX */ + +#include "speex/speex_resampler.h" +#include "misc.h" +#endif /* OUTSIDE_SPEEX */ + +#include <math.h> + +#ifndef M_PI +#define M_PI 3.14159263 +#endif + +#ifdef FIXED_POINT +#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x))) +#else +#define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x)))) +#endif + +/*#define float double*/ +#define FILTER_SIZE 64 +#define OVERSAMPLE 8 + +#define IMAX(a,b) ((a) > (b) ? (a) : (b)) + +#ifndef NULL +#define NULL 0 +#endif + +typedef int (*resampler_basic_func)(SpeexResamplerState *, spx_uint32_t , const spx_word16_t *, spx_uint32_t *, spx_word16_t *, spx_uint32_t *); + +struct SpeexResamplerState_ { + spx_uint32_t in_rate; + spx_uint32_t out_rate; + spx_uint32_t num_rate; + spx_uint32_t den_rate; + + int quality; + spx_uint32_t nb_channels; + spx_uint32_t filt_len; + spx_uint32_t mem_alloc_size; + int int_advance; + int frac_advance; + float cutoff; + spx_uint32_t oversample; + int initialised; + int started; + + /* These are per-channel */ + spx_int32_t *last_sample; + spx_uint32_t *samp_frac_num; + spx_uint32_t *magic_samples; + + spx_word16_t *mem; + spx_word16_t *sinc_table; + spx_uint32_t sinc_table_length; + resampler_basic_func resampler_ptr; + + int in_stride; + int out_stride; +} ; + +static double kaiser12_table[68] = { + 0.99859849, 1.00000000, 0.99859849, 0.99440475, 0.98745105, 0.97779076, + 0.96549770, 0.95066529, 0.93340547, 0.91384741, 0.89213598, 0.86843014, + 0.84290116, 0.81573067, 0.78710866, 0.75723148, 0.72629970, 0.69451601, + 0.66208321, 0.62920216, 0.59606986, 0.56287762, 0.52980938, 0.49704014, + 0.46473455, 0.43304576, 0.40211431, 0.37206735, 0.34301800, 0.31506490, + 0.28829195, 0.26276832, 0.23854851, 0.21567274, 0.19416736, 0.17404546, + 0.15530766, 0.13794294, 0.12192957, 0.10723616, 0.09382272, 0.08164178, + 0.07063950, 0.06075685, 0.05193064, 0.04409466, 0.03718069, 0.03111947, + 0.02584161, 0.02127838, 0.01736250, 0.01402878, 0.01121463, 0.00886058, + 0.00691064, 0.00531256, 0.00401805, 0.00298291, 0.00216702, 0.00153438, + 0.00105297, 0.00069463, 0.00043489, 0.00025272, 0.00013031, 0.0000527734, + 0.00001000, 0.00000000}; +/* +static double kaiser12_table[36] = { + 0.99440475, 1.00000000, 0.99440475, 0.97779076, 0.95066529, 0.91384741, + 0.86843014, 0.81573067, 0.75723148, 0.69451601, 0.62920216, 0.56287762, + 0.49704014, 0.43304576, 0.37206735, 0.31506490, 0.26276832, 0.21567274, + 0.17404546, 0.13794294, 0.10723616, 0.08164178, 0.06075685, 0.04409466, + 0.03111947, 0.02127838, 0.01402878, 0.00886058, 0.00531256, 0.00298291, + 0.00153438, 0.00069463, 0.00025272, 0.0000527734, 0.00000500, 0.00000000}; +*/ +static double kaiser10_table[36] = { + 0.99537781, 1.00000000, 0.99537781, 0.98162644, 0.95908712, 0.92831446, + 0.89005583, 0.84522401, 0.79486424, 0.74011713, 0.68217934, 0.62226347, + 0.56155915, 0.50119680, 0.44221549, 0.38553619, 0.33194107, 0.28205962, + 0.23636152, 0.19515633, 0.15859932, 0.12670280, 0.09935205, 0.07632451, + 0.05731132, 0.04193980, 0.02979584, 0.02044510, 0.01345224, 0.00839739, + 0.00488951, 0.00257636, 0.00115101, 0.00035515, 0.00000000, 0.00000000}; + +static double kaiser8_table[36] = { + 0.99635258, 1.00000000, 0.99635258, 0.98548012, 0.96759014, 0.94302200, + 0.91223751, 0.87580811, 0.83439927, 0.78875245, 0.73966538, 0.68797126, + 0.63451750, 0.58014482, 0.52566725, 0.47185369, 0.41941150, 0.36897272, + 0.32108304, 0.27619388, 0.23465776, 0.19672670, 0.16255380, 0.13219758, + 0.10562887, 0.08273982, 0.06335451, 0.04724088, 0.03412321, 0.02369490, + 0.01563093, 0.00959968, 0.00527363, 0.00233883, 0.00050000, 0.00000000}; + +static double kaiser6_table[36] = { + 0.99733006, 1.00000000, 0.99733006, 0.98935595, 0.97618418, 0.95799003, + 0.93501423, 0.90755855, 0.87598009, 0.84068475, 0.80211977, 0.76076565, + 0.71712752, 0.67172623, 0.62508937, 0.57774224, 0.53019925, 0.48295561, + 0.43647969, 0.39120616, 0.34752997, 0.30580127, 0.26632152, 0.22934058, + 0.19505503, 0.16360756, 0.13508755, 0.10953262, 0.08693120, 0.06722600, + 0.05031820, 0.03607231, 0.02432151, 0.01487334, 0.00752000, 0.00000000}; + +struct FuncDef { + double *table; + int oversample; +}; + +static struct FuncDef _KAISER12 = {kaiser12_table, 64}; +#define KAISER12 (&_KAISER12) +/*static struct FuncDef _KAISER12 = {kaiser12_table, 32}; +#define KAISER12 (&_KAISER12)*/ +static struct FuncDef _KAISER10 = {kaiser10_table, 32}; +#define KAISER10 (&_KAISER10) +static struct FuncDef _KAISER8 = {kaiser8_table, 32}; +#define KAISER8 (&_KAISER8) +static struct FuncDef _KAISER6 = {kaiser6_table, 32}; +#define KAISER6 (&_KAISER6) + +struct QualityMapping { + int base_length; + int oversample; + float downsample_bandwidth; + float upsample_bandwidth; + struct FuncDef *window_func; +}; + + +/* This table maps conversion quality to internal parameters. There are two + reasons that explain why the up-sampling bandwidth is larger than the + down-sampling bandwidth: + 1) When up-sampling, we can assume that the spectrum is already attenuated + close to the Nyquist rate (from an A/D or a previous resampling filter) + 2) Any aliasing that occurs very close to the Nyquist rate will be masked + by the sinusoids/noise just below the Nyquist rate (guaranteed only for + up-sampling). +*/ +static const struct QualityMapping quality_map[11] = { + { 8, 4, 0.830f, 0.860f, KAISER6 }, /* Q0 */ + { 16, 4, 0.850f, 0.880f, KAISER6 }, /* Q1 */ + { 32, 4, 0.882f, 0.910f, KAISER6 }, /* Q2 */ /* 82.3% cutoff ( ~60 dB stop) 6 */ + { 48, 8, 0.895f, 0.917f, KAISER8 }, /* Q3 */ /* 84.9% cutoff ( ~80 dB stop) 8 */ + { 64, 8, 0.921f, 0.940f, KAISER8 }, /* Q4 */ /* 88.7% cutoff ( ~80 dB stop) 8 */ + { 80, 16, 0.922f, 0.940f, KAISER10}, /* Q5 */ /* 89.1% cutoff (~100 dB stop) 10 */ + { 96, 16, 0.940f, 0.945f, KAISER10}, /* Q6 */ /* 91.5% cutoff (~100 dB stop) 10 */ + {128, 16, 0.950f, 0.950f, KAISER10}, /* Q7 */ /* 93.1% cutoff (~100 dB stop) 10 */ + {160, 16, 0.960f, 0.960f, KAISER10}, /* Q8 */ /* 94.5% cutoff (~100 dB stop) 10 */ + {192, 32, 0.968f, 0.968f, KAISER12}, /* Q9 */ /* 95.5% cutoff (~100 dB stop) 10 */ + {256, 32, 0.975f, 0.975f, KAISER12}, /* Q10 */ /* 96.6% cutoff (~100 dB stop) 10 */ +}; +/*8,24,40,56,80,104,128,160,200,256,320*/ +static double compute_func(float x, struct FuncDef *func) +{ + float y, frac; + double interp[4]; + int ind; + y = x*func->oversample; + ind = (int)floor(y); + frac = (y-ind); + /* CSE with handle the repeated powers */ + interp[3] = -0.1666666667*frac + 0.1666666667*(frac*frac*frac); + interp[2] = frac + 0.5*(frac*frac) - 0.5*(frac*frac*frac); + /*interp[2] = 1.f - 0.5f*frac - frac*frac + 0.5f*frac*frac*frac;*/ + interp[0] = -0.3333333333*frac + 0.5*(frac*frac) - 0.1666666667*(frac*frac*frac); + /* Just to make sure we don't have rounding problems */ + interp[1] = 1.f-interp[3]-interp[2]-interp[0]; + + /*sum = frac*accum[1] + (1-frac)*accum[2];*/ + return interp[0]*func->table[ind] + interp[1]*func->table[ind+1] + interp[2]*func->table[ind+2] + interp[3]*func->table[ind+3]; +} + +#if 0 +#include <stdio.h> +int main(int argc, char **argv) +{ + int i; + for (i=0;i<256;i++) + { + printf ("%f\n", compute_func(i/256., KAISER12)); + } + return 0; +} +#endif + +#ifdef FIXED_POINT +/* The slow way of computing a sinc for the table. Should improve that some day */ +static spx_word16_t sinc(float cutoff, float x, int N, struct FuncDef *window_func) +{ + /*fprintf (stderr, "%f ", x);*/ + float xx = x * cutoff; + if (fabs(x)<1e-6f) + return WORD2INT(32768.*cutoff); + else if (fabs(x) > .5f*N) + return 0; + /*FIXME: Can it really be any slower than this? */ + return WORD2INT(32768.*cutoff*sin(M_PI*xx)/(M_PI*xx) * compute_func(fabs(2.*x/N), window_func)); +} +#else +/* The slow way of computing a sinc for the table. Should improve that some day */ +static spx_word16_t sinc(float cutoff, float x, int N, struct FuncDef *window_func) +{ + /*fprintf (stderr, "%f ", x);*/ + float xx = x * cutoff; + if (fabs(x)<1e-6) + return cutoff; + else if (fabs(x) > .5*N) + return 0; + /*FIXME: Can it really be any slower than this? */ + return cutoff*sin(M_PI*xx)/(M_PI*xx) * compute_func(fabs(2.*x/N), window_func); +} +#endif + +#ifdef FIXED_POINT +static void cubic_coef(spx_word16_t x, spx_word16_t interp[4]) +{ + /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation + but I know it's MMSE-optimal on a sinc */ + spx_word16_t x2, x3; + x2 = MULT16_16_P15(x, x); + x3 = MULT16_16_P15(x, x2); + interp[0] = PSHR32(MULT16_16(QCONST16(-0.16667f, 15),x) + MULT16_16(QCONST16(0.16667f, 15),x3),15); + interp[1] = EXTRACT16(EXTEND32(x) + SHR32(SUB32(EXTEND32(x2),EXTEND32(x3)),1)); + interp[3] = PSHR32(MULT16_16(QCONST16(-0.33333f, 15),x) + MULT16_16(QCONST16(.5f,15),x2) - MULT16_16(QCONST16(0.16667f, 15),x3),15); + /* Just to make sure we don't have rounding problems */ + interp[2] = Q15_ONE-interp[0]-interp[1]-interp[3]; + if (interp[2]<32767) + interp[2]+=1; +} +#else +static void cubic_coef(spx_word16_t frac, spx_word16_t interp[4]) +{ + /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation + but I know it's MMSE-optimal on a sinc */ + interp[0] = -0.16667f*frac + 0.16667f*frac*frac*frac; + interp[1] = frac + 0.5f*frac*frac - 0.5f*frac*frac*frac; + /*interp[2] = 1.f - 0.5f*frac - frac*frac + 0.5f*frac*frac*frac;*/ + interp[3] = -0.33333f*frac + 0.5f*frac*frac - 0.16667f*frac*frac*frac; + /* Just to make sure we don't have rounding problems */ + interp[2] = 1.-interp[0]-interp[1]-interp[3]; +} +#endif + +static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) +{ + int N = st->filt_len; + int out_sample = 0; + spx_word16_t *mem; + int last_sample = st->last_sample[channel_index]; + spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; + mem = st->mem + channel_index * st->mem_alloc_size; + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + int j; + spx_word32_t sum=0; + + /* We already have all the filter coefficients pre-computed in the table */ + const spx_word16_t *ptr; + /* Do the memory part */ + for (j=0;last_sample-N+1+j < 0;j++) + { + sum += MULT16_16(mem[last_sample+j],st->sinc_table[samp_frac_num*st->filt_len+j]); + } + + /* Do the new part */ + ptr = in+st->in_stride*(last_sample-N+1+j); + for (;j<N;j++) + { + sum += MULT16_16(*ptr,st->sinc_table[samp_frac_num*st->filt_len+j]); + ptr += st->in_stride; + } + + *out = PSHR32(sum,15); + out += st->out_stride; + out_sample++; + last_sample += st->int_advance; + samp_frac_num += st->frac_advance; + if (samp_frac_num >= st->den_rate) + { + samp_frac_num -= st->den_rate; + last_sample++; + } + } + st->last_sample[channel_index] = last_sample; + st->samp_frac_num[channel_index] = samp_frac_num; + return out_sample; +} + +#ifdef FIXED_POINT +#else +/* This is the same as the previous function, except with a double-precision accumulator */ +static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) +{ + int N = st->filt_len; + int out_sample = 0; + spx_word16_t *mem; + int last_sample = st->last_sample[channel_index]; + spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; + mem = st->mem + channel_index * st->mem_alloc_size; + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + int j; + double sum=0; + + /* We already have all the filter coefficients pre-computed in the table */ + const spx_word16_t *ptr; + /* Do the memory part */ + for (j=0;last_sample-N+1+j < 0;j++) + { + sum += MULT16_16(mem[last_sample+j],(double)st->sinc_table[samp_frac_num*st->filt_len+j]); + } + + /* Do the new part */ + ptr = in+st->in_stride*(last_sample-N+1+j); + for (;j<N;j++) + { + sum += MULT16_16(*ptr,(double)st->sinc_table[samp_frac_num*st->filt_len+j]); + ptr += st->in_stride; + } + + *out = sum; + out += st->out_stride; + out_sample++; + last_sample += st->int_advance; + samp_frac_num += st->frac_advance; + if (samp_frac_num >= st->den_rate) + { + samp_frac_num -= st->den_rate; + last_sample++; + } + } + st->last_sample[channel_index] = last_sample; + st->samp_frac_num[channel_index] = samp_frac_num; + return out_sample; +} +#endif + +static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) +{ + int N = st->filt_len; + int out_sample = 0; + spx_word16_t *mem; + int last_sample = st->last_sample[channel_index]; + spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; + mem = st->mem + channel_index * st->mem_alloc_size; + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + int j; + spx_word32_t sum=0; + + /* We need to interpolate the sinc filter */ + spx_word32_t accum[4] = {0.f,0.f, 0.f, 0.f}; + spx_word16_t interp[4]; + const spx_word16_t *ptr; + int offset; + spx_word16_t frac; + offset = samp_frac_num*st->oversample/st->den_rate; +#ifdef FIXED_POINT + frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate); +#else + frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate; +#endif + /* This code is written like this to make it easy to optimise with SIMD. + For most DSPs, it would be best to split the loops in two because most DSPs + have only two accumulators */ + for (j=0;last_sample-N+1+j < 0;j++) + { + spx_word16_t curr_mem = mem[last_sample+j]; + accum[0] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset-2]); + accum[1] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset-1]); + accum[2] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset]); + accum[3] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset+1]); + } + ptr = in+st->in_stride*(last_sample-N+1+j); + /* Do the new part */ + for (;j<N;j++) + { + spx_word16_t curr_in = *ptr; + ptr += st->in_stride; + accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]); + accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); + accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); + accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); + } + cubic_coef(frac, interp); + sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]); + + *out = PSHR32(sum,15); + out += st->out_stride; + out_sample++; + last_sample += st->int_advance; + samp_frac_num += st->frac_advance; + if (samp_frac_num >= st->den_rate) + { + samp_frac_num -= st->den_rate; + last_sample++; + } + } + st->last_sample[channel_index] = last_sample; + st->samp_frac_num[channel_index] = samp_frac_num; + return out_sample; +} + +#ifdef FIXED_POINT +#else +/* This is the same as the previous function, except with a double-precision accumulator */ +static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) +{ + int N = st->filt_len; + int out_sample = 0; + spx_word16_t *mem; + int last_sample = st->last_sample[channel_index]; + spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; + mem = st->mem + channel_index * st->mem_alloc_size; + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + int j; + spx_word32_t sum=0; + + /* We need to interpolate the sinc filter */ + double accum[4] = {0.f,0.f, 0.f, 0.f}; + float interp[4]; + const spx_word16_t *ptr; + float alpha = ((float)samp_frac_num)/st->den_rate; + int offset = samp_frac_num*st->oversample/st->den_rate; + float frac = alpha*st->oversample - offset; + /* This code is written like this to make it easy to optimise with SIMD. + For most DSPs, it would be best to split the loops in two because most DSPs + have only two accumulators */ + for (j=0;last_sample-N+1+j < 0;j++) + { + double curr_mem = mem[last_sample+j]; + accum[0] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset-2]); + accum[1] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset-1]); + accum[2] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset]); + accum[3] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset+1]); + } + ptr = in+st->in_stride*(last_sample-N+1+j); + /* Do the new part */ + for (;j<N;j++) + { + double curr_in = *ptr; + ptr += st->in_stride; + accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]); + accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); + accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); + accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); + } + cubic_coef(frac, interp); + sum = interp[0]*accum[0] + interp[1]*accum[1] + interp[2]*accum[2] + interp[3]*accum[3]; + + *out = PSHR32(sum,15); + out += st->out_stride; + out_sample++; + last_sample += st->int_advance; + samp_frac_num += st->frac_advance; + if (samp_frac_num >= st->den_rate) + { + samp_frac_num -= st->den_rate; + last_sample++; + } + } + st->last_sample[channel_index] = last_sample; + st->samp_frac_num[channel_index] = samp_frac_num; + return out_sample; +} +#endif + +static void update_filter(SpeexResamplerState *st) +{ + spx_uint32_t old_length; + + old_length = st->filt_len; + st->oversample = quality_map[st->quality].oversample; + st->filt_len = quality_map[st->quality].base_length; + + if (st->num_rate > st->den_rate) + { + /* down-sampling */ + st->cutoff = quality_map[st->quality].downsample_bandwidth * st->den_rate / st->num_rate; + /* FIXME: divide the numerator and denominator by a certain amount if they're too large */ + st->filt_len = st->filt_len*st->num_rate / st->den_rate; + /* Round down to make sure we have a multiple of 4 */ + st->filt_len &= (~0x3); + if (2*st->den_rate < st->num_rate) + st->oversample >>= 1; + if (4*st->den_rate < st->num_rate) + st->oversample >>= 1; + if (8*st->den_rate < st->num_rate) + st->oversample >>= 1; + if (16*st->den_rate < st->num_rate) + st->oversample >>= 1; + if (st->oversample < 1) + st->oversample = 1; + } else { + /* up-sampling */ + st->cutoff = quality_map[st->quality].upsample_bandwidth; + } + + /* Choose the resampling type that requires the least amount of memory */ + if (st->den_rate <= st->oversample) + { + spx_uint32_t i; + if (!st->sinc_table) + st->sinc_table = (spx_word16_t *)speex_alloc(st->filt_len*st->den_rate*sizeof(spx_word16_t)); + else if (st->sinc_table_length < st->filt_len*st->den_rate) + { + st->sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,st->filt_len*st->den_rate*sizeof(spx_word16_t)); + st->sinc_table_length = st->filt_len*st->den_rate; + } + for (i=0;i<st->den_rate;i++) + { + spx_uint32_t j; + for (j=0;j<st->filt_len;j++) + { + st->sinc_table[i*st->filt_len+j] = sinc(st->cutoff,((j-st->filt_len/2+1)-((float)i)/st->den_rate), st->filt_len, quality_map[st->quality].window_func); + } + } +#ifdef FIXED_POINT + st->resampler_ptr = resampler_basic_direct_single; +#else + if (st->quality>8) + st->resampler_ptr = resampler_basic_direct_double; + else + st->resampler_ptr = resampler_basic_direct_single; +#endif + /*fprintf (stderr, "resampler uses direct sinc table and normalised cutoff %f\n", cutoff);*/ + } else { + spx_int32_t i; + if (!st->sinc_table) + st->sinc_table = (spx_word16_t *)speex_alloc((st->filt_len*st->oversample+8)*sizeof(spx_word16_t)); + else if (st->sinc_table_length < st->filt_len*st->oversample+8) + { + st->sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,(st->filt_len*st->oversample+8)*sizeof(spx_word16_t)); + st->sinc_table_length = st->filt_len*st->oversample+8; + } + for (i=-4;i<(spx_int32_t)(st->oversample*st->filt_len+4);i++) + st->sinc_table[i+4] = sinc(st->cutoff,(i/(float)st->oversample - st->filt_len/2), st->filt_len, quality_map[st->quality].window_func); +#ifdef FIXED_POINT + st->resampler_ptr = resampler_basic_interpolate_single; +#else + if (st->quality>8) + st->resampler_ptr = resampler_basic_interpolate_double; + else + st->resampler_ptr = resampler_basic_interpolate_single; +#endif + /*fprintf (stderr, "resampler uses interpolated sinc table and normalised cutoff %f\n", cutoff);*/ + } + st->int_advance = st->num_rate/st->den_rate; + st->frac_advance = st->num_rate%st->den_rate; + + if (!st->mem) + { + spx_uint32_t i; + st->mem = (spx_word16_t*)speex_alloc(st->nb_channels*(st->filt_len-1) * sizeof(spx_word16_t)); + for (i=0;i<st->nb_channels*(st->filt_len-1);i++) + st->mem[i] = 0; + st->mem_alloc_size = st->filt_len-1; + /*speex_warning("init filter");*/ + } else if (!st->started) + { + spx_uint32_t i; + st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*(st->filt_len-1) * sizeof(spx_word16_t)); + for (i=0;i<st->nb_channels*(st->filt_len-1);i++) + st->mem[i] = 0; + st->mem_alloc_size = st->filt_len-1; + /*speex_warning("reinit filter");*/ + } else if (st->filt_len > old_length) + { + spx_uint32_t i; + /* Increase the filter length */ + /*speex_warning("increase filter size");*/ + int old_alloc_size = st->mem_alloc_size; + if (st->filt_len-1 > st->mem_alloc_size) + { + st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*(st->filt_len-1) * sizeof(spx_word16_t)); + st->mem_alloc_size = st->filt_len-1; + } + for (i=0;i<st->nb_channels;i++) + { + spx_uint32_t j; + /* Copy data going backward */ + for (j=0;j<old_length-1;j++) + st->mem[i*st->mem_alloc_size+(st->filt_len-2-j)] = st->mem[i*old_alloc_size+(old_length-2-j)]; + /* Then put zeros for lack of anything better */ + for (;j<st->filt_len-1;j++) + st->mem[i*st->mem_alloc_size+(st->filt_len-2-j)] = 0; + /* Adjust last_sample */ + st->last_sample[i] += (st->filt_len - old_length)/2; + } + } else if (st->filt_len < old_length) + { + spx_uint32_t i; + /* Reduce filter length, this a bit tricky */ + /*speex_warning("decrease filter size (unimplemented)");*/ + /* Adjust last_sample (which will likely end up negative) */ + /*st->last_sample += (st->filt_len - old_length)/2;*/ + for (i=0;i<st->nb_channels;i++) + { + spx_uint32_t j; + st->magic_samples[i] = (old_length - st->filt_len)/2; + /* Copy data going backward */ + for (j=0;j<st->filt_len-1+st->magic_samples[i];j++) + st->mem[i*st->mem_alloc_size+j] = st->mem[i*st->mem_alloc_size+j+st->magic_samples[i]]; + } + } + +} + +SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err) +{ + return speex_resampler_init_frac(nb_channels, in_rate, out_rate, in_rate, out_rate, quality, err); +} + +SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err) +{ + spx_uint32_t i; + SpeexResamplerState *st; + if (quality > 10 || quality < 0) + { + if (err) + *err = RESAMPLER_ERR_INVALID_ARG; + return NULL; + } + st = (SpeexResamplerState *)speex_alloc(sizeof(SpeexResamplerState)); + st->initialised = 0; + st->started = 0; + st->in_rate = 0; + st->out_rate = 0; + st->num_rate = 0; + st->den_rate = 0; + st->quality = -1; + st->sinc_table_length = 0; + st->mem_alloc_size = 0; + st->filt_len = 0; + st->mem = 0; + st->resampler_ptr = 0; + + st->cutoff = 1.f; + st->nb_channels = nb_channels; + st->in_stride = 1; + st->out_stride = 1; + + /* Per channel data */ + st->last_sample = (spx_int32_t*)speex_alloc(nb_channels*sizeof(int)); + st->magic_samples = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(int)); + st->samp_frac_num = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(int)); + for (i=0;i<nb_channels;i++) + { + st->last_sample[i] = 0; + st->magic_samples[i] = 0; + st->samp_frac_num[i] = 0; + } + + speex_resampler_set_quality(st, quality); + speex_resampler_set_rate_frac(st, ratio_num, ratio_den, in_rate, out_rate); + + + update_filter(st); + + st->initialised = 1; + if (err) + *err = RESAMPLER_ERR_SUCCESS; + + return st; +} + +void speex_resampler_destroy(SpeexResamplerState *st) +{ + speex_free(st->mem); + speex_free(st->sinc_table); + speex_free(st->last_sample); + speex_free(st->magic_samples); + speex_free(st->samp_frac_num); + speex_free(st); +} + + + +static int speex_resampler_process_native(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) +{ + int j=0; + int N = st->filt_len; + int out_sample = 0; + spx_word16_t *mem; + spx_uint32_t tmp_out_len = 0; + mem = st->mem + channel_index * st->mem_alloc_size; + st->started = 1; + + /* Handle the case where we have samples left from a reduction in filter length */ + if (st->magic_samples[channel_index]) + { + spx_uint32_t tmp_in_len; + spx_uint32_t tmp_magic; + tmp_in_len = st->magic_samples[channel_index]; + tmp_out_len = *out_len; + /* FIXME: Need to handle the case where the out array is too small */ + /* magic_samples needs to be set to zero to avoid infinite recursion */ + tmp_magic = st->magic_samples[channel_index]; + st->magic_samples[channel_index] = 0; + speex_resampler_process_native(st, channel_index, mem+N-1, &tmp_in_len, out, &tmp_out_len); + /*speex_warning_int("extra samples:", tmp_out_len);*/ + /* If we couldn't process all "magic" input samples, save the rest for next time */ + if (tmp_in_len < tmp_magic) + { + spx_uint32_t i; + st->magic_samples[channel_index] = tmp_magic-tmp_in_len; + for (i=0;i<st->magic_samples[channel_index];i++) + mem[N-1+i]=mem[N-1+i+tmp_in_len]; + } + out += tmp_out_len; + } + + /* Call the right resampler through the function ptr */ + out_sample = st->resampler_ptr(st, channel_index, in, in_len, out, out_len); + + if (st->last_sample[channel_index] < (spx_int32_t)*in_len) + *in_len = st->last_sample[channel_index]; + *out_len = out_sample+tmp_out_len; + st->last_sample[channel_index] -= *in_len; + + for (j=0;j<N-1-(spx_int32_t)*in_len;j++) + mem[j] = mem[j+*in_len]; + for (;j<N-1;j++) + mem[j] = in[st->in_stride*(j+*in_len-N+1)]; + + return RESAMPLER_ERR_SUCCESS; +} + +#define FIXED_STACK_ALLOC 1024 + +#ifdef FIXED_POINT +int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len) +{ + spx_uint32_t i; + int istride_save, ostride_save; +#ifdef VAR_ARRAYS + spx_word16_t x[*in_len]; + spx_word16_t y[*out_len]; + /*VARDECL(spx_word16_t *x); + VARDECL(spx_word16_t *y); + ALLOC(x, *in_len, spx_word16_t); + ALLOC(y, *out_len, spx_word16_t);*/ + istride_save = st->in_stride; + ostride_save = st->out_stride; + for (i=0;i<*in_len;i++) + x[i] = WORD2INT(in[i*st->in_stride]); + st->in_stride = st->out_stride = 1; + speex_resampler_process_native(st, channel_index, x, in_len, y, out_len); + st->in_stride = istride_save; + st->out_stride = ostride_save; + for (i=0;i<*out_len;i++) + out[i*st->out_stride] = y[i]; +#else + spx_word16_t x[FIXED_STACK_ALLOC]; + spx_word16_t y[FIXED_STACK_ALLOC]; + spx_uint32_t ilen=*in_len, olen=*out_len; + istride_save = st->in_stride; + ostride_save = st->out_stride; + while (ilen && olen) + { + spx_uint32_t ichunk, ochunk; + ichunk = ilen; + ochunk = olen; + if (ichunk>FIXED_STACK_ALLOC) + ichunk=FIXED_STACK_ALLOC; + if (ochunk>FIXED_STACK_ALLOC) + ochunk=FIXED_STACK_ALLOC; + for (i=0;i<ichunk;i++) + x[i] = WORD2INT(in[i*st->in_stride]); + st->in_stride = st->out_stride = 1; + speex_resampler_process_native(st, channel_index, x, &ichunk, y, &ochunk); + st->in_stride = istride_save; + st->out_stride = ostride_save; + for (i=0;i<ochunk;i++) + out[i*st->out_stride] = y[i]; + out += ochunk; + in += ichunk; + ilen -= ichunk; + olen -= ochunk; + } + *in_len -= ilen; + *out_len -= olen; +#endif + return RESAMPLER_ERR_SUCCESS; +} +int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len) +{ + return speex_resampler_process_native(st, channel_index, in, in_len, out, out_len); +} +#else +int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len) +{ + return speex_resampler_process_native(st, channel_index, in, in_len, out, out_len); +} +int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len) +{ + spx_uint32_t i; + int istride_save, ostride_save; +#ifdef VAR_ARRAYS + spx_word16_t x[*in_len]; + spx_word16_t y[*out_len]; + /*VARDECL(spx_word16_t *x); + VARDECL(spx_word16_t *y); + ALLOC(x, *in_len, spx_word16_t); + ALLOC(y, *out_len, spx_word16_t);*/ + istride_save = st->in_stride; + ostride_save = st->out_stride; + for (i=0;i<*in_len;i++) + x[i] = in[i*st->in_stride]; + st->in_stride = st->out_stride = 1; + speex_resampler_process_native(st, channel_index, x, in_len, y, out_len); + st->in_stride = istride_save; + st->out_stride = ostride_save; + for (i=0;i<*out_len;i++) + out[i*st->out_stride] = WORD2INT(y[i]); +#else + spx_word16_t x[FIXED_STACK_ALLOC]; + spx_word16_t y[FIXED_STACK_ALLOC]; + spx_uint32_t ilen=*in_len, olen=*out_len; + istride_save = st->in_stride; + ostride_save = st->out_stride; + while (ilen && olen) + { + spx_uint32_t ichunk, ochunk; + ichunk = ilen; + ochunk = olen; + if (ichunk>FIXED_STACK_ALLOC) + ichunk=FIXED_STACK_ALLOC; + if (ochunk>FIXED_STACK_ALLOC) + ochunk=FIXED_STACK_ALLOC; + for (i=0;i<ichunk;i++) + x[i] = in[i*st->in_stride]; + st->in_stride = st->out_stride = 1; + speex_resampler_process_native(st, channel_index, x, &ichunk, y, &ochunk); + st->in_stride = istride_save; + st->out_stride = ostride_save; + for (i=0;i<ochunk;i++) + out[i*st->out_stride] = WORD2INT(y[i]); + out += ochunk; + in += ichunk; + ilen -= ichunk; + olen -= ochunk; + } + *in_len -= ilen; + *out_len -= olen; +#endif + return RESAMPLER_ERR_SUCCESS; +} +#endif + +int speex_resampler_process_interleaved_float(SpeexResamplerState *st, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len) +{ + spx_uint32_t i; + int istride_save, ostride_save; + istride_save = st->in_stride; + ostride_save = st->out_stride; + st->in_stride = st->out_stride = st->nb_channels; + for (i=0;i<st->nb_channels;i++) + { + speex_resampler_process_float(st, i, in+i, in_len, out+i, out_len); + } + st->in_stride = istride_save; + st->out_stride = ostride_save; + return RESAMPLER_ERR_SUCCESS; +} + +int speex_resampler_process_interleaved_int(SpeexResamplerState *st, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len) +{ + spx_uint32_t i; + int istride_save, ostride_save; + istride_save = st->in_stride; + ostride_save = st->out_stride; + st->in_stride = st->out_stride = st->nb_channels; + for (i=0;i<st->nb_channels;i++) + { + speex_resampler_process_int(st, i, in+i, in_len, out+i, out_len); + } + st->in_stride = istride_save; + st->out_stride = ostride_save; + return RESAMPLER_ERR_SUCCESS; +} + +int speex_resampler_set_rate(SpeexResamplerState *st, spx_uint32_t in_rate, spx_uint32_t out_rate) +{ + return speex_resampler_set_rate_frac(st, in_rate, out_rate, in_rate, out_rate); +} + +void speex_resampler_get_rate(SpeexResamplerState *st, spx_uint32_t *in_rate, spx_uint32_t *out_rate) +{ + *in_rate = st->in_rate; + *out_rate = st->out_rate; +} + +int speex_resampler_set_rate_frac(SpeexResamplerState *st, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate) +{ + int fact; + if (st->in_rate == in_rate && st->out_rate == out_rate && st->num_rate == ratio_num && st->den_rate == ratio_den) + return RESAMPLER_ERR_SUCCESS; + + st->in_rate = in_rate; + st->out_rate = out_rate; + st->num_rate = ratio_num; + st->den_rate = ratio_den; + /* FIXME: This is terribly inefficient, but who cares (at least for now)? */ + for (fact=2;fact<=sqrt(IMAX(in_rate, out_rate));fact++) + { + while ((st->num_rate % fact == 0) && (st->den_rate % fact == 0)) + { + st->num_rate /= fact; + st->den_rate /= fact; + } + } + + if (st->initialised) + update_filter(st); + return RESAMPLER_ERR_SUCCESS; +} + +void speex_resampler_get_ratio(SpeexResamplerState *st, spx_uint32_t *ratio_num, spx_uint32_t *ratio_den) +{ + *ratio_num = st->num_rate; + *ratio_den = st->den_rate; +} + +int speex_resampler_set_quality(SpeexResamplerState *st, int quality) +{ + if (quality > 10 || quality < 0) + return RESAMPLER_ERR_INVALID_ARG; + if (st->quality == quality) + return RESAMPLER_ERR_SUCCESS; + st->quality = quality; + if (st->initialised) + update_filter(st); + return RESAMPLER_ERR_SUCCESS; +} + +void speex_resampler_get_quality(SpeexResamplerState *st, int *quality) +{ + *quality = st->quality; +} + +void speex_resampler_set_input_stride(SpeexResamplerState *st, spx_uint32_t stride) +{ + st->in_stride = stride; +} + +void speex_resampler_get_input_stride(SpeexResamplerState *st, spx_uint32_t *stride) +{ + *stride = st->in_stride; +} + +void speex_resampler_set_output_stride(SpeexResamplerState *st, spx_uint32_t stride) +{ + st->out_stride = stride; +} + +void speex_resampler_get_output_stride(SpeexResamplerState *st, spx_uint32_t *stride) +{ + *stride = st->out_stride; +} + +int speex_resampler_skip_zeros(SpeexResamplerState *st) +{ + spx_uint32_t i; + for (i=0;i<st->nb_channels;i++) + st->last_sample[i] = st->filt_len/2; + return RESAMPLER_ERR_SUCCESS; +} + +int speex_resampler_reset_mem(SpeexResamplerState *st) +{ + spx_uint32_t i; + for (i=0;i<st->nb_channels*(st->filt_len-1);i++) + st->mem[i] = 0; + return RESAMPLER_ERR_SUCCESS; +} + +const char *speex_resampler_strerror(int err) +{ + switch (err) + { + case RESAMPLER_ERR_SUCCESS: + return "Success."; + case RESAMPLER_ERR_ALLOC_FAILED: + return "Memory allocation failed."; + case RESAMPLER_ERR_BAD_STATE: + return "Bad resampler state."; + case RESAMPLER_ERR_INVALID_ARG: + return "Invalid argument."; + case RESAMPLER_ERR_PTR_OVERLAP: + return "Input and output buffers overlap."; + default: + return "Unknown error. Bad error code or strange version mismatch."; + } +} diff --git a/libspeex/sb_celp.c b/libspeex/sb_celp.c index 89ba473..50b9824 100644 --- a/libspeex/sb_celp.c +++ b/libspeex/sb_celp.c @@ -45,6 +45,7 @@ #include "vq.h" #include "ltp.h" #include "misc.h" +#include "math_approx.h" /* Default size for the encoder and decoder stack (can be changed at compile time). This does not apply when using variable-size arrays or alloca. */ @@ -109,12 +110,26 @@ int sb_decoder_ctl(void *state, int request, void *ptr) #ifdef FIXED_POINT static const spx_word16_t gc_quant_bound[16] = {125, 164, 215, 282, 370, 484, 635, 832, 1090, 1428, 1871, 2452, 3213, 4210, 5516, 7228}; +static const spx_word16_t fold_quant_bound[32] = { + 39, 44, 50, 57, 64, 73, 83, 94, + 106, 120, 136, 154, 175, 198, 225, 255, + 288, 327, 370, 420, 476, 539, 611, 692, + 784, 889, 1007, 1141, 1293, 1465, 1660, 1881}; #define LSP_MARGIN 410 #define LSP_DELTA1 6553 #define LSP_DELTA2 1638 #else +static const spx_word16_t gc_quant_bound[16] = { + 0.97979, 1.28384, 1.68223, 2.20426, 2.88829, 3.78458, 4.95900, 6.49787, + 8.51428, 11.15642, 14.61846, 19.15484, 25.09895, 32.88761, 43.09325, 56.46588}; +static const spx_word16_t fold_quant_bound[32] = { + 0.30498, 0.34559, 0.39161, 0.44375, 0.50283, 0.56979, 0.64565, 0.73162, + 0.82903, 0.93942, 1.06450, 1.20624, 1.36685, 1.54884, 1.75506, 1.98875, + 2.25355, 2.55360, 2.89361, 3.27889, 3.71547, 4.21018, 4.77076, 5.40598, + 6.12577, 6.94141, 7.86565, 8.91295, 10.09969, 11.44445, 12.96826, 14.69497}; + #define LSP_MARGIN .05 #define LSP_DELTA1 .2 #define LSP_DELTA2 .05 @@ -126,122 +141,69 @@ static const spx_word16_t gc_quant_bound[16] = {125, 164, 215, 282, 370, 484, 63 #ifdef FIXED_POINT static const spx_word16_t h0[64] = {2, -7, -7, 18, 15, -39, -25, 75, 35, -130, -41, 212, 38, -327, -17, 483, -32, -689, 124, 956, -283, -1307, 543, 1780, -973, -2467, 1733, 3633, -3339, -6409, 9059, 30153, 30153, 9059, -6409, -3339, 3633, 1733, -2467, -973, 1780, 543, -1307, -283, 956, 124, -689, -32, 483, -17, -327, 38, 212, -41, -130, 35, 75, -25, -39, 15, 18, -7, -7, 2}; -static const spx_word16_t h1[64] = {2, 7, -7, -18, 15, 39, -25, -75, 35, 130, -41, -212, 38, 327, -17, -483, -32, 689, 124, -956, -283, 1307, 543, -1780, -973, 2467, 1733, -3633, -3339, 6409, 9059, -30153, 30153, -9059, -6409, 3339, 3633, -1733, -2467, 973, 1780, -543, -1307, 283, 956, -124, -689, 32, 483, 17, -327, -38, 212, 41, -130, -35, 75, 25, -39, -15, 18, 7, -7, -2}; - - #else static const float h0[64] = { - 3.596189e-05, -0.0001123515, - -0.0001104587, 0.0002790277, - 0.0002298438, -0.0005953563, - -0.0003823631, 0.00113826, - 0.0005308539, -0.001986177, - -0.0006243724, 0.003235877, - 0.0005743159, -0.004989147, - -0.0002584767, 0.007367171, - -0.0004857935, -0.01050689, - 0.001894714, 0.01459396, - -0.004313674, -0.01994365, - 0.00828756, 0.02716055, - -0.01485397, -0.03764973, - 0.026447, 0.05543245, - -0.05095487, -0.09779096, - 0.1382363, 0.4600981, - 0.4600981, 0.1382363, - -0.09779096, -0.05095487, - 0.05543245, 0.026447, - -0.03764973, -0.01485397, - 0.02716055, 0.00828756, - -0.01994365, -0.004313674, - 0.01459396, 0.001894714, - -0.01050689, -0.0004857935, - 0.007367171, -0.0002584767, - -0.004989147, 0.0005743159, - 0.003235877, -0.0006243724, - -0.001986177, 0.0005308539, - 0.00113826, -0.0003823631, - -0.0005953563, 0.0002298438, - 0.0002790277, -0.0001104587, - -0.0001123515, 3.596189e-05 + 3.596189e-05f, -0.0001123515f, + -0.0001104587f, 0.0002790277f, + 0.0002298438f, -0.0005953563f, + -0.0003823631f, 0.00113826f, + 0.0005308539f, -0.001986177f, + -0.0006243724f, 0.003235877f, + 0.0005743159f, -0.004989147f, + -0.0002584767f, 0.007367171f, + -0.0004857935f, -0.01050689f, + 0.001894714f, 0.01459396f, + -0.004313674f, -0.01994365f, + 0.00828756f, 0.02716055f, + -0.01485397f, -0.03764973f, + 0.026447f, 0.05543245f, + -0.05095487f, -0.09779096f, + 0.1382363f, 0.4600981f, + 0.4600981f, 0.1382363f, + -0.09779096f, -0.05095487f, + 0.05543245f, 0.026447f, + -0.03764973f, -0.01485397f, + 0.02716055f, 0.00828756f, + -0.01994365f, -0.004313674f, + 0.01459396f, 0.001894714f, + -0.01050689f, -0.0004857935f, + 0.007367171f, -0.0002584767f, + -0.004989147f, 0.0005743159f, + 0.003235877f, -0.0006243724f, + -0.001986177f, 0.0005308539f, + 0.00113826f, -0.0003823631f, + -0.0005953563f, 0.0002298438f, + 0.0002790277f, -0.0001104587f, + -0.0001123515f, 3.596189e-05f }; -static const float h1[64] = { - 3.596189e-05, 0.0001123515, - -0.0001104587, -0.0002790277, - 0.0002298438, 0.0005953563, - -0.0003823631, -0.00113826, - 0.0005308539, 0.001986177, - -0.0006243724, -0.003235877, - 0.0005743159, 0.004989147, - -0.0002584767, -0.007367171, - -0.0004857935, 0.01050689, - 0.001894714, -0.01459396, - -0.004313674, 0.01994365, - 0.00828756, -0.02716055, - -0.01485397, 0.03764973, - 0.026447, -0.05543245, - -0.05095487, 0.09779096, - 0.1382363, -0.4600981, - 0.4600981, -0.1382363, - -0.09779096, 0.05095487, - 0.05543245, -0.026447, - -0.03764973, 0.01485397, - 0.02716055, -0.00828756, - -0.01994365, 0.004313674, - 0.01459396, -0.001894714, - -0.01050689, 0.0004857935, - 0.007367171, 0.0002584767, - -0.004989147, -0.0005743159, - 0.003235877, 0.0006243724, - -0.001986177, -0.0005308539, - 0.00113826, 0.0003823631, - -0.0005953563, -0.0002298438, - 0.0002790277, 0.0001104587, - -0.0001123515, -3.596189e-05 -}; #endif extern const spx_word16_t lpc_window[]; -static void mix_and_saturate(spx_word32_t *x0, spx_word32_t *x1, spx_word16_t *out, int len) -{ - int i; - for (i=0;i<len;i++) - { - spx_word32_t tmp; -#ifdef FIXED_POINT - tmp=PSHR(x0[i]-x1[i],SIG_SHIFT-1); -#else - tmp=2*(x0[i]-x1[i]); -#endif - if (tmp>32767) - out[i] = 32767; - else if (tmp<-32767) - out[i] = -32767; - else - out[i] = tmp; - } -} void *sb_encoder_init(const SpeexMode *m) { int i; + spx_int32_t tmp; SBEncState *st; const SpeexSBMode *mode; st = (SBEncState*)speex_alloc(sizeof(SBEncState)); if (!st) return NULL; -#if defined(VAR_ARRAYS) || defined (USE_ALLOCA) - st->stack = NULL; -#else - st->stack = (char*)speex_alloc_scratch(SB_ENC_STACK); -#endif st->mode = m; mode = (const SpeexSBMode*)m->mode; st->st_low = speex_encoder_init(mode->nb_mode); +#if defined(VAR_ARRAYS) || defined (USE_ALLOCA) + st->stack = NULL; +#else + /*st->stack = (char*)speex_alloc_scratch(SB_ENC_STACK);*/ + speex_encoder_ctl(st->st_low, SPEEX_GET_STACK, &st->stack); +#endif + st->full_frame_size = 2*mode->frameSize; st->frame_size = mode->frameSize; st->subframeSize = mode->subframeSize; @@ -254,10 +216,10 @@ void *sb_encoder_init(const SpeexMode *m) st->submodes=mode->submodes; st->submodeSelect = st->submodeID=mode->defaultSubmode; - i=9; - speex_encoder_ctl(st->st_low, SPEEX_SET_QUALITY, &i); - i=1; - speex_encoder_ctl(st->st_low, SPEEX_SET_WIDEBAND, &i); + tmp=9; + speex_encoder_ctl(st->st_low, SPEEX_SET_QUALITY, &tmp); + tmp=1; + speex_encoder_ctl(st->st_low, SPEEX_SET_WIDEBAND, &tmp); st->lag_factor = mode->lag_factor; st->lpc_floor = mode->lpc_floor; @@ -265,49 +227,33 @@ void *sb_encoder_init(const SpeexMode *m) st->gamma2=mode->gamma2; st->first=1; - st->x0d=(spx_sig_t*)speex_alloc((st->frame_size)*sizeof(spx_sig_t)); - st->x1d=(spx_sig_t*)speex_alloc((st->frame_size)*sizeof(spx_sig_t)); - st->high=(spx_sig_t*)speex_alloc((st->full_frame_size)*sizeof(spx_sig_t)); - st->y0=(spx_sig_t*)speex_alloc((st->full_frame_size)*sizeof(spx_sig_t)); - st->y1=(spx_sig_t*)speex_alloc((st->full_frame_size)*sizeof(spx_sig_t)); + st->high=(spx_word16_t*)speex_alloc((st->windowSize-st->frame_size)*sizeof(spx_word16_t)); st->h0_mem=(spx_word16_t*)speex_alloc((QMF_ORDER)*sizeof(spx_word16_t)); st->h1_mem=(spx_word16_t*)speex_alloc((QMF_ORDER)*sizeof(spx_word16_t)); - st->g0_mem=(spx_word32_t*)speex_alloc((QMF_ORDER)*sizeof(spx_word32_t)); - st->g1_mem=(spx_word32_t*)speex_alloc((QMF_ORDER)*sizeof(spx_word32_t)); - - st->excBuf=(spx_sig_t*)speex_alloc((st->bufSize)*sizeof(spx_sig_t)); - st->exc = st->excBuf + st->bufSize - st->windowSize; - st->res=(spx_sig_t*)speex_alloc((st->frame_size)*sizeof(spx_sig_t)); - st->sw=(spx_sig_t*)speex_alloc((st->frame_size)*sizeof(spx_sig_t)); st->window= lpc_window; st->lagWindow = (spx_word16_t*)speex_alloc((st->lpcSize+1)*sizeof(spx_word16_t)); for (i=0;i<st->lpcSize+1;i++) st->lagWindow[i]=16384*exp(-.5*sqr(2*M_PI*st->lag_factor*i)); - st->autocorr = (spx_word16_t*)speex_alloc((st->lpcSize+1)*sizeof(spx_word16_t)); - st->lpc = (spx_coef_t*)speex_alloc(st->lpcSize*sizeof(spx_coef_t)); - st->bw_lpc1 = (spx_coef_t*)speex_alloc(st->lpcSize*sizeof(spx_coef_t)); - st->bw_lpc2 = (spx_coef_t*)speex_alloc(st->lpcSize*sizeof(spx_coef_t)); - st->lsp = (spx_lsp_t*)speex_alloc(st->lpcSize*sizeof(spx_lsp_t)); - st->qlsp = (spx_lsp_t*)speex_alloc(st->lpcSize*sizeof(spx_lsp_t)); st->old_lsp = (spx_lsp_t*)speex_alloc(st->lpcSize*sizeof(spx_lsp_t)); st->old_qlsp = (spx_lsp_t*)speex_alloc(st->lpcSize*sizeof(spx_lsp_t)); - st->interp_lsp = (spx_lsp_t*)speex_alloc(st->lpcSize*sizeof(spx_lsp_t)); - st->interp_qlsp = (spx_lsp_t*)speex_alloc(st->lpcSize*sizeof(spx_lsp_t)); - st->interp_lpc = (spx_coef_t*)speex_alloc(st->lpcSize*sizeof(spx_coef_t)); st->interp_qlpc = (spx_coef_t*)speex_alloc(st->lpcSize*sizeof(spx_coef_t)); st->pi_gain = (spx_word32_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word32_t)); - st->low_innov = (spx_word32_t*)speex_alloc((st->frame_size)*sizeof(spx_word32_t)); - speex_encoder_ctl(st->st_low, SPEEX_SET_INNOVATION_SAVE, st->low_innov); - st->innov_save = NULL; + st->exc_rms = (spx_word16_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word16_t)); + st->innov_rms_save = NULL; st->mem_sp = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); st->mem_sp2 = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); st->mem_sw = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); + for (i=0;i<st->lpcSize;i++) + { + st->old_lsp[i]=LSP_SCALING*(M_PI*((float)(i+1)))/(st->lpcSize+1); + } + st->vbr_quality = 8; st->vbr_enabled = 0; st->vbr_max = 0; @@ -331,38 +277,21 @@ void sb_encoder_destroy(void *state) speex_encoder_destroy(st->st_low); #if !(defined(VAR_ARRAYS) || defined (USE_ALLOCA)) - speex_free_scratch(st->stack); + /*speex_free_scratch(st->stack);*/ #endif - speex_free(st->x0d); - speex_free(st->x1d); speex_free(st->high); - speex_free(st->y0); - speex_free(st->y1); speex_free(st->h0_mem); speex_free(st->h1_mem); - speex_free(st->g0_mem); - speex_free(st->g1_mem); - speex_free(st->excBuf); - speex_free(st->res); - speex_free(st->sw); speex_free(st->lagWindow); - speex_free(st->autocorr); - speex_free(st->lpc); - speex_free(st->bw_lpc1); - speex_free(st->bw_lpc2); - speex_free(st->lsp); - speex_free(st->qlsp); speex_free(st->old_lsp); speex_free(st->old_qlsp); - speex_free(st->interp_lsp); - speex_free(st->interp_qlsp); - speex_free(st->interp_lpc); speex_free(st->interp_qlpc); speex_free(st->pi_gain); + speex_free(st->exc_rms); speex_free(st->mem_sp); speex_free(st->mem_sp2); @@ -383,44 +312,56 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) VARDECL(spx_word16_t *target); VARDECL(spx_word16_t *syn_resp); VARDECL(spx_word32_t *low_pi_gain); - VARDECL(spx_word16_t *low_exc); + spx_word16_t *low; + spx_word16_t *high; + VARDECL(spx_word16_t *low_exc_rms); + VARDECL(spx_word16_t *low_innov_rms); const SpeexSBMode *mode; - int dtx; + spx_int32_t dtx; spx_word16_t *in = (spx_word16_t*)vin; - + spx_word16_t e_low=0, e_high=0; + VARDECL(spx_coef_t *lpc); + VARDECL(spx_coef_t *interp_lpc); + VARDECL(spx_coef_t *bw_lpc1); + VARDECL(spx_coef_t *bw_lpc2); + VARDECL(spx_lsp_t *lsp); + VARDECL(spx_lsp_t *qlsp); + VARDECL(spx_lsp_t *interp_lsp); + VARDECL(spx_lsp_t *interp_qlsp); + st = (SBEncState*)state; stack=st->stack; mode = (const SpeexSBMode*)(st->mode->mode); - + low = in; + high = in+st->frame_size; + + /* High-band buffering / sync with low band */ + /* Compute the two sub-bands by filtering with QMF h0*/ + qmf_decomp(in, h0, low, high, st->full_frame_size, QMF_ORDER, st->h0_mem, stack); + + if (st->vbr_enabled || st->vad_enabled) { - VARDECL(spx_word16_t *low); - ALLOC(low, st->frame_size, spx_word16_t); - - /* Compute the two sub-bands by filtering with h0 and h1*/ - qmf_decomp(in, h0, st->x0d, st->x1d, st->full_frame_size, QMF_ORDER, st->h0_mem, stack); - - for (i=0;i<st->frame_size;i++) - low[i] = SATURATE(PSHR(st->x0d[i],SIG_SHIFT),32767); - - /* Encode the narrowband part*/ - speex_encode_native(st->st_low, low, bits); - - for (i=0;i<st->frame_size;i++) - st->x0d[i] = SHL(low[i],SIG_SHIFT); + /* Need to compute things here before the signal is trashed by the encoder */ + /*FIXME: Are the two signals (low, high) in sync? */ + e_low = compute_rms16(low, st->frame_size); + e_high = compute_rms16(high, st->frame_size); } - /* High-band buffering / sync with low band */ - for (i=0;i<st->windowSize-st->frame_size;i++) - st->high[i] = st->high[st->frame_size+i]; - for (i=0;i<st->frame_size;i++) - st->high[st->windowSize-st->frame_size+i]=SATURATE(st->x1d[i],536854528); - - speex_move(st->excBuf, st->excBuf+st->frame_size, (st->bufSize-st->frame_size)*sizeof(spx_sig_t)); + ALLOC(low_innov_rms, st->nbSubframes, spx_word16_t); + speex_encoder_ctl(st->st_low, SPEEX_SET_INNOVATION_SAVE, low_innov_rms); + /* Encode the narrowband part*/ + speex_encode_native(st->st_low, low, bits); + high = high - (st->windowSize-st->frame_size); + for (i=0;i<st->windowSize-st->frame_size;i++) + high[i] = st->high[i]; + for (i=0;i<st->windowSize-st->frame_size;i++) + st->high[i] = high[i+st->frame_size]; + ALLOC(low_pi_gain, st->nbSubframes, spx_word32_t); - ALLOC(low_exc, st->frame_size, spx_word16_t); + ALLOC(low_exc_rms, st->nbSubframes, spx_word16_t); speex_encoder_ctl(st->st_low, SPEEX_GET_PI_GAIN, low_pi_gain); - speex_encoder_ctl(st->st_low, SPEEX_GET_EXC, low_exc); + speex_encoder_ctl(st->st_low, SPEEX_GET_EXC, low_exc_rms); speex_encoder_ctl(st->st_low, SPEEX_GET_LOW_MODE, &dtx); @@ -429,35 +370,53 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) else dtx=0; + ALLOC(lpc, st->lpcSize, spx_coef_t); + ALLOC(interp_lpc, st->lpcSize, spx_coef_t); + ALLOC(bw_lpc1, st->lpcSize, spx_coef_t); + ALLOC(bw_lpc2, st->lpcSize, spx_coef_t); + + ALLOC(lsp, st->lpcSize, spx_lsp_t); + ALLOC(qlsp, st->lpcSize, spx_lsp_t); + ALLOC(interp_lsp, st->lpcSize, spx_lsp_t); + ALLOC(interp_qlsp, st->lpcSize, spx_lsp_t); + { + VARDECL(spx_word16_t *autocorr); VARDECL(spx_word16_t *w_sig); + ALLOC(autocorr, st->lpcSize+1, spx_word16_t); ALLOC(w_sig, st->windowSize, spx_word16_t); /* Window for analysis */ - for (i=0;i<st->windowSize;i++) - w_sig[i] = SHR(MULT16_16(SHR((spx_word32_t)(st->high[i]),SIG_SHIFT),st->window[i]),SIG_SHIFT); - + /* FIXME: This is a kludge */ + if (st->subframeSize==80) + { + for (i=0;i<st->windowSize;i++) + w_sig[i] = EXTRACT16(SHR32(MULT16_16(high[i],st->window[i>>1]),SIG_SHIFT)); + } else { + for (i=0;i<st->windowSize;i++) + w_sig[i] = EXTRACT16(SHR32(MULT16_16(high[i],st->window[i]),SIG_SHIFT)); + } /* Compute auto-correlation */ - _spx_autocorr(w_sig, st->autocorr, st->lpcSize+1, st->windowSize); - } - st->autocorr[0] = ADD16(st->autocorr[0],MULT16_16_Q15(st->autocorr[0],st->lpc_floor)); /* Noise floor in auto-correlation domain */ + _spx_autocorr(w_sig, autocorr, st->lpcSize+1, st->windowSize); + autocorr[0] = ADD16(autocorr[0],MULT16_16_Q15(autocorr[0],st->lpc_floor)); /* Noise floor in auto-correlation domain */ - /* Lag windowing: equivalent to filtering in the power-spectrum domain */ - for (i=0;i<st->lpcSize+1;i++) - st->autocorr[i] = MULT16_16_Q14(st->autocorr[i],st->lagWindow[i]); + /* Lag windowing: equivalent to filtering in the power-spectrum domain */ + for (i=0;i<st->lpcSize+1;i++) + autocorr[i] = MULT16_16_Q14(autocorr[i],st->lagWindow[i]); - /* Levinson-Durbin */ - _spx_lpc(st->lpc, st->autocorr, st->lpcSize); + /* Levinson-Durbin */ + _spx_lpc(lpc, autocorr, st->lpcSize); + } /* LPC to LSPs (x-domain) transform */ - roots=lpc_to_lsp (st->lpc, st->lpcSize, st->lsp, 10, LSP_DELTA1, stack); + roots=lpc_to_lsp (lpc, st->lpcSize, lsp, 10, LSP_DELTA1, stack); if (roots!=st->lpcSize) { - roots = lpc_to_lsp (st->lpc, st->lpcSize, st->lsp, 10, LSP_DELTA2, stack); + roots = lpc_to_lsp (lpc, st->lpcSize, lsp, 10, LSP_DELTA2, stack); if (roots!=st->lpcSize) { /*If we can't find all LSP's, do some damage control and use a flat filter*/ for (i=0;i<st->lpcSize;i++) { - st->lsp[i]=LSP_SCALING*M_PI*((float)(i+1))/(st->lpcSize+1); + lsp[i]=st->old_lsp[i]; } } } @@ -465,7 +424,6 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) /* VBR code */ if ((st->vbr_enabled || st->vad_enabled) && !dtx) { - float e_low=0, e_high=0; float ratio; if (st->abr_enabled) { @@ -487,10 +445,7 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) } - /*FIXME: Are the two signals (low, high) in sync? */ - e_low = compute_rms(st->x0d, st->frame_size); - e_high = compute_rms(st->high, st->frame_size); - ratio = 2*log((1+e_high)/(1+e_low)); + ratio = 2*log((1.f+e_high)/(1.f+e_low)); speex_encoder_ctl(st->st_low, SPEEX_GET_RELATIVE_QUALITY, &st->relative_quality); if (ratio<-4) @@ -500,7 +455,7 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) /*if (ratio>-2)*/ if (st->vbr_enabled) { - int modeid; + spx_int32_t modeid; modeid = mode->nb_modes-1; st->relative_quality+=1.0*(ratio+2); if (st->relative_quality<-1) @@ -522,7 +477,7 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) speex_encoder_ctl(state, SPEEX_SET_HIGH_MODE, &modeid); if (st->abr_enabled) { - int bitrate; + spx_int32_t bitrate; speex_encoder_ctl(state, SPEEX_GET_BITRATE, &bitrate); st->abr_drift+=(bitrate-st->abr_enabled); st->abr_drift2 = .95*st->abr_drift2 + .05*(bitrate-st->abr_enabled); @@ -556,23 +511,14 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) if (dtx || st->submodes[st->submodeID] == NULL) { for (i=0;i<st->frame_size;i++) - st->exc[i]=st->sw[i]=VERY_SMALL; + high[i]=VERY_SMALL; for (i=0;i<st->lpcSize;i++) st->mem_sw[i]=0; st->first=1; /* Final signal synthesis from excitation */ - iir_mem2(st->exc, st->interp_qlpc, st->high, st->frame_size, st->lpcSize, st->mem_sp); - -#ifdef RESYNTH - /* Reconstruct the original */ - fir_mem_up(st->x0d, h0, st->y0, st->full_frame_size, QMF_ORDER, st->g0_mem, stack); - fir_mem_up(st->high, h1, st->y1, st->full_frame_size, QMF_ORDER, st->g1_mem, stack); - - for (i=0;i<st->full_frame_size;i++) - in[i]=SHR(st->y0[i]-st->y1[i], SIG_SHIFT-1); -#endif + iir_mem16(high, st->interp_qlpc, high, st->frame_size, st->lpcSize, st->mem_sp, stack); if (dtx) return 0; @@ -582,14 +528,14 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) /* LSP quantization */ - SUBMODE(lsp_quant)(st->lsp, st->qlsp, st->lpcSize, bits); + SUBMODE(lsp_quant)(lsp, qlsp, st->lpcSize, bits); if (st->first) { for (i=0;i<st->lpcSize;i++) - st->old_lsp[i] = st->lsp[i]; + st->old_lsp[i] = lsp[i]; for (i=0;i<st->lpcSize;i++) - st->old_qlsp[i] = st->qlsp[i]; + st->old_qlsp[i] = qlsp[i]; } ALLOC(mem, st->lpcSize, spx_mem_t); @@ -599,37 +545,33 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) for (sub=0;sub<st->nbSubframes;sub++) { - spx_sig_t *exc, *sp, *res, *sw, *innov_save=NULL; - spx_word16_t filter_ratio; + VARDECL(spx_word16_t *exc); + VARDECL(spx_word16_t *res); + VARDECL(spx_word16_t *sw); + spx_word16_t *sp; + spx_word16_t filter_ratio; /*Q7*/ int offset; - spx_word32_t rl, rh; + spx_word32_t rl, rh; /*Q13*/ spx_word16_t eh=0; offset = st->subframeSize*sub; - sp=st->high+offset; - exc=st->exc+offset; - res=st->res+offset; - sw=st->sw+offset; - /* Pointer for saving innovation */ - if (st->innov_save) - { - innov_save = st->innov_save+2*offset; - for (i=0;i<2*st->subframeSize;i++) - innov_save[i]=0; - } + sp=high+offset; + ALLOC(exc, st->subframeSize, spx_word16_t); + ALLOC(res, st->subframeSize, spx_word16_t); + ALLOC(sw, st->subframeSize, spx_word16_t); /* LSP interpolation (quantized and unquantized) */ - lsp_interpolate(st->old_lsp, st->lsp, st->interp_lsp, st->lpcSize, sub, st->nbSubframes); - lsp_interpolate(st->old_qlsp, st->qlsp, st->interp_qlsp, st->lpcSize, sub, st->nbSubframes); + lsp_interpolate(st->old_lsp, lsp, interp_lsp, st->lpcSize, sub, st->nbSubframes); + lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, st->lpcSize, sub, st->nbSubframes); - lsp_enforce_margin(st->interp_lsp, st->lpcSize, LSP_MARGIN); - lsp_enforce_margin(st->interp_qlsp, st->lpcSize, LSP_MARGIN); + lsp_enforce_margin(interp_lsp, st->lpcSize, LSP_MARGIN); + lsp_enforce_margin(interp_qlsp, st->lpcSize, LSP_MARGIN); - lsp_to_lpc(st->interp_lsp, st->interp_lpc, st->lpcSize,stack); - lsp_to_lpc(st->interp_qlsp, st->interp_qlpc, st->lpcSize, stack); + lsp_to_lpc(interp_lsp, interp_lpc, st->lpcSize,stack); + lsp_to_lpc(interp_qlsp, st->interp_qlpc, st->lpcSize, stack); - bw_lpc(st->gamma1, st->interp_lpc, st->bw_lpc1, st->lpcSize); - bw_lpc(st->gamma2, st->interp_lpc, st->bw_lpc2, st->lpcSize); + bw_lpc(st->gamma1, interp_lpc, bw_lpc1, st->lpcSize); + bw_lpc(st->gamma2, interp_lpc, bw_lpc2, st->lpcSize); /* Compute mid-band (4000 Hz for wideband) response of low-band and high-band filters */ @@ -643,24 +585,24 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) rl = low_pi_gain[sub]; #ifdef FIXED_POINT - filter_ratio=PDIV32_16(SHL(rl+82,2),SHR(82+rh,5)); + filter_ratio=EXTRACT16(SATURATE(PDIV32(SHL32(ADD32(rl,82),7),ADD32(82,rh)),32767)); #else filter_ratio=(rl+.01)/(rh+.01); #endif /* Compute "real excitation" */ - fir_mem2(sp, st->interp_qlpc, exc, st->subframeSize, st->lpcSize, st->mem_sp2); + fir_mem16(sp, st->interp_qlpc, exc, st->subframeSize, st->lpcSize, st->mem_sp2, stack); /* Compute energy of low-band and high-band excitation */ - eh = compute_rms(exc, st->subframeSize); + eh = compute_rms16(exc, st->subframeSize); if (!SUBMODE(innovation_quant)) {/* 1 for spectral folding excitation, 0 for stochastic */ - float g; - spx_word16_t el; - el = compute_rms(st->low_innov+offset, st->subframeSize); + spx_word32_t g; /*Q7*/ + spx_word16_t el; /*Q0*/ + el = low_innov_rms[sub]; /* Gain to use if we want to use the low-band excitation for high-band */ - g=eh/(1.+el); + g=PDIV32(MULT16_16(filter_ratio,eh),EXTEND32(ADD16(1,el))); #if 0 { @@ -678,15 +620,10 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) } #endif -#ifdef FIXED_POINT - g *= filter_ratio/128.; -#else - g *= filter_ratio; -#endif /*print_vec(&g, 1, "gain factor");*/ /* Gain quantization */ { - int quant = (int) floor(.5 + 10 + 8.0 * log((g+.0001))); + int quant = scal_quant(g, fold_quant_bound, 32); /*speex_warning_int("tata", quant);*/ if (quant<0) quant=0; @@ -694,68 +631,57 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) quant=31; speex_bits_pack(bits, quant, 5); } - + if (st->innov_rms_save) + { + st->innov_rms_save[sub] = eh; + } + st->exc_rms[sub] = eh; } else { - spx_word16_t gc; - spx_word32_t scale; - spx_word16_t el; - el = compute_rms16(low_exc+offset, st->subframeSize); + spx_word16_t gc; /*Q7*/ + spx_word32_t scale; /*Q14*/ + spx_word16_t el; /*Q0*/ + el = low_exc_rms[sub]; /*Q0*/ gc = PDIV32_16(MULT16_16(filter_ratio,1+eh),1+el); /* This is a kludge that cleans up a historical bug */ if (st->subframeSize==80) - gc *= 0.70711; + gc = MULT16_16_P15(QCONST16(0.70711f,15),gc); /*printf ("%f %f %f %f\n", el, eh, filter_ratio, gc);*/ -#ifdef FIXED_POINT { int qgc = scal_quant(gc, gc_quant_bound, 16); speex_bits_pack(bits, qgc, 4); - gc = MULT16_32_Q15(28626,gc_quant_bound[qgc]); + gc = MULT16_16_Q15(QCONST16(0.87360,15),gc_quant_bound[qgc]); } -#else - { - int qgc = (int)floor(.5+3.7*(log(gc)+0.15556)); - if (qgc<0) - qgc=0; - if (qgc>15) - qgc=15; - speex_bits_pack(bits, qgc, 4); - gc = exp((1/3.7)*qgc-0.15556); - } -#endif if (st->subframeSize==80) - gc *= 1.4142; + gc = MULT16_16_P14(QCONST16(1.4142f,14), gc); scale = SHL32(MULT16_16(PDIV32_16(SHL32(EXTEND32(gc),SIG_SHIFT-6),filter_ratio),(1+el)),6); - compute_impulse_response(st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, syn_resp, st->subframeSize, st->lpcSize, stack); + compute_impulse_response(st->interp_qlpc, bw_lpc1, bw_lpc2, syn_resp, st->subframeSize, st->lpcSize, stack); /* Reset excitation */ for (i=0;i<st->subframeSize;i++) - exc[i]=VERY_SMALL; + res[i]=VERY_SMALL; /* Compute zero response (ringing) of A(z/g1) / ( A(z/g2) * Aq(z) ) */ for (i=0;i<st->lpcSize;i++) mem[i]=st->mem_sp[i]; - iir_mem2(exc, st->interp_qlpc, exc, st->subframeSize, st->lpcSize, mem); + iir_mem16(res, st->interp_qlpc, res, st->subframeSize, st->lpcSize, mem, stack); for (i=0;i<st->lpcSize;i++) mem[i]=st->mem_sw[i]; - filter_mem2(exc, st->bw_lpc1, st->bw_lpc2, res, st->subframeSize, st->lpcSize, mem); + filter_mem16(res, bw_lpc1, bw_lpc2, res, st->subframeSize, st->lpcSize, mem, stack); /* Compute weighted signal */ for (i=0;i<st->lpcSize;i++) mem[i]=st->mem_sw[i]; - filter_mem2(sp, st->bw_lpc1, st->bw_lpc2, sw, st->subframeSize, st->lpcSize, mem); + filter_mem16(sp, bw_lpc1, bw_lpc2, sw, st->subframeSize, st->lpcSize, mem, stack); /* Compute target signal */ for (i=0;i<st->subframeSize;i++) - target[i]=PSHR32(sw[i]-res[i],SIG_SHIFT); - - for (i=0;i<st->subframeSize;i++) - exc[i]=0; + target[i]=SUB16(sw[i],res[i]); signal_div(target, target, scale, st->subframeSize); @@ -764,22 +690,13 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) innov[i]=0; /*print_vec(target, st->subframeSize, "\ntarget");*/ - SUBMODE(innovation_quant)(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, + SUBMODE(innovation_quant)(target, st->interp_qlpc, bw_lpc1, bw_lpc2, SUBMODE(innovation_params), st->lpcSize, st->subframeSize, innov, syn_resp, bits, stack, st->complexity, SUBMODE(double_codebook)); /*print_vec(target, st->subframeSize, "after");*/ signal_mul(innov, innov, scale, st->subframeSize); - for (i=0;i<st->subframeSize;i++) - exc[i] = ADD32(exc[i], innov[i]); - - if (st->innov_save) - { - for (i=0;i<st->subframeSize;i++) - innov_save[2*i]=innov[i]; - } - if (SUBMODE(double_codebook)) { char *tmp_stack=stack; VARDECL(spx_sig_t *innov2); @@ -787,42 +704,44 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) for (i=0;i<st->subframeSize;i++) innov2[i]=0; for (i=0;i<st->subframeSize;i++) - target[i]*=2.5; - SUBMODE(innovation_quant)(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, + target[i]=MULT16_16_P13(QCONST16(2.5f,13), target[i]); + + SUBMODE(innovation_quant)(target, st->interp_qlpc, bw_lpc1, bw_lpc2, SUBMODE(innovation_params), st->lpcSize, st->subframeSize, innov2, syn_resp, bits, stack, st->complexity, 0); + signal_mul(innov2, innov2, MULT16_32_P15(QCONST16(0.4f,15),scale), st->subframeSize); + for (i=0;i<st->subframeSize;i++) - innov2[i]*=scale*(1/2.5)/SIG_SCALING; - for (i=0;i<st->subframeSize;i++) - exc[i] = ADD32(exc[i],innov2[i]); + innov[i] = ADD32(innov[i],innov2[i]); stack = tmp_stack; } + for (i=0;i<st->subframeSize;i++) + exc[i] = PSHR32(innov[i],SIG_SHIFT); + + if (st->innov_rms_save) + { + st->innov_rms_save[sub] = MULT16_16_Q15(QCONST16(.70711f, 15), compute_rms(innov, st->subframeSize)); + } + st->exc_rms[sub] = compute_rms16(exc, st->subframeSize); + } + /*Keep the previous memory*/ for (i=0;i<st->lpcSize;i++) mem[i]=st->mem_sp[i]; /* Final signal synthesis from excitation */ - iir_mem2(exc, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, st->mem_sp); + iir_mem16(exc, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, st->mem_sp, stack); /* Compute weighted signal again, from synthesized speech (not sure it's the right thing) */ - filter_mem2(sp, st->bw_lpc1, st->bw_lpc2, sw, st->subframeSize, st->lpcSize, st->mem_sw); + filter_mem16(sp, bw_lpc1, bw_lpc2, sw, st->subframeSize, st->lpcSize, st->mem_sw, stack); } - -#ifdef RESYNTH - /* Reconstruct the original */ - fir_mem_up(st->x0d, h0, st->y0, st->full_frame_size, QMF_ORDER, st->g0_mem, stack); - fir_mem_up(st->high, h1, st->y1, st->full_frame_size, QMF_ORDER, st->g1_mem, stack); - - for (i=0;i<st->full_frame_size;i++) - in[i]=SHR(st->y0[i]-st->y1[i], SIG_SHIFT-1); -#endif for (i=0;i<st->lpcSize;i++) - st->old_lsp[i] = st->lsp[i]; + st->old_lsp[i] = lsp[i]; for (i=0;i<st->lpcSize;i++) - st->old_qlsp[i] = st->qlsp[i]; + st->old_qlsp[i] = qlsp[i]; st->first=0; @@ -835,26 +754,24 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) void *sb_decoder_init(const SpeexMode *m) { - int tmp; + spx_int32_t tmp; SBDecState *st; const SpeexSBMode *mode; st = (SBDecState*)speex_alloc(sizeof(SBDecState)); if (!st) return NULL; -#if defined(VAR_ARRAYS) || defined (USE_ALLOCA) - st->stack = NULL; -#else - st->stack = (char*)speex_alloc_scratch(SB_DEC_STACK); -#endif st->mode = m; mode=(const SpeexSBMode*)m->mode; - st->encode_submode = 1; - - - st->st_low = speex_decoder_init(mode->nb_mode); +#if defined(VAR_ARRAYS) || defined (USE_ALLOCA) + st->stack = NULL; +#else + /*st->stack = (char*)speex_alloc_scratch(SB_DEC_STACK);*/ + speex_decoder_ctl(st->st_low, SPEEX_GET_STACK, &st->stack); +#endif + st->full_frame_size = 2*mode->frameSize; st->frame_size = mode->frameSize; st->subframeSize = mode->subframeSize; @@ -870,29 +787,18 @@ void *sb_decoder_init(const SpeexMode *m) st->first=1; - - st->x0d = (spx_sig_t*)speex_alloc((st->frame_size)*sizeof(spx_sig_t)); - st->x1d = (spx_sig_t*)speex_alloc((st->frame_size)*sizeof(spx_sig_t)); - st->high = (spx_sig_t*)speex_alloc((st->full_frame_size)*sizeof(spx_sig_t)); - st->y0 = (spx_sig_t*)speex_alloc((st->full_frame_size)*sizeof(spx_sig_t)); - st->y1 = (spx_sig_t*)speex_alloc((st->full_frame_size)*sizeof(spx_sig_t)); - st->g0_mem = (spx_word32_t*)speex_alloc((QMF_ORDER)*sizeof(spx_word32_t)); st->g1_mem = (spx_word32_t*)speex_alloc((QMF_ORDER)*sizeof(spx_word32_t)); - st->exc = (spx_sig_t*)speex_alloc((st->frame_size)*sizeof(spx_sig_t)); - st->excBuf = (spx_sig_t*)speex_alloc((st->subframeSize)*sizeof(spx_sig_t)); + st->excBuf = (spx_word16_t*)speex_alloc((st->subframeSize)*sizeof(spx_word16_t)); - st->qlsp = (spx_lsp_t*)speex_alloc((st->lpcSize)*sizeof(spx_lsp_t)); st->old_qlsp = (spx_lsp_t*)speex_alloc((st->lpcSize)*sizeof(spx_lsp_t)); - st->interp_qlsp = (spx_lsp_t*)speex_alloc(st->lpcSize*sizeof(spx_lsp_t)); st->interp_qlpc = (spx_coef_t*)speex_alloc(st->lpcSize*sizeof(spx_coef_t)); st->pi_gain = (spx_word32_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word32_t)); + st->exc_rms = (spx_word16_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word16_t)); st->mem_sp = (spx_mem_t*)speex_alloc((2*st->lpcSize)*sizeof(spx_mem_t)); - st->low_innov = (spx_word32_t*)speex_alloc((st->frame_size)*sizeof(spx_word32_t)); - speex_decoder_ctl(st->st_low, SPEEX_SET_INNOVATION_SAVE, st->low_innov); st->innov_save = NULL; @@ -911,23 +817,16 @@ void sb_decoder_destroy(void *state) st = (SBDecState*)state; speex_decoder_destroy(st->st_low); #if !(defined(VAR_ARRAYS) || defined (USE_ALLOCA)) - speex_free_scratch(st->stack); + /*speex_free_scratch(st->stack);*/ #endif - speex_free(st->x0d); - speex_free(st->x1d); - speex_free(st->high); - speex_free(st->y0); - speex_free(st->y1); speex_free(st->g0_mem); speex_free(st->g1_mem); - speex_free(st->exc); speex_free(st->excBuf); - speex_free(st->qlsp); speex_free(st->old_qlsp); - speex_free(st->interp_qlsp); speex_free(st->interp_qlpc); speex_free(st->pi_gain); + speex_free(st->exc_rms); speex_free(st->mem_sp); speex_free(state); @@ -943,7 +842,7 @@ static void sb_decode_lost(SBDecState *st, spx_word16_t *out, int dtx, char *sta saved_modeid=st->submodeID; st->submodeID=1; } else { - bw_lpc(GAMMA_SCALING*0.99, st->interp_qlpc, st->interp_qlpc, st->lpcSize); + bw_lpc(QCONST16(0.99f,15), st->interp_qlpc, st->interp_qlpc, st->lpcSize); } st->first=1; @@ -952,25 +851,17 @@ static void sb_decode_lost(SBDecState *st, spx_word16_t *out, int dtx, char *sta /* Final signal synthesis from excitation */ if (!dtx) { - spx_word16_t low_ener; - low_ener = .9*compute_rms(st->exc, st->frame_size); - for (i=0;i<st->frame_size;i++) - st->exc[i] = speex_rand(low_ener, &st->seed); + st->last_ener = MULT16_16_Q15(QCONST16(.9f,15),st->last_ener); } - for (i=0;i<st->frame_size;i++) - st->high[i]=st->exc[i]; + out[i+st->frame_size] = speex_rand(st->last_ener, &st->seed); - iir_mem2(st->high, st->interp_qlpc, st->high, st->frame_size, st->lpcSize, - st->mem_sp); + iir_mem16(out+st->frame_size, st->interp_qlpc, out+st->frame_size, st->frame_size, st->lpcSize, + st->mem_sp, stack); /* Reconstruct the original */ - fir_mem_up(st->x0d, h0, st->y0, st->full_frame_size, QMF_ORDER, st->g0_mem, stack); - fir_mem_up(st->high, h1, st->y1, st->full_frame_size, QMF_ORDER, st->g1_mem, stack); - - mix_and_saturate(st->y0, st->y1, out, st->full_frame_size); - + qmf_synth(out, out+st->frame_size, h0, out, st->full_frame_size, QMF_ORDER, st->g0_mem, st->g1_mem, stack); if (dtx) { st->submodeID=saved_modeid; @@ -987,26 +878,24 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) int ret; char *stack; VARDECL(spx_word32_t *low_pi_gain); - VARDECL(spx_word16_t *low_exc); + VARDECL(spx_word16_t *low_exc_rms); VARDECL(spx_coef_t *ak); - int dtx; + VARDECL(spx_lsp_t *qlsp); + VARDECL(spx_lsp_t *interp_qlsp); + spx_int32_t dtx; const SpeexSBMode *mode; spx_word16_t *out = (spx_word16_t*)vout; + spx_word16_t *low_innov_alias; + spx_word32_t exc_ener_sum = 0; st = (SBDecState*)state; stack=st->stack; mode = (const SpeexSBMode*)(st->mode->mode); - { - VARDECL(spx_word16_t *low); - ALLOC(low, st->frame_size, spx_word16_t); - - /* Decode the low-band */ - ret = speex_decode_native(st->st_low, bits, low); - - for (i=0;i<st->frame_size;i++) - st->x0d[i] = SHL((spx_sig_t)low[i], SIG_SHIFT); - } + low_innov_alias = out+st->frame_size; + speex_decoder_ctl(st->st_low, SPEEX_SET_INNOVATION_SAVE, low_innov_alias); + /* Decode the low-band */ + ret = speex_decode_native(st->st_low, bits, out); speex_decoder_ctl(st->st_low, SPEEX_GET_DTX_STATUS, &dtx); @@ -1042,7 +931,7 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) } if (st->submodeID != 0 && st->submodes[st->submodeID] == NULL) { - speex_warning("Invalid mode encountered: corrupted stream?"); + speex_notify("Invalid mode encountered. The stream is corrupted."); return -2; } } @@ -1057,51 +946,49 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) } for (i=0;i<st->frame_size;i++) - st->exc[i]=VERY_SMALL; + out[st->frame_size+i]=VERY_SMALL; st->first=1; /* Final signal synthesis from excitation */ - iir_mem2(st->exc, st->interp_qlpc, st->high, st->frame_size, st->lpcSize, st->mem_sp); - - fir_mem_up(st->x0d, h0, st->y0, st->full_frame_size, QMF_ORDER, st->g0_mem, stack); - fir_mem_up(st->high, h1, st->y1, st->full_frame_size, QMF_ORDER, st->g1_mem, stack); + iir_mem16(out+st->frame_size, st->interp_qlpc, out+st->frame_size, st->frame_size, st->lpcSize, st->mem_sp, stack); - mix_and_saturate(st->y0, st->y1, out, st->full_frame_size); + qmf_synth(out, out+st->frame_size, h0, out, st->full_frame_size, QMF_ORDER, st->g0_mem, st->g1_mem, stack); return 0; } - for (i=0;i<st->frame_size;i++) - st->exc[i]=0; - ALLOC(low_pi_gain, st->nbSubframes, spx_word32_t); - ALLOC(low_exc, st->frame_size, spx_word16_t); + ALLOC(low_exc_rms, st->nbSubframes, spx_word16_t); speex_decoder_ctl(st->st_low, SPEEX_GET_PI_GAIN, low_pi_gain); - speex_decoder_ctl(st->st_low, SPEEX_GET_EXC, low_exc); + speex_decoder_ctl(st->st_low, SPEEX_GET_EXC, low_exc_rms); - SUBMODE(lsp_unquant)(st->qlsp, st->lpcSize, bits); + ALLOC(qlsp, st->lpcSize, spx_lsp_t); + ALLOC(interp_qlsp, st->lpcSize, spx_lsp_t); + SUBMODE(lsp_unquant)(qlsp, st->lpcSize, bits); if (st->first) { for (i=0;i<st->lpcSize;i++) - st->old_qlsp[i] = st->qlsp[i]; + st->old_qlsp[i] = qlsp[i]; } ALLOC(ak, st->lpcSize, spx_coef_t); for (sub=0;sub<st->nbSubframes;sub++) { - spx_sig_t *exc, *sp, *innov_save=NULL; + VARDECL(spx_word32_t *exc); + spx_word16_t *innov_save=NULL; + spx_word16_t *sp; spx_word16_t filter_ratio; spx_word16_t el=0; int offset; spx_word32_t rl=0,rh=0; offset = st->subframeSize*sub; - sp=st->high+offset; - exc=st->exc+offset; + sp=out+st->frame_size+offset; + ALLOC(exc, st->subframeSize, spx_word32_t); /* Pointer for saving innovation */ if (st->innov_save) { @@ -1111,12 +998,12 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) } /* LSP interpolation */ - lsp_interpolate(st->old_qlsp, st->qlsp, st->interp_qlsp, st->lpcSize, sub, st->nbSubframes); + lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, st->lpcSize, sub, st->nbSubframes); - lsp_enforce_margin(st->interp_qlsp, st->lpcSize, LSP_MARGIN); + lsp_enforce_margin(interp_qlsp, st->lpcSize, LSP_MARGIN); /* LSP to LPC */ - lsp_to_lpc(st->interp_qlsp, ak, st->lpcSize, stack); + lsp_to_lpc(interp_qlsp, ak, st->lpcSize, stack); /* Calculate reponse ratio between the low and high filter in the middle of the band (4000 Hz) */ @@ -1125,13 +1012,13 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) rh = LPC_SCALING; for (i=0;i<st->lpcSize;i+=2) { - rh += st->interp_qlpc[i+1] - st->interp_qlpc[i]; - st->pi_gain[sub] += st->interp_qlpc[i] + st->interp_qlpc[i+1]; + rh += ak[i+1] - ak[i]; + st->pi_gain[sub] += ak[i] + ak[i+1]; } rl = low_pi_gain[sub]; #ifdef FIXED_POINT - filter_ratio=PDIV32_16(SHL(rl+82,2),SHR(82+rh,5)); + filter_ratio=EXTRACT16(SATURATE(PDIV32(SHL32(ADD32(rl,82),7),ADD32(82,rh)),32767)); #else filter_ratio=(rl+.01)/(rh+.01); #endif @@ -1140,60 +1027,32 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) exc[i]=0; if (!SUBMODE(innovation_unquant)) { - float g; + spx_word32_t g; int quant; quant = speex_bits_unpack_unsigned(bits, 5); - g= exp(((float)quant-10)/8.0); + g= spx_exp(MULT16_16(QCONST16(.125f,11),(quant-10))); -#ifdef FIXED_POINT - g /= filter_ratio/128.; -#else - g /= filter_ratio; -#endif - /* High-band excitation using the low-band excitation and a gain */ + g = PDIV32(g, filter_ratio); -#if 0 - for (i=0;i<st->subframeSize;i++) - exc[i]=mode->folding_gain*g*st->low_innov[offset+i]; -#else + for (i=0;i<st->subframeSize;i+=2) { - float tmp=1; - /*static tmp1=0,tmp2=0; - static int seed=1; - el = compute_rms(low_innov+offset, st->subframeSize);*/ - for (i=0;i<st->subframeSize;i++) - { - float e=tmp*g*mode->folding_gain*st->low_innov[offset+i]; - tmp *= -1; - exc[i] = e; - /*float r = speex_rand(g*el,&seed); - exc[i] = .5*(r+tmp2 + e-tmp1); - tmp1 = e; - tmp2 = r;*/ - } - + exc[i]=SHL32(MULT16_32_P15(MULT16_16_Q15(mode->folding_gain,low_innov_alias[offset+i]),SHL32(g,6)),SIG_SHIFT); + exc[i+1]=NEG32(SHL32(MULT16_32_P15(MULT16_16_Q15(mode->folding_gain,low_innov_alias[offset+i+1]),SHL32(g,6)),SIG_SHIFT)); } -#endif } else { spx_word16_t gc; spx_word32_t scale; int qgc = speex_bits_unpack_unsigned(bits, 4); - - el = compute_rms16(low_exc+offset, st->subframeSize); - -#ifdef FIXED_POINT - gc = MULT16_32_Q15(28626,gc_quant_bound[qgc]); -#else - gc = exp((1/3.7)*qgc-0.15556); -#endif + + el = low_exc_rms[sub]; + gc = MULT16_16_Q15(QCONST16(0.87360,15),gc_quant_bound[qgc]); if (st->subframeSize==80) - gc *= 1.4142; - - scale = SHL(MULT16_16(PDIV32_16(SHL(gc,SIG_SHIFT-6),filter_ratio),(1+el)),6); + gc = MULT16_16_P14(QCONST16(1.4142f,14),gc); + scale = SHL32(PDIV32(SHL32(MULT16_16(gc, el),3), filter_ratio),SIG_SHIFT-3); SUBMODE(innovation_unquant)(exc, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed); @@ -1207,8 +1066,7 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) innov2[i]=0; SUBMODE(innovation_unquant)(innov2, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed); - for (i=0;i<st->subframeSize;i++) - innov2[i]*=scale/(float)SIG_SCALING*(1/2.5); + signal_mul(innov2, innov2, MULT16_32_P15(QCONST16(0.4f,15),scale), st->subframeSize); for (i=0;i<st->subframeSize;i++) exc[i] = ADD32(exc[i],innov2[i]); stack = tmp_stack; @@ -1219,27 +1077,25 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) if (st->innov_save) { for (i=0;i<st->subframeSize;i++) - innov_save[2*i]=exc[i]; + innov_save[2*i]=EXTRACT16(PSHR32(exc[i],SIG_SHIFT)); } for (i=0;i<st->subframeSize;i++) sp[i]=st->excBuf[i]; - iir_mem2(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, - st->mem_sp); + iir_mem16(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, + st->mem_sp, stack); for (i=0;i<st->subframeSize;i++) - st->excBuf[i]=exc[i]; + st->excBuf[i]=EXTRACT16(PSHR32(exc[i],SIG_SHIFT)); for (i=0;i<st->lpcSize;i++) st->interp_qlpc[i] = ak[i]; - + st->exc_rms[sub] = compute_rms16(st->excBuf, st->subframeSize); + exc_ener_sum = ADD32(exc_ener_sum, DIV32(MULT16_16(st->exc_rms[sub],st->exc_rms[sub]), st->nbSubframes)); } - - fir_mem_up(st->x0d, h0, st->y0, st->full_frame_size, QMF_ORDER, st->g0_mem, stack); - fir_mem_up(st->high, h1, st->y1, st->full_frame_size, QMF_ORDER, st->g1_mem, stack); - - mix_and_saturate(st->y0, st->y1, out, st->full_frame_size); - + st->last_ener = spx_sqrt(exc_ener_sum); + + qmf_synth(out, out+st->frame_size, h0, out, st->full_frame_size, QMF_ORDER, st->g0_mem, st->g1_mem, stack); for (i=0;i<st->lpcSize;i++) - st->old_qlsp[i] = st->qlsp[i]; + st->old_qlsp[i] = qlsp[i]; st->first=0; @@ -1254,10 +1110,10 @@ int sb_encoder_ctl(void *state, int request, void *ptr) switch(request) { case SPEEX_GET_FRAME_SIZE: - (*(int*)ptr) = st->full_frame_size; + (*(spx_int32_t*)ptr) = st->full_frame_size; break; case SPEEX_SET_HIGH_MODE: - st->submodeSelect = st->submodeID = (*(int*)ptr); + st->submodeSelect = st->submodeID = (*(spx_int32_t*)ptr); break; case SPEEX_SET_LOW_MODE: speex_encoder_ctl(st->st_low, SPEEX_SET_LOW_MODE, ptr); @@ -1275,22 +1131,22 @@ int sb_encoder_ctl(void *state, int request, void *ptr) speex_encoder_ctl(st, SPEEX_SET_QUALITY, ptr); break; case SPEEX_SET_VBR: - st->vbr_enabled = (*(int*)ptr); + st->vbr_enabled = (*(spx_int32_t*)ptr); speex_encoder_ctl(st->st_low, SPEEX_SET_VBR, ptr); break; case SPEEX_GET_VBR: - (*(int*)ptr) = st->vbr_enabled; + (*(spx_int32_t*)ptr) = st->vbr_enabled; break; case SPEEX_SET_VAD: - st->vad_enabled = (*(int*)ptr); + st->vad_enabled = (*(spx_int32_t*)ptr); speex_encoder_ctl(st->st_low, SPEEX_SET_VAD, ptr); break; case SPEEX_GET_VAD: - (*(int*)ptr) = st->vad_enabled; + (*(spx_int32_t*)ptr) = st->vad_enabled; break; case SPEEX_SET_VBR_QUALITY: { - int q; + spx_int32_t q; float qual = (*(float*)ptr)+.6; st->vbr_quality = (*(float*)ptr); if (qual>10) @@ -1311,7 +1167,7 @@ int sb_encoder_ctl(void *state, int request, void *ptr) speex_encoder_ctl(st->st_low, SPEEX_SET_VBR, &st->vbr_enabled); if (st->vbr_enabled) { - int i=10, rate, target; + spx_int32_t i=10, rate, target; float vbr_qual; target = (*(spx_int32_t*)ptr); while (i>=0) @@ -1337,8 +1193,8 @@ int sb_encoder_ctl(void *state, int request, void *ptr) break; case SPEEX_SET_QUALITY: { - int nb_qual; - int quality = (*(int*)ptr); + spx_int32_t nb_qual; + int quality = (*(spx_int32_t*)ptr); if (quality < 0) quality = 0; if (quality > 10) @@ -1350,16 +1206,16 @@ int sb_encoder_ctl(void *state, int request, void *ptr) break; case SPEEX_SET_COMPLEXITY: speex_encoder_ctl(st->st_low, SPEEX_SET_COMPLEXITY, ptr); - st->complexity = (*(int*)ptr); + st->complexity = (*(spx_int32_t*)ptr); if (st->complexity<1) st->complexity=1; break; case SPEEX_GET_COMPLEXITY: - (*(int*)ptr) = st->complexity; + (*(spx_int32_t*)ptr) = st->complexity; break; case SPEEX_SET_BITRATE: { - int i=10; + spx_int32_t i=10; spx_int32_t rate, target; target = (*(spx_int32_t*)ptr); while (i>=0) @@ -1397,25 +1253,23 @@ int sb_encoder_ctl(void *state, int request, void *ptr) int i; st->first = 1; for (i=0;i<st->lpcSize;i++) - st->lsp[i]=(M_PI*((float)(i+1)))/(st->lpcSize+1); + st->old_lsp[i]=(M_PI*((float)(i+1)))/(st->lpcSize+1); for (i=0;i<st->lpcSize;i++) st->mem_sw[i]=st->mem_sp[i]=st->mem_sp2[i]=0; - for (i=0;i<st->bufSize;i++) - st->excBuf[i]=0; for (i=0;i<QMF_ORDER;i++) - st->h0_mem[i]=st->h1_mem[i]=st->g0_mem[i]=st->g1_mem[i]=0; + st->h0_mem[i]=st->h1_mem[i]=0; } break; case SPEEX_SET_SUBMODE_ENCODING: - st->encode_submode = (*(int*)ptr); - speex_encoder_ctl(st->st_low, SPEEX_SET_SUBMODE_ENCODING, &ptr); + st->encode_submode = (*(spx_int32_t*)ptr); + speex_encoder_ctl(st->st_low, SPEEX_SET_SUBMODE_ENCODING, ptr); break; case SPEEX_GET_SUBMODE_ENCODING: - (*(int*)ptr) = st->encode_submode; + (*(spx_int32_t*)ptr) = st->encode_submode; break; case SPEEX_GET_LOOKAHEAD: speex_encoder_ctl(st->st_low, SPEEX_GET_LOOKAHEAD, ptr); - (*(int*)ptr) = 2*(*(int*)ptr) + QMF_ORDER - 1; + (*(spx_int32_t*)ptr) = 2*(*(spx_int32_t*)ptr) + QMF_ORDER - 1; break; case SPEEX_SET_PLC_TUNING: speex_encoder_ctl(st->st_low, SPEEX_SET_PLC_TUNING, ptr); @@ -1474,33 +1328,22 @@ int sb_encoder_ctl(void *state, int request, void *ptr) case SPEEX_GET_EXC: { int i; - spx_sig_t *e = (spx_sig_t*)ptr; - for (i=0;i<st->full_frame_size;i++) - e[i]=0; - for (i=0;i<st->frame_size;i++) - e[2*i]=2*st->exc[i]; - } - break; - case SPEEX_GET_INNOV: - { - int i; - spx_sig_t *e = (spx_sig_t*)ptr; - for (i=0;i<st->full_frame_size;i++) - e[i]=0; - for (i=0;i<st->frame_size;i++) - e[2*i]=2*st->exc[i]; + for (i=0;i<st->nbSubframes;i++) + ((spx_word16_t*)ptr)[i] = st->exc_rms[i]; } break; case SPEEX_GET_RELATIVE_QUALITY: (*(float*)ptr)=st->relative_quality; break; case SPEEX_SET_INNOVATION_SAVE: - st->innov_save = (spx_sig_t*)ptr; + st->innov_rms_save = (spx_word16_t*)ptr; break; case SPEEX_SET_WIDEBAND: speex_encoder_ctl(st->st_low, SPEEX_SET_WIDEBAND, ptr); break; - + case SPEEX_GET_STACK: + *((char**)ptr) = st->stack; + break; default: speex_warning_int("Unknown nb_ctl request: ", request); return -1; @@ -1515,7 +1358,7 @@ int sb_decoder_ctl(void *state, int request, void *ptr) switch(request) { case SPEEX_SET_HIGH_MODE: - st->submodeID = (*(int*)ptr); + st->submodeID = (*(spx_int32_t*)ptr); break; case SPEEX_SET_LOW_MODE: speex_decoder_ctl(st->st_low, SPEEX_SET_LOW_MODE, ptr); @@ -1524,20 +1367,20 @@ int sb_decoder_ctl(void *state, int request, void *ptr) speex_decoder_ctl(st->st_low, SPEEX_GET_LOW_MODE, ptr); break; case SPEEX_GET_FRAME_SIZE: - (*(int*)ptr) = st->full_frame_size; + (*(spx_int32_t*)ptr) = st->full_frame_size; break; case SPEEX_SET_ENH: speex_decoder_ctl(st->st_low, request, ptr); - st->lpc_enh_enabled = *((int*)ptr); + st->lpc_enh_enabled = *((spx_int32_t*)ptr); break; case SPEEX_GET_ENH: - *((int*)ptr) = st->lpc_enh_enabled; + *((spx_int32_t*)ptr) = st->lpc_enh_enabled; break; case SPEEX_SET_MODE: case SPEEX_SET_QUALITY: { - int nb_qual; - int quality = (*(int*)ptr); + spx_int32_t nb_qual; + int quality = (*(spx_int32_t*)ptr); if (quality < 0) quality = 0; if (quality > 10) @@ -1578,18 +1421,19 @@ int sb_decoder_ctl(void *state, int request, void *ptr) st->mem_sp[i]=0; for (i=0;i<QMF_ORDER;i++) st->g0_mem[i]=st->g1_mem[i]=0; + st->last_ener=0; } break; case SPEEX_SET_SUBMODE_ENCODING: - st->encode_submode = (*(int*)ptr); - speex_decoder_ctl(st->st_low, SPEEX_SET_SUBMODE_ENCODING, &ptr); + st->encode_submode = (*(spx_int32_t*)ptr); + speex_decoder_ctl(st->st_low, SPEEX_SET_SUBMODE_ENCODING, ptr); break; case SPEEX_GET_SUBMODE_ENCODING: - (*(int*)ptr) = st->encode_submode; + (*(spx_int32_t*)ptr) = st->encode_submode; break; case SPEEX_GET_LOOKAHEAD: speex_decoder_ctl(st->st_low, SPEEX_GET_LOOKAHEAD, ptr); - (*(int*)ptr) = 2*(*(int*)ptr); + (*(spx_int32_t*)ptr) = 2*(*(spx_int32_t*)ptr); break; case SPEEX_SET_HIGHPASS: speex_decoder_ctl(st->st_low, SPEEX_SET_HIGHPASS, ptr); @@ -1609,33 +1453,22 @@ int sb_decoder_ctl(void *state, int request, void *ptr) case SPEEX_GET_EXC: { int i; - spx_sig_t *e = (spx_sig_t*)ptr; - for (i=0;i<st->full_frame_size;i++) - e[i]=0; - for (i=0;i<st->frame_size;i++) - e[2*i]=2*st->exc[i]; - } - break; - case SPEEX_GET_INNOV: - { - int i; - spx_sig_t *e = (spx_sig_t*)ptr; - for (i=0;i<st->full_frame_size;i++) - e[i]=0; - for (i=0;i<st->frame_size;i++) - e[2*i]=2*st->exc[i]; + for (i=0;i<st->nbSubframes;i++) + ((spx_word16_t*)ptr)[i] = st->exc_rms[i]; } break; case SPEEX_GET_DTX_STATUS: speex_decoder_ctl(st->st_low, SPEEX_GET_DTX_STATUS, ptr); break; case SPEEX_SET_INNOVATION_SAVE: - st->innov_save = (spx_sig_t*)ptr; + st->innov_save = (spx_word16_t*)ptr; break; case SPEEX_SET_WIDEBAND: speex_decoder_ctl(st->st_low, SPEEX_SET_WIDEBAND, ptr); break; - + case SPEEX_GET_STACK: + *((char**)ptr) = st->stack; + break; default: speex_warning_int("Unknown nb_ctl request: ", request); return -1; diff --git a/libspeex/sb_celp.h b/libspeex/sb_celp.h index 4da03e4..a0dc3af 100644 --- a/libspeex/sb_celp.h +++ b/libspeex/sb_celp.h @@ -58,37 +58,21 @@ typedef struct SBEncState { spx_word16_t gamma2; /**< Perceptual weighting coef 2 */ char *stack; /**< Temporary allocation stack */ - spx_sig_t *x0d, *x1d; /**< QMF filter signals*/ - spx_sig_t *high; /**< High-band signal (buffer) */ - spx_sig_t *y0, *y1; /**< QMF synthesis signals */ + spx_word16_t *high; /**< High-band signal (buffer) */ spx_word16_t *h0_mem, *h1_mem; - spx_word32_t *g0_mem, *g1_mem; /**< QMF memories */ - spx_sig_t *excBuf; /**< High-band excitation */ - spx_sig_t *exc; /**< High-band excitation (for QMF only)*/ - spx_sig_t *res; /**< Zero-input response (ringing) */ - spx_sig_t *sw; /**< Perceptually weighted signal */ const spx_word16_t *window; /**< LPC analysis window */ spx_word16_t *lagWindow; /**< Auto-correlation window */ - spx_word16_t *autocorr; /**< Auto-correlation (for LPC analysis) */ - spx_coef_t *lpc; /**< LPC coefficients */ - spx_lsp_t *lsp; /**< LSP coefficients */ - spx_lsp_t *qlsp; /**< Quantized LSPs */ spx_lsp_t *old_lsp; /**< LSPs of previous frame */ spx_lsp_t *old_qlsp; /**< Quantized LSPs of previous frame */ - spx_lsp_t *interp_lsp; /**< Interpolated LSPs for current sub-frame */ - spx_lsp_t *interp_qlsp; /**< Interpolated quantized LSPs for current sub-frame */ - spx_coef_t *interp_lpc; /**< Interpolated LPCs for current sub-frame */ spx_coef_t *interp_qlpc; /**< Interpolated quantized LPCs for current sub-frame */ - spx_coef_t *bw_lpc1; /**< Bandwidth-expanded version of LPCs (#1) */ - spx_coef_t *bw_lpc2; /**< Bandwidth-expanded version of LPCs (#2) */ spx_mem_t *mem_sp; /**< Synthesis signal memory */ spx_mem_t *mem_sp2; spx_mem_t *mem_sw; /**< Perceptual signal memory */ spx_word32_t *pi_gain; - spx_sig_t *innov_save; /**< If non-NULL, innovation is copied here */ - spx_sig_t *low_innov; /**< Lower-band innovation is copied here magically */ + spx_word16_t *exc_rms; + spx_word16_t *innov_rms_save; /**< If non-NULL, innovation is copied here */ float vbr_quality; /**< Quality setting for VBR encoding */ int vbr_enabled; /**< 1 for enabling VBR, 0 otherwise */ @@ -125,23 +109,18 @@ typedef struct SBDecState { int lpc_enh_enabled; char *stack; - spx_sig_t *x0d, *x1d; - spx_sig_t *high; - spx_sig_t *y0, *y1; spx_word32_t *g0_mem, *g1_mem; - spx_sig_t *exc; - spx_sig_t *excBuf; - spx_lsp_t *qlsp; + spx_word16_t *excBuf; spx_lsp_t *old_qlsp; - spx_lsp_t *interp_qlsp; spx_coef_t *interp_qlpc; spx_mem_t *mem_sp; spx_word32_t *pi_gain; - spx_sig_t *innov_save; /** If non-NULL, innovation is copied here */ - spx_sig_t *low_innov; /** Lower-band innovation is copied here magically */ + spx_word16_t *exc_rms; + spx_word16_t *innov_save; /** If non-NULL, innovation is copied here */ + spx_word16_t last_ener; spx_int32_t seed; int encode_submode; diff --git a/libspeex/speex.c b/libspeex/speex.c index 94829e6..846e021 100644 --- a/libspeex/speex.c +++ b/libspeex/speex.c @@ -86,7 +86,7 @@ int speex_decode_native(void *state, SpeexBits *bits, spx_word16_t *out) int speex_encode(void *state, float *in, SpeexBits *bits) { int i; - int N; + spx_int32_t N; spx_int16_t short_in[MAX_IN_SAMPLES]; speex_encoder_ctl(state, SPEEX_GET_FRAME_SIZE, &N); for (i=0;i<N;i++) @@ -111,7 +111,7 @@ int speex_encode_int(void *state, spx_int16_t *in, SpeexBits *bits) int speex_decode(void *state, SpeexBits *bits, float *out) { int i, ret; - int N; + spx_int32_t N; spx_int16_t short_out[MAX_IN_SAMPLES]; speex_decoder_ctl(state, SPEEX_GET_FRAME_SIZE, &N); ret = (*((SpeexMode**)state))->dec(state, bits, short_out); @@ -136,7 +136,7 @@ int speex_encode(void *state, float *in, SpeexBits *bits) int speex_encode_int(void *state, spx_int16_t *in, SpeexBits *bits) { int i; - int N; + spx_int32_t N; float float_in[MAX_IN_SAMPLES]; speex_encoder_ctl(state, SPEEX_GET_FRAME_SIZE, &N); for (i=0;i<N;i++) @@ -152,7 +152,7 @@ int speex_decode(void *state, SpeexBits *bits, float *out) int speex_decode_int(void *state, SpeexBits *bits, spx_int16_t *out) { int i; - int N; + spx_int32_t N; float float_out[MAX_IN_SAMPLES]; int ret; speex_decoder_ctl(state, SPEEX_GET_FRAME_SIZE, &N); diff --git a/libspeex/speex_callbacks.c b/libspeex/speex_callbacks.c index 0b99188..682322e 100644 --- a/libspeex/speex_callbacks.c +++ b/libspeex/speex_callbacks.c @@ -73,7 +73,7 @@ int speex_inband_handler(SpeexBits *bits, SpeexCallback *callback_list, void *st int speex_std_mode_request_handler(SpeexBits *bits, void *state, void *data) { - int m; + spx_int32_t m; m = speex_bits_unpack_unsigned(bits, 4); speex_encoder_ctl(data, SPEEX_SET_MODE, &m); return 0; @@ -81,7 +81,7 @@ int speex_std_mode_request_handler(SpeexBits *bits, void *state, void *data) int speex_std_low_mode_request_handler(SpeexBits *bits, void *state, void *data) { - int m; + spx_int32_t m; m = speex_bits_unpack_unsigned(bits, 4); speex_encoder_ctl(data, SPEEX_SET_LOW_MODE, &m); return 0; @@ -89,7 +89,7 @@ int speex_std_low_mode_request_handler(SpeexBits *bits, void *state, void *data) int speex_std_high_mode_request_handler(SpeexBits *bits, void *state, void *data) { - int m; + spx_int32_t m; m = speex_bits_unpack_unsigned(bits, 4); speex_encoder_ctl(data, SPEEX_SET_HIGH_MODE, &m); return 0; @@ -97,7 +97,7 @@ int speex_std_high_mode_request_handler(SpeexBits *bits, void *state, void *data int speex_std_vbr_request_handler(SpeexBits *bits, void *state, void *data) { - int vbr; + spx_int32_t vbr; vbr = speex_bits_unpack_unsigned(bits, 1); speex_encoder_ctl(data, SPEEX_SET_VBR, &vbr); return 0; @@ -105,7 +105,7 @@ int speex_std_vbr_request_handler(SpeexBits *bits, void *state, void *data) int speex_std_enh_request_handler(SpeexBits *bits, void *state, void *data) { - int enh; + spx_int32_t enh; enh = speex_bits_unpack_unsigned(bits, 1); speex_decoder_ctl(data, SPEEX_SET_ENH, &enh); return 0; @@ -113,7 +113,7 @@ int speex_std_enh_request_handler(SpeexBits *bits, void *state, void *data) int speex_std_vbr_quality_request_handler(SpeexBits *bits, void *state, void *data) { - int qual; + float qual; qual = speex_bits_unpack_unsigned(bits, 4); speex_encoder_ctl(data, SPEEX_SET_VBR_QUALITY, &qual); return 0; diff --git a/libspeex/speex_header.c b/libspeex/speex_header.c index 7fc2f5a..8e10851 100644 --- a/libspeex/speex_header.c +++ b/libspeex/speex_header.c @@ -133,14 +133,14 @@ SpeexHeader *speex_packet_to_header(char *packet, int size) for (i=0;i<8;i++) if (packet[i]!=h[i]) { - speex_warning ("This doesn't look like a Speex file"); + speex_notify("This doesn't look like a Speex file"); return NULL; } /*FIXME: Do we allow larger headers?*/ if (size < (int)sizeof(SpeexHeader)) { - speex_warning("Speex header too small"); + speex_notify("Speex header too small"); return NULL; } diff --git a/libspeex/testdenoise.c b/libspeex/testdenoise.c index 177227d..42644cb 100644 --- a/libspeex/testdenoise.c +++ b/libspeex/testdenoise.c @@ -24,9 +24,9 @@ int main() speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_AGC_LEVEL, &f); i=0; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DEREVERB, &i); - f=.4; + f=.0; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &f); - f=.3; + f=.0; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &f); while (1) { @@ -34,7 +34,7 @@ int main() fread(in, sizeof(short), NN, stdin); if (feof(stdin)) break; - vad = speex_preprocess(st, in, NULL); + vad = speex_preprocess_run(st, in); /*fprintf (stderr, "%d\n", vad);*/ fwrite(in, sizeof(short), NN, stdout); count++; diff --git a/libspeex/testecho.c b/libspeex/testecho.c index fc5bf34..60d76d5 100644 --- a/libspeex/testecho.c +++ b/libspeex/testecho.c @@ -18,7 +18,6 @@ int main(int argc, char **argv) { int echo_fd, ref_fd, e_fd; - spx_int32_t noise[NN+1]; short echo_buf[NN], ref_buf[NN], e_buf[NN]; SpeexEchoState *st; SpeexPreprocessState *den; @@ -36,12 +35,13 @@ int main(int argc, char **argv) den = speex_preprocess_state_init(NN, 8000); int tmp = 8000; mc_echo_ctl(st, SPEEX_ECHO_SET_SAMPLING_RATE, &tmp); + speex_preprocess_ctl(den, SPEEX_PREPROCESS_SET_ECHO_STATE, st); while (read(ref_fd, ref_buf, NN*2)) { read(echo_fd, echo_buf, NN*2); - mc_echo_cancel(st, ref_buf, echo_buf, e_buf, noise); - /*speex_preprocess(den, e_buf, noise);*/ + mc_echo_cancellation(st, ref_buf, echo_buf, e_buf); + speex_preprocess_run(den, e_buf); write(e_fd, e_buf, NN*2); } mc_echo_state_destroy(st); diff --git a/libspeex/testenc.c b/libspeex/testenc.c index a7ad409..eabd02c 100644 --- a/libspeex/testenc.c +++ b/libspeex/testenc.c @@ -27,9 +27,9 @@ int main(int argc, char **argv) void *st; void *dec; SpeexBits bits; - int tmp; + spx_int32_t tmp; int bitCount=0; - int skip_group_delay; + spx_int32_t skip_group_delay; SpeexCallback callback; sigpow = 0; diff --git a/libspeex/testenc_uwb.c b/libspeex/testenc_uwb.c index 7512336..e9bf18a 100644 --- a/libspeex/testenc_uwb.c +++ b/libspeex/testenc_uwb.c @@ -28,9 +28,9 @@ int main(int argc, char **argv) void *st; void *dec; SpeexBits bits; - int tmp; + spx_int32_t tmp; int bitCount=0; - int skip_group_delay; + spx_int32_t skip_group_delay; SpeexCallback callback; sigpow = 0; diff --git a/libspeex/testenc_wb.c b/libspeex/testenc_wb.c index 7a19189..8e515cb 100644 --- a/libspeex/testenc_wb.c +++ b/libspeex/testenc_wb.c @@ -28,9 +28,9 @@ int main(int argc, char **argv) void *st; void *dec; SpeexBits bits; - int tmp; + spx_int32_t tmp; int bitCount=0; - int skip_group_delay; + spx_int32_t skip_group_delay; SpeexCallback callback; sigpow = 0; diff --git a/libspeex/testresample.c b/libspeex/testresample.c new file mode 100644 index 0000000..71392cc --- /dev/null +++ b/libspeex/testresample.c @@ -0,0 +1,86 @@ +/* Copyright (C) 2007 Jean-Marc Valin + + File: testresample.c + Testing the resampling code + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include "speex/speex_resampler.h" +#include <math.h> +#include <stdlib.h> + +#define NN 256 + +int main() +{ + spx_uint32_t i; + short *in; + short *out; + float *fin, *fout; + int count = 0; + SpeexResamplerState *st = speex_resampler_init(1, 8000, 12000, 10, NULL); + speex_resampler_set_rate(st, 96000, 44100); + speex_resampler_skip_zeros(st); + + in = malloc(NN*sizeof(short)); + out = malloc(2*NN*sizeof(short)); + fin = malloc(NN*sizeof(float)); + fout = malloc(2*NN*sizeof(float)); + while (1) + { + spx_uint32_t in_len; + spx_uint32_t out_len; + fread(in, sizeof(short), NN, stdin); + if (feof(stdin)) + break; + for (i=0;i<NN;i++) + fin[i]=in[i]; + in_len = NN; + out_len = 2*NN; + /*if (count==2) + speex_resampler_set_quality(st, 10);*/ + speex_resampler_process_float(st, 0, fin, &in_len, fout, &out_len); + for (i=0;i<out_len;i++) + out[i]=floor(.5+fout[i]); + /*speex_warning_int("writing", out_len);*/ + fwrite(out, sizeof(short), out_len, stdout); + count++; + } + speex_resampler_destroy(st); + free(in); + free(out); + free(fin); + free(fout); + return 0; +} + diff --git a/libspeex/vbr.c b/libspeex/vbr.c index bfd1fa6..d24ec0f 100644 --- a/libspeex/vbr.c +++ b/libspeex/vbr.c @@ -47,29 +47,29 @@ const float vbr_nb_thresh[9][11]={ - {-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}, /* CNG */ - { 3.5, 2.5, 2.0, 1.2, 0.5, 0.0, -0.5, -0.7, -0.8, -0.9, -1.0}, /* 2 kbps */ - {10.0, 6.5, 5.2, 4.5, 3.9, 3.5, 3.0, 2.5, 2.3, 1.8, 1.0}, /* 6 kbps */ - {11.0, 8.8, 7.5, 6.5, 5.0, 3.9, 3.9, 3.9, 3.5, 3.0, 1.0}, /* 8 kbps */ - {11.0, 11.0, 9.9, 9.0, 8.0, 7.0, 6.5, 6.0, 5.0, 4.0, 2.0}, /* 11 kbps */ - {11.0, 11.0, 11.0, 11.0, 9.5, 9.0, 8.0, 7.0, 6.5, 5.0, 3.0}, /* 15 kbps */ - {11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 9.5, 8.5, 8.0, 6.5, 4.0}, /* 18 kbps */ - {11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 9.8, 7.5, 5.5}, /* 24 kbps */ - { 8.0, 5.0, 3.7, 3.0, 2.5, 2.0, 1.8, 1.5, 1.0, 0.0, 0.0} /* 4 kbps */ + {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* CNG */ + { 3.5f, 2.5f, 2.0f, 1.2f, 0.5f, 0.0f, -0.5f, -0.7f, -0.8f, -0.9f, -1.0f}, /* 2 kbps */ + {10.0f, 6.5f, 5.2f, 4.5f, 3.9f, 3.5f, 3.0f, 2.5f, 2.3f, 1.8f, 1.0f}, /* 6 kbps */ + {11.0f, 8.8f, 7.5f, 6.5f, 5.0f, 3.9f, 3.9f, 3.9f, 3.5f, 3.0f, 1.0f}, /* 8 kbps */ + {11.0f, 11.0f, 9.9f, 9.0f, 8.0f, 7.0f, 6.5f, 6.0f, 5.0f, 4.0f, 2.0f}, /* 11 kbps */ + {11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 9.0f, 8.0f, 7.0f, 6.5f, 5.0f, 3.0f}, /* 15 kbps */ + {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.5f, 8.0f, 6.5f, 4.0f}, /* 18 kbps */ + {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.8f, 7.5f, 5.5f}, /* 24 kbps */ + { 8.0f, 5.0f, 3.7f, 3.0f, 2.5f, 2.0f, 1.8f, 1.5f, 1.0f, 0.0f, 0.0f} /* 4 kbps */ }; const float vbr_hb_thresh[5][11]={ - {-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}, /* silence */ - {-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}, /* 2 kbps */ - {11.0, 11.0, 9.5, 8.5, 7.5, 6.0, 5.0, 3.9, 3.0, 2.0, 1.0}, /* 6 kbps */ - {11.0, 11.0, 11.0, 11.0, 11.0, 9.5, 8.7, 7.8, 7.0, 6.5, 4.0}, /* 10 kbps */ - {11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 9.8, 7.5, 5.5} /* 18 kbps */ + {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */ + {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* 2 kbps */ + {11.0f, 11.0f, 9.5f, 8.5f, 7.5f, 6.0f, 5.0f, 3.9f, 3.0f, 2.0f, 1.0f}, /* 6 kbps */ + {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.7f, 7.8f, 7.0f, 6.5f, 4.0f}, /* 10 kbps */ + {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.8f, 7.5f, 5.5f} /* 18 kbps */ }; const float vbr_uhb_thresh[2][11]={ - {-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}, /* silence */ - { 3.9, 2.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0} /* 2 kbps */ + {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */ + { 3.9f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -1.0f} /* 2 kbps */ }; void vbr_init(VBRState *vbr) diff --git a/libspeex/window.c b/libspeex/window.c index 3748f65..65b1917 100644 --- a/libspeex/window.c +++ b/libspeex/window.c @@ -65,30 +65,30 @@ const spx_word16_t lpc_window[200] = { }; #else const spx_word16_t lpc_window[200] = { - 0.080000, 0.080158, 0.080630, 0.081418, 0.082520, 0.083935, 0.085663, 0.087703, - 0.090052, 0.092710, 0.095674, 0.098943, 0.102514, 0.106385, 0.110553, 0.115015, - 0.119769, 0.124811, 0.130137, 0.135744, 0.141628, 0.147786, 0.154212, 0.160902, - 0.167852, 0.175057, 0.182513, 0.190213, 0.198153, 0.206328, 0.214731, 0.223357, - 0.232200, 0.241254, 0.250513, 0.259970, 0.269619, 0.279453, 0.289466, 0.299651, - 0.310000, 0.320507, 0.331164, 0.341965, 0.352901, 0.363966, 0.375151, 0.386449, - 0.397852, 0.409353, 0.420943, 0.432615, 0.444361, 0.456172, 0.468040, 0.479958, - 0.491917, 0.503909, 0.515925, 0.527959, 0.540000, 0.552041, 0.564075, 0.576091, - 0.588083, 0.600042, 0.611960, 0.623828, 0.635639, 0.647385, 0.659057, 0.670647, - 0.682148, 0.693551, 0.704849, 0.716034, 0.727099, 0.738035, 0.748836, 0.759493, - 0.770000, 0.780349, 0.790534, 0.800547, 0.810381, 0.820030, 0.829487, 0.838746, - 0.847800, 0.856643, 0.865269, 0.873672, 0.881847, 0.889787, 0.897487, 0.904943, - 0.912148, 0.919098, 0.925788, 0.932214, 0.938372, 0.944256, 0.949863, 0.955189, - 0.960231, 0.964985, 0.969447, 0.973615, 0.977486, 0.981057, 0.984326, 0.987290, - 0.989948, 0.992297, 0.994337, 0.996065, 0.997480, 0.998582, 0.999370, 0.999842, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 0.998640, 0.994566, 0.987787, 0.978324, 0.966203, - 0.951458, 0.934131, 0.914270, 0.891931, 0.867179, 0.840084, 0.810723, 0.779182, - 0.745551, 0.709930, 0.672424, 0.633148, 0.592223, 0.549781, 0.505964, 0.460932, - 0.414863, 0.367968, 0.320511, 0.272858, 0.225569, 0.179655, 0.137254, 0.103524 + 0.080000f, 0.080158f, 0.080630f, 0.081418f, 0.082520f, 0.083935f, 0.085663f, 0.087703f, + 0.090052f, 0.092710f, 0.095674f, 0.098943f, 0.102514f, 0.106385f, 0.110553f, 0.115015f, + 0.119769f, 0.124811f, 0.130137f, 0.135744f, 0.141628f, 0.147786f, 0.154212f, 0.160902f, + 0.167852f, 0.175057f, 0.182513f, 0.190213f, 0.198153f, 0.206328f, 0.214731f, 0.223357f, + 0.232200f, 0.241254f, 0.250513f, 0.259970f, 0.269619f, 0.279453f, 0.289466f, 0.299651f, + 0.310000f, 0.320507f, 0.331164f, 0.341965f, 0.352901f, 0.363966f, 0.375151f, 0.386449f, + 0.397852f, 0.409353f, 0.420943f, 0.432615f, 0.444361f, 0.456172f, 0.468040f, 0.479958f, + 0.491917f, 0.503909f, 0.515925f, 0.527959f, 0.540000f, 0.552041f, 0.564075f, 0.576091f, + 0.588083f, 0.600042f, 0.611960f, 0.623828f, 0.635639f, 0.647385f, 0.659057f, 0.670647f, + 0.682148f, 0.693551f, 0.704849f, 0.716034f, 0.727099f, 0.738035f, 0.748836f, 0.759493f, + 0.770000f, 0.780349f, 0.790534f, 0.800547f, 0.810381f, 0.820030f, 0.829487f, 0.838746f, + 0.847800f, 0.856643f, 0.865269f, 0.873672f, 0.881847f, 0.889787f, 0.897487f, 0.904943f, + 0.912148f, 0.919098f, 0.925788f, 0.932214f, 0.938372f, 0.944256f, 0.949863f, 0.955189f, + 0.960231f, 0.964985f, 0.969447f, 0.973615f, 0.977486f, 0.981057f, 0.984326f, 0.987290f, + 0.989948f, 0.992297f, 0.994337f, 0.996065f, 0.997480f, 0.998582f, 0.999370f, 0.999842f, + 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, + 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, + 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, + 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, + 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, + 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, + 1.000000f, 1.000000f, 1.000000f, 0.998640f, 0.994566f, 0.987787f, 0.978324f, 0.966203f, + 0.951458f, 0.934131f, 0.914270f, 0.891931f, 0.867179f, 0.840084f, 0.810723f, 0.779182f, + 0.745551f, 0.709930f, 0.672424f, 0.633148f, 0.592223f, 0.549781f, 0.505964f, 0.460932f, + 0.414863f, 0.367968f, 0.320511f, 0.272858f, 0.225569f, 0.179655f, 0.137254f, 0.103524f }; #endif |