Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/xiph/speex.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/tmv
diff options
context:
space:
mode:
authorjm <jm@0101bb08-14d6-0310-b084-bc0e0c8e3800>2007-03-15 16:13:25 +0300
committerjm <jm@0101bb08-14d6-0310-b084-bc0e0c8e3800>2007-03-15 16:13:25 +0300
commit4c7f0a5ffd7ea5e52968aad1e89108e124be012b (patch)
tree0c5858c0c5b1f04b9d20173f254ce47a1f098d72 /tmv
parent57656c5f88e699a8a6b6af442c736b0ef95cbf3d (diff)
Merging Trimedia support contributed by Hong Zhiqian (incomplete for now)
git-svn-id: http://svn.xiph.org/trunk/speex@12764 0101bb08-14d6-0310-b084-bc0e0c8e3800
Diffstat (limited to 'tmv')
-rw-r--r--tmv/_kiss_fft_guts_tm.h188
-rw-r--r--tmv/cb_search_tm.h149
-rw-r--r--tmv/config.h98
-rw-r--r--tmv/fftwrap_tm.h233
-rw-r--r--tmv/filterbank_tm.h289
-rw-r--r--tmv/filters_tm.h1521
-rw-r--r--tmv/fixed_tm.h100
-rw-r--r--tmv/kiss_fft_tm.h599
-rw-r--r--tmv/kiss_fftr_tm.h235
-rw-r--r--tmv/lpc_tm.h150
-rw-r--r--tmv/lsp_tm.h310
-rw-r--r--tmv/ltp_tm.h479
-rw-r--r--tmv/mdf_tm.h1192
-rw-r--r--tmv/misc_tm.h92
-rw-r--r--tmv/preprocess_tm.h1135
-rw-r--r--tmv/profile_tm.h407
-rw-r--r--tmv/quant_lsp_tm.h448
-rw-r--r--tmv/speex_config_types.h31
-rw-r--r--tmv/vq_tm.h494
19 files changed, 8150 insertions, 0 deletions
diff --git a/tmv/_kiss_fft_guts_tm.h b/tmv/_kiss_fft_guts_tm.h
new file mode 100644
index 0000000..9cf2864
--- /dev/null
+++ b/tmv/_kiss_fft_guts_tm.h
@@ -0,0 +1,188 @@
+/* Copyright (C) 2007 Hong Zhiqian */
+/**
+ @file _kiss_fft_guts_tm.h
+ @author Hong Zhiqian
+ @brief Various compatibility routines for Speex (TriMedia version)
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _KISS_FFT_GUTS_TM_
+#define _KISS_FFT_GUTS_TM_
+
+#ifdef TM_ASM
+
+#include <ops/custom_defs.h>
+
+#ifdef FIXED_POINT
+
+#undef sround
+#define sround(x) sex16(((x) + (1<<(FRACBITS-1)) ) >> FRACBITS)
+
+#undef MIN
+#undef MAX
+#define MIN(a,b) imin(a,b)
+#define MAX(a,b) imax(a,b)
+
+#define TM_MUL(res,a,b) \
+ { register int a0, a1, b0, b1; \
+ \
+ a0 = sex16((a)); \
+ a1 = asri(16,(a)); \
+ b0 = sex16((b)); \
+ b1 = asri(16,(b)); \
+ (res)= pack16lsb( \
+ sround(ifir16((a),funshift2((b),(b)))), \
+ sround(a0*b0-a1*b1)); \
+ } \
+
+#define TM_ADD(res,a,b) \
+ { (res)=dspidualadd((a),(b)); \
+ } \
+
+#define TM_SUB(res,a,b) \
+ { (res)=dspidualsub((a),(b)); \
+ } \
+
+#define TM_SHR(res,a,shift) \
+ { (res)=dualasr((a),(shift)); \
+ } \
+
+#define TM_DIV(res,c,frac) \
+ { register int c1, c0; \
+ \
+ c1 = asri(16,(c)); \
+ c0 = sex16((c)); \
+ (res) = pack16lsb(sround(c1 * (32767/(frac))), sround(c0 * (32767/(frac))));\
+ } \
+
+#define TM_NEGMSB(res, a) \
+ { (res) = pack16lsb((ineg(asri(16,(a)))), (a)); \
+ } \
+
+#else
+
+#undef MIN
+#undef MAX
+#define MIN(a,b) fmin(a,b)
+#define MAX(a,b) fmax(a,b)
+
+#endif
+#endif
+
+#undef CHECKBUF
+#define CHECKBUF(buf,nbuf,n) \
+ { \
+ if ( nbuf < (size_t)(n) ) { \
+ speex_free(buf); \
+ buf = (kiss_fft_cpx*)KISS_FFT_MALLOC(sizeof(kiss_fft_cpx)*(n)); \
+ nbuf = (size_t)(n); \
+ } \
+ } \
+
+#undef C_ADD
+#define C_ADD( res, a,b) \
+ { \
+ CHECK_OVERFLOW_OP((a).r,+,(b).r) \
+ CHECK_OVERFLOW_OP((a).i,+,(b).i) \
+ (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \
+ } \
+
+
+#undef C_SUB
+#define C_SUB( res, a,b) \
+ { \
+ CHECK_OVERFLOW_OP((a).r,-,(b).r) \
+ CHECK_OVERFLOW_OP((a).i,-,(b).i) \
+ (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \
+ } \
+
+#undef C_ADDTO
+#define C_ADDTO( res , a) \
+ { \
+ CHECK_OVERFLOW_OP((res).r,+,(a).r) \
+ CHECK_OVERFLOW_OP((res).i,+,(a).i) \
+ (res).r += (a).r; (res).i += (a).i; \
+ } \
+
+#undef C_SUBFROM
+#define C_SUBFROM( res, a) \
+ { \
+ CHECK_OVERFLOW_OP((res).r,-,(a).r) \
+ CHECK_OVERFLOW_OP((res).i,-,(a).i) \
+ (res).r -= (a).r; (res).i -= (a).i; \
+ } \
+
+#undef kf_cexp
+#define kf_cexp(x,phase) \
+ { (x)->r = KISS_FFT_COS(phase); \
+ (x)->i = KISS_FFT_SIN(phase); } \
+
+#undef kf_cexp2
+#define kf_cexp2(x,phase) \
+ { (x)->r = spx_cos_norm((phase)); \
+ (x)->i = spx_cos_norm((phase)-32768); } \
+
+
+#ifdef FIXED_POINT
+
+#undef C_MUL
+#define C_MUL(m,a,b) \
+ { (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \
+ (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); } \
+
+#undef C_FIXDIV
+#define C_FIXDIV(c,div) \
+ { DIVSCALAR( (c).r , div); \
+ DIVSCALAR( (c).i , div); } \
+
+#undef C_MULBYSCALAR
+#define C_MULBYSCALAR( c, s ) \
+ { (c).r = sround( smul( (c).r , s ) ) ; \
+ (c).i = sround( smul( (c).i , s ) ) ; } \
+
+#else
+
+#undef C_MUL
+#define C_MUL(m,a,b) \
+ { (m).r = (a).r*(b).r - (a).i*(b).i; \
+ (m).i = (a).r*(b).i + (a).i*(b).r; } \
+
+
+#undef C_MULBYSCALAR
+#define C_MULBYSCALAR( c, s ) \
+ { (c).r *= (s); \
+ (c).i *= (s); } \
+
+
+
+#endif
+
+#endif
+
diff --git a/tmv/cb_search_tm.h b/tmv/cb_search_tm.h
new file mode 100644
index 0000000..15ca1d5
--- /dev/null
+++ b/tmv/cb_search_tm.h
@@ -0,0 +1,149 @@
+/* Copyright (C) 2007 Hong Zhiqian */
+/**
+ @file cb_search_tm.h
+ @author Hong Zhiqian
+ @brief Various compatibility routines for Speex (TriMedia version)
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <ops/custom_defs.h>
+#include "profile_tm.h"
+
+#ifdef FIXED_POINT
+
+#define OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK
+static void compute_weighted_codebook(
+ const signed char * restrict shape_cb,
+ const Int16 * restrict r,
+ Int16 * restrict resp,
+ Int16 * restrict resp2,
+ Int32 * restrict E,
+ int shape_cb_size, int subvect_size, char *stack)
+{
+ register int i, j;
+ register int quadsize;
+
+ TMDEBUG_ALIGNMEM(r);
+ TMDEBUG_ALIGNMEM(resp);
+ TMDEBUG_ALIGNMEM(resp2);
+
+ COMPUTEWEIGHTEDCODEBOOK_START();
+
+ quadsize = subvect_size << 2;
+
+ for ( i=0 ; i<shape_cb_size ; i+=4 )
+ { register int e0, e1, e2, e3;
+
+ e0 = e1 = e2 = e3 = 0;
+ for( j=0 ; j<subvect_size ; ++j )
+ {
+ register int k, rr;
+ register int resj0, resj1, resj2, resj3;
+
+ resj0 = resj1 = resj2 = resj3 = 0;
+
+ for ( k=0 ; k<=j ; ++k )
+ { rr = r[j-k];
+
+ resj0 += shape_cb[k] * rr;
+ resj1 += shape_cb[k+subvect_size] * rr;
+ resj2 += shape_cb[k+2*subvect_size] * rr;
+ resj3 += shape_cb[k+3*subvect_size] * rr;
+ }
+
+ resj0 >>= 13;
+ resj1 >>= 13;
+ resj2 >>= 13;
+ resj3 >>= 13;
+
+ e0 += resj0 * resj0;
+ e1 += resj1 * resj1;
+ e2 += resj2 * resj2;
+ e3 += resj3 * resj3;
+
+ resp[j] = resj0;
+ resp[j+subvect_size] = resj1;
+ resp[j+2*subvect_size] = resj2;
+ resp[j+3*subvect_size] = resj3;
+ }
+
+ E[i] = e0;
+ E[i+1] = e1;
+ E[i+2] = e2;
+ E[i+3] = e3;
+
+ resp += quadsize;
+ shape_cb += quadsize;
+ }
+
+#ifndef REMARK_ON
+ (void)resp2;
+ (void)stack;
+#endif
+
+ COMPUTEWEIGHTEDCODEBOOK_STOP();
+}
+
+#define OVERRIDE_TARGET_UPDATE
+static inline void target_update(Int16 * restrict t, Int16 g, Int16 * restrict r, int len)
+{
+ register int n;
+ register int gr1, gr2, t1, t2, r1, r2;
+ register int quadsize;
+
+ TARGETUPDATE_START();
+
+ quadsize = len & 0xFFFFFFFC;
+
+ for ( n=0; n<quadsize ; n+=4 )
+ { gr1 = pack16lsb(PSHR32((g * r[n]),13) , PSHR32((g * r[n+1]),13));
+ gr2 = pack16lsb(PSHR32((g * r[n+2]),13), PSHR32((g * r[n+3]),13));
+
+ t1 = pack16lsb(t[n], t[n+1]);
+ t2 = pack16lsb(t[n+2],t[n+3]);
+
+ r1 = dspidualsub(t1, gr1);
+ r2 = dspidualsub(t2, gr2);
+
+ t[n] = asri(16,r1);
+ t[n+1] = sex16(r1);
+ t[n+2] = asri(16,r2);
+ t[n+3] = sex16(r2);
+ }
+
+ for ( n=quadsize ; n<len ; ++n )
+ { t[n] = SUB16(t[n],PSHR32(MULT16_16(g,r[n]),13));
+ }
+
+ TARGETUPDATE_STOP();
+}
+
+#endif
+
diff --git a/tmv/config.h b/tmv/config.h
new file mode 100644
index 0000000..0b68fb8
--- /dev/null
+++ b/tmv/config.h
@@ -0,0 +1,98 @@
+#ifndef _CONFIG_H_
+#define _CONFIG_H_
+
+#define USE_COMPACT_KISS_FFT
+//#define USE_KISS_FFT
+
+#ifdef WIN32
+
+//#define FIXED_POINT
+
+#define inline __inline
+#define restrict
+
+#elif defined (__TCS__)
+
+#define FIXED_POINT
+#define PREPROCESS_MDF_FLOAT
+#define TM_ASM
+
+#define TM_DEBUGMEM_ALIGNNMENT 1
+
+#define TM_PROFILE 1
+#define TM_PROFILE_FIRMEM16 0
+#define TM_PROFILE_IIRMEM16 0
+#define TM_PROFILE_FILTERMEM16 0
+#define TM_PROFILE_VQNBEST 0
+#define TM_PROFILE_VQNBESTSIGN 0
+#define TM_PROFILE_COMPUTEQUANTWEIGHTS 0
+#define TM_PROFILE_LSPQUANT 0
+#define TM_PROFILE_LSPWEIGHTQUANT 0
+#define TM_PROFILE_LSPENFORCEMARGIN 0
+#define TM_PROFILE_LSPTOLPC 0
+#define TM_PROFILE_INNERPROD 0
+#define TM_PROFILE_PITCHXCORR 0
+#define TM_PROFILE_LSP_INTERPOLATE 0
+#define TM_PROFILE_CHEBPOLYEVA 0
+#define TM_PROFILE_COMPUTEWEIGHTEDCODEBOOK 0
+#define TM_PROFILE_TARGETUPDATE 0
+#define TM_PROFILE_SPXAUTOCORR 0
+#define TM_PROFILE_COMPUTEPITCHERROR 0
+#define TM_PROFILE_COMPUTERMS16 0
+#define TM_PROFILE_NORMALIZE16 0
+#define TM_PROFILE_BWLPC 0
+#define TM_PROFILE_HIGHPASS 0
+#define TM_PROFILE_SIGNALMUL 0
+#define TM_PROFILE_SIGNALDIV 0
+#define TM_PROFILE_COMPUTEIMPULSERESPONSE 0
+#define TM_PROFILE_PITCHGAINSEARCH3TAPVQ 0
+#define TM_PROFILE_OPENLOOPNBESTPITCH 0
+#define TM_PROFILE_PREPROCESSANALYSIS 0
+#define TM_PROFILE_UPDATENOISEPROB 0
+#define TM_PROFILE_COMPUTEGAINFLOOR 0
+#define TM_PROFILE_FILTERDCNOTCH16 0
+#define TM_PROFILE_MDFINNERPROD 0
+#define TM_PROFILE_SPECTRALMULACCUM 0
+#define TM_PROFILE_WEIGHTEDSPECTRALMULCONJ 0
+#define TM_PROFILE_MDFADJUSTPROP 0
+#define TM_PROFILE_SPEEXECHOGETRESIDUAL 0
+#define TM_PROFILE_MAXIMIZERANGE 0
+#define TM_PROFILE_RENORMRANGE 0
+#define TM_PROFILE_POWERSPECTRUM 0
+#define TM_PROFILE_QMFSYNTH 0
+#define TM_PROFILE_QMFDECOMP 0
+#define TM_PROFILE_FILTERBANKCOMPUTEBANK32 0
+#define TM_PROFILE_FILTERBANKCOMPUTEPSD16 0
+
+#define TM_UNROLL 1
+#define TM_UNROLL_FILTER 1
+#define TM_UNROLL_IIR 1
+#define TM_UNROLL_FIR 1
+#define TM_UNROLL_HIGHPASS 1
+#define TM_UNROLL_SIGNALMUL 1
+#define TM_UNROLL_SIGNALDIV 1
+#define TM_UNROLL_VQNBEST 1
+#define TM_UNROLL_VQSIGNNBEST 1
+#define TM_UNROLL__SPXAUTOCORR 1
+#define TM_UNROLL_COMPUTERMS16 1
+#define TM_UNROLL_COMPUTEIMPULSERESPONSE 1
+#define TM_UNROLL_QMFSYNTH 1
+#define TM_UNROLL_PITCHGAINSEARCH3TAPVQ 1
+#define TM_UNROLL_OPENLOOPNBESTPITCH 1
+#define TM_UNROLL_FILTERBANKCOMPUTEBANK32 1
+#define TM_UNROLL_FILTERBANKCOMPUTEPSD16 1
+#define TM_UNROLL_SPEEXPREPROCESSRUN 1
+#define TM_UNROLL_PREPROCESSANALYSIS 1
+#define TM_UNROLL_UPDATENOISEPROB 1
+#define TM_UNROLL_COMPUTEGAINFLOOR 1
+#define TM_UNROLL_SPEEXECHOGETRESIDUAL 1
+#define TM_UNROLL_SPEEXECHOCANCELLATION 1
+#define TM_UNROLL_FILTERDCNOTCH16 1
+#define TM_UNROLL_MDFINNERPRODUCT 1
+#define TM_UNROLL_SPECTRALMULACCUM 1
+#define TM_UNROLL_MDFADJUSTPROP 1
+
+#endif
+
+#endif
+
diff --git a/tmv/fftwrap_tm.h b/tmv/fftwrap_tm.h
new file mode 100644
index 0000000..39d924e
--- /dev/null
+++ b/tmv/fftwrap_tm.h
@@ -0,0 +1,233 @@
+/* Copyright (C) 2007 Hong Zhiqian */
+/**
+ @file fftwrap_tm.h
+ @author Hong Zhiqian
+ @brief Various compatibility routines for Speex (TriMedia version)
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#include <ops/custom_defs.h>
+#include "profile_tm.h"
+
+#ifdef FIXED_POINT
+
+#define OVERRIDE_MAXIMIZE_RANGE
+static int maximize_range(Int16 *in, Int16 *out, int bound, int len)
+{
+ register int max_val=0;
+ register int shift=0;
+ register int i, j;
+
+ TMDEBUG_ALIGNMEM(in);
+ TMDEBUG_ALIGNMEM(out);
+
+ MAXIMIZERANGE_START();
+
+ len >>= 1;
+
+ for ( i=0 ; i<len ; i+=4 )
+ {
+ register int x10, x32, x54, x76;
+
+ x10 = ld32x(in,i);
+ x32 = ld32x(in,i+1);
+ x54 = ld32x(in,i+2);
+ x76 = ld32x(in,i+3);
+
+ x10 = dspidualabs(x10);
+ x32 = dspidualabs(x32);
+ x54 = dspidualabs(x54);
+ x76 = dspidualabs(x76);
+
+ x10 = imax(sex16(x10), asri(16,x10));
+ x32 = imax(sex16(x32), asri(16,x32));
+ x54 = imax(sex16(x54), asri(16,x54));
+ x76 = imax(sex16(x76), asri(16,x76));
+
+ max_val = imax(max_val,x10);
+ max_val = imax(max_val,x32);
+ max_val = imax(max_val,x54);
+ max_val = imax(max_val,x76);
+ }
+
+ while ( max_val <= (bound>>1) && max_val != 0 )
+ { max_val <<= 1;
+ shift++;
+ }
+
+ if ( shift != 0 )
+ {
+ for ( i=0,j=0 ; i<len ; i+=4,j+=16 )
+ {
+ register int x10, x32, x54, x76;
+
+ x10 = ld32x(in,i);
+ x32 = ld32x(in,i+1);
+ x54 = ld32x(in,i+2);
+ x76 = ld32x(in,i+3);
+
+ x10 = dualasl(x10, shift);
+ x32 = dualasl(x32, shift);
+ x54 = dualasl(x54, shift);
+ x76 = dualasl(x76, shift);
+
+ st32d(j,out,x10);
+ st32d(j+4,out,x32);
+ st32d(j+8,out,x54);
+ st32d(j+12,out,x76);
+ }
+ }
+
+ MAXIMIZERANGE_STOP();
+
+ return shift;
+}
+
+#define OVERRIDE_RENORM_RANGE
+static void renorm_range(Int16 *in, Int16 *out, int shift, int len)
+{
+ register int i, j, s, l;
+
+ TMDEBUG_ALIGNMEM(in);
+ TMDEBUG_ALIGNMEM(out);
+
+ RENORMRANGE_START();
+
+ s = (1<<((shift))>>1);
+ s = pack16lsb(s,s);
+
+ len >>= 1;
+ l = len & (int)0xFFFFFFFE;
+
+ for ( i=0,j=0 ; i<l; i+=2,j+=8 )
+ {
+ register int x10, x32;
+
+ x10 = ld32x(in,i);
+ x32 = ld32x(in,i+1);
+
+ x10 = dspidualadd(x10, s);
+ x32 = dspidualadd(x32, s);
+
+ x10 = dualasr(x10, shift);
+ x32 = dualasr(x32, shift);
+
+ st32d(j,out,x10);
+ st32d(j+4,out,x32);
+ }
+
+ if ( len & (int)0x01 )
+ {
+ register int x10;
+
+ x10 = ld32x(in,i);
+ x10 = dspidualadd(x10, s);
+ x10 = dualasr(x10, shift);
+ st32d(j,out,x10);
+ }
+
+ RENORMRANGE_STOP();
+}
+
+#endif
+
+#ifdef USE_COMPACT_KISS_FFT
+#ifdef FIXED_POINT
+
+#define OVERRIDE_POWER_SPECTRUM
+void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N)
+{
+ register int x10, x32, x54, x76, *x;
+ register int i;
+
+ x = (int*)(X-1);
+
+ TMDEBUG_ALIGNMEM(x);
+
+ POWERSPECTRUM_START();
+
+ x76 = 0;
+ ps[0] = MULT16_16(X[0],X[0]);
+ N >>= 1;
+
+ for( i=1 ; i<N ; i+=4 )
+ {
+ x10 = ld32x(x, i);
+ x32 = ld32x(x, i+1);
+ x54 = ld32x(x, i+2);
+ x76 = ld32x(x, i+3);
+
+ ps[i] = ifir16(x10,x10);
+ ps[i+1] = ifir16(x32,x32);
+ ps[i+2] = ifir16(x54,x54);
+ ps[i+3] = ifir16(x76,x76);
+ }
+
+ x76 = sex16(x76);
+ ps[N] = x76 * x76;
+
+ POWERSPECTRUM_STOP();
+}
+
+#else
+
+#define OVERRIDE_POWER_SPECTRUM
+void power_spectrum(const float * restrict X, float * restrict ps, int N)
+{
+ register int i, j;
+ register float xx;
+
+ POWERSPECTRUM_START();
+
+ xx = X[0];
+
+ ps[0]=MULT16_16(xx,xx);
+
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+ for (i=1,j=1;i<N-1;i+=2,j++)
+ { register float xi, xii;
+
+ xi = X[i];
+ xii = X[i+1];
+
+ ps[j] = MULT16_16(xi,xi) + MULT16_16(xii,xii);
+ }
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+
+ xx = X[i];
+ ps[j]=MULT16_16(xx,xx);
+
+ POWERSPECTRUM_STOP();
+}
+
+#endif
+#endif
+
diff --git a/tmv/filterbank_tm.h b/tmv/filterbank_tm.h
new file mode 100644
index 0000000..39d2332
--- /dev/null
+++ b/tmv/filterbank_tm.h
@@ -0,0 +1,289 @@
+/* Copyright (C) 2007 Hong Zhiqian */
+/**
+ @file filterbank_tm.h
+ @author Hong Zhiqian
+ @brief Various compatibility routines for Speex (TriMedia version)
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#include <ops/custom_defs.h>
+#include "profile_tm.h"
+
+#ifdef FIXED_POINT
+
+#define OVERRIDE_FILTERBANK_COMPUTE_BANK32
+void filterbank_compute_bank32(FilterBank * restrict bank, spx_word32_t * restrict ps, spx_word32_t * restrict mel)
+{
+ register int i, j, k, banks, len, zero, s;
+ register int * restrict left;
+ register int * restrict right;
+ register int * restrict bleft;
+ register int * restrict bright;
+
+ left = (int*)bank->filter_left;
+ right = (int*)bank->filter_right;
+ bleft = (int*)bank->bank_left;
+ bright = (int*)bank->bank_right;
+
+ TMDEBUG_ALIGNMEM(ps);
+ TMDEBUG_ALIGNMEM(mel);
+ TMDEBUG_ALIGNMEM(left);
+ TMDEBUG_ALIGNMEM(right);
+ TMDEBUG_ALIGNMEM(bleft);
+ TMDEBUG_ALIGNMEM(bright);
+
+ FILTERBANKCOMPUTEBANK32_START();
+
+ banks = bank->nb_banks << 2;
+ zero = 0;
+ len = bank->len;
+ s = (1<<((15))>>1);
+
+#if (TM_UNROLL && TM_UNROLL_FILTERBANKCOMPUTEBANK32)
+#pragma TCS_unroll=2
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<banks ; i+=4 )
+ { st32d(i, mel, zero);
+ }
+#if (TM_UNROLL && TM_UNROLL_FILTERBANKCOMPUTEBANK32)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+#if (TM_UNROLL && TM_UNROLL_FILTERBANKCOMPUTEBANK32)
+#pragma TCS_unroll=2
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0,j=1,k=0 ; i<len ; i+=2,j+=2,++k )
+ { register int ps1, ps0, _mel, ps0_msb, ps0_lsb, ps1_msb, ps1_lsb;
+ register int left10, right10, left1, left0, right1, right0;
+ register int il1, ir1, il0, ir0;
+
+ ps0 = ld32x(ps,i);
+ il0 = ld32x(bleft,i);
+ _mel = ld32x(mel,il0);
+ left10 = ld32x(left,k);
+ ir0 = ld32x(bright,i);
+ right10 = ld32x(right,k);
+
+ ps0_msb = ps0 >> 15;
+ ps0_lsb = ps0 & 0x00007fff;
+ left0 = sex16(left10);
+ right0 = sex16(right10);
+
+ _mel += left0 * ps0_msb + ((left0 * ps0_lsb + s ) >> 15);
+ mel[il0]= _mel;
+ _mel = ld32x(mel,ir0);
+ _mel += right0 * ps0_msb + ((right0 * ps0_lsb + s ) >> 15);
+ mel[ir0]= _mel;
+
+ ps1 = ld32x(ps,j);
+ il1 = ld32x(bleft,j);
+ _mel = ld32x(mel,il1);
+ ir1 = ld32x(bright,j);
+
+ left1 = asri(16,left10);
+ right1 = asri(16,right10);
+ ps1_msb = ps1 >> 15;
+ ps1_lsb = ps1 & 0x00007fff;
+
+ _mel += left1 * ps1_msb + ((left1 * ps1_lsb + s ) >> 15);
+ mel[il1]= _mel;
+ _mel = ld32x(mel,ir1);
+ _mel += right1 * ps1_msb + ((right1 * ps1_lsb + s ) >> 15);
+ mel[ir1]= _mel;
+ }
+#if (TM_UNROLL && TM_UNROLL_FILTERBANKCOMPUTEBANK32)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ FILTERBANKCOMPUTEBANK32_STOP();
+}
+
+#define OVERRIDE_FILTERBANK_COMPUTE_PSD16
+void filterbank_compute_psd16(FilterBank * restrict bank, spx_word16_t * restrict mel, spx_word16_t * restrict ps)
+{
+ register int i, j, k, len, s;
+ register int * restrict left;
+ register int * restrict right;
+ register int * restrict bleft;
+ register int * restrict bright;
+
+ left = (int*)bank->filter_left;
+ right = (int*)bank->filter_right;
+ bleft = (int*)bank->bank_left;
+ bright = (int*)bank->bank_right;
+
+ TMDEBUG_ALIGNMEM(ps);
+ TMDEBUG_ALIGNMEM(mel);
+ TMDEBUG_ALIGNMEM(left);
+ TMDEBUG_ALIGNMEM(right);
+ TMDEBUG_ALIGNMEM(bleft);
+ TMDEBUG_ALIGNMEM(bright);
+
+ FILTERBANKCOMPUTEPSD16_START();
+
+ len = bank->len;
+ s = (1<<((15))>>1);
+
+#if (TM_UNROLL && TM_UNROLL_FILTERBANKCOMPUTEPSD16)
+#pragma TCS_unroll=2
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0,j=0,k=0 ; i<len ; i+=2,j+=4,++k )
+ {
+ register int mell0, melr0, mel1, mel0, mell1, melr1;
+ register int il1, ir1, il0, ir0;
+ register int left10, right10, lr1, lr0;
+ register int acc0, acc1, ps10;
+
+ acc0 = acc1 = s;
+
+ il0 = ld32x(bleft, i);
+ ir0 = ld32x(bright,i);
+ mell0 = mel[il0];
+ melr0 = mel[ir0];
+ left10 = ld32x(left, k);
+ right10 = ld32x(right, k);
+ mel0 = pack16lsb(mell0, melr0);
+ lr0 = pack16lsb(left10, right10);
+
+ acc0 += ifir16(mel0, lr0);
+ acc0 >>= 15;
+
+ il1 = ld32x(bleft, i+1);
+ ir1 = ld32x(bright,i+1);
+ mell1 = mel[il1];
+ melr1 = mel[ir1];
+ mel1 = pack16lsb(mell1, melr1);
+ lr1 = pack16msb(left10, right10);
+
+ acc1 += ifir16(mel1, lr1);
+ acc1 >>= 15;
+
+ ps10 = pack16lsb(acc1, acc0);
+
+ st32d(j, ps, ps10);
+ }
+#if (TM_UNROLL && TM_UNROLL_FILTERBANKCOMPUTEPSD16)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ FILTERBANKCOMPUTEPSD16_STOP();
+}
+
+#else
+
+#define OVERRIDE_FILTERBANK_COMPUTE_BANK32
+void filterbank_compute_bank32(FilterBank * restrict bank, float * restrict ps, float * restrict mel)
+{
+ register int i, banks, len;
+ register int * restrict bleft, * restrict bright;
+ register float * restrict left, * restrict right;
+
+ banks = bank->nb_banks;
+ len = bank->len;
+ bleft = bank->bank_left;
+ bright= bank->bank_right;
+ left = bank->filter_left;
+ right = bank->filter_right;
+
+ FILTERBANKCOMPUTEBANK32_START();
+
+ memset(mel, 0, banks * sizeof(float));
+
+#if (TM_UNROLL && TM_UNROLL_FILTERBANKCOMPUTEBANK32)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<len ; ++i)
+ {
+ register int id1, id2;
+ register float psi;
+
+ id1 = bleft[i];
+ id2 = bright[i];
+ psi = ps[i];
+
+ mel[id1] += left[i] * psi;
+ mel[id2] += right[i] * psi;
+ }
+#if (TM_UNROLL && TM_UNROLL_FILTERBANKCOMPUTEBANK32)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ FILTERBANKCOMPUTEBANK32_STOP();
+}
+
+#define OVERRIDE_FILTERBANK_COMPUTE_PSD16
+void filterbank_compute_psd16(FilterBank * restrict bank, float * restrict mel, float * restrict ps)
+{
+ register int i, len;
+ register int * restrict bleft, * restrict bright;
+ register float * restrict left, * restrict right;
+
+ len = bank->len;
+ bleft = bank->bank_left;
+ bright= bank->bank_right;
+ left = bank->filter_left;
+ right = bank->filter_right;
+
+ FILTERBANKCOMPUTEPSD16_START();
+
+#if (TM_UNROLL && TM_UNROLL_FILTERBANKCOMPUTEPSD16)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<len ; ++i )
+ {
+ register float acc;
+ register int id1, id2;
+
+ id1 = bleft[i];
+ id2 = bright[i];
+
+ acc = mel[id1] * left[i];
+ acc += mel[id2] * right[i];
+
+ ps[i] = acc;
+ }
+#if (TM_UNROLL && TM_UNROLL_FILTERBANKCOMPUTEPSD16)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ FILTERBANKCOMPUTEPSD16_STOP();
+}
+
+
+#endif
diff --git a/tmv/filters_tm.h b/tmv/filters_tm.h
new file mode 100644
index 0000000..8f28102
--- /dev/null
+++ b/tmv/filters_tm.h
@@ -0,0 +1,1521 @@
+#include <ops/custom_defs.h>
+#include "profile_tm.h"
+
+#ifdef FIXED_POINT
+
+#define iadd(a,b) ((a) + (b))
+
+#define OVERRIDE_BW_LPC
+void bw_lpc(Int16 gamma, const Int16 *lpc_in, Int16 *lpc_out, int order)
+{
+ register int tmp, g, i;
+
+ TMDEBUG_ALIGNMEM(lpc_in);
+ TMDEBUG_ALIGNMEM(lpc_out);
+
+ BWLPC_START();
+
+ tmp = g = gamma;
+ for ( i=0 ; i<4 ; i+=2,lpc_out+=4 )
+ { register int in10, y1, y0, y10;
+ register int in32, y3, y2, y32;
+
+ in10 = ld32x(lpc_in,i);
+ y0 = ((tmp * sex16(in10)) + 16384) >> 15;
+ tmp = ((tmp * g) + 16384) >> 15;
+ y1 = ((tmp * asri(16,in10)) + 16384) >> 15;
+ tmp = ((tmp * g) + 16384) >> 15;
+ y10 = pack16lsb(y1,y0);
+ st32(lpc_out,y10);
+
+ in32 = ld32x(lpc_in,i+1);
+ y2 = ((tmp * sex16(in32)) + 16384) >> 15;
+ tmp = ((tmp * g) + 16384) >> 15;
+ y3 = ((tmp * asri(16,in32)) + 16384) >> 15;
+ tmp = ((tmp * g) + 16384) >> 15;
+ y32 = pack16lsb(y3,y2);
+ st32d(4,lpc_out,y32);
+ }
+
+ if ( order == 10 )
+ { register int in10, y1, y0, y10;
+
+ in10 = ld32x(lpc_in,i);
+ y0 = ((tmp * sex16(in10)) + 16384) >> 15;
+ tmp = ((tmp * g) + 16384) >> 15;
+ y1 = ((tmp * asri(16,in10)) + 16384) >> 15;
+ tmp = ((tmp * g) + 16384) >> 15;
+ y10 = pack16lsb(y1,y0);
+ st32(lpc_out,y10);
+ }
+
+ BWLPC_STOP();
+}
+
+
+#define OVERRIDE_HIGHPASS
+void highpass(const Int16 *x, Int16 *y, int len, int filtID, Int32 *mem)
+{
+ const Int16 Pcoef[5][3] = {{16384, -31313, 14991}, {16384, -31569, 15249}, {16384, -31677, 15328}, {16384, -32313, 15947}, {16384, -22446, 6537}};
+ const Int16 Zcoef[5][3] = {{15672, -31344, 15672}, {15802, -31601, 15802}, {15847, -31694, 15847}, {16162, -32322, 16162}, {14418, -28836, 14418}};
+ register int i;
+ register int den1, den2, num0, num1, num2, den11, den22, mem0, mem1;
+
+ TMDEBUG_ALIGNMEM(mem);
+
+ HIGHPASS_START();
+
+ filtID = imin(4, filtID);
+
+ den1 = -Pcoef[filtID][1];
+ den2 = -Pcoef[filtID][2];
+ num0 = Zcoef[filtID][0];
+ num1 = Zcoef[filtID][1];
+ num2 = Zcoef[filtID][2];
+ den11 = den1 << 1;
+ den22 = den2 << 1;
+ mem0 = mem[0];
+ mem1 = mem[1];
+
+#if (TM_UNROLL && TM_UNROLL_HIGHPASS)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<len ; ++i )
+ {
+ register int yi;
+ register int vout, xi, vout_i, vout_d;
+
+ xi = x[i];
+
+ vout = num0 * xi + mem0;
+ vout_i = vout >> 15;
+ vout_d = vout & 0x7FFF;
+ yi = iclipi(PSHR32(vout,14),32767);
+ mem0 = (mem1 + num1 * xi) + (den11 * vout_i) + (((den1 * vout_d) >> 15) << 1);
+ mem1 = (num2 * xi) + (den22 * vout_i) + (((den2 * vout_d) >> 15) << 1);
+
+ y[i] = yi;
+ }
+#if (TM_UNROLL && TM_UNROLL_HIGHPASS)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ mem[0] = mem0;
+ mem[1] = mem1;
+
+ HIGHPASS_STOP();
+}
+
+
+#define OVERRIDE_SIGNALMUL
+void signal_mul(const Int32 *x, Int32 *y, Int32 scale, int len)
+{
+ register int i, scale_i, scale_d;
+
+ SIGNALMUL_START();
+
+ scale_i = scale >> 14;
+ scale_d = scale & 0x3FFF;
+
+#if (TM_UNROLL && TM_UNROLL_SIGNALMUL)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<len ; ++i)
+ {
+ register int xi;
+
+ xi = x[i] >> 7;
+
+ y[i] = ((xi * scale_i + ((xi * scale_d) >> 14)) << 7);
+
+ }
+#if (TM_UNROLL && TM_UNROLL_SIGNALMUL)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ SIGNALMUL_STOP();
+}
+
+#define OVERRIDE_SIGNALDIV
+void signal_div(const Int16 *x, Int16 *y, Int32 scale, int len)
+{
+ register int i;
+
+ SIGNALDIV_START();
+
+ if (scale > SHL32(EXTEND32(SIG_SCALING), 8))
+ {
+ register int scale_1;
+ scale = PSHR32(scale, SIG_SHIFT);
+ scale_1 = DIV32_16(SHL32(EXTEND32(SIG_SCALING),7),scale);
+#if (TM_UNROLL && TM_UNROLL_SIGNALDIV)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<len ; ++i)
+ {
+ y[i] = MULT16_16_P15(scale_1, x[i]);
+ }
+#if (TM_UNROLL && TM_UNROLL_SIGNALDIV)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ } else if (scale > SHR32(EXTEND32(SIG_SCALING), 2)) {
+
+ register int scale_1;
+ scale = PSHR32(scale, SIG_SHIFT-5);
+ scale_1 = DIV32_16(SHL32(EXTEND32(SIG_SCALING),3),scale);
+#if (TM_UNROLL && TM_UNROLL_SIGNALDIV)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for (i=0;i<len;i++)
+ {
+ y[i] = PSHR32(MULT16_16(scale_1, SHL16(x[i],2)),8);
+ }
+#if (TM_UNROLL && TM_UNROLL_SIGNALDIV)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ } else {
+
+ register int scale_1;
+ scale = PSHR32(scale, SIG_SHIFT-7);
+ scale = imax(5,scale);
+ scale_1 = DIV32_16(SHL32(EXTEND32(SIG_SCALING),3),scale);
+
+#if (TM_UNROLL && TM_UNROLL_SIGNALDIV)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for (i=0;i<len;i++)
+ {
+ y[i] = PSHR32(MULT16_16(scale_1, SHL16(x[i],2)),6);
+ }
+#if (TM_UNROLL && TM_UNROLL_SIGNALDIV)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ }
+
+ SIGNALMUL_STOP();
+}
+
+
+#define OVERRIDE_COMPUTE_RMS
+Int16 compute_rms(const Int32 *x, int len)
+{
+ register int i;
+ register int sum=0;
+ register int max_val=1;
+ register int sig_shift;
+
+ TMDEBUG_ALIGNMEM(x);
+
+ for ( i=0 ; i<len ; i+=4 )
+ {
+ register int tmp0, tmp1, tmp2, tmp3;
+
+ tmp0 = ld32x(x,i);
+ tmp1 = ld32x(x,i+1);
+
+ tmp0 = iabs(tmp0);
+ tmp1 = iabs(tmp1);
+
+ tmp2 = ld32x(x,i+2);
+ tmp3 = ld32x(x,i+3);
+
+ tmp2 = iabs(tmp2);
+ tmp3 = iabs(tmp3);
+
+ tmp0 = imax(tmp0, tmp1);
+ max_val = imax(max_val, tmp0);
+ tmp2 = imax(tmp2, tmp3);
+ max_val = imax(max_val, tmp2);
+ }
+
+ sig_shift = 0;
+ while ( max_val>16383 )
+ { sig_shift++;
+ max_val >>= 1;
+ }
+
+
+ for ( i=0 ; i<len ; i+=4 )
+ {
+ register int acc0, acc1, acc2;
+
+ acc0 = pack16lsb(ld32x(x,i) >> sig_shift, ld32x(x,i+1) >> sig_shift);
+ acc1 = pack16lsb(ld32x(x,i+2) >> sig_shift, ld32x(x,i+3) >> sig_shift);
+ acc2 = ifir16(acc0,acc0) + ifir16(acc1, acc1);
+ sum += acc2 >> 6;
+ }
+
+ return EXTRACT16(PSHR32(SHL32(EXTEND32(spx_sqrt(DIV32(sum,len))),(sig_shift+3)),SIG_SHIFT));
+}
+
+#define OVERRIDE_COMPUTE_RMS16
+Int16 compute_rms16(const Int16 *x, int len)
+{
+ register int max_val, i;
+
+ COMPUTERMS16_START();
+
+ max_val = 10;
+
+#if 0
+
+ {
+ register int len2 = len >> 1;
+#if (TM_UNROLL && TM_UNROLL_COMPUTERMS16)
+#pragma TCS_unroll=2
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<len2 ; i+=2 )
+ {
+ register int x10, x32;
+
+ x10 = ld32x(x,i);
+ x32 = ld32x(x,i+1);
+
+ x10 = dspidualabs(x10);
+ x32 = dspidualabs(x32);
+
+ x10 = imax(sex16(x10), asri(16,x10));
+ x32 = imax(sex16(x32), asri(16,x32));
+
+ max_val = imax(max_val,x10);
+ max_val = imax(max_val,x32);
+ }
+#if (TM_UNROLL && TM_UNROLL_COMPUTERMS16)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ if (max_val>16383)
+ {
+ register int sum = 0;
+#if (TM_UNROLL && TM_UNROLL_COMPUTERMS16)
+#pragma TCS_unroll=2
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<len_2; i+=2 )
+ {
+ register int x10, x32;
+ register int acc0, acc1;
+
+ x10 = ld32x(x,i);
+ x32 = ld32x(x,i+1);
+
+ x10 = dualasr(x10,1);
+ x32 = dualasr(x32,1);
+
+ acc0 = ifir16(x10,x10);
+ acc1 = ifir16(x32,x32);
+ sum += (acc0 + acc1) >> 6;
+ }
+#if (TM_UNROLL && TM_UNROLL_COMPUTERMS16)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ COMPUTERMS16_STOP();
+ return spx_sqrt(sum/len) << 4;
+ } else
+ {
+ register int sig_shift;
+ register int sum=0;
+
+ sig_shift = mux(max_val < 8192, 1, 0);
+ sig_shift = mux(max_val < 4096, 2, sig_shift);
+ sig_shift = mux(max_val < 2048, 3, sig_shift);
+
+#if (TM_UNROLL && TM_UNROLL_COMPUTERMS16)
+#pragma TCS_unroll=2
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<len_2 ; i+=2 )
+ {
+ register int x10, x32;
+ register int acc0, acc1;
+
+ x10 = ld32x(x,i);
+ x32 = ld32x(x,i+1);
+
+ x10 = dualasl(x10,sig_shift);
+ x32 = dualasl(x32,sig_shift);
+
+ acc0 = ifir16(x10,x10);
+ acc1 = ifir16(x32,x32);
+ sum += (acc0 + acc1) >> 6;
+ }
+#if (TM_UNROLL && TM_UNROLL_COMPUTERMS16)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ COMPUTERMS16_STOP();
+ return spx_sqrt(sum/len) << (3 - sig_shift);
+ }
+ }
+
+#else
+ {
+#if (TM_UNROLL && TM_UNROLL_COMPUTERMS16)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<len ; ++i )
+ {
+ register int xi;
+
+ xi = x[i];
+ xi = iabs(xi);
+ max_val = imax(xi,max_val);
+ }
+#if (TM_UNROLL && TM_UNROLL_COMPUTERMS16)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ if (max_val>16383)
+ {
+ register int sum = 0;
+#if (TM_UNROLL && TM_UNROLL_COMPUTERMS16)
+#pragma TCS_unroll=2
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<len ; i+=4 )
+ {
+ register int x10, x32;
+ register int acc0, acc1;
+
+ x10 = pack16lsb(x[i+1],x[i]);
+ x32 = pack16lsb(x[i+3],x[i+2]);
+
+ x10 = dualasr(x10,1);
+ x32 = dualasr(x32,1);
+
+ acc0 = ifir16(x10,x10);
+ acc1 = ifir16(x32,x32);
+ sum += (acc0 + acc1) >> 6;
+ }
+#if (TM_UNROLL && TM_UNROLL_COMPUTERMS16)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ COMPUTERMS16_STOP();
+ return spx_sqrt(sum/len) << 4;
+ } else {
+ register int sig_shift;
+ register int sum=0;
+
+ sig_shift = mux(max_val < 8192, 1, 0);
+ sig_shift = mux(max_val < 4096, 2, sig_shift);
+ sig_shift = mux(max_val < 2048, 3, sig_shift);
+
+#if (TM_UNROLL && TM_UNROLL_COMPUTERMS16)
+#pragma TCS_unroll=2
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<len ; i+=4 )
+ {
+ register int x10, x32;
+ register int acc0, acc1;
+
+ x10 = pack16lsb(x[i+1],x[i]);
+ x32 = pack16lsb(x[i+3],x[i+2]);
+
+ x10 = dualasl(x10,sig_shift);
+ x32 = dualasl(x32,sig_shift);
+
+ acc0 = ifir16(x10,x10);
+ acc1 = ifir16(x32,x32);
+ sum += (acc0 + acc1) >> 6;
+ }
+#if (TM_UNROLL && TM_UNROLL_COMPUTERMS16)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ COMPUTERMS16_STOP();
+ return spx_sqrt(sum/len) << (3 - sig_shift);
+ }
+ }
+#endif
+}
+
+int normalize16_9(const Int32* restrict x, Int16 * restrict y, Int32 max_scale)
+{
+ register int x0, x1, x2, x3, x4, x5, x6, x7, x8;
+ register int max_val, m0, m1, m2, m3, m4;
+ register int sig_shift;
+ register int y10, y32, y54, y76;
+
+ TMDEBUG_ALIGNMEM(x);
+
+ x0 = ld32(x);
+ x1 = ld32x(x,1); x2 = ld32x(x,2); x3 = ld32x(x,3); x4 = ld32x(x,4);
+ x5 = ld32x(x,5); x6 = ld32x(x,6); x7 = ld32x(x,7); x8 = ld32x(x,8);
+
+ m0 = imax(iabs(x0), iabs(x1));
+ m1 = imax(iabs(x2), iabs(x3));
+ m2 = imax(iabs(x4), iabs(x5));
+ m3 = imax(iabs(x6), iabs(x7));
+ m4 = imax(m0, iabs(x8));
+ m1 = imax(m1, m2);
+ m3 = imax(m3, m4);
+ max_val = imax(1,imax(m1,m3));
+
+ sig_shift=0;
+ while (max_val>max_scale)
+ { sig_shift++;
+ max_val >>= 1;
+ }
+
+ if ( sig_shift != 0 )
+ {
+ y10 = pack16lsb(x1 >> sig_shift, x0 >> sig_shift);
+ y32 = pack16lsb(x3 >> sig_shift, x2 >> sig_shift);
+ y54 = pack16lsb(x5 >> sig_shift, x4 >> sig_shift);
+ y76 = pack16lsb(x7 >> sig_shift, x6 >> sig_shift);
+
+ y[8] = x8 >> sig_shift;
+ st32(y,y10);
+ st32d(4,y,y32);
+ st32d(8,y,y54);
+ st32d(12,y,y76);
+ }
+ return sig_shift;
+}
+
+int normalize16_mod8(const Int32 * restrict x, Int16 * restrict y, Int32 max_scale,int len)
+{
+ register int i, max_val, sig_shift;
+
+ TMDEBUG_ALIGNMEM(x);
+
+ max_val = 1;
+
+ for ( i=0 ; i<len ; i+=4 )
+ {
+ register int tmp0, tmp1, tmp2, tmp3;
+
+ tmp0 = ld32x(x,i);
+ tmp1 = ld32x(x,i+1);
+
+ tmp0 = iabs(tmp0);
+ tmp1 = iabs(tmp1);
+
+ tmp2 = ld32x(x,i+2);
+ tmp3 = ld32x(x,i+3);
+
+ tmp2 = iabs(tmp2);
+ tmp3 = iabs(tmp3);
+
+ tmp0 = imax(tmp0, tmp1);
+ max_val = imax(max_val, tmp0);
+ tmp2 = imax(tmp2, tmp3);
+ max_val = imax(max_val, tmp2);
+ }
+
+ sig_shift=0;
+ while (max_val>max_scale)
+ { sig_shift++;
+ max_val >>= 1;
+ }
+
+ if ( sig_shift != 0 )
+ {
+ for ( i=0 ; i<len ; i+=8, y+=8 )
+ {
+ register int x0, x1, x2, x3, x4, x5, x6, x7;
+ register int y10, y32, y54, y76;
+
+ x0 = ld32x(x,i); x1 = ld32x(x,i+1); x2 = ld32x(x,i+2); x3 = ld32x(x,i+3);
+ x4 = ld32x(x,i+4); x5 = ld32x(x,i+5); x6 = ld32x(x,i+6); x7 = ld32x(x,i+7);
+
+ y10 = pack16lsb(x1 >> sig_shift, x0 >> sig_shift);
+ y32 = pack16lsb(x3 >> sig_shift, x2 >> sig_shift);
+ y54 = pack16lsb(x5 >> sig_shift, x4 >> sig_shift);
+ y76 = pack16lsb(x7 >> sig_shift, x6 >> sig_shift);
+
+ st32(y,y10);
+ st32d(4,y,y32);
+ st32d(8,y,y54);
+ st32d(12,y,y76);
+ }
+ }
+ return sig_shift;
+}
+
+
+#define OVERRIDE_NORMALIZE16
+int normalize16(const Int32 *x, Int16 *y, Int32 max_scale, int len)
+{
+ TMDEBUG_ALIGNMEM(x);
+ TMDEBUG_ALIGNMEM(y);
+
+ NORMALIZE16_START();
+
+ if ( len == 9 )
+ { NORMALIZE16_STOP();
+ return normalize16_9(x,y,max_scale);
+ } else
+ { NORMALIZE16_STOP();
+ return normalize16_mod8(x,y,max_scale,len);
+ }
+}
+
+
+void filter_mem16_10(const Int16 *x, const Int16 *num, const Int16 *den, Int16 *y, int N, Int32 *mem)
+{
+ register int i;
+ register int c9, c8, c7, c6, c5;
+ register int c4, c3, c2, c1, c0;
+ register int input;
+ register int output_0, output_1, output_2, output_3, output_4;
+ register int output_5, output_6, output_7, output_8, output_9;
+ register Int16 xi, yi;
+
+ c9 = pack16lsb(-den[9],num[9]);
+ c8 = pack16lsb(-den[8],num[8]);
+ c7 = pack16lsb(-den[7],num[7]);
+ c6 = pack16lsb(-den[6],num[6]);
+ c5 = pack16lsb(-den[5],num[5]);
+ c4 = pack16lsb(-den[4],num[4]);
+ c3 = pack16lsb(-den[3],num[3]);
+ c2 = pack16lsb(-den[2],num[2]);
+ c1 = pack16lsb(-den[1],num[1]);
+ c0 = pack16lsb(-den[0],num[0]);
+
+ output_0 = mem[0];
+ output_1 = mem[1];
+ output_2 = mem[2];
+ output_3 = mem[3];
+ output_4 = mem[4];
+ output_5 = mem[5];
+ output_6 = mem[6];
+ output_7 = mem[7];
+ output_8 = mem[8];
+ output_9 = mem[9];
+
+#if (TM_UNROLL && TM_UNROLL_FILTER)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+
+ for ( i=0 ; i<N ; i++ )
+ {
+ xi = (int)(x[i]);
+ yi = iclipi(iadd(xi,PSHR32(output_0,LPC_SHIFT)),32767);
+
+ input = pack16lsb(yi,xi);
+ output_0= iadd(ifir16(c0,input),output_1);
+ output_1= iadd(ifir16(c1,input),output_2);
+ output_2= iadd(ifir16(c2,input),output_3);
+ output_3= iadd(ifir16(c3,input),output_4);
+ output_4= iadd(ifir16(c4,input),output_5);
+ output_5= iadd(ifir16(c5,input),output_6);
+ output_6= iadd(ifir16(c6,input),output_7);
+ output_7= iadd(ifir16(c7,input),output_8);
+ output_8= iadd(ifir16(c8,input),output_9);
+ output_9= ifir16(c9,input);
+
+ y[i] = yi;
+ }
+
+#if (TM_UNROLL && TM_UNROLL_FILTER)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ mem[0] = output_0;
+ mem[1] = output_1;
+ mem[2] = output_2;
+ mem[3] = output_3;
+ mem[4] = output_4;
+ mem[5] = output_5;
+ mem[6] = output_6;
+ mem[7] = output_7;
+ mem[8] = output_8;
+ mem[9] = output_9;
+}
+
+void filter_mem16_8(const Int16 *x, const Int16 *num, const Int16 *den, Int16 *y, int N, Int32 *mem)
+{
+ register int i;
+ register int c7, c6, c5, c4, c3, c2, c1, c0;
+ register int output_0, output_1, output_2, output_3, output_4, output_5, output_6, output_7;
+ register int input;
+ register Int16 xi, yi;
+
+ c7 = pack16lsb(-den[7],num[7]);
+ c6 = pack16lsb(-den[6],num[6]);
+ c5 = pack16lsb(-den[5],num[5]);
+ c4 = pack16lsb(-den[4],num[4]);
+ c3 = pack16lsb(-den[3],num[3]);
+ c2 = pack16lsb(-den[2],num[2]);
+ c1 = pack16lsb(-den[1],num[1]);
+ c0 = pack16lsb(-den[0],num[0]);
+
+ output_0 = mem[0];
+ output_1 = mem[1];
+ output_2 = mem[2];
+ output_3 = mem[3];
+ output_4 = mem[4];
+ output_5 = mem[5];
+ output_6 = mem[6];
+ output_7 = mem[7];
+
+#if (TM_UNROLL && TM_UNROLL_FILTER)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+
+ for ( i=0 ; i<N ; i++ )
+ {
+ xi = x[i];
+ yi = iclipi(iadd((int)(xi),PSHR32(output_0,LPC_SHIFT)),32767);
+
+ input = pack16lsb(yi,xi);
+ output_0= iadd(ifir16(c0,input),output_1);
+ output_1= iadd(ifir16(c1,input),output_2);
+ output_2= iadd(ifir16(c2,input),output_3);
+ output_3= iadd(ifir16(c3,input),output_4);
+ output_4= iadd(ifir16(c4,input),output_5);
+ output_5= iadd(ifir16(c5,input),output_6);
+ output_6= iadd(ifir16(c6,input),output_7);
+ output_7= ifir16(c7,input);
+
+ y[i] = yi;
+ }
+
+#if (TM_UNROLL && TM_UNROLL_FILTER)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+
+ mem[0] = output_0;
+ mem[1] = output_1;
+ mem[2] = output_2;
+ mem[3] = output_3;
+ mem[4] = output_4;
+ mem[5] = output_5;
+ mem[6] = output_6;
+ mem[7] = output_7;
+}
+
+#define OVERRIDE_FILTER_MEM16
+void filter_mem16(const Int16 *x, const Int16 *num, const Int16 *den, Int16 *y, int N, int ord, Int32 *mem, char *stack)
+{
+ TMDEBUG_ALIGNMEM(x);
+ TMDEBUG_ALIGNMEM(y);
+ TMDEBUG_ALIGNMEM(num);
+ TMDEBUG_ALIGNMEM(den);
+
+ FILTERMEM16_START();
+
+ if(ord==10)
+ filter_mem16_10(x, num, den, y, N, mem);
+ else if (ord==8)
+ filter_mem16_8(x, num, den, y, N, mem);
+
+#ifndef REMARK_ON
+ (void)stack;
+#endif
+
+ FILTERMEM16_STOP();
+}
+
+void iir_mem16_8(const Int16 *x, const Int16 *den, Int16 *y, int N, Int32 *mem)
+{
+ register int i;
+ register int c67, c45, c23, c01;
+ register int r1, r2, r3;
+ register int y10, y32, y54, y76, yi;
+
+ c67 = pack16lsb(-den[6],-den[7]);
+ c45 = pack16lsb(-den[4],-den[5]);
+ c23 = pack16lsb(-den[2],-den[3]);
+ c01 = pack16lsb(-den[0],-den[1]);
+
+ y10 = mem[0];
+ y32 = mem[1];
+ y54 = mem[2];
+ y76 = mem[3];
+
+#if (TM_UNROLL && TM_UNROLL_IIR)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+
+ for ( i=0 ; i < N ; ++i )
+ {
+ r2 = iadd(ifir16(y10,c67),ifir16(y32,c45));
+ r3 = iadd(ifir16(y54,c23),ifir16(y76,c01));
+ r1 = iadd(r2,r3);
+
+ y10 = funshift2(y32, y10);
+ y32 = funshift2(y54, y32);
+ y54 = funshift2(y76, y54);
+
+ yi = iclipi(iadd((int)(x[i]),PSHR32(r1,LPC_SHIFT)),32767);
+ y[i]= yi;
+ y76 = funshift2(yi, y76);
+ }
+
+#if (TM_UNROLL && TM_UNROLL_IIR)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ mem[0] = y10;
+ mem[1] = y32;
+ mem[2] = y54;
+ mem[3] = y76;
+
+}
+
+void iir_mem16_10(const Int16 *x, const Int16 *den, Int16 *y, int N, Int32 *mem)
+{
+ register int i;
+ register int c89, c67, c45, c23, c01;
+ register int r1, r2, r3, r4, r5;
+ register int y10, y32, y54, y76, y98, yi;
+
+ c89 = pack16lsb(-den[8],-den[9]);
+ c67 = pack16lsb(-den[6],-den[7]);
+ c45 = pack16lsb(-den[4],-den[5]);
+ c23 = pack16lsb(-den[2],-den[3]);
+ c01 = pack16lsb(-den[0],-den[1]);
+
+ y10 = mem[0];
+ y32 = mem[1];
+ y54 = mem[2];
+ y76 = mem[3];
+ y98 = mem[4];
+
+#if (TM_UNROLL && TM_UNROLL_IIR)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+
+ for ( i=0 ; i < N ; ++i )
+ {
+ r2 = iadd(ifir16(y10,c89),ifir16(y32,c67));
+ r3 = iadd(ifir16(y54,c45),ifir16(y76,c23));
+ r4 = ifir16(y98,c01);
+ r5 = iadd(r2,r3);
+ r1 = iadd(r4,r5);
+
+ y10 = funshift2(y32, y10);
+ y32 = funshift2(y54, y32);
+ y54 = funshift2(y76, y54);
+ y76 = funshift2(y98, y76);
+
+ yi = iclipi(iadd((int)(x[i]),PSHR32(r1,LPC_SHIFT)),32767);
+ y[i]= yi;
+ y98 = funshift2(yi, y98);
+ }
+
+#if (TM_UNROLL && TM_UNROLL_IIR)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ mem[0] = y10;
+ mem[1] = y32;
+ mem[2] = y54;
+ mem[3] = y76;
+ mem[4] = y98;
+}
+
+#define OVERRIDE_IIR_MEM16
+void iir_mem16(const Int16 *x, const Int16 *den, Int16 *y, int N, int ord, Int32 *mem, char *stack)
+{
+ TMDEBUG_ALIGNMEM(den);
+
+ IIRMEM16_START();
+
+ if(ord==10)
+ iir_mem16_10(x, den, y, N, mem);
+ else if (ord==8)
+ iir_mem16_8(x, den, y, N, mem);
+
+#ifndef REMARK_ON
+ (void)stack;
+#endif
+
+ IIRMEM16_STOP();
+}
+
+void fir_mem16_8(const Int16 *x, const Int16 *num, Int16 *y, int N, Int32 *mem)
+{
+ register int i, N_2;
+ register int c67, c45, c23, c01;
+ register int b0, b1, b2, b3;
+ register int r1, r2, r3;
+ register int x10, x32, x54, x76, xi;
+ register Int16 *a;
+
+ N_2 = N >> 1;
+
+ c67 = ld32x(num,3);
+ c45 = ld32x(num,2);
+ c23 = ld32x(num,1);
+ c01 = ld32x(num,0);
+
+ c67 = funshift2(c67,c67);
+ c45 = funshift2(c45,c45);
+ c23 = funshift2(c23,c23);
+ c01 = funshift2(c01,c01);
+
+ b3 = x76 = ld32x(x,N_2-1);
+ b2 = x54 = ld32x(x,N_2-2);
+ b1 = x32 = ld32x(x,N_2-3);
+ b0 = x10 = ld32x(x,N_2-4);
+
+#if (TM_UNROLL && TM_UNROLL_FILTER > 0)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+
+ for ( i=N-1 ; i >= 8 ; --i )
+ {
+ xi = asri(16,x76);
+ x76 = funshift2(x76, x54);
+ x54 = funshift2(x54, x32);
+ x32 = funshift2(x32, x10);
+ x10 = pack16lsb(x10, (int)x[i-8]);
+
+ r2 = iadd(ifir16(x10,c67),ifir16(x32,c45));
+ r3 = iadd(ifir16(x54,c23),ifir16(x76,c01));
+ r1 = iadd(r2,r3);
+
+ y[i] = iclipi(iadd(xi,PSHR32(r1,LPC_SHIFT)),32767);
+ }
+ for ( i=7, a=(Int16*)mem ; i>=0 ; --i )
+ {
+ xi = asri(16,x76);
+ x76 = funshift2(x76, x54);
+ x54 = funshift2(x54, x32);
+ x32 = funshift2(x32, x10);
+ x10 = pack16lsb(x10, (int)a[i]);
+
+ r2 = iadd(ifir16(x10,c67),ifir16(x32,c45));
+ r3 = iadd(ifir16(x54,c23),ifir16(x76,c01));
+ r1 = iadd(r2,r3);
+
+ y[i] = iclipi(iadd(xi,PSHR32(r1,LPC_SHIFT)),32767);
+ }
+
+#if (TM_UNROLL && TM_UNROLL_FILTER > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ mem[0] = b0;
+ mem[1] = b1;
+ mem[2] = b2;
+ mem[3] = b3;
+}
+
+void fir_mem16_10(const Int16 *x, const Int16 *num, Int16 *y, int N, Int32 *mem)
+{
+ register int N_2, i;
+ register int c89, c67, c45, c23, c01;
+ register int b0, b1, b2, b3, b4;
+ register int r1, r2, r3, r4, r5;
+ register int x10, x32, x54, x76, x98, xi;
+ register Int16 *a;
+
+ N_2 = N >> 1;
+
+ c89 = ld32x(num,4);
+ c67 = ld32x(num,3);
+ c45 = ld32x(num,2);
+ c23 = ld32x(num,1);
+ c01 = ld32x(num,0);
+
+ c89 = funshift2(c89,c89);
+ c67 = funshift2(c67,c67);
+ c45 = funshift2(c45,c45);
+ c23 = funshift2(c23,c23);
+ c01 = funshift2(c01,c01);
+
+ b4 = x98 = ld32x(x,N_2-1);
+ b3 = x76 = ld32x(x,N_2-2);
+ b2 = x54 = ld32x(x,N_2-3);
+ b1 = x32 = ld32x(x,N_2-4);
+ b0 = x10 = ld32x(x,N_2-5);
+
+#if (TM_UNROLL && TM_UNROLL_FIR > 0)
+#pragma TCS_unroll=5
+#pragma TCS_unrollexact=1
+#endif
+
+ for ( i=N-1 ; i >= 10 ; --i )
+ {
+ xi = asri(16,x98);
+ x98 = funshift2(x98, x76);
+ x76 = funshift2(x76, x54);
+ x54 = funshift2(x54, x32);
+ x32 = funshift2(x32, x10);
+ x10 = pack16lsb(x10, (int)(x[i-10]));
+
+ r2 = iadd(ifir16(x10,c89),ifir16(x32,c67));
+ r3 = iadd(ifir16(x54,c45),ifir16(x76,c23));
+ r4 = ifir16(x98,c01);
+ r5 = iadd(r2,r3);
+ r1 = iadd(r4,r5);
+
+ y[i] = iclipi(iadd(xi,PSHR32(r1,LPC_SHIFT)),32767);
+ }
+
+ for ( i=9,a =(Int16*)mem ; i>=0 ; --i )
+ {
+ xi = asri(16,x98);
+ x98 = funshift2(x98, x76);
+ x76 = funshift2(x76, x54);
+ x54 = funshift2(x54, x32);
+ x32 = funshift2(x32, x10);
+ x10 = pack16lsb(x10, (int)(a[i]));
+
+ r2 = iadd(ifir16(x10,c89),ifir16(x32,c67));
+ r3 = iadd(ifir16(x54,c45),ifir16(x76,c23));
+ r4 = ifir16(x98,c01);
+ r5 = iadd(r2,r3);
+ r1 = iadd(r4,r5);
+
+ y[i] = iclipi(iadd(xi,PSHR32(r1,LPC_SHIFT)),32767);
+ }
+
+#if (TM_UNROLL && TM_UNROLL_FIR > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ mem[0] = b0;
+ mem[1] = b1;
+ mem[2] = b2;
+ mem[3] = b3;
+ mem[4] = b4;
+
+
+}
+
+#define OVERRIDE_FIR_MEM16
+void fir_mem16(const spx_word16_t *x, const Int16 *num, spx_word16_t *y, int N, int ord, Int32 *mem, char *stack)
+{
+ TMDEBUG_ALIGNMEM(x);
+ TMDEBUG_ALIGNMEM(y);
+ TMDEBUG_ALIGNMEM(num);
+
+ FIRMEM16_START();
+
+ if(ord==10)
+ fir_mem16_10(x, num, y, N, mem);
+ else if (ord==8)
+ fir_mem16_8(x, num, y, N, mem);
+
+#ifndef REMARK_ON
+ (void)stack;
+#endif
+
+ FIRMEM16_STOP();
+}
+
+
+
+#define OVERRIDE_SYN_PERCEP_ZERO16
+void syn_percep_zero16(const Int16 *xx, const Int16 *ak, const Int16 *awk1, const Int16 *awk2, Int16 *y, int N, int ord, char *stack)
+{
+ register int i,j;
+ VARDECL(Int32 *mem);
+ ALLOC(mem, ord, Int32);
+
+ TMDEBUG_ALIGNMEM(mem);
+
+ for ( i=0,j=0 ; i<ord ; ++i,j+=4 )
+ st32d(j,mem,0);
+ iir_mem16(xx, ak, y, N, ord, mem, stack);
+ for ( i=0,j=0 ; i<ord ; ++i,j+=4 )
+ st32d(j,mem,0);
+ filter_mem16(y, awk1, awk2, y, N, ord, mem, stack);
+}
+
+
+#define OVERRIDE_RESIDUE_PERCEP_ZER016
+void residue_percep_zero16(const Int16 *xx, const Int16 *ak, const Int16 *awk1, const Int16 *awk2, Int16 *y, int N, int ord, char *stack)
+{
+ register int i,j;
+ VARDECL(Int32 *mem);
+ ALLOC(mem, ord, Int32);
+
+ TMDEBUG_ALIGNMEM(mem);
+
+ for ( i=0,j=0 ; i<ord ; ++i,j+=4 )
+ st32d(j,mem,0);
+ filter_mem16(xx, ak, awk1, y, N, ord, mem, stack);
+ for ( i=0,j=0 ; i<ord ; ++i,j+=4 )
+ st32d(j,mem,0);
+ fir_mem16(y, awk2, y, N, ord, mem, stack);
+}
+
+
+
+void compute_impulse_response_10(const Int16 *ak, const Int16 *awk1, const Int16 *awk2, Int16 *y, int N)
+{
+ register int awk_01, awk_23, awk_45, awk_67, awk_89;
+ register int y10, y32, y54, y76, y98, yi;
+ register int i, acc0, acc1, N_2;
+
+ N_2 = N << 1;
+
+ awk_01 = ld32(awk1);
+ awk_23 = ld32x(awk1,1);
+ awk_45 = ld32x(awk1,2);
+ awk_67 = ld32x(awk1,3);
+ awk_89 = ld32x(awk1,4);
+
+ y10 = funshift2(awk_01, LPC_SCALING << 16);
+ st32d(0, y, y10);
+ y32 = funshift2(awk_23, awk_01);
+ st32d(4, y, y32);
+ y54 = funshift2(awk_45, awk_23);
+ st32d(8, y, y54);
+ y76 = funshift2(awk_67, awk_45);
+ st32d(12, y, y76);
+ y98 = funshift2(awk_89, awk_67);
+ st32d(16, y, y98);
+ y10 = funshift2(0, awk_89);
+ st32d(20, y, y10);
+#if (TM_UNROLL && TM_UNROLL_COMPUTEIMPULSERESPONSE > 0)
+#pragma TCS_unroll=2
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=24 ; i<N_2 ; i+=4 )
+ { st32d(i, y, 0);
+ }
+#if (TM_UNROLL && TM_UNROLL_COMPUTEIMPULSERESPONSE > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ y10 = y32 = y54 = y76 = y98 = 0;
+ awk_01 = ld32(awk2);
+ awk_23 = ld32x(awk2,1);
+ awk_45 = ld32x(awk2,2);
+ awk_67 = ld32x(awk2,3);
+ awk_89 = ld32x(awk2,4);
+
+ awk_01 = funshift2(awk_01, awk_01);
+ awk_23 = funshift2(awk_23, awk_23);
+ awk_45 = funshift2(awk_45, awk_45);
+ awk_67 = funshift2(awk_67, awk_67);
+ awk_89 = funshift2(awk_89, awk_89);
+
+#if (TM_UNROLL && TM_UNROLL_COMPUTEIMPULSERESPONSE > 0)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<N ; ++i )
+ {
+ yi = y[i];
+
+ acc0 = ifir16(y10, awk_89) + ifir16(y32, awk_67);
+ acc1 = ifir16(y54, awk_45) + ifir16(y76, awk_23);
+ yi += PSHR32(acc0 + acc1 + ifir16(y98, awk_01),LPC_SHIFT);
+ y[i] = yi;
+
+ y10 = funshift2(y32, y10);
+ y32 = funshift2(y54, y32);
+ y54 = funshift2(y76, y54);
+ y76 = funshift2(y98, y76);
+ y98 = funshift2(ineg(yi), y98);
+ }
+#if (TM_UNROLL && TM_UNROLL_COMPUTEIMPULSERESPONSE > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ y10 = y32 = y54 = y76 = y98 = 0;
+ awk_01 = ld32(ak);
+ awk_23 = ld32x(ak,1);
+ awk_45 = ld32x(ak,2);
+ awk_67 = ld32x(ak,3);
+ awk_89 = ld32x(ak,4);
+ awk_01 = funshift2(awk_01, awk_01);
+ awk_23 = funshift2(awk_23, awk_23);
+ awk_45 = funshift2(awk_45, awk_45);
+ awk_67 = funshift2(awk_67, awk_67);
+ awk_89 = funshift2(awk_89, awk_89);
+
+#if (TM_UNROLL && TM_UNROLL_COMPUTEIMPULSERESPONSE > 0)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<N ; ++i )
+ {
+ yi = y[i];
+
+ acc0 = ifir16(y10, awk_89) + ifir16(y32, awk_67);
+ acc1 = ifir16(y54, awk_45) + ifir16(y76, awk_23);
+ yi = PSHR32(SHL32(yi,LPC_SHIFT+1) + acc0 + acc1 + ifir16(y98, awk_01),LPC_SHIFT);
+ y[i] = yi;
+
+ y10 = funshift2(y32, y10);
+ y32 = funshift2(y54, y32);
+ y54 = funshift2(y76, y54);
+ y76 = funshift2(y98, y76);
+ y98 = funshift2(ineg(yi), y98);
+ }
+#if (TM_UNROLL && TM_UNROLL_COMPUTEIMPULSERESPONSE > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+}
+
+void compute_impulse_response_8(const Int16 *ak, const Int16 *awk1, const Int16 *awk2, Int16 *y, int N)
+{
+ register int awk_01, awk_23, awk_45, awk_67;
+ register int y10, y32, y54, y76, yi;
+ register int i, acc0, acc1, N_2;
+
+ N_2 = N << 1;
+
+ awk_01 = ld32(awk1);
+ awk_23 = ld32x(awk1,1);
+ awk_45 = ld32x(awk1,2);
+ awk_67 = ld32x(awk1,3);
+
+ y10 = funshift2(awk_01, LPC_SCALING << 16);
+ st32d(0, y, y10);
+ y32 = funshift2(awk_23, awk_01);
+ st32d(4, y, y32);
+ y54 = funshift2(awk_45, awk_23);
+ st32d(8, y, y54);
+ y76 = funshift2(awk_67, awk_45);
+ st32d(12, y, y76);
+ y10 = funshift2(0, awk_67);
+ st32d(16, y, y10);
+ st32d(20, y, 0);
+
+#if (TM_UNROLL && TM_UNROLL_COMPUTEIMPULSERESPONSE > 0)
+#pragma TCS_unroll=2
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=24 ; i<N_2 ; i+=4 )
+ { st32d(i, y, 0);
+ }
+#if (TM_UNROLL && TM_UNROLL_COMPUTEIMPULSERESPONSE > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ y10 = y32 = y54 = y76 = 0;
+ awk_01 = ld32(awk2);
+ awk_23 = ld32x(awk2,1);
+ awk_45 = ld32x(awk2,2);
+ awk_67 = ld32x(awk2,3);
+
+ awk_01 = funshift2(awk_01, awk_01);
+ awk_23 = funshift2(awk_23, awk_23);
+ awk_45 = funshift2(awk_45, awk_45);
+ awk_67 = funshift2(awk_67, awk_67);
+#if (TM_UNROLL && TM_UNROLL_COMPUTEIMPULSERESPONSE > 0)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<N ; ++i )
+ {
+ yi = y[i];
+
+ acc0 = ifir16(y10, awk_67) + ifir16(y32, awk_45);
+ acc1 = ifir16(y54, awk_23) + ifir16(y76, awk_01);
+ yi += PSHR32(acc0 + acc1,LPC_SHIFT);
+ y[i] = yi;
+
+ y10 = funshift2(y32, y10);
+ y32 = funshift2(y54, y32);
+ y54 = funshift2(y76, y54);
+ y76 = funshift2(ineg(yi), y76);
+ }
+#if (TM_UNROLL && TM_UNROLL_COMPUTEIMPULSERESPONSE > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ y10 = y32 = y54 = y76 = 0;
+ awk_01 = ld32(ak);
+ awk_23 = ld32x(ak,1);
+ awk_45 = ld32x(ak,2);
+ awk_67 = ld32x(ak,3);
+ awk_01 = funshift2(awk_01, awk_01);
+ awk_23 = funshift2(awk_23, awk_23);
+ awk_45 = funshift2(awk_45, awk_45);
+ awk_67 = funshift2(awk_67, awk_67);
+#if (TM_UNROLL && TM_UNROLL_COMPUTEIMPULSERESPONSE > 0)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<N ; ++i )
+ {
+ yi = y[i];
+
+ acc0 = ifir16(y10, awk_67) + ifir16(y32, awk_45);
+ acc1 = ifir16(y54, awk_23) + ifir16(y76, awk_01);
+ yi = PSHR32(SHL32(yi,LPC_SHIFT+1) + acc0 + acc1,LPC_SHIFT);
+ y[i] = yi;
+
+ y10 = funshift2(y32, y10);
+ y32 = funshift2(y54, y32);
+ y54 = funshift2(y76, y54);
+ y76 = funshift2(ineg(yi), y76);
+ }
+#if (TM_UNROLL && TM_UNROLL_COMPUTEIMPULSERESPONSE > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+}
+
+
+#define OVERRIDE_COMPUTE_IMPULSE_RESPONSE
+void compute_impulse_response(const Int16 *ak, const Int16 *awk1, const Int16 *awk2, Int16 *y, int N, int ord, char *stack)
+{
+ TMDEBUG_ALIGNMEM(ak);
+ TMDEBUG_ALIGNMEM(awk1);
+ TMDEBUG_ALIGNMEM(awk2);
+ TMDEBUG_ALIGNMEM(y);
+
+ COMPUTEIMPULSERESPONSE_START();
+ if ( ord == 10 )
+ compute_impulse_response_10(ak,awk1,awk2,y,N);
+ else
+ compute_impulse_response_8(ak,awk1,awk2,y,N);
+
+ (void)stack;
+
+ COMPUTEIMPULSERESPONSE_STOP();
+}
+
+
+#define OVERRIDE_QMFSYNTH
+void qmf_synth(const Int16 *x1, const Int16 *x2, const Int16 *a, Int16 *y, int N, int M, Int32 *mem1, Int32 *mem2, char *stack)
+ /* assumptions:
+ all odd x[i] are zero -- well, actually they are left out of the array now
+ N and M are multiples of 4 */
+{
+ register int i, j;
+ register int M2, N2;
+ VARDECL(int *x12);
+ M2 = M>>1;
+ N2 = N>>1;
+ ALLOC(x12, M2+N2, int);
+
+
+ TMDEBUG_ALIGNMEM(a);
+ TMDEBUG_ALIGNMEM(x12);
+ TMDEBUG_ALIGNMEM(mem1);
+ TMDEBUG_ALIGNMEM(mem2);
+
+ QMFSYNTH_START();
+
+#if (TM_UNROLL && TM_UNROLL_QMFSYNTH > 0)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<N2 ; ++i )
+ { register int index = N2-1-i;
+ x12[i] = pack16lsb(x1[index],x2[index]);
+ }
+
+ for ( j= 0; j<M2 ; ++j)
+ { register int index = (j << 1) + 1;
+ x12[N2+j] = pack16lsb(mem1[index],mem2[index]);
+ }
+#if (TM_UNROLL && TM_UNROLL_QMFSYNTH > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ for (i = 0; i < N2; i += 2)
+ {
+ register int y0, y1, y2, y3;
+ register int x12_0;
+
+ y0 = y1 = y2 = y3 = 0;
+ x12_0 = x12[N2-2-i];
+
+ for (j = 0; j < M2; j += 2)
+ {
+ register int x12_1;
+ register int a10, a11, a0_0;
+ register int _a10, _a11, _a0_0;
+
+ a10 = ld32x(a,j);
+ a11 = pack16msb(a10,a10);
+ a0_0= pack16lsb(a10,ineg(sex16(a10)));
+ x12_1 = x12[N2-1+j-i];
+
+ y0 += ifir16(a0_0,x12_1);
+ y1 += ifir16(a11, x12_1);
+ y2 += ifir16(a0_0,x12_0);
+ y3 += ifir16(a11 ,x12_0);
+
+
+ _a10 = ld32x(a,j+1);
+ _a11 = pack16msb(_a10,_a10);
+ _a0_0 = pack16lsb(_a10,ineg(sex16(_a10)));
+ x12_0 = x12[N2+j-i];
+
+ y0 += ifir16(_a0_0,x12_0);
+ y1 += ifir16(_a11, x12_0);
+ y2 += ifir16(_a0_0,x12_1);
+ y3 += ifir16(_a11 ,x12_1);
+
+ }
+ y[2*i] = EXTRACT16(SATURATE32(PSHR32(y0,15),32767));
+ y[2*i+1] = EXTRACT16(SATURATE32(PSHR32(y1,15),32767));
+ y[2*i+2] = EXTRACT16(SATURATE32(PSHR32(y2,15),32767));
+ y[2*i+3] = EXTRACT16(SATURATE32(PSHR32(y3,15),32767));
+ }
+
+#if (TM_UNROLL && TM_UNROLL_QMFSYNTH > 0)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for (i = 0; i < M2; ++i)
+ { mem1[2*i+1] = asri(16,x12[i]);
+ mem2[2*i+1] = sex16(x12[i]);
+ }
+#if (TM_UNROLL && TM_UNROLL_QMFSYNTH > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ QMFSYNTH_STOP();
+}
+
+
+#define OVERRIDE_QMFDECOMP
+void qmf_decomp(const Int16 *xx, const Int16 *aa, Int16 *y1, Int16 *y2, int N, int M, Int16 *mem, char *stack)
+{
+ VARDECL(int *_a);
+ VARDECL(int *_x);
+ register int i, j, k, MM, M2, N2;
+ register int _xx10, _mm10;
+ register int *_x2;
+
+ M2=M>>1;
+ N2=N>>1;
+ MM=(M-2)<<1;
+
+ ALLOC(_a, M2, int);
+ ALLOC(_x, N2+M2, int);
+ _x2 = _x + M2 - 1;
+
+ TMDEBUG_ALIGNMEM(xx);
+ TMDEBUG_ALIGNMEM(aa);
+ TMDEBUG_ALIGNMEM(y1);
+ TMDEBUG_ALIGNMEM(y2);
+ TMDEBUG_ALIGNMEM(mem);
+ TMDEBUG_ALIGNMEM(_a);
+ TMDEBUG_ALIGNMEM(_x);
+
+ QMFDECOMP_START();
+
+ _xx10 = ld32(xx);
+ _xx10 = dualasr(_xx10,1);
+ _mm10 = ld32(mem);
+ _x2[0] = pack16lsb(_xx10,_mm10);
+
+#if (TM_UNROLL && TM_UNROLL_QMFSYNTH > 0)
+#pragma TCS_unroll=2
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<M2 ; ++i )
+ { register int a10;
+
+ a10 = ld32x(aa,i);
+ a10 = funshift2(a10, a10);
+ _a[M2-i-1] = a10;
+ }
+
+ for ( j=1 ; j<N2 ; ++j )
+ { register int _xx32;
+
+ _xx32 = ld32x(xx,j);
+ _xx32 = dualasr(_xx32,1);
+ _x2[j] = funshift2(_xx32, _xx10);
+ _xx10 = _xx32;
+ }
+
+ for ( k=1 ; k<M2; ++k )
+ { register int _mm32;
+
+ _mm32 = ld32x(mem,k);
+ _mm10 = funshift2(_mm10,_mm10);
+ _x2[-k] = pack16lsb(_mm10,_mm32);
+ _mm10 = _mm32;
+ }
+
+
+ for ( i=N2-1,j=0 ; j<MM ; --i,j+=4 )
+ { register int _xx;
+
+ _xx = ld32x(xx,i);
+ _xx = dualasr(_xx,1);
+ _xx = funshift2(_xx,_xx);
+ st32d(j, mem, _xx);
+ }
+#if (TM_UNROLL && TM_UNROLL_QMFSYNTH > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ mem[M-2] = xx[N-M+1] >> 1;
+
+
+ M2 >>= 1;
+ for ( i=0 ; i<N2 ; ++i )
+ { register int y1k, y2k;
+
+ y1k = y2k = 0;
+
+ for ( j=0 ; j<M2 ; j+=2 )
+ { register int _aa, _acc0, _acc1;
+ register int __xx10, __mm10, __acc0, __acc1, __aa;
+ register int _tmp0, _tmp1, _tmp2, _tmp3;
+
+ _xx10 = ld32x(_x, i+j);
+ _mm10 = ld32x(_x2,i-j);
+ _aa = ld32x(_a, j);
+ _mm10 = funshift2(_mm10,_mm10);
+ _acc0 = dspidualadd(_xx10, _mm10);
+ _acc1 = dspidualsub(_xx10, _mm10);
+
+ __xx10 = ld32x(_x, i+j+1);
+ __mm10 = ld32x(_x2,i-j-1);
+ __aa = ld32x(_a, j+1);
+ __mm10 = funshift2(__mm10,__mm10);
+ __acc0 = dspidualadd(__xx10, __mm10);
+ __acc1 = dspidualsub(__xx10, __mm10);
+
+ y1k += ifir16(_aa, _acc0);
+ y1k += ifir16(__aa, __acc0);
+
+ _tmp0 = pack16lsb(_aa,__aa);
+ _tmp1 = pack16msb(_aa,__aa);
+ _tmp2 = pack16lsb(_acc1, __acc1);
+ _tmp3 = pack16msb(_acc1, __acc1);
+
+ y2k -= ifir16(_tmp0, _tmp2);
+ y2k += ifir16(_tmp1, _tmp3);
+
+ }
+
+ y1[i] = iclipi(PSHR32(y1k,15),32767);
+ y2[i] = iclipi(PSHR32(y2k,15),32767);
+ }
+
+ QMFDECOMP_STOP();
+}
+
+#endif
+
diff --git a/tmv/fixed_tm.h b/tmv/fixed_tm.h
new file mode 100644
index 0000000..c8fa968
--- /dev/null
+++ b/tmv/fixed_tm.h
@@ -0,0 +1,100 @@
+/* Copyright (C) 2007 Hong Zhiqian */
+/**
+ @file fixed_tm.h
+ @author Hong Zhiqian
+ @brief Various compatibility routines for Speex (TriMedia version)
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef FIXED_TM_H
+#define FIXED_TM_H
+
+#include <ops/custom_defs.h>
+
+
+#undef SATURATE
+#undef SATURATE16
+#undef SATURATE32
+#define SATURATE(x,a) iclipi(x,a)
+#define SATURATE16(x,a) iclipi(x,a)
+#define SATURATE32(x,a) iclipi(x,a)
+
+#undef EXTEND32
+#define EXTEND32(x) sex16(x)
+
+#undef NEG16
+#undef NEG32
+#define NEG16(x) ineg((int)(x))
+#define NEG32(x) ineg(x)
+
+#undef ABS
+#undef ABS16
+#undef ABS32
+#define ABS(x) iabs(x)
+#define ABS32(x) iabs(x)
+#define ABS16(x) iabs((int)(x))
+
+#undef MIN16
+#undef MIN32
+#define MIN16(a,b) imin((int)(a),(int)(b))
+#define MIN32(a,b) imin(a,b)
+
+#undef MAX16
+#undef MAX32
+#define MAX16(a,b) imax((int)(a),(int)(b))
+#define MAX32(a,b) imax(a,b)
+
+#undef ADD16
+#undef SUB16
+#undef ADD32
+#undef SUB32
+#undef MULT16_16
+#undef MULT16_16_16
+
+#define ADD16(a,b) ((int)(a) + (int)(b))
+#define SUB16(a,b) ((int)(a) - (int)(b))
+#define ADD32(a,b) ((int)(a) + (int)(b))
+#define SUB32(a,b) ((int)(a) - (int)(b))
+#define MULT16_16_16(a,b) ((int)(a) * (int)(b))
+#define MULT16_16(a,b) ((int)(a) * (int)(b))
+
+#if TM_DEBUGMEM_ALIGNNMENT
+#include <stdio.h>
+#define TMDEBUG_ALIGNMEM(x) \
+ { if( ((int)(x) & (int)(0x00000003)) != 0 ) \
+ { printf("memory not align. file: %s, line: %d\n", __FILE__, __LINE__); \
+ } \
+ }
+
+#else
+#define TMDEBUG_ALIGNMEM(x)
+#endif
+
+#endif
+
diff --git a/tmv/kiss_fft_tm.h b/tmv/kiss_fft_tm.h
new file mode 100644
index 0000000..dce2072
--- /dev/null
+++ b/tmv/kiss_fft_tm.h
@@ -0,0 +1,599 @@
+/* Copyright (C) 2007 Hong Zhiqian */
+/**
+ @file kiss_fft_tm.h
+ @author Hong Zhiqian
+ @brief Various compatibility routines for Speex (TriMedia version)
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "_kiss_fft_guts_tm.h"
+
+#ifdef TM_ASM
+
+#include "profile_tm.h"
+
+#ifdef FIXED_POINT
+
+#define OVERRIDE_KFBFLY2
+static void kf_bfly2(
+ kiss_fft_cpx *Fout,
+ const int fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ register int * restrict Fout2;
+ register int * restrict tw1 = (int*)st->twiddles;
+ register int i, j;
+ register int _inv = !st->inverse;
+
+ Fout2 = (int*)Fout + m;
+
+ for ( i=0,j=0 ; i<m ; ++i,j+=4,tw1+=fstride )
+ { register int tw_10, ff_10, f2_10;
+
+ ff_10 = ld32x(Fout, i);
+ f2_10 = ld32x(Fout2, i);
+ tw_10 = ld32(tw1);
+
+ if ( _inv )
+ { TM_SHR(f2_10, f2_10, 1);
+ TM_SHR(ff_10, ff_10, 1);
+ }
+
+ TM_MUL(tw_10, tw_10, f2_10);
+ TM_SUB(f2_10, ff_10, tw_10);
+ TM_ADD(ff_10, ff_10, tw_10);
+
+ st32d(j, Fout2, f2_10);
+ st32d(j, Fout, ff_10);
+ }
+}
+
+#define OVERRIDE_KFBFLY4
+static void kf_bfly4(
+ kiss_fft_cpx *Fout,
+ const int fstride,
+ const kiss_fft_cfg st,
+ const int m
+ )
+{
+ register int * restrict tw1;
+ register int * restrict tw2;
+ register int * restrict tw3;
+ register int * restrict Fout1;
+ register int * restrict Fout2;
+ register int * restrict Fout3;
+ register int i, j;
+ register int fstride2, fstride3;
+ register int _inv = !st->inverse;
+
+ tw3 = tw2 = tw1 = (int*)st->twiddles;
+ fstride2 = fstride << 1;
+ fstride3 = fstride * 3;
+
+ Fout1 = (int*)Fout + m;
+ Fout2 = (int*)Fout + (m << 1);
+ Fout3 = (int*)Fout + (m * 3);
+
+
+ for ( i=0,j=0 ; i<m ; ++i,j+=4,tw1+=fstride,tw2+=fstride2,tw3+=fstride3 )
+ { register int sc0, sc1, sc2, sc3, sc4, sc5;
+ register int ff0;
+
+ sc0 = ld32x(Fout1,i);
+ sc3 = ld32(tw1);
+ sc1 = ld32x(Fout2, i);
+ sc4 = ld32(tw2);
+ sc2 = ld32x(Fout3, i);
+ sc5 = ld32(tw3);
+ ff0 = ld32x(Fout,i);
+
+ if ( _inv )
+ {
+ TM_ADD(sc0, sc0, 0x00020002);
+ TM_ADD(sc1, sc1, 0x00020002);
+ TM_ADD(sc2, sc2, 0x00020002);
+ TM_ADD(ff0, ff0, 0x00020002);
+ TM_SHR(sc0, sc0, 2);
+ TM_SHR(sc1, sc1, 2);
+ TM_SHR(sc2, sc2, 2);
+ TM_SHR(ff0, ff0, 2);
+ }
+
+ TM_MUL(sc0, sc0, sc3);
+ TM_MUL(sc1, sc1, sc4);
+ TM_MUL(sc2, sc2, sc5);
+ TM_SUB(sc5, ff0, sc1);
+ TM_ADD(ff0, ff0, sc1);
+ TM_ADD(sc3, sc0, sc2);
+ TM_SUB(sc4, sc0, sc2);
+ TM_SUB(sc1, ff0, sc3);
+ TM_ADD(ff0, ff0, sc3);
+
+ st32d(j, Fout2, sc1);
+ st32d(j, Fout, ff0);
+
+ sc5 = funshift2(sc5, sc5);
+
+ if ( _inv )
+ { TM_ADD(ff0, sc5, sc4);
+ TM_SUB(sc1, sc5, sc4);
+ } else
+ { TM_ADD(sc1, sc5, sc4);
+ TM_SUB(ff0, sc5, sc4);
+ }
+
+ sc0 = funshift2(sc1, ff0);
+ sc2 = funshift2(ff0, sc1);
+
+ st32d(j, Fout1, sc0);
+ st32d(j, Fout3, sc2);
+ }
+}
+
+
+#define OVERRIDE_KFBFLY3
+static void kf_bfly3(
+ kiss_fft_cpx *Fout,
+ const int fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ register int * restrict tw1;
+ register int * restrict tw2;
+ register int * restrict Fout1;
+ register int * restrict Fout2;
+ register int epi;
+ register int i, j;
+ register int fstride2;
+ register int _inv = !st->inverse;
+
+ tw1 = tw2 = (int*)st->twiddles;
+ Fout1 = (int*)Fout + m;
+ Fout2 = (int*)Fout + (m << 1);
+ epi = tw1[fstride*m];
+ epi = pack16lsb(epi,epi);
+ fstride2 = fstride << 1;
+
+ for ( i=0,j=0 ; i<m ; ++i,j+=4,tw1+=fstride,tw2+=fstride2 )
+ { register int sc0, sc1, sc2, sc3, sc4, sc5;
+ register int ff0;
+
+ sc1 = ld32x(Fout1,i);
+ sc2 = ld32x(Fout2,i);
+ sc3 = ld32(tw1);
+ sc4 = ld32(tw2);
+ ff0 = ld32x(Fout,i);
+
+ if ( _inv )
+ {
+ TM_DIV(sc1, sc1, 3);
+ TM_DIV(sc2, sc2, 3);
+ TM_DIV(ff0, ff0, 3);
+ }
+
+ TM_MUL(sc1, sc1, sc3);
+ TM_MUL(sc2, sc2, sc4);
+ TM_ADD(sc3, sc1, sc2);
+ TM_SUB(sc0, sc1, sc2);
+ TM_SHR(sc4, sc3, 1);
+ TM_SUB(sc1, ff0, sc4);
+
+ sc0 = dspidualmul(sc0, epi);
+ sc0 = funshift2(sc0, sc0);
+
+ TM_ADD(ff0, ff0, sc3);
+ TM_ADD(sc4, sc1, sc0);
+ TM_SUB(sc5, sc1, sc0);
+
+ sc1 = funshift2(sc4, sc5);
+ sc2 = funshift2(sc5, sc4);
+ sc2 = funshift2(sc2, sc2);
+
+ st32d(j, Fout1, sc1);
+ st32d(j, Fout, ff0);
+ st32d(j, Fout2, sc2);
+ }
+}
+
+
+#define OVERRIDE_KFBFLY5
+static void kf_bfly5(
+ kiss_fft_cpx *Fout,
+ const int fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ register int * restrict tw1;
+ register int * restrict tw2;
+ register int * restrict tw3;
+ register int * restrict tw4;
+ register int * restrict Fout1;
+ register int * restrict Fout2;
+ register int * restrict Fout3;
+ register int * restrict Fout4;
+ register int fstride2, fstride3, fstride4;
+ register int i, j;
+ register int yab_msb, yab_lsb, yba_msb, yba_lsb;
+ register int _inv = !st->inverse;
+
+
+ Fout1=(int*)Fout+m;
+ Fout2=(int*)Fout+(m<<1);
+ Fout3=(int*)Fout+(3 *m);
+ Fout4=(int*)Fout+(m<<2);
+
+ tw1 = tw2 = tw3 = tw4 = (int*)st->twiddles;
+
+ i = tw1[fstride*m];
+ yab_lsb = tw1[fstride*(m<<1)];
+ yab_msb = pack16msb(i, yab_lsb);
+ yab_lsb = pack16lsb(i, yab_lsb);
+ yba_msb = funshift2(-sex16(yab_msb), yab_msb);
+ yba_lsb = funshift2(yab_lsb, yab_lsb);
+
+ fstride2 = fstride << 1;
+ fstride3 = fstride * 3;
+ fstride4 = fstride << 2;
+
+ for ( i=0,j=0 ; i<m ; ++i,j+=4,tw1+=fstride,tw2+=fstride2,tw3+=fstride3,tw4+=fstride4 )
+ { register int sc0, sc1, sc2, sc3, sc4, sc5, sc6;
+ register int sc7, sc8, sc9, sc10, sc11, sc12;
+ register int ff0, sc78_msb, sc78_lsb, sc90_msb, sc90_lsb;
+
+ sc0 = ld32x(Fout,i);
+ sc1 = ld32x(Fout1,i);
+ sc2 = ld32x(Fout2,i);
+ sc3 = ld32x(Fout3,i);
+ sc4 = ld32x(Fout4,i);
+ sc5 = ld32(tw1);
+ sc6 = ld32(tw2);
+ sc7 = ld32(tw3);
+ sc8 = ld32(tw4);
+
+ if ( _inv )
+ {
+ TM_DIV(sc0, sc0, 5);
+ TM_DIV(sc1, sc1, 5);
+ TM_DIV(sc2, sc2, 5);
+ TM_DIV(sc3, sc3, 5);
+ TM_DIV(sc4, sc4, 5);
+ }
+
+ ff0 = sc0;
+
+ TM_MUL(sc1, sc1, sc5);
+ TM_MUL(sc2, sc2, sc6);
+ TM_MUL(sc3, sc3, sc7);
+ TM_MUL(sc4, sc4, sc8);
+ TM_ADD(sc7, sc1, sc4);
+ TM_SUB(sc10,sc1, sc4);
+ TM_ADD(sc8, sc2, sc3);
+ TM_SUB(sc9, sc2, sc3);
+
+ TM_ADD(ff0, ff0, sc7);
+ TM_ADD(ff0, ff0, sc8);
+ st32d(j, Fout, ff0);
+
+ sc78_msb = pack16msb(sc7,sc8);
+ sc78_lsb = pack16lsb(sc7,sc8);
+ sc90_msb = pack16msb(sc10,sc9);
+ sc90_lsb = pack16lsb(sc10,sc9);
+
+ sc5 = pack16lsb( sround(ifir16(sc78_msb,yab_lsb)), sround(ifir16(sc78_lsb,yab_lsb)));
+ sc6 = pack16lsb(-sround(ifir16(sc90_lsb,yab_msb)), sround(ifir16(sc90_msb,yab_msb)));
+
+ TM_ADD(sc5, sc5, sc0);
+ TM_SUB(sc1, sc5, sc6);
+ TM_ADD(sc4, sc5, sc6);
+ st32d(j, Fout1, sc1);
+ st32d(j, Fout4, sc4);
+
+ sc11 = pack16lsb( sround(ifir16(sc78_msb,yba_lsb)), sround(ifir16(sc78_lsb,yba_lsb)));
+ sc12 = pack16lsb(-sround(ifir16(sc90_lsb,yba_msb)), sround(ifir16(sc90_msb,yba_msb)));
+
+ TM_ADD(sc11, sc11, sc0);
+ TM_ADD(sc2, sc11, sc12);
+ TM_SUB(sc3, sc11, sc12);
+ st32d(j, Fout2, sc2);
+ st32d(j, Fout3, sc3);
+
+ }
+}
+
+
+#define OVERRIDE_KF_BFLY_GENERIC
+static void kf_bfly_generic(
+ kiss_fft_cpx * restrict Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m,
+ int p
+ )
+{
+ register int _inv = !st->inverse;
+ register int i, j, k, l;
+ register int * restrict twiddles = (int*)st->twiddles;
+ register int Norig = st->nfft;
+
+ CHECKBUF(scratchbuf,nscratchbuf,p);
+
+ for ( i=0; i<m; ++i )
+ { register int sc10;
+
+ for ( j=0,k=i ; j<p ; ++j,k+=m )
+ { register int f10;
+
+ f10 = ld32x(Fout,k);
+
+ if ( _inv )
+ { TM_DIV(f10, f10, p);
+ }
+
+ st32d(j<<2, scratchbuf, f10);
+ }
+
+ for ( j=0,k=i,sc10=ld32(scratchbuf) ; j<p ; ++j,k+=m )
+ {
+ register int twidx = 0;
+ register int f10;
+
+ for ( l=1,f10 = sc10 ; l<p ; ++l )
+ { register int tw, sc;
+
+ twidx += fstride * k;
+ if ( twidx>=Norig )
+ { twidx -= Norig;
+ }
+
+ sc = ld32x(scratchbuf,l);
+ tw = ld32x(twiddles,twidx);
+
+ TM_MUL(sc, sc, tw);
+ TM_ADD(f10, f10, sc);
+ }
+ st32d(k<<2, Fout, f10);
+ }
+ }
+}
+
+#else
+
+#define OVERRIDE_KFBFLY2
+static void kf_bfly2(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ register kiss_fft_cpx * restrict Fout2;
+ register kiss_fft_cpx * restrict tw1 = st->twiddles;
+
+ Fout2 = Fout + m;
+
+ do
+ {
+ register kiss_fft_cpx _fout2, _fout, t;
+
+ _fout2 = *Fout2;
+ _fout = *Fout;
+
+ C_MUL ( t, _fout2, *tw1);
+ C_SUB (_fout2, _fout, t);
+ C_ADD (_fout, _fout, t);
+
+ *Fout2 = _fout2;
+ *Fout = _fout;
+
+ tw1 += fstride;
+ ++Fout2;
+ ++Fout;
+
+ } while ( --m );
+}
+
+#define OVERRIDE_KFBFLY4
+static void kf_bfly4(
+ kiss_fft_cpx * Fout,
+ const int fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ register kiss_fft_cpx * restrict tw1,* restrict tw2,* restrict tw3;
+ register kiss_fft_cpx * restrict Fout1, * restrict Fout2, * restrict Fout3;
+ register int _inv = !st->inverse;
+
+ tw3 = tw2 = tw1 = st->twiddles;
+
+ Fout1 = Fout + m;
+ Fout2 = Fout + (m << 1);
+ Fout3 = Fout + (m * 3);
+
+ do {
+
+ register kiss_fft_cpx _fout;
+ register kiss_fft_cpx sc0, sc1, sc2, sc3, sc4, sc5;
+
+ _fout = *Fout;
+
+ C_MUL( sc0,*Fout1, *tw1);
+ C_MUL( sc1,*Fout2, *tw2);
+ C_MUL( sc2,*Fout3, *tw3);
+ C_SUB( sc5, _fout, sc1);
+ C_ADD( _fout, _fout, sc1);
+ C_ADD( sc3, sc0, sc2);
+ C_SUB( sc4, sc0, sc2);
+ C_SUB(*Fout2, _fout, sc3);
+ C_ADD( *Fout, _fout, sc3);
+
+ tw1 += fstride;
+ tw2 += (fstride << 1);
+ tw3 += (fstride * 3);
+
+ if ( _inv )
+ {
+ Fout1->r = sc5.r + sc4.i;
+ Fout1->i = sc5.i - sc4.r;
+ Fout3->r = sc5.r - sc4.i;
+ Fout3->i = sc5.i + sc4.r;
+ }
+ else
+ { Fout1->r = sc5.r - sc4.i;
+ Fout1->i = sc5.i + sc4.r;
+ Fout3->r = sc5.r + sc4.i;
+ Fout3->i = sc5.i - sc4.r;
+ }
+
+
+ ++Fout; ++Fout1; ++Fout2; ++Fout3;
+
+ } while(--m);
+}
+
+#define OVERRIDE_KFBFLY3
+static void kf_bfly3(
+ kiss_fft_cpx * Fout,
+ const int fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ register kiss_fft_cpx * restrict Fout1, * restrict Fout2;
+ register kiss_fft_cpx * restrict tw1,* restrict tw2;
+ register float epi;
+
+ tw1 = tw2 = st->twiddles;
+ epi = st->twiddles[fstride*m].i;
+ Fout1 = Fout + m;
+ Fout2 = Fout + (m << 1);
+
+ do {
+
+ register kiss_fft_cpx _fout;
+ register kiss_fft_cpx sc0, sc1, sc2, sc3;
+
+ _fout = *Fout;
+
+ C_MUL( sc1, *Fout1, *tw1);
+ C_MUL( sc2, *Fout2, *tw2);
+ C_ADD( sc3, sc1, sc2);
+ C_SUB( sc0, sc1, sc2);
+ tw1 += fstride;
+ tw2 += (fstride << 1);
+
+ sc1.r = _fout.r - HALF_OF(sc3.r);
+ sc1.i = _fout.i - HALF_OF(sc3.i);
+
+ C_MULBYSCALAR(sc0, epi);
+ C_ADD(*Fout, _fout, sc3);
+
+ Fout2->r = sc1.r + sc0.i;
+ Fout2->i = sc1.i - sc0.r;
+
+ Fout1->r = sc1.i - sc0.i;
+ Fout1->i = sc1.r + sc0.r;
+
+ ++Fout; ++Fout1; ++Fout2;
+
+ } while(--m);
+}
+
+#define OVERRIDE_KFBFLY5
+static void kf_bfly5(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ register kiss_fft_cpx * restrict Fout1,* restrict Fout2,* restrict Fout3,* restrict Fout4;
+ register int u;
+ register kiss_fft_cpx *tw;
+ register float yar, yai, ybr, ybi;
+
+ Fout1=Fout+m;
+ Fout2=Fout+(m<<1);
+ Fout3=Fout+(m*3);
+ Fout4=Fout+(m<<2);
+
+ tw = st->twiddles;
+ yar = tw[fstride*m].r;
+ yai = tw[fstride*m].i;
+ ybr = tw[fstride*2*m].r;
+ ybi = tw[fstride*2*m].i;
+
+ for ( u=0; u<m; ++u )
+ {
+ register kiss_fft_cpx sc0, sc1, sc2, sc3, sc4, sc5, sc6, sc7, sc8, sc9, sc10, sc11, sc12;
+
+ sc0 = *Fout;
+
+ C_MUL( sc1,*Fout1, tw[u*fstride]);
+ C_MUL( sc2,*Fout2, tw[2*u*fstride]);
+ C_MUL( sc3,*Fout3, tw[3*u*fstride]);
+ C_MUL( sc4,*Fout4, tw[4*u*fstride]);
+
+ C_ADD( sc7, sc1, sc4);
+ C_SUB( sc10, sc1, sc4);
+ C_ADD( sc8, sc2, sc3);
+ C_SUB( sc9, sc2, sc3);
+
+ Fout->r = sc0.r + sc7.r + sc8.r;
+ Fout->i = sc0.i + sc7.i + sc8.i;
+
+ sc5.r = sc0.r + S_MUL(sc7.r,yar) + S_MUL(sc8.r,ybr);
+ sc5.i = sc0.i + S_MUL(sc7.i,yar) + S_MUL(sc8.i,ybr);
+
+ sc6.r = S_MUL(sc10.i,yai) + S_MUL(sc9.i,ybi);
+ sc6.i = -S_MUL(sc10.r,yai) - S_MUL(sc9.r,ybi);
+
+ C_SUB(*Fout1,sc5,sc6);
+ C_ADD(*Fout4,sc5,sc6);
+
+ sc11.r = sc0.r + S_MUL(sc7.r,ybr) + S_MUL(sc8.r,yar);
+ sc11.i = sc0.i + S_MUL(sc7.i,ybr) + S_MUL(sc8.i,yar);
+ sc12.r = - S_MUL(sc10.i,ybi) + S_MUL(sc9.i,yai);
+ sc12.i = S_MUL(sc10.r,ybi) - S_MUL(sc9.r,yai);
+ C_ADD(*Fout2,sc11,sc12);
+ C_SUB(*Fout3,sc11,sc12);
+
+ ++Fout1; ++Fout2; ++Fout3; ++Fout4;
+ }
+}
+
+
+#endif
+
+#endif
diff --git a/tmv/kiss_fftr_tm.h b/tmv/kiss_fftr_tm.h
new file mode 100644
index 0000000..5b2b2cf
--- /dev/null
+++ b/tmv/kiss_fftr_tm.h
@@ -0,0 +1,235 @@
+/* Copyright (C) 2007 Hong Zhiqian */
+/**
+ @file kiss_fftr_tm.h
+ @author Hong Zhiqian
+ @brief Various compatibility routines for Speex (TriMedia version)
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#include "_kiss_fft_guts_tm.h"
+
+#ifdef TM_ASM
+
+#include "profile_tm.h"
+
+#ifdef FIXED_POINT
+
+#define TM_NDIV(res,c,frac) \
+ { register int c1, c0; \
+ \
+ c1 = -asri(16,(c)); \
+ c0 = sex16((c)); \
+ (res) = pack16lsb(sround(c1 * (32767/(frac))), sround(c0 * (32767/(frac))));\
+ }
+
+
+#define OVERRIDE_KISS_FFTR
+void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar * restrict timedata, kiss_fft_cpx * restrict freqdata)
+{
+ register int ncfft, ncfft2, k;
+ register int * restrict tmpbuf;
+ register int * restrict twiddles;
+
+ ncfft = st->substate->nfft;
+ ncfft2 = ncfft >> 1;
+ tmpbuf = (int*)st->tmpbuf;
+ twiddles = (int*)st->super_twiddles;
+
+ TMDEBUG_ALIGNMEM(timedata);
+ TMDEBUG_ALIGNMEM(freqdata);
+ TMDEBUG_ALIGNMEM(tmpbuf);
+ TMDEBUG_ALIGNMEM(twiddles);
+
+ kiss_fft(st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf);
+
+ {
+ register int tdcr, tdci;
+ tdcr = sround(st->tmpbuf[0].r * (32767/2));
+ tdci = sround(st->tmpbuf[0].i * (32767/2));
+
+ freqdata[0].r = tdcr + tdci;
+ freqdata[ncfft].r = tdcr - tdci;
+ freqdata[ncfft].i = freqdata[0].i = 0;
+ }
+
+ for ( k=1 ; k <= ncfft2 ; ++k )
+ {
+ register int fpk, fpnk, i, tw, f1k, f2k;
+ register int fq1, fq2;
+
+ i = ncfft-k;
+
+ fpk = ld32x(tmpbuf,k);
+ tw = ld32x(twiddles,k);
+ fpnk = ld32x(tmpbuf,i);
+
+ TM_DIV(fpk, fpk, 2);
+ TM_NDIV(fpnk,fpnk,2);
+
+ TM_ADD( f1k, fpk , fpnk );
+ TM_SUB( f2k, fpk , fpnk );
+ TM_MUL( tw , f2k, tw );
+ TM_ADD( fq1, f1k, tw );
+ TM_SHR( fq1, fq1, 1 );
+ TM_SUB( fq2, f1k, tw );
+ TM_NEGMSB( fq2, fq2 );
+ TM_SHR( fq2, fq2, 1 );
+
+
+ st32d( k<<2, freqdata, fq1 );
+ st32d( i<<2, freqdata, fq2 );
+ }
+}
+
+#define OVERRIDE_KISS_FFTRI
+void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx * restrict freqdata,kiss_fft_scalar * restrict timedata)
+{
+ register int k, ncfft, ncfft2;
+ register int * restrict tmpbuf;
+ register int * restrict twiddles;
+
+ ncfft = st->substate->nfft;
+ ncfft2 = ncfft >> 1;
+ tmpbuf = (int*)st->tmpbuf;
+ twiddles = (int*)st->super_twiddles;
+
+ TMDEBUG_ALIGNMEM(freqdata);
+ TMDEBUG_ALIGNMEM(timedata);
+ TMDEBUG_ALIGNMEM(tmpbuf);
+ TMDEBUG_ALIGNMEM(twiddles);
+
+ {
+ register int fqr, fqnr;
+
+ fqr = freqdata[0].r;
+ fqnr = freqdata[ncfft].r;
+
+ st->tmpbuf[0].r = fqr + fqnr;
+ st->tmpbuf[0].i = fqr - fqnr;
+ }
+
+ for ( k=1 ; k <= ncfft2 ; ++k )
+ {
+ register int fk, fnkc, i, tw, fek, fok, tmp;
+ register int tbk, tbn;
+
+ i = ncfft-k;
+
+ fk = ld32x(freqdata,k);
+ tw = ld32x(twiddles,k);
+ fnkc = pack16lsb(-freqdata[i].i, freqdata[i].r);
+
+ TM_ADD (fek, fk, fnkc);
+ TM_SUB (tmp, fk, fnkc);
+ TM_MUL (fok, tmp, tw );
+ TM_ADD (tbk, fek, fok);
+ TM_SUB (tbn, fek, fok);
+ TM_NEGMSB(tbn, tbn);
+
+ st32d(k<<2, tmpbuf, tbk);
+ st32d(i<<2, tmpbuf, tbn);
+ }
+ kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
+}
+
+#else
+
+#define OVERRIDE_KISS_FFTR
+void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar * restrict timedata,kiss_fft_cpx * restrict freqdata)
+{
+ register kiss_fft_cpx fpnk, fpk, f1k, f2k, twk;
+ register int k, ncfft;
+ register kiss_fft_cpx * restrict tmpbuf, * restrict tw;
+ register float tdcr, tdci;
+
+ ncfft = st->substate->nfft;
+ tmpbuf= st->tmpbuf;
+ tw = st->super_twiddles;
+
+ kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, tmpbuf );
+
+ tdcr = tmpbuf[0].r;
+ tdci = tmpbuf[0].i;
+
+ freqdata[0].r = tdcr + tdci;
+ freqdata[ncfft].r = tdcr - tdci;
+ freqdata[ncfft].i = freqdata[0].i = 0;
+
+ for ( k=1;k <= ncfft/2 ; ++k )
+ {
+ fpk = tmpbuf[k];
+ fpnk.r = tmpbuf[ncfft-k].r;
+ fpnk.i = -tmpbuf[ncfft-k].i;
+
+ C_ADD( f1k, fpk , fpnk );
+ C_SUB( f2k, fpk , fpnk );
+ C_MUL( twk, f2k , tw[k]);
+
+ freqdata[k].r = HALF_OF(f1k.r + twk.r);
+ freqdata[k].i = HALF_OF(f1k.i + twk.i);
+ freqdata[ncfft-k].r = HALF_OF(f1k.r - twk.r);
+ freqdata[ncfft-k].i = HALF_OF(twk.i - f1k.i);
+ }
+}
+
+#define OVERRIDE_KISS_FFTRI
+void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx * restrict freqdata,kiss_fft_scalar * restrict timedata)
+{
+ register int k, ncfft;
+ register kiss_fft_cpx * restrict tmpbuf, * restrict tw;
+
+
+ ncfft = st->substate->nfft;
+ tmpbuf= st->tmpbuf;
+ tw = st->super_twiddles;
+
+ tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
+ tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
+
+ for (k = 1; k <= ncfft / 2; ++k)
+ {
+ register kiss_fft_cpx fk, fnkc, fek, fok, tmp;
+ fk = freqdata[k];
+ fnkc.r = freqdata[ncfft - k].r;
+ fnkc.i = -freqdata[ncfft - k].i;
+
+ C_ADD (fek, fk, fnkc);
+ C_SUB (tmp, fk, fnkc);
+ C_MUL (fok,tmp,tw[k]);
+ C_ADD (tmpbuf[k],fek, fok);
+ C_SUB (tmp, fek, fok);
+ tmpbuf[ncfft - k].r = tmp.r;
+ tmpbuf[ncfft - k].i = -tmp.i;
+ }
+ kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
+}
+
+#endif
+#endif
+
diff --git a/tmv/lpc_tm.h b/tmv/lpc_tm.h
new file mode 100644
index 0000000..f5e9bcd
--- /dev/null
+++ b/tmv/lpc_tm.h
@@ -0,0 +1,150 @@
+/* Copyright (C) 2007 Hong Zhiqian */
+/**
+ @file lpc_tm.h
+ @author Hong Zhiqian
+ @brief Various compatibility routines for Speex (TriMedia version)
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <ops/custom_defs.h>
+#include "profile_tm.h"
+
+#ifdef FIXED_POINT
+
+#define OVERRIDE_SPEEX_AUTOCORR
+void _spx_autocorr(const Int16 *x, Int16 *ac, int lag, int n)
+{
+ register int i, j;
+ register int shift, ac_shift;
+ register int n_2;
+ register int ac0;
+
+ TMDEBUG_ALIGNMEM(x);
+ TMDEBUG_ALIGNMEM(ac);
+
+ _SPX_AUTOCORR_START();
+
+ n_2 = n >> 1;
+ ac0 = n + 1;
+
+#if (TM_UNROLL && TM_UNROLL__SPXAUTOCORR)
+#pragma TCS_unroll=5
+#pragma TCS_unrollexact=1
+#endif
+ for ( j=0 ; j<n_2 ; j+=4 )
+ { register int x10, x32, x54, x76;
+
+ x10 = ld32x(x,j);
+ x32 = ld32x(x,j+1);
+ x54 = ld32x(x,j+2);
+ x76 = ld32x(x,j+3);
+
+ ac0 += ifir16(x10, x10) >> 8;
+ ac0 += ifir16(x32, x32) >> 8;
+ ac0 += ifir16(x54, x54) >> 8;
+ ac0 += ifir16(x76, x76) >> 8;
+ }
+#if (TM_UNROLL && TM_UNROLL__SPXAUTOCORR)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ shift = 8;
+ while (shift && ac0<0x40000000)
+ { shift--;
+ ac0 <<= 1;
+ }
+
+ ac_shift = 18;
+ while (ac_shift && ac0<0x40000000)
+ { ac_shift--;
+ ac0 <<= 1;
+ }
+
+ if ( shift == 0 )
+ {
+ for ( i=0 ; i<lag ; ++i )
+ {
+ register int acc0, acc1, acc2;
+ register int k, l, m;
+ register int x10, x32, y10, y32;
+
+ acc2 = acc1 = acc0 = 0;
+
+ for ( j=i ; j<16 ; ++j )
+ { acc0 += (x[j] * x[j-i]);
+ }
+
+ for ( k=16,l=8,m=16-i ; k<n ; k+=4,l+=2,m+=4 )
+ {
+ x10 = ld32x(x,l);
+ y10 = pack16lsb(x[m+1],x[m]);
+ x32 = ld32x(x,l+1);
+ y32 = pack16lsb(x[m+3],x[m+2]);
+
+ acc1 += ifir16(x10,y10);
+ acc2 += ifir16(x32,y32);
+ }
+
+ ac[i] = (acc0 + acc1 + acc2) >> ac_shift;
+ }
+ } else
+ {
+ for ( i=0 ; i<lag ; ++i )
+ {
+ register int acc0, acc1, acc2;
+ register int k, l, m;
+ register int x10, x32, y10, y32;
+
+ acc2 = acc1 = acc0 = 0;
+
+ for ( j=i ; j<16 ; ++j )
+ { acc0 += (x[j] * x[j-i]) >> shift;
+ }
+
+ for ( k=16,l=8,m=16-i ; k<n ; k+=4,l+=2,m+=4 )
+ {
+ x10 = ld32x(x,l);
+ y10 = pack16lsb(x[m+1],x[m]);
+ x32 = ld32x(x,l+1);
+ y32 = pack16lsb(x[m+3],x[m+2]);
+
+ acc1 += ifir16(x10,y10) >> shift;
+ acc2 += ifir16(x32,y32) >> shift;
+ }
+
+ ac[i] = (acc0 + acc1 + acc2) >> ac_shift;
+ }
+ }
+
+ _SPX_AUTOCORR_STOP();
+}
+
+#endif
diff --git a/tmv/lsp_tm.h b/tmv/lsp_tm.h
new file mode 100644
index 0000000..acb0d91
--- /dev/null
+++ b/tmv/lsp_tm.h
@@ -0,0 +1,310 @@
+/* Copyright (C) 2007 Hong Zhiqian */
+/**
+ @file lsp_tm.h
+ @author Hong Zhiqian
+ @brief Various compatibility routines for Speex (TriMedia version)
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <ops/custom_defs.h>
+#include "profile_tm.h"
+
+
+#ifdef FIXED_POINT
+
+#define OVERRIDE_LSP_INTERPOLATE
+void lsp_interpolate(Int16 *old_lsp, Int16 *new_lsp, Int16 *interp_lsp, int len, int subframe, int nb_subframes)
+{
+ register int tmp = DIV32_16(SHL32(EXTEND32(1 + subframe),14),nb_subframes);
+ register int tmp2 = 16384-tmp;
+ register int in_0, in_1, factor, out_1, out_2, olsp, nlsp, ilsp;
+ register int i;
+
+ TMDEBUG_ALIGNMEM(old_lsp);
+ TMDEBUG_ALIGNMEM(new_lsp);
+ TMDEBUG_ALIGNMEM(interp_lsp);
+
+ LSPINTERPOLATE_START();
+
+ factor = pack16lsb(tmp,tmp2);
+
+ len >>= 1;
+ for ( i=0 ; i<len ; ++i )
+ {
+ olsp = ld32x(old_lsp,i); // olsp[i+1],olsp[i]
+ nlsp = ld32x(new_lsp,i); // nlsp[i+1],nlsp[i]
+
+ in_0 = pack16lsb(nlsp,olsp);
+ in_1 = pack16msb(nlsp,olsp);
+
+ out_1 = 8192 + ifir16(in_0,factor);
+ out_2 = 8192 + ifir16(in_1,factor);
+
+ ilsp = pack16lsb(out_2 >> 14, out_1 >> 14);
+ st32d(i << 2, interp_lsp, ilsp);
+
+ }
+
+ LSPINTERPOLATE_STOP();
+}
+
+
+#define OVERRIDE_CHEB_POLY_EVA
+static inline Int32 cheb_poly_eva(Int16 *coef, Int16 x, int m, char *stack)
+{
+ register int c10, c32, c54;
+ register int sum, b0, f0, f1, f2, f3;
+ register int xx, f32, f10;
+
+ CHEBPOLYEVA_START();
+
+ xx = sex16(x);
+ b0 = iclipi(xx,16383);
+
+#if 0
+ c10 = ld32(coef);
+ c32 = ld32x(coef,1);
+ c54 = ld32x(coef,2);
+#else
+ c10 = pack16lsb(coef[1],coef[0]);
+ c32 = pack16lsb(coef[3],coef[2]);
+ c54 = pack16lsb(coef[5],coef[4]);
+#endif
+
+ f0 = ((xx * b0) >> 13) - 16384;
+ f1 = ((xx * f0) >> 13) - b0;
+ f2 = ((xx * f1) >> 13) - f0;
+
+ if ( m == 4 )
+ { sum = sex16(c54);
+ f32 = pack16lsb(xx,f0);
+ f10 = pack16lsb(f1,f2);
+
+ } else
+ { sum = asri(16,c54);
+ sum += ((sex16(c54) * xx) + 8192) >> 14;
+
+ f3 = ((xx * f2) >> 13) - f1;
+ f32 = pack16lsb(f0,f1);
+ f10 = pack16lsb(f2,f3);
+ }
+
+ sum += (ifir16(c32,f32) + 8192) >> 14;
+ sum += (ifir16(c10,f10) + 8192) >> 14;
+
+#ifndef REMARK_ON
+ (void)stack;
+#endif
+
+ CHEBPOLYEVA_STOP();
+ return sum;
+}
+
+
+#define OVERRIDE_LSP_ENFORCE_MARGIN
+void lsp_enforce_margin(Int16 *lsp, int len, Int16 margin)
+{
+ register int i;
+ register int m = margin;
+ register int m2 = 25736-margin;
+ register int lsp0, lsp1, lsp2;
+
+ TMDEBUG_ALIGNMEM(lsp);
+
+ LSPENFORCEMARGIN_START();
+
+ lsp0 = ld32(lsp);
+ lsp1 = asri(16,lsp0);
+ lsp0 = sex16(lsp0);
+ lsp2 = lsp[len-1];
+
+ if ( lsp0 < m )
+ { lsp0 = m;
+ lsp[0] = m;
+ }
+
+ if ( lsp2 > m2 )
+ { lsp2 = m2;
+ lsp[len-1] = m2;
+ }
+
+ for ( i=1 ; i<len-1 ; ++i )
+ {
+ lsp0 += m;
+ lsp2 = lsp[i+1];
+ m2 = lsp2 - m;
+
+ if ( lsp1 < lsp0 )
+ { lsp1 = lsp0;
+ lsp[i] = lsp0;
+ }
+
+ if ( lsp1 > m2 )
+ { lsp1 = (lsp1 >> 1) + (m2 >> 1);
+ lsp[i] = lsp1;
+ }
+
+ lsp0 = lsp1;
+ lsp1 = lsp2;
+ }
+
+ LSPENFORCEMARGIN_STOP();
+}
+
+
+#define OVERRIDE_LSP_TO_LPC
+void lsp_to_lpc(Int16 *freq, Int16 *ak,int lpcrdr, char *stack)
+{
+ VARDECL(Int16 *freqn);
+ VARDECL(int **xp);
+ VARDECL(int *xpmem);
+ VARDECL(int **xq);
+ VARDECL(int *xqmem);
+
+ register int i, j, k;
+ register int xout1,xout2,xin;
+ register int m;
+
+ LSPTOLPC_START();
+
+ m = lpcrdr>>1;
+
+ /*
+
+ Reconstruct P(z) and Q(z) by cascading second order polynomials
+ in form 1 - 2cos(w)z(-1) + z(-2), where w is the LSP frequency.
+ In the time domain this is:
+
+ y(n) = x(n) - 2cos(w)x(n-1) + x(n-2)
+
+ This is what the ALLOCS below are trying to do:
+
+ int xp[m+1][lpcrdr+1+2]; // P matrix in QIMP
+ int xq[m+1][lpcrdr+1+2]; // Q matrix in QIMP
+
+ These matrices store the output of each stage on each row. The
+ final (m-th) row has the output of the final (m-th) cascaded
+ 2nd order filter. The first row is the impulse input to the
+ system (not written as it is known).
+
+ The version below takes advantage of the fact that a lot of the
+ outputs are zero or known, for example if we put an inpulse
+ into the first section the "clock" it 10 times only the first 3
+ outputs samples are non-zero (it's an FIR filter).
+ */
+
+ ALLOC(xp, (m+1), int*);
+ ALLOC(xpmem, (m+1)*(lpcrdr+1+2), int);
+
+ ALLOC(xq, (m+1), int*);
+ ALLOC(xqmem, (m+1)*(lpcrdr+1+2), int);
+
+ for ( i=0; i<=m; i++ )
+ { xp[i] = xpmem + i*(lpcrdr+1+2);
+ xq[i] = xqmem + i*(lpcrdr+1+2);
+ }
+
+ /* work out 2cos terms in Q14 */
+
+ ALLOC(freqn, lpcrdr, Int16);
+ for ( j=0,k=0 ; j<m ; ++j,k+=2 )
+ { register int f;
+
+ f = ld32x(freq,j);
+
+ freqn[k] = ANGLE2X(sex16(f));
+ freqn[k+1] = ANGLE2X(asri(16,f));
+ }
+
+ #define QIMP 21 /* scaling for impulse */
+ xin = SHL32(EXTEND32(1), (QIMP-1)); /* 0.5 in QIMP format */
+
+ /* first col and last non-zero values of each row are trivial */
+
+ for(i=0;i<=m;i++)
+ { xp[i][1] = 0;
+ xp[i][2] = xin;
+ xp[i][2+2*i] = xin;
+ xq[i][1] = 0;
+ xq[i][2] = xin;
+ xq[i][2+2*i] = xin;
+ }
+
+ /* 2nd row (first output row) is trivial */
+
+ xp[1][3] = -MULT16_32_Q14(freqn[0],xp[0][2]);
+ xq[1][3] = -MULT16_32_Q14(freqn[1],xq[0][2]);
+
+ xout1 = xout2 = 0;
+
+ for( i=1 ; i<m ; ++i)
+ { register int f, f0, f1, m0, m1;
+
+ k = 2*(i+1)-1;
+ f = ld32x(freqn,i);
+ f0 = sex16(f);
+ f1 = asri(16,f);
+
+ for( j=1 ; j<k ; ++j)
+ { register int _m0, _m1;
+
+ _m0 = MULT16_32_Q14(f0,xp[i][j+1]);
+ xp[i+1][j+2] = ADD32(SUB32(xp[i][j+2], _m0), xp[i][j]);
+
+ _m1 = MULT16_32_Q14(f1,xq[i][j+1]);
+ xq[i+1][j+2] = ADD32(SUB32(xq[i][j+2], _m1), xq[i][j]);
+ }
+
+
+ m0 = MULT16_32_Q14(f0,xp[i][j+1]);
+ xp[i+1][j+2] = SUB32(xp[i][j], m0);
+ m1 = MULT16_32_Q14(f1,xq[i][j+1]);
+ xq[i+1][j+2] = SUB32(xq[i][j], m1);
+ }
+
+
+ for( i=0,j=3 ; i<lpcrdr ; ++j,++i )
+ { register int _a0, _xp0, _xq0;
+
+ _xp0 = xp[m][j];
+ _xq0 = xq[m][j];
+
+ _a0 = PSHR32(_xp0 + xout1 + _xq0 - xout2, QIMP-13);
+ xout1 = _xp0;
+ xout2 = _xq0;
+
+ ak[i] = iclipi(_a0,32767);
+ }
+
+ LSPTOLPC_STOP();
+}
+
+
+#endif
diff --git a/tmv/ltp_tm.h b/tmv/ltp_tm.h
new file mode 100644
index 0000000..eccd701
--- /dev/null
+++ b/tmv/ltp_tm.h
@@ -0,0 +1,479 @@
+/* Copyright (C) 2007 Hong Zhiqian */
+/**
+ @file ltp_tm.h
+ @author Hong Zhiqian
+ @brief Various compatibility routines for Speex (TriMedia version)
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#include <ops/custom_defs.h>
+#include "profile_tm.h"
+
+#ifdef FIXED_POINT
+
+#define OVERRIDE_INNER_PROD
+Int32 inner_prod(const Int16 * restrict x, const Int16 * restrict y, int len)
+{
+ register int sum = 0;
+
+ INNERPROD_START();
+
+ if ( (int)x & 0x03 == 0 && (int)y & 0x03 == 0 )
+ {
+ register int i;
+
+ len >>= 1;
+ for ( i=0 ; i<len ; i+=4 )
+ {
+ register int x0, x1, y0, y1, x2, x3, y2, y3;
+
+ x0 = ld32x(x,i);
+ y0 = ld32x(x,i);
+ x1 = ld32x(x,i+1);
+ y1 = ld32x(y,i+1);
+ sum += (ifir16(x0,y0) + ifir16(x1,y1)) >> 6;
+
+ x2 = ld32x(x,i+2);
+ y2 = ld32x(x,i+2);
+ x3 = ld32x(x,i+3);
+ y3 = ld32x(x,i+3);
+ sum += (ifir16(x2,y2) + ifir16(x3,y3)) >> 6;
+
+ }
+ } else
+ {
+ len >>= 3;
+ while( len-- )
+ {
+ register int x0, x1, x2, x3, y0, y1, y2, y3;
+
+ x0 = pack16lsb(x[0],x[1]);
+ y0 = pack16lsb(y[0],y[1]);
+ x1 = pack16lsb(x[2],x[3]);
+ y1 = pack16lsb(y[2],y[3]);
+ sum += (ifir16(x0,y0) + ifir16(x1,y1)) >> 6;
+
+ x2 = pack16lsb(x[4],x[5]);
+ y2 = pack16lsb(y[4],y[5]);
+ x3 = pack16lsb(x[6],x[7]);
+ y3 = pack16lsb(y[6],y[7]);
+ sum += (ifir16(x2,y2) + ifir16(x3,y3)) >> 6;
+
+ x += 8;
+ y += 8;
+ }
+ }
+
+ INNERPROD_STOP();
+ return sum;
+}
+
+#define OVERRIDE_PITCH_XCORR
+void pitch_xcorr(const Int16 *_x, const Int16 *_y, Int32 *corr, int len, int nb_pitch, char *stack)
+{
+ register int sum_1, sum_2, sum_3, sum_4;
+ register int y10, y32, y54, y76, y21, y43, y65;
+ register int x10, x32;
+ register int i, j, k, limit;
+
+ TMDEBUG_ALIGNMEM(_x);
+ TMDEBUG_ALIGNMEM(_y);
+
+ PITCHXCORR_START();
+
+ limit = nb_pitch >> 1;
+ len >>= 1;
+
+ for (i=0 ; i<limit ; i+=2 )
+ {
+ sum_1 = sum_2 = sum_3 = sum_4 = 0;
+
+ y10 = ld32x(_y,i);
+ y32 = ld32x(_y,i+1);
+
+ for ( j=0 ; j<len ; j+=2 )
+ {
+ x10 = ld32x(_x,j);
+ x32 = ld32x(_x,j+1);
+ y54 = ld32x(_y,i+j+2);
+ y76 = ld32x(_y,i+j+3);
+
+ sum_1 += (ifir16(x10,y10) + ifir16(x32,y32)) >> 6;
+ sum_3 += (ifir16(x10,y32) + ifir16(x32,y54)) >> 6;
+
+ y21 = funshift2(y32,y10);
+ y43 = funshift2(y54,y32);
+ y65 = funshift2(y76,y54);
+
+ sum_2 += (ifir16(x10,y21) + ifir16(x32,y43)) >> 6;
+ sum_4 += (ifir16(x10,y43) + ifir16(x32,y65)) >> 6;
+
+ y10 = y54;
+ y32 = y76;
+
+ }
+
+ k = i << 1;
+ corr[nb_pitch-1-k]=sum_1;
+ corr[nb_pitch-2-k]=sum_2;
+ corr[nb_pitch-3-k]=sum_3;
+ corr[nb_pitch-4-k]=sum_4;
+ }
+
+#ifndef REMARK_ON
+ (void)stack;
+#endif
+
+ PITCHXCORR_STOP();
+}
+
+#ifndef ttisim
+#define OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ
+static int pitch_gain_search_3tap_vq
+(
+ const signed char *gain_cdbk,
+ int gain_cdbk_size,
+ Int16 *C16,
+ Int16 max_gain
+)
+{
+ register int pp = 0x00400040, p=64;
+ register int g10, g2, g20, g21, g02, g22, g01;
+ register int cb0, cb1, cb2, cb5432;
+ register int C10, C32, C54, C76, C98, C83, C2;
+ register int acc0, acc1, acc2, acc3, sum, gsum, bsum=-VERY_LARGE32;
+ register int i, best_cdbk=0;
+ register Int16 tmp;
+
+ TMDEBUG_ALIGNMEM(C16);
+ TMDEBUG_ALIGNMEM(gain_cdbk+2);
+
+ PITCHGAINSEARCH3TAPVQ_START();
+
+ tmp = ild16(gain_cdbk);
+ C98 = ld32x(C16,4);
+ C32 = ld32x(C16,1);
+ C10 = ld32(C16);
+ C54 = ld32x(C16,2);
+ C76 = ld32x(C16,3);
+
+ cb0 = sex8(tmp);
+ cb1 = sex8(tmp>>8);
+ C83 = funshift2(C98,C32);
+ C2 = sex16(C32);
+ gain_cdbk += 2;
+
+
+#if (TM_UNROLL && TM_UNROLL_PITCHGAINSEARCH3TAPVQ > 0)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<gain_cdbk_size ; ++i )
+ {
+ cb5432 = ld32x(gain_cdbk,i);
+ cb2 = sex8(cb5432);
+ gsum = sex8(cb5432>>8);
+ sum = 0;
+
+ g10 = pack16lsb(cb1 + 32, cb0 + 32);
+ g2 = cb2 + 32;
+ g02 = pack16lsb(g10, g2);
+ acc0 = dspidualmul(g10,pp);
+ sum += ifir16(acc0,C10);
+ sum += p * g2 * C2;
+
+ g22 = pack16lsb(g02, g02);
+ g01 = funshift2(g10, g10);
+
+ acc1 = dspidualmul(g22, g01);
+ sum -= ifir16(acc1, C54);
+ acc2 = dspidualmul(g10, g10);
+ sum -= ifir16(acc2, C76);
+
+ g20 = pack16lsb(g2, g10);
+ g21 = funshift2(g2, g10);
+ acc3 = dspidualmul(g20, g21);
+ sum -= ifir16(acc3, C83);
+
+
+ if ( sum>bsum && gsum<=max_gain )
+ { bsum = sum;
+ best_cdbk=i;
+ }
+
+ cb0 = sex8(cb5432 >> 16);
+ cb1 = asri(24,cb5432);
+ }
+#if (TM_UNROLL && TM_UNROLL_PITCHGAINSEARCH3TAPVQ > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ PITCHGAINSEARCH3TAPVQ_STOP();
+ return best_cdbk;
+}
+#endif
+
+#define OVERRIDE_COMPUTE_PITCH_ERROR
+#ifndef OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ
+inline Int32 compute_pitch_error(Int16 *C, Int16 *g, Int16 pitch_control)
+{
+ register int c10, c32, c54, c76, c98, c83;
+ register int g10, g32, g02, g22, g01, g21, g20;
+ register int pp, tmp0, tmp1, tmp2, tmp3;
+ register int sum = 0;
+
+
+ COMPUTEPITCHERROR_START();
+
+ g10 = ld32(g);
+ g32 = ld32x(g,1);
+ pp = pack16lsb(pitch_control,pitch_control);
+ c10 = ld32(C);
+ c32 = ld32x(C,1);
+ g02 = pack16lsb(g10,g32);
+ g22 = pack16lsb(g32,g32);
+ g01 = funshift2(g10,g10);
+ tmp0 = dspidualmul(g10,pp);
+ sum += ifir16(tmp0, c10);
+ sum += pitch_control * sex16(g32) * sex16(c32);
+ c54 = ld32x(C,2);
+ c76 = ld32x(C,3);
+ c98 = ld32x(C,4);
+ tmp1 = dspidualmul(g22,g01);
+ sum -= ifir16(tmp1, c54);
+ tmp2 = dspidualmul(g10,g10);
+ sum -= ifir16(tmp2,c76);
+ c83 = funshift2(c98,c32);
+ g20 = funshift2(g02,g02);
+ g21 = funshift2(g02,g10);
+ tmp3 = dspidualmul(g20,g21);
+ sum -= ifir16(tmp3,c83);
+
+ COMPUTEPITCHERROR_STOP();
+ return sum;
+}
+#endif
+
+#define OVERRIDE_OPEN_LOOP_NBEST_PITCH
+void open_loop_nbest_pitch(Int16 *sw, int start, int end, int len, int *pitch, Int16 *gain, int N, char *stack)
+{
+ VARDECL(int *best_score);
+ VARDECL(int *best_ener);
+ VARDECL(Int32 *corr);
+ VARDECL(Int16 *corr16);
+ VARDECL(Int16 *ener16);
+ register int i, j, k, l, N4, N2;
+ register int _sw10, _sw32, _s0, _s2, limit;
+ register int *energy;
+ register int cshift=0, eshift=0;
+ register int scaledown = 0;
+ register int e0, _energy0;
+
+ ALLOC(corr16, end-start+1, Int16);
+ ALLOC(ener16, end-start+1, Int16);
+ ALLOC(corr, end-start+1, Int32);
+ ALLOC(best_score, N, int);
+ ALLOC(best_ener, N, int);
+ energy = corr;
+ N4 = N << 2;
+ N2 = N >> 1;
+
+ TMDEBUG_ALIGNMEM(sw);
+ TMDEBUG_ALIGNMEM(pitch);
+ TMDEBUG_ALIGNMEM(gain);
+ TMDEBUG_ALIGNMEM(best_score);
+ TMDEBUG_ALIGNMEM(best_ener);
+ TMDEBUG_ALIGNMEM(corr16);
+ TMDEBUG_ALIGNMEM(ener16);
+
+ OPENLOOPNBESTPITCH_START();
+
+ for ( i=0 ; i<N4 ; i+=4 )
+ { st32d(i,best_score,-1);
+ st32d(i,best_ener,0);
+ st32d(i,pitch,start);
+ }
+
+ for ( j=asri(1,-end) ; j<N2 ; ++j )
+ { register int _sw10;
+
+ _sw10 = ld32x(sw,j);
+ _sw10 = dspidualabs(_sw10);
+
+ if ( _sw10 & 0xC000C000 )
+ { scaledown = 1;
+ break;
+ }
+ }
+
+ if ( scaledown )
+ {
+ for ( j=asri(1,-end),k=asli(1,-end) ; j<N2 ; ++j,k+=4 )
+ { register int _sw10;
+
+ _sw10 = ld32x(sw,j);
+ _sw10 = dualasr(_sw10,1);
+ st32d(k, sw, _sw10);
+ }
+ }
+
+ energy[0] = _energy0 = inner_prod(sw-start, sw-start, len);
+ e0 = inner_prod(sw, sw, len);
+
+ j=asri(1,-start-1); k=j+20;
+ _sw10 = ld32x(sw,j);
+ _sw32 = ld32x(sw,k);
+ limit = end-1-start;
+
+ for ( i=1,--j,--k ; i<limit ; i+=2,--j,--k )
+ { register int _energy1, __sw10, __sw32, __s0, __s2;
+
+ _s0 = sex16(_sw10);
+ _s2 = sex16(_sw32);
+ _energy1 = (_energy0 + ((_s0 * _s0) >> 6)) - ((_s2 * _s2) >> 6);
+ _energy0 = imax(0,_energy1);
+ energy[i] = _energy0;
+ __sw10 = ld32x(sw,j);
+ __sw32 = ld32x(sw,k);
+ __s0 = asri(16,__sw10);
+ __s2 = asri(16,__sw32);
+ _energy1 = (_energy0 + ((__s0 * __s0) >> 6)) - ((__s2 * __s2) >> 6);
+ _energy0 = imax(0,_energy1);
+ energy[i+1] = _energy0;
+ _sw10 = __sw10;
+ _sw32 = __sw32;
+ }
+
+ _s0 = sex16(_sw10);
+ _s2 = sex16(_sw32);
+ _energy0 = imax(0,(_energy0 + ((_s0 * _s0) >> 6)) - ((_s2 * _s2) >> 6));
+ energy[i] = _energy0;
+
+
+ eshift = normalize16(energy, ener16, 32766, end-start+1);
+ /* In fixed-point, this actually overrites the energy array (aliased to corr) */
+ pitch_xcorr(sw, sw-end, corr, len, end-start+1, stack);
+ /* Normalize to 180 so we can square it and it still fits in 16 bits */
+ cshift = normalize16(corr, corr16, 180, end-start+1);
+ /* If we scaled weighted input down, we need to scale it up again (OK, so we've just lost the LSB, who cares?) */
+
+ if ( scaledown )
+ {
+ for ( j=asri(1,-end),k=asli(1,-end) ; j<N2 ; ++j,k+=4 )
+ { register int _sw10;
+
+ _sw10 = ld32x(sw,j);
+ _sw10 = dualasl(_sw10,1);
+ st32d(k, sw, _sw10);
+ }
+ }
+
+ /* Search for the best pitch prediction gain */
+ for ( i=start,l=0 ; i<end ; i+=2,++l )
+ { register int _corr16, _c0, _c1;
+ register int _ener16, _e0, _e1;
+
+ _corr16 = ld32x(corr16,l);
+ _corr16 = dspidualmul(_corr16,_corr16);
+ _c0 = sex16(_corr16);
+ _c1 = asri(16,_corr16);
+
+ _ener16 = ld32x(ener16,l);
+ _ener16 = dspidualadd(_ener16,0x00010001);
+ _e0 = sex16(_ener16);
+ _e1 = asri(16,_ener16);
+
+ /* Instead of dividing the tmp by the energy, we multiply on the other side */
+
+ if ( (_c0 * best_ener[N-1]) > (best_score[N-1] * _e0) )
+ {
+ best_score[N-1] = _c0;
+ best_ener[N-1] = _e0;
+ pitch[N-1] = i;
+
+ for( j=0 ; j<N-1 ; ++j )
+ { if ( (_c0 * best_ener[j]) > best_score[j] * _e0 )
+ { for( k=N-1 ; k>j ; --k )
+ {
+ best_score[k]=best_score[k-1];
+ best_ener[k]=best_ener[k-1];
+ pitch[k]=pitch[k-1];
+ }
+
+ best_score[j]=_c0;
+ best_ener[j]=_e0;
+ pitch[j]=i;
+ break;
+ }
+ }
+ }
+
+ if ( (_c1 * best_ener[N-1]) > (best_score[N-1] * _e1) )
+ {
+ best_score[N-1] = _c1;
+ best_ener[N-1] = _e1;
+ pitch[N-1] = i+1;
+
+ for( j=0 ; j<N-1 ; ++j )
+ { if ( (_c1 * best_ener[j]) > best_score[j] * _e1 )
+ { for( k=N-1 ; k>j ; --k )
+ {
+ best_score[k]=best_score[k-1];
+ best_ener[k]=best_ener[k-1];
+ pitch[k]=pitch[k-1];
+ }
+
+ best_score[j]=_c1;
+ best_ener[j]=_e1;
+ pitch[j]=i+1;
+ break;
+ }
+ }
+ }
+ }
+
+ /* Compute open-loop gain if necessary */
+ if (gain)
+ {
+ for (j=0;j<N;j++)
+ {
+ spx_word16_t g;
+ i=pitch[j];
+ g = DIV32(SHL32(EXTEND32(corr16[i-start]),cshift), 10+SHR32(MULT16_16(spx_sqrt(e0),spx_sqrt(SHL32(EXTEND32(ener16[i-start]),eshift))),6));
+ gain[j] = imax(0,g);
+ }
+ }
+
+ OPENLOOPNBESTPITCH_STOP();
+}
+
+
+#endif
+
diff --git a/tmv/mdf_tm.h b/tmv/mdf_tm.h
new file mode 100644
index 0000000..cc82bf8
--- /dev/null
+++ b/tmv/mdf_tm.h
@@ -0,0 +1,1192 @@
+/* Copyright (C) 2007 Hong Zhiqian */
+/**
+ @file mdf_tm.h
+ @author Hong Zhiqian
+ @brief Various compatibility routines for Speex (TriMedia version)
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#include <ops/custom_defs.h>
+#include "profile_tm.h"
+
+// shifted power spectrum to fftwrap.c so that optimisation can be shared between mdf.c and preprocess.c
+#define OVERRIDE_POWER_SPECTRUM
+
+#ifdef FIXED_POINT
+
+#else
+
+#define OVERRIDE_FILTER_DC_NOTCH16
+void filter_dc_notch16(
+ const spx_int16_t * restrict in,
+ float radius,
+ float * restrict out,
+ int len,
+ float * restrict mem
+)
+{
+ register int i;
+ register float den2, r1;
+ register float mem0, mem1;
+
+ FILTERDCNOTCH16_START();
+
+ r1 = 1 - radius;
+ den2 = (radius * radius) + (0.7 * r1 * r1);
+ mem0 = mem[0];
+ mem1 = mem[1];
+
+#if (TM_UNROLL && TM_UNROLL_FILTERDCNOTCH16)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<len ; ++i )
+ {
+ register float vin = in[i];
+ register float vout = mem0 + vin;
+ register float rvout = radius * vout;
+
+ mem0 = mem1 + 2 * (-vin + rvout);
+ mem1 = vin - (den2 * vout);
+
+ out[i] = rvout;
+ }
+#if (TM_UNROLL && TM_UNROLL_FILTERDCNOTCH16)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ mem[0] = mem0;
+ mem[1] = mem1;
+
+ FILTERDCNOTCH16_STOP();
+}
+
+#define OVERRIDE_MDF_INNER_PROD
+float mdf_inner_prod(
+ const float * restrict x,
+ const float * restrict y,
+ int len
+)
+{
+ register float sum = 0;
+
+ MDFINNERPROD_START();
+
+ len >>= 1;
+
+#if (TM_UNROLL && TM_UNROLL_MDFINNERPRODUCT)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ while(len--)
+ {
+ register float acc0, acc1;
+
+ acc0 = (*x++) * (*y++);
+ acc1 = (*x++) * (*y++);
+
+ sum += acc0 + acc1;
+ }
+#if (TM_UNROLL && TM_UNROLL_MDFINNERPRODUCT)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ MDFINNERPROD_STOP();
+
+ return sum;
+}
+
+#define OVERRIDE_SPECTRAL_MUL_ACCUM
+void spectral_mul_accum(
+ const float * restrict X,
+ const float * restrict Y,
+ float * restrict acc,
+ int N, int M
+)
+{
+ register int i, j;
+ register float Xi, Yi, Xii, Yii;
+ register int _N;
+
+ SPECTRALMULACCUM_START();
+
+ acc[0] = X[0] * Y[0];
+ _N = N-1;
+
+ for ( i=1 ; i<_N ; i+=2 )
+ {
+ Xi = X[i];
+ Yi = Y[i];
+ Xii = X[i+1];
+ Yii = Y[i+1];
+
+ acc[i] = (Xi * Yi - Xii * Yii);
+ acc[i+1]= (Xii * Yi + Xi * Yii);
+ }
+
+ acc[_N] = X[_N] * Y[_N];
+
+ for ( j=1,X+=N,Y+=N ; j<M ; j++ )
+ {
+ acc[0] += X[0] * Y[0];
+
+ for ( i=1 ; i<N-1 ; i+=2 )
+ {
+ Xi = X[i];
+ Yi = Y[i];
+ Xii = X[i+1];
+ Yii = Y[i+1];
+
+ acc[i] += (Xi * Yi - Xii * Yii);
+ acc[i+1]+= (Xii * Yi + Xi * Yii);
+ }
+
+ acc[_N] += X[_N] * Y[_N];
+ X += N;
+ Y += N;
+ }
+
+ SPECTRALMULACCUM_STOP();
+}
+
+#define OVERRIDE_WEIGHTED_SPECTRAL_MUL_CONJ
+void weighted_spectral_mul_conj(
+ const float * restrict w,
+ const float p,
+ const float * restrict X,
+ const float * restrict Y,
+ float * restrict prod,
+ int N
+)
+{
+ register int i, j;
+ register int _N;
+
+ WEIGHTEDSPECTRALMULCONJ_START();
+
+ prod[0] = p * w[0] * X[0] * Y[0];
+ _N = N-1;
+
+ for (i=1,j=1;i<_N;i+=2,j++)
+ {
+ register float W;
+ register float Xi, Yi, Xii, Yii;
+
+ Xi = X[i];
+ Yi = Y[i];
+ Xii = X[i+1];
+ Yii = Y[i+1];
+ W = p * w[j];
+
+
+ prod[i] = W * (Xi * Yi + Xii * Yii);
+ prod[i+1]= W * (Xi * Yii - Xii * Yi);
+ }
+
+ prod[_N] = p * w[j] * X[_N] * Y[_N];
+
+ WEIGHTEDSPECTRALMULCONJ_STOP();
+}
+
+#define OVERRIDE_MDF_ADJUST_PROP
+void mdf_adjust_prop(
+ const float * restrict W,
+ int N,
+ int M,
+ float * restrict prop
+)
+{
+ register int i, j;
+ register float max_sum = 1;
+ register float prop_sum = 1;
+
+ MDFADJUSTPROP_START();
+
+ for ( i=0 ; i<M ; ++i )
+ {
+ register float tmp = 1;
+ register int k = i * N;
+ register int l = k + N;
+ register float propi;
+
+#if (TM_UNROLL && TM_UNROLL_MDFADJUSTPROP)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( j=k ; j<l ; ++j )
+ {
+ register float wi = W[j];
+
+ tmp += wi * wi;
+ }
+#if (TM_UNROLL && TM_UNROLL_MDFADJUSTPROP)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ propi = spx_sqrt(tmp);
+ prop[i]= propi;
+ max_sum= fmux(propi > max_sum, propi, max_sum);
+ }
+
+ for ( i=0 ; i<M ; ++i )
+ {
+ register float propi = prop[i];
+
+ propi += .1f * max_sum;
+ prop_sum += propi;
+ prop[i] = propi;
+ }
+
+ prop_sum = 0.99f / prop_sum;
+ for ( i=0 ; i<M ; ++i )
+ { prop[i] = prop_sum * prop[i];
+ }
+
+ MDFADJUSTPROP_STOP();
+}
+
+
+#define OVERRIDE_SPEEX_ECHO_GET_RESIDUAL
+void speex_echo_get_residual(
+ SpeexEchoState * restrict st,
+ float * restrict residual_echo,
+ int len
+)
+{
+ register int i;
+ register float leak2, leake;
+ register int N;
+ register float * restrict window;
+ register float * restrict last_y;
+ register float * restrict y;
+
+ SPEEXECHOGETRESIDUAL_START();
+
+ window = st->window;
+ last_y = st->last_y;
+ y = st->y;
+ N = st->window_size;
+
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for (i=0;i<N;i++)
+ { y[i] = window[i] * last_y[i];
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ spx_fft(st->fft_table, st->y, st->Y);
+ power_spectrum(st->Y, residual_echo, N);
+
+ leake = st->leak_estimate;
+ leak2 = fmux(leake > .5, 1, 2 * leake);
+ N = st->frame_size;
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<N ; ++i )
+ { residual_echo[i] *= leak2;
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ residual_echo[N] *= leak2;
+
+#ifndef NO_REMARK
+ (void)len;
+#endif
+
+ SPEEXECHOGETRESIDUAL_STOP();
+}
+#endif
+
+
+void mdf_preemph(
+ SpeexEchoState * restrict st,
+ spx_word16_t * restrict x,
+ const spx_int16_t * restrict far_end,
+ int framesize
+)
+{
+ register spx_word16_t preemph = st->preemph;
+ register spx_word16_t memX = st->memX;
+ register spx_word16_t memD = st->memD;
+ register spx_word16_t * restrict input = st->input;
+ register int i;
+#ifdef FIXED_POINT
+ register int saturated = st->saturated;
+#endif
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<framesize ; ++i )
+ {
+ register spx_int16_t far_endi = far_end[i];
+ register spx_word32_t tmp32;
+ register spx_word16_t inputi = input[i];
+
+ tmp32 = SUB32(EXTEND32(far_endi), EXTEND32(MULT16_16_P15(preemph,memX)));
+
+#ifdef FIXED_POINT
+ saturated = mux(iabs(tmp32) > 32767, M+1, saturated);
+ tmp32 = iclipi(tmp32,32767);
+#endif
+
+ x[i] = EXTRACT16(tmp32);
+ memX = far_endi;
+ tmp32 = SUB32(EXTEND32(inputi), EXTEND32(MULT16_16_P15(preemph, memD)));
+
+#ifdef FIXED_POINT
+ saturated = mux( ((tmp32 > 32767) && (saturated == 0)), 1,
+ mux( ((tmp32 <-32767) && (saturated == 0)), 1, saturated ));
+ tmp32 = iclipi(tmp32,32767);
+#endif
+ memD = inputi;
+ input[i] = tmp32;
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ st->memD = memD;
+ st->memX = memX;
+
+#ifdef FIXED_POINT
+ st->saturated = saturated;
+#endif
+}
+
+void mdf_sub(
+ spx_word16_t * restrict dest,
+ const spx_word16_t * restrict src1,
+ const spx_word16_t * restrict src2,
+ int framesize
+)
+{
+ register int i;
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+
+#ifdef FIXED_POINT
+ for ( i=0,framesize<<=1 ; i<framesize ; i+=4 )
+ { register int src1i, src2i, desti;
+
+ src1i = ld32d(src1,i);
+ src2i = ld32d(src2,i);
+ desti = dspidualsub(src1i,src2i);
+ st32d(i, dest, desti);
+ }
+#else
+ for ( i=0 ; i<framesize ; ++i )
+ { dest[i] = src1[i] - src2[i];
+ }
+#endif
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+}
+
+void mdf_sub_int(
+ spx_word16_t * restrict dest,
+ const spx_int16_t * restrict src1,
+ const spx_int16_t * restrict src2,
+ int framesize
+)
+{
+ register int i, j;
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+
+#ifdef FIXED_POINT
+ for ( i=0,framesize<<=1 ; i<framesize ; i+=4 )
+ { register int src1i, src2i, desti;
+
+ src1i = ld32d(src1,i);
+ src2i = ld32d(src2,i);
+ desti = dspidualsub(src1i,src2i);
+ st32d(i, dest, desti);
+ }
+#else
+ for ( i=0,j=0 ; i<framesize ; i+=2,++j )
+ { register int src1i, src2i, desti;
+
+
+ src1i = ld32d(src1,j);
+ src2i = ld32d(src2,j);
+ desti = dspidualsub(src1i,src2i);
+
+ dest[i] = sex16(desti);
+ dest[i+1] = asri(16,desti);
+ }
+#endif
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+}
+
+void mdf_compute_weight_gradient(
+ SpeexEchoState * restrict st,
+ spx_word16_t * restrict X,
+ int N,
+ int M
+)
+{
+ register int i, j;
+ register spx_word32_t * restrict PHI = st->PHI;
+
+ for (j=M-1;j>=0;j--)
+ {
+ register spx_word32_t * restrict W = &(st->W[j*N]);
+
+ weighted_spectral_mul_conj(
+ st->power_1,
+ FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15),
+ &X[(j+1)*N],
+ st->E,
+ st->PHI,
+ N);
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for (i=0;i<N;i++)
+ { W[i] = ADD32(W[i],PHI[i]);
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ }
+}
+
+void mdf_update_weight(
+ SpeexEchoState * restrict st,
+ int N,
+ int M,
+ int framesize
+)
+{
+ register int j;
+ register int cancel_count = st->cancel_count;
+ register spx_word16_t * restrict wtmp = st->wtmp;
+#ifdef FIXED_POINT
+ register spx_word16_t * restrict wtmp2 = st->wtmp2;
+ register int i;
+#endif
+
+ for ( j=0 ; j<M ; j++ )
+ {
+ register spx_word32_t * restrict W = &(st->W[j*N]);
+
+ if (j==0 || cancel_count%(M-1) == j-1)
+ {
+#ifdef FIXED_POINT
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<N ; i++ )
+ wtmp2[i] = EXTRACT16(PSHR32(W[i],NORMALIZE_SCALEDOWN+16));
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ spx_ifft(st->fft_table, wtmp2, wtmp);
+ memset(wtmp, 0, framesize * sizeof(spx_word16_t));
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for (j=framesize; j<N ; ++j)
+ { wtmp[j]=SHL16(wtmp[j],NORMALIZE_SCALEUP);
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ spx_fft(st->fft_table, wtmp, wtmp2);
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for (i=0;i<N;i++)
+ { W[i] -= SHL32(EXTEND32(wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+#else
+ spx_ifft(st->fft_table, W, wtmp);
+ memset(&wtmp[framesize], 0, (N-framesize) * sizeof(spx_word16_t));
+ spx_fft(st->fft_table, wtmp, W);
+#endif
+ }
+ }
+}
+
+#ifdef TWO_PATH
+// first four parameters is passed by registers
+// generate faster performance with 4 parameters functions
+spx_word32_t mdf_update_foreground(
+ SpeexEchoState * restrict st,
+ spx_word32_t Dbf,
+ spx_word32_t Sff,
+ spx_word32_t See
+)
+{
+ register spx_word32_t Davg1 = st->Davg1;
+ register spx_word32_t Davg2 = st->Davg2;
+ register spx_word32_t Dvar1 = st->Dvar1;
+ register spx_word32_t Dvar2 = st->Dvar2;
+ register spx_word16_t * restrict input = st->input;
+ register int framesize = st->frame_size;
+ register spx_word16_t * restrict xx = st->x + framesize;
+ register spx_word16_t * restrict y = st->y + framesize;
+ register spx_word16_t * restrict ee = st->e + framesize;
+ register int update_foreground;
+ register int i;
+ register int N = st->window_size;
+ register int M = st->M;
+
+#ifdef FIXED_POINT
+ register spx_word32_t sc0 = SUB32(Sff,See);
+ register spx_float_t sc1 = FLOAT_MUL32U(Sff,Dbf);
+
+ Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),Davg1), MULT16_32_Q15(QCONST16(.4f,15),sc0));
+ Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),Davg2), MULT16_32_Q15(QCONST16(.15f,15),sc0));
+ Dvar1 = FLOAT_ADD(
+ FLOAT_MULT(VAR1_SMOOTH,Dvar1),
+ FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff),
+ MULT16_32_Q15(QCONST16(.4f,15),Dbf)));
+ Dvar2 = FLOAT_ADD(
+ FLOAT_MULT(VAR2_SMOOTH,Dvar2),
+ FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff),
+ MULT16_32_Q15(QCONST16(.15f,15),Dbf)));
+#else
+ register spx_word32_t sc0 = Sff - See;
+ register spx_word32_t sc1 = Sff * Dbf;
+
+ Davg1 = .6*Davg1 + .4*sc0;
+ Davg2 = .85*Davg2 + .15*sc0;
+ Dvar1 = VAR1_SMOOTH*Dvar1 + .16*sc1;
+ Dvar2 = VAR2_SMOOTH*Dvar2 + .0225*sc1;
+#endif
+
+ update_foreground =
+ mux( FLOAT_GT(FLOAT_MUL32U(sc0, VABS(sc0)), sc1), 1,
+ mux( FLOAT_GT(FLOAT_MUL32U(Davg1, VABS(Davg1)), FLOAT_MULT(VAR1_UPDATE,(Dvar1))), 1,
+ mux( FLOAT_GT(FLOAT_MUL32U(Davg2, VABS(Davg2)), FLOAT_MULT(VAR2_UPDATE,(Dvar2))), 1, 0)));
+
+ if ( update_foreground )
+ {
+ register spx_word16_t * restrict windowf = st->window + framesize;
+ register spx_word16_t * restrict window = st->window;
+
+ st->Davg1 = st->Davg2 = 0;
+ st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
+
+ memcpy(st->foreground, st->W, N*M*sizeof(spx_word32_t));
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<framesize ; ++i)
+ { register spx_word16_t wi = window[i];
+ register spx_word16_t wfi = windowf[i];
+ register spx_word16_t ei = ee[i];
+ register spx_word16_t yi = y[i];
+
+ ee[i] = MULT16_16_Q15(wfi,ei) + MULT16_16_Q15(wi,yi);
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ } else
+ {
+ register int reset_background;
+
+ reset_background =
+ mux( FLOAT_GT(FLOAT_MUL32U(-(sc0),VABS(sc0)), FLOAT_MULT(VAR_BACKTRACK,sc1)), 1,
+ mux( FLOAT_GT(FLOAT_MUL32U(-(Davg1), VABS(Davg1)), FLOAT_MULT(VAR_BACKTRACK,Dvar1)), 1,
+ mux( FLOAT_GT(FLOAT_MUL32U(-(Davg2), VABS(Davg2)), FLOAT_MULT(VAR_BACKTRACK,Dvar2)), 1, 0)));
+
+ if ( reset_background )
+ {
+ memcpy(st->W, st->foreground, N*M*sizeof(spx_word32_t));
+ memcpy(y, ee, framesize * sizeof(spx_word16_t));
+ mdf_sub(xx,input,y,framesize);
+ See = Sff;
+ st->Davg1 = st->Davg2 = 0;
+ st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
+ } else
+ {
+ st->Davg1 = Davg1;
+ st->Davg2 = Davg2;
+ st->Dvar1 = Dvar1;
+ st->Dvar2 = Dvar2;
+ }
+ }
+
+ return See;
+}
+#endif
+
+void mdf_compute_error_signal(
+ SpeexEchoState * restrict st,
+ const spx_int16_t * restrict in,
+ spx_int16_t * restrict out,
+ int framesize
+)
+{
+ register spx_word16_t preemph = st->preemph;
+ register spx_word16_t memE = st->memE;
+ register int saturated = st->saturated;
+ register spx_word16_t * restrict e = st->e;
+ register spx_word16_t * restrict ee = st->e + framesize;
+ register spx_word16_t * restrict input = st->input;
+ register spx_word16_t * restrict xx = st->x + framesize;
+ register int i;
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<framesize ; ++i )
+ {
+ register spx_word32_t tmp_out;
+ register spx_int16_t ini = in[i];
+ register int flg;
+
+#ifdef FIXED_POINT
+
+#ifdef TWO_PATH
+ tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(ee[i]));
+ tmp_out = iclipi(tmp_out,32767);
+#else
+ tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(y[i]));
+ tmp_out = iclipi(tmp_out,32767);
+#endif
+
+#else
+#ifdef TWO_PATH
+ tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(ee[i]));
+#else
+ tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(y[i]));
+#endif
+ tmp_out =
+ fmux( tmp_out > 32767, 32767,
+ fmux( tmp_out < -32768, -32768, tmp_out));
+#endif
+
+ tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(preemph,memE)));
+ flg = iabs(ini) >= 32000;
+ tmp_out = VMUX( flg, 0, tmp_out);
+ saturated = mux( flg && (saturated == 0), 1, saturated);
+
+ out[i] = (spx_int16_t)tmp_out;
+ memE = tmp_out;
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ st->memE = memE;
+ st->saturated = saturated;
+ memset(e, 0, framesize * sizeof(spx_word16_t));
+ memcpy(ee, xx, framesize * sizeof(spx_word16_t));
+}
+
+inline int mdf_check(
+ SpeexEchoState * restrict st,
+ spx_int16_t * out,
+ spx_word32_t Syy,
+ spx_word32_t Sxx,
+ spx_word32_t See,
+ spx_word32_t Sff,
+ spx_word32_t Sdd
+)
+{
+ register int N = st->window_size;
+ register spx_word32_t N1e9 = N * 1e9;
+ register int screwed_up = st->screwed_up;
+ register int framesize = st->frame_size;
+
+ if (!(Syy>=0 && Sxx>=0 && See >= 0)
+#ifndef FIXED_POINT
+ || !(Sff < N1e9 && Syy < N1e9 && Sxx < N1e9 )
+#endif
+ )
+ {
+ screwed_up += 50;
+ memset(out, 0, framesize * sizeof(spx_int16_t));
+
+ } else
+ { screwed_up = mux( SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)), screwed_up+1, 0);
+ }
+
+ st->screwed_up = screwed_up;
+
+ return screwed_up;
+}
+
+void mdf_smooth(
+ spx_word32_t * restrict power,
+ spx_word32_t * restrict Xf,
+ int framesize,
+ int M
+)
+{
+ register spx_word16_t ss, ss_1, pf, xff;
+ register int j;
+
+#ifdef FIXED_POINT
+ ss=DIV32_16(11469,M);
+ ss_1 = SUB16(32767,ss);
+#else
+ ss=.35/M;
+ ss_1 = 1-ss;
+#endif
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( j=0 ; j<framesize ; ++j )
+ { register spx_word32_t pi = power[j];
+ register spx_word32_t xfi = Xf[j];
+
+ power[j] = MULT16_32_Q15(ss_1,pi) + 1 + MULT16_32_Q15(ss,xfi);
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ pf = power[framesize];
+ xff = Xf[framesize];
+ power[framesize] = MULT16_32_Q15(ss_1,pf) + 1 + MULT16_32_Q15(ss,xff);
+}
+
+void mdf_compute_filtered_spectra_crosscorrelations(
+ SpeexEchoState * restrict st,
+ spx_word32_t Syy,
+ spx_word32_t See,
+ int framesize
+)
+{
+ register spx_float_t Pey = FLOAT_ONE;
+ register spx_float_t Pyy = FLOAT_ONE;
+ register spx_word16_t spec_average = st->spec_average;
+ register spx_word32_t * restrict pRf = st->Rf;
+ register spx_word32_t * restrict pYf = st->Yf;
+ register spx_word32_t * restrict pEh = st->Eh;
+ register spx_word32_t * restrict pYh = st->Yh;
+ register spx_word16_t beta0 = st->beta0;
+ register spx_word16_t beta_max = st->beta_max;
+ register spx_float_t alpha, alpha_1;
+ register spx_word32_t tmp32, tmpx;
+ register spx_float_t sPey = st->Pey;
+ register spx_float_t sPyy = st->Pyy;
+ register spx_float_t tmp;
+ register spx_word16_t leak_estimate;
+ register int j;
+ register spx_float_t Eh, Yh;
+ register spx_word32_t _Ehj, _Rfj, _Yfj, _Yhj;
+
+#ifdef FIXED_POINT
+ register spx_word16_t spec_average1 = SUB16(32767,spec_average);
+#else
+ register spx_word16_t spec_average1 = 1 - spec_average;
+#endif
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for (j=framesize; j>0 ; --j)
+ {
+ _Ehj = pEh[j];
+ _Rfj = pRf[j];
+ _Yfj = pYf[j];
+ _Yhj = pYh[j];
+
+ Eh = PSEUDOFLOAT(_Rfj - _Ehj);
+ Yh = PSEUDOFLOAT(_Yfj - _Yhj);
+
+ Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
+ Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
+
+ pEh[j] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Ehj), spec_average, _Rfj);
+ pYh[j] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Yhj), spec_average, _Yfj);
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ _Ehj = pEh[0];
+ _Rfj = pRf[0];
+ _Yfj = pYf[0];
+ _Yhj = pYh[0];
+
+ Eh = PSEUDOFLOAT(_Rfj - _Ehj);
+ Yh = PSEUDOFLOAT(_Yfj - _Yhj);
+
+ Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
+ Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
+
+ pEh[0] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Ehj), spec_average, _Rfj);
+ pYh[0] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Yhj), spec_average, _Yfj);
+
+ Pyy = FLOAT_SQRT(Pyy);
+ Pey = FLOAT_DIVU(Pey,Pyy);
+
+ tmp32 = MULT16_32_Q15(beta0,Syy);
+ tmpx = MULT16_32_Q15(beta_max,See);
+ tmp32 = VMUX(tmp32 > tmpx, tmpx, tmp32);
+ alpha = FLOAT_DIV32(tmp32, See);
+ alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha);
+
+ sPey = FLOAT_ADD(FLOAT_MULT(alpha_1,sPey) , FLOAT_MULT(alpha,Pey));
+ sPyy = FLOAT_ADD(FLOAT_MULT(alpha_1,sPyy) , FLOAT_MULT(alpha,Pyy));
+ tmp = FLOAT_MULT(MIN_LEAK,sPyy);
+
+#ifndef FIXED_POINT
+ sPyy = VMUX(FLOAT_LT(sPyy, FLOAT_ONE), FLOAT_ONE, sPyy);
+ sPey = VMUX(FLOAT_LT(sPey, tmp), tmp, sPey);
+ sPey = VMUX(FLOAT_LT(sPey, sPyy), sPey, sPyy);
+#else
+ sPyy = FLOAT_LT(sPyy, FLOAT_ONE) ? FLOAT_ONE : sPyy;
+ sPey = FLOAT_LT(sPey, tmp) ? tmp : sPey;
+ sPey = FLOAT_LT(sPey, sPyy) ? sPey : sPyy;
+#endif
+
+ leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(sPey, sPyy),14));
+
+ leak_estimate = VMUX( leak_estimate > 16383, 32767, SHL16(leak_estimate,1));
+ st->Pey = sPey;
+ st->Pyy = sPyy;
+ st->leak_estimate = leak_estimate;
+}
+
+inline spx_word16_t mdf_compute_RER(
+ spx_word32_t See,
+ spx_word32_t Syy,
+ spx_word32_t Sey,
+ spx_word32_t Sxx,
+ spx_word16_t leake
+)
+{
+ register spx_word16_t RER;
+
+#ifdef FIXED_POINT
+ register spx_word32_t tmp32;
+ register spx_word32_t tmp;
+ spx_float_t bound = PSEUDOFLOAT(Sey);
+
+ tmp32 = MULT16_32_Q15(leake,Syy);
+ tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1)));
+
+ bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy)));
+ tmp = FLOAT_EXTRACT32(bound);
+ tmp32 = imux( FLOAT_GT(bound, PSEUDOFLOAT(See)), See,
+ imux( tmp32 < tmp, tmp, tmp32));
+
+ tmp = SHR32(See,1);
+ tmp32 = imux(tmp32 > tmp, tmp, tmp32);
+ RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15));
+#else
+ register spx_word32_t r0;
+
+ r0 = (Sey * Sey)/(1 + See * Syy);
+
+ RER = (.0001*Sxx + 3.* MULT16_32_Q15(leake,Syy)) / See;
+ RER = fmux( RER < r0, r0, RER);
+ RER = fmux( RER > .5, .5, RER);
+#endif
+
+ return RER;
+}
+
+void mdf_adapt(
+ SpeexEchoState * restrict st,
+ spx_word16_t RER,
+ spx_word32_t Syy,
+ spx_word32_t See,
+ spx_word32_t Sxx
+)
+{
+ register spx_float_t * restrict power_1 = st->power_1;
+ register spx_word32_t * restrict power = st->power;
+ register int adapted = st->adapted;
+ register spx_word32_t sum_adapt = st->sum_adapt;
+ register spx_word16_t leake = st->leak_estimate;
+ register int framesize = st->frame_size;
+ register int i;
+ register int M = st->M;
+
+ adapted = mux( !adapted && sum_adapt > QCONST32(M,15) &&
+ MULT16_32_Q15(leake,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy), 1, adapted);
+
+ if ( adapted )
+ { register spx_word32_t * restrict Yf = st->Yf;
+ register spx_word32_t * restrict Rf = st->Rf;
+ register spx_word32_t r, e, e2;
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<framesize ; ++i )
+ {
+ r = SHL32(Yf[i],3);
+ r = MULT16_32_Q15(leake,r);
+ e = SHL32(Rf[i],3)+1;
+
+#ifdef FIXED_POINT
+ e2 = SHR32(e,1);
+ r = mux( r > e2, e2, r);
+#else
+ e2 = e * .5;
+ r = fmux( r > e2, e2, r);
+#endif
+
+ r = MULT16_32_Q15(QCONST16(.7,15),r) +
+ MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
+
+ power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,power[i]+10)),WEIGHT_SHIFT+16);
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ r = SHL32(Yf[framesize],3);
+ r = MULT16_32_Q15(leake,r);
+ e = SHL32(Rf[framesize],3)+1;
+
+#ifdef FIXED_POINT
+ e2 = SHR32(e,1);
+ r = mux( r > e2, e2, r);
+#else
+ e2 = e * .5;
+ r = fmux( r > e2, e2, r);
+#endif
+
+ r = MULT16_32_Q15(QCONST16(.7,15),r) +
+ MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
+
+ power_1[framesize] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,power[framesize]+10)),WEIGHT_SHIFT+16);
+
+ } else
+ {
+ register spx_word16_t adapt_rate=0;
+ register int N = st->window_size;
+
+ if ( Sxx > SHR32(MULT16_16(N, 1000),6) )
+ { register spx_word32_t tmp32, tmp32q;
+
+ tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx);
+#ifdef FIXED_POINT
+ tmp32q = SHR32(See,2);
+ tmp32 = mux(tmp32 > tmp32q, tmp32q, tmp32);
+#else
+ tmp32q = 0.25 * See;
+ tmp32 = fmux(tmp32 > tmp32q, tmp32q, tmp32);
+#endif
+ adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15));
+ }
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for (i=0;i<framesize;i++)
+ power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(power[i],10)),WEIGHT_SHIFT+1);
+#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ power_1[framesize] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(power[framesize],10)),WEIGHT_SHIFT+1);
+ sum_adapt = ADD32(sum_adapt,adapt_rate);
+ }
+
+ st->sum_adapt = sum_adapt;
+ st->adapted = adapted;
+}
+
+#define OVERRIDE_ECHO_CANCELLATION
+void speex_echo_cancellation(
+ SpeexEchoState * restrict st,
+ const spx_int16_t * restrict in,
+ const spx_int16_t * restrict far_end,
+ spx_int16_t * restrict out
+)
+{
+ register int framesize = st->frame_size;
+ register spx_word16_t * restrict x = st->x;
+ register spx_word16_t * restrict xx = st->x + framesize;
+ register spx_word16_t * restrict yy = st->y + framesize;
+ register spx_word16_t * restrict ee = st->e + framesize;
+ register spx_word32_t Syy, See, Sxx, Sdd, Sff;
+ register spx_word16_t RER;
+ register spx_word32_t Sey;
+ register int j;
+ register int N,M;
+#ifdef TWO_PATH
+ register spx_word32_t Dbf;
+#endif
+
+ N = st->window_size;
+ M = st->M;
+ st->cancel_count++;
+
+ filter_dc_notch16(in, st->notch_radius, st->input, framesize, st->notch_mem);
+ mdf_preemph(st, xx, far_end, framesize);
+
+ {
+
+ register spx_word16_t * restrict X = st->X;
+
+ for ( j=M-1 ; j>=0 ; j-- )
+ { register spx_word16_t * restrict Xdes = &(X[(j+1)*N]);
+ register spx_word16_t * restrict Xsrc = &(X[j*N]);
+
+ memcpy(Xdes, Xsrc, N * sizeof(spx_word16_t));
+ }
+
+ spx_fft(st->fft_table, x, X);
+ memcpy(st->last_y, st->x, N * sizeof(spx_word16_t));
+ Sxx = mdf_inner_prod(xx, xx, framesize);
+ memcpy(x, xx, framesize * sizeof(spx_word16_t));
+
+#ifdef TWO_PATH
+ spectral_mul_accum(st->X, st->foreground, st->Y, N, M);
+ spx_ifft(st->fft_table, st->Y, st->e);
+ mdf_sub(xx, st->input, ee, framesize);
+ Sff = mdf_inner_prod(xx, xx, framesize);
+#endif
+
+ mdf_adjust_prop (st->W, N, M, st->prop);
+
+ if (st->saturated == 0)
+ { mdf_compute_weight_gradient(st, X, N, M);
+ } else
+ { st->saturated--;
+ }
+ }
+
+ mdf_update_weight(st, N, M, framesize);
+ spectral_mul_accum(st->X, st->W, st->Y, N, M);
+ spx_ifft(st->fft_table, st->Y, st->y);
+
+#ifdef TWO_PATH
+ mdf_sub(xx, ee, yy, framesize);
+ Dbf = 10+mdf_inner_prod(xx, xx, framesize);
+#endif
+
+ mdf_sub(xx, st->input, yy, framesize);
+ See = mdf_inner_prod(xx, xx, framesize);
+
+#ifndef TWO_PATH
+ Sff = See;
+#else
+ See = mdf_update_foreground(st,Dbf,Sff,See);
+#endif
+
+
+ mdf_compute_error_signal(st, in, out, framesize);
+ Sey = mdf_inner_prod(ee, yy, framesize);
+ Syy = mdf_inner_prod(yy, yy, framesize);
+ Sdd = mdf_inner_prod(st->input, st->input, framesize);
+
+ if ( mdf_check(st,out,Syy,Sxx,See,Sff,Sdd) >= 50 )
+ { speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now.");
+ speex_echo_state_reset(st);
+ return;
+ }
+
+ See = MAX32(See, SHR32(MULT16_16(N, 100),6));
+ spx_fft(st->fft_table, st->e, st->E);
+ memset(st->y, 0, framesize * sizeof(spx_word16_t));
+ spx_fft(st->fft_table, st->y, st->Y);
+ power_spectrum(st->E, st->Rf, N);
+ power_spectrum(st->Y, st->Yf, N);
+ power_spectrum(st->X, st->Xf, N);
+
+ mdf_smooth(st->power,st->Xf,framesize, M);
+ mdf_compute_filtered_spectra_crosscorrelations(st,Syy,See,framesize);
+ RER = mdf_compute_RER(See,Syy,Sey,Sxx,st->leak_estimate);
+ mdf_adapt(st, RER, Syy, See, Sxx);
+
+ if ( st->adapted )
+ { register spx_word16_t * restrict last_yy = st->last_y + framesize;
+
+ memcpy(st->last_y, last_yy, framesize * sizeof(spx_word16_t));
+ mdf_sub_int(last_yy, in, out, framesize);
+
+ }
+}
+
+
+
diff --git a/tmv/misc_tm.h b/tmv/misc_tm.h
new file mode 100644
index 0000000..4d6cea7
--- /dev/null
+++ b/tmv/misc_tm.h
@@ -0,0 +1,92 @@
+/* Copyright (C) 2007 Hong Zhiqian */
+/**
+ @file misc_tm.h
+ @author Hong Zhiqian
+ @brief Various compatibility routines for Speex (TriMedia version)
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#include <tmml.h>
+
+
+#if TM_PROFILE
+int __profile_begin;
+int __profile_end;
+#endif
+
+#define OVERRIDE_SPEEX_ALLOC
+void *speex_alloc (int size)
+{
+ void *ptr;
+
+ if ( tmmlMalloc(0, size, (pVoid*)&ptr, tmmlMallocCacheAligned | tmmlMallocCleared) != TM_OK )
+ { return NULL;
+ }
+
+ return ptr;
+}
+
+
+#define OVERRIDE_SPEEX_ALLOC_SCRATCH
+void *speex_alloc_scratch (int size)
+{
+ void *ptr;
+
+ if ( tmmlMalloc(0, size, (pVoid*)&ptr, tmmlMallocCacheAligned | tmmlMallocCleared) != TM_OK )
+ { return NULL;
+ }
+
+ return ptr;
+}
+
+
+#define OVERRIDE_SPEEX_REALLOC
+void *speex_realloc (void *ptr, int size)
+{
+ if ( tmmlRealloc(0, size, (pVoid)ptr, (pVoid*)&ptr, tmmlMallocCacheAligned | tmmlMallocCleared) != TM_OK )
+ { return NULL;
+ }
+
+ return ptr;
+}
+
+
+#define OVERRIDE_SPEEX_FREE
+void speex_free (void *ptr)
+{
+ tmmlFree(ptr);
+}
+
+
+#define OVERRIDE_SPEEX_FREE_SCRATCH
+void speex_free_scratch (void *ptr)
+{
+ tmmlFree(ptr);
+}
+
diff --git a/tmv/preprocess_tm.h b/tmv/preprocess_tm.h
new file mode 100644
index 0000000..f903b73
--- /dev/null
+++ b/tmv/preprocess_tm.h
@@ -0,0 +1,1135 @@
+/* Copyright (C) 2007 Hong Zhiqian */
+/**
+ @file preprocess_tm.h
+ @author Hong Zhiqian
+ @brief Various compatibility routines for Speex (TriMedia version)
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#include <ops/custom_defs.h>
+#include "profile_tm.h"
+
+#ifdef FIXED_POINT
+#define OVERRIDE_PREPROCESS_ANALYSIS
+static void preprocess_analysis(SpeexPreprocessState * restrict st, spx_int16_t * restrict x)
+{
+ register int i, j, framesize = st->frame_size;
+ register int N = st->ps_size;
+ register int N3 = 2*N - framesize;
+ register int N4 = framesize - N3;
+ register int * restrict ps = st->ps;
+ register int * restrict frame;
+ register int * restrict inbuf;
+ register int * restrict ptr;
+ register int max_val;
+
+ frame = (int*)(st->frame);
+ inbuf = (int*)(st->inbuf);
+ ptr = (int*)(st->frame+N3);
+
+ TMDEBUG_ALIGNMEM(x);
+ TMDEBUG_ALIGNMEM(frame);
+ TMDEBUG_ALIGNMEM(inbuf);
+
+ PREPROCESSANAYLSIS_START();
+
+ N3 >>= 1;
+ framesize >>= 1;
+ max_val = 0;
+
+ for ( i=0,j=0 ; i<N3 ; i+=2,j+=8 )
+ { register int r1, r2;
+
+ r1 = ld32x(inbuf,i);
+ r2 = ld32x(inbuf,i+1);
+
+ st32d(j, frame, r1);
+ st32d(j+4, frame, r2);
+ }
+
+ for ( i=0,j=0 ; i<framesize ; i+=2,j+=8 )
+ { register int r1, r2;
+
+ r1 = ld32x(x, i);
+ r2 = ld32x(x, i+1);
+
+ st32d(j, ptr, r1);
+ st32d(j+4,ptr, r2);
+ }
+
+ for ( i=0,j=0,ptr=(int*)(x+N4) ; i<N3 ; i+=2,j+=8 )
+ { register int r1, r2;
+
+ r1 = ld32x(ptr, i);
+ r2 = ld32x(ptr, i+1);
+
+ st32d(j, inbuf, r1);
+ st32d(j+4,inbuf, r2);
+ }
+#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
+#pragma TCS_unroll=2
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0,j=0,ptr=(int*)st->window ; i<N ; ++i,j+=4 )
+ { register int f10, w10, r0, r1;
+
+ f10 = ld32x(frame, i);
+ w10 = ld32x(ptr, i);
+
+ r0 = (sex16(f10) * sex16(w10)) >> 15;
+ r1 = (asri(16,f10) * asri(16,w10)) >> 15;
+
+ max_val = imax(iabs(sex16(r0)), max_val);
+ max_val = imax(iabs(sex16(r1)), max_val);
+
+ r0 = pack16lsb(r1, r0);
+ st32d(j, frame, r0);
+ }
+#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ max_val = 14 - spx_ilog2(max_val);
+ st->frame_shift = max_val;
+
+ if ( max_val != 0 )
+ {
+#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0,j=0 ; i<N ; ++i,j+=4 )
+ { register int f10;
+
+ f10 = ld32x(frame, i);
+ f10 = dualasl(f10, max_val);
+ st32d(j, frame, f10);
+
+ }
+#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ }
+
+ spx_fft(st->fft_lookup, st->frame, st->ft);
+ power_spectrum(st->ft, ps, N << 1);
+
+#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0,ptr=(int*)st->ps,max_val<<=1,j=((1<<((max_val))>>1)) ;i<N ; ++i )
+ {
+ ps[i] = (ps[i] + j) >> max_val;
+ }
+#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ filterbank_compute_bank32(st->bank, ps, ps+N);
+
+ PREPROCESSANAYLSIS_STOP();
+}
+
+#define _MULT16_32_Q15(a,b,c) ADD32(MULT16_16((a),(b)), SHR(MULT16_16((a),(c)),15))
+
+#define OVERRIDE_UPDATE_NOISE_PROB
+static void update_noise_prob(SpeexPreprocessState * restrict st)
+{
+ register int i;
+ register int min_range, nb_adapt;
+ register int N = st->ps_size;
+ register int * restrict Smin = (int*)st->Smin;
+ register int * restrict Stmp = (int*)st->Stmp;
+ register int * restrict S = (int*)st->S;
+
+ UPDATENOISEPROB_START();
+
+ {
+ register int psi_lsb, psi_msb, ips_lsb, ips_msb, psii_lsb, psii_msb;
+ register int psiii_lsb, psiii_msb;
+ register int q8, q05, q2, q1;
+ register int *ps = (int*)st->ps;
+ register int si_lsb, si_msb, sii_lsb, sii_msb;
+
+ q8 = QCONST16(.8f,15);
+ q05 = QCONST16(.05f,15);
+ q2 = QCONST16(.2f,15);
+ q1 = QCONST16(.1f,15);
+
+ ips_lsb = ps[0];
+ psi_lsb = ps[1];
+ si_lsb = S[0];
+ ips_msb = ips_lsb >> 15;
+ psi_msb = psi_lsb >> 15;
+ si_msb = si_lsb >> 15;
+
+ ips_lsb &= 0x00007fff;
+ psi_lsb &= 0x00007fff;
+ si_lsb &= 0x00007fff;
+
+ S[0] = _MULT16_32_Q15(q8,si_msb,si_lsb) + _MULT16_32_Q15(q2,ips_msb,ips_lsb);
+
+ for ( i=1 ; i<N-1 ; i+=2 )
+ {
+ psii_lsb = ps[i+1];
+ si_lsb = S[i];
+
+ psii_msb = psii_lsb >> 15;
+ si_msb = si_lsb >> 15;
+ si_lsb &= 0x00007fff;
+ psii_lsb &= 0x00007fff;
+
+ S[i]= _MULT16_32_Q15(q8,si_msb,si_lsb) +
+ _MULT16_32_Q15(q05,ips_msb,ips_lsb) +
+ _MULT16_32_Q15(q1,psi_msb,psi_lsb) +
+ _MULT16_32_Q15(q05,psii_msb,psii_lsb);
+
+ psiii_lsb = ps[i+2];
+ sii_lsb = S[i+1];
+
+ sii_msb = sii_lsb >> 15;
+ psiii_msb= psiii_lsb >> 15;
+ sii_lsb &= 0x00007fff;
+ psiii_lsb&= 0x00007fff;
+
+ S[i+1]= _MULT16_32_Q15(q8,sii_msb,sii_lsb) +
+ _MULT16_32_Q15(q05,psi_msb,psi_lsb) +
+ _MULT16_32_Q15(q1,psii_msb,psii_lsb) +
+ _MULT16_32_Q15(q05,psiii_msb,psiii_lsb);
+
+ ips_lsb = psii_lsb;
+ ips_msb = psii_msb;
+ psi_lsb = psiii_lsb;
+ psi_msb = psiii_msb;
+ }
+
+ S[N-1] = MULT16_32_Q15(q8,S[N-1]) + MULT16_32_Q15(q2,ps[N-1]);
+ }
+
+ nb_adapt = st->nb_adapt;
+
+ if ( nb_adapt==1 )
+ { for ( i=0 ; i<N ; ++i )
+ Smin[i] = Stmp[i] = 0;
+
+ }
+
+ min_range = mux(nb_adapt < 100, 15,
+ mux(nb_adapt < 1000, 50,
+ mux(nb_adapt < 10000, 150, 300)));
+
+ if ( st->min_count > min_range )
+ {
+ st->min_count = 0;
+
+#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
+#pragma TCS_unroll=2
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<N ; ++i )
+ { register int si, stmpi;
+
+ si = S[i];
+ stmpi = Stmp[i];
+
+ Smin[i] = imin(stmpi,si);
+ Stmp[i] = si;
+ }
+#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ } else
+ {
+
+#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
+#pragma TCS_unroll=2
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<N ; ++i )
+ { register int si, stmpi, smini;
+
+ si = S[i];
+ stmpi = Stmp[i];
+ smini = Smin[i];
+
+ Smin[i] = imin(smini,si);
+ Stmp[i] = imin(stmpi,si);
+ }
+#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ }
+
+
+ {
+ register int q4;
+ register int * restrict update_prob = (int*)st->update_prob;
+
+ q4 = QCONST16(.4f,15);
+
+#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<N ; ++i )
+ { register int si;
+ register int smini;
+
+ si = S[i];
+ smini = Smin[i];
+ update_prob[i] = mux(MULT16_32_Q15(q4,si) > ADD32(smini,20), 1, 0);
+ }
+#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ }
+
+ UPDATENOISEPROB_STOP();
+}
+
+#else
+
+#define OVERRIDE_PREPROCESS_ANALYSIS
+static void preprocess_analysis(SpeexPreprocessState * restrict st, spx_int16_t * restrict x)
+{
+ register int i;
+ register int framesize = st->frame_size;
+ register int N = st->ps_size;
+ register int N3 = 2*N - framesize;
+ register int N4 = framesize - N3;
+ register float * restrict ps = st->ps;
+ register float * restrict frame = st->frame;
+ register float * restrict inbuf = st->inbuf;
+
+ PREPROCESSANAYLSIS_START();
+
+#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<N3 ; ++i )
+ {
+ frame[i] = inbuf[i];
+ }
+#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<framesize ; ++i )
+ { frame[N3+i] = x[i];
+ }
+#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0,x+=N4 ; i<N3 ; ++i )
+ { inbuf[i] = x[i];
+ }
+#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ inbuf = st->window;
+
+#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<2*N ; ++i )
+ {
+ frame[i] = frame[i] * inbuf[i];
+ }
+#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ spx_fft(st->fft_lookup, frame, st->ft);
+ power_spectrum(st->ft, ps, N << 1);
+ filterbank_compute_bank32(st->bank, ps, ps+N);
+
+ PREPROCESSANAYLSIS_STOP();
+}
+
+
+#define OVERRIDE_UPDATE_NOISE_PROB
+static void update_noise_prob(SpeexPreprocessState * restrict st)
+{
+
+ register float * restrict S = st->S;
+ register float * restrict ps = st->ps;
+ register int N = st->ps_size;
+ register int min_range;
+ register int i;
+ register int nb_adapt;
+ register float * restrict Smin = st->Smin;
+ register float * restrict Stmp = st->Stmp;
+
+ UPDATENOISEPROB_START();
+
+ {
+ register float ips, psi;
+
+ ips = ps[0];
+ psi = ps[1];
+
+ S[0] = .8f * S[0] + .2f * ips;
+
+ for ( i=1 ; i<N-1 ; i+=2 )
+ {
+ register float psii, psiii;
+
+ psii = ps[i+1];
+ psiii = ps[i+2];
+ S[i] = .8f * S[i] + .05f * ips + .1f * psi + .05f * psii;
+ S[i+1] = .8f * S[i+1] + .05f * psi + .1f * psii + .05f * psiii;
+ ips = psii;
+ psi = psiii;
+ }
+
+ S[N-1] = .8f * S[N-1] + .2f * ps[N-1];
+ }
+
+ nb_adapt = st->nb_adapt;
+
+ if ( nb_adapt==1 )
+ {
+ for (i=0;i<N;i++)
+ Smin[i] = st->Stmp[i] = 0;
+ }
+
+ min_range = mux(nb_adapt < 100, 15,
+ mux(nb_adapt < 1000, 50,
+ mux(nb_adapt < 10000, 150, 300)));
+
+
+ if ( st->min_count > min_range )
+ {
+ st->min_count = 0;
+#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<N ; ++i )
+ {
+ register float stmpi, si;
+
+ stmpi = Stmp[i];
+ si = S[i];
+
+ Smin[i] = fmin(stmpi,si);
+ Stmp[i] = si;
+ }
+#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ } else
+ {
+ register float * restrict Smin = st->Smin;
+
+#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<N ; ++i )
+ {
+ register float stmpi, si, smini;
+
+ stmpi = Stmp[i];
+ si = S[i];
+ smini = Smin[i];
+
+ Smin[i] = fmin(smini,si);
+ Stmp[i] = fmin(stmpi,si);
+ }
+#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ }
+
+ {
+ register int * restrict update_prob = (int*)st->update_prob;
+
+#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for (i=0;i<N;i++)
+ { register float si;
+ register float smini;
+
+ si = S[i];
+ smini = Smin[i];
+ update_prob[i] = mux( (.4 * si) > (smini + 20.f), 1, 0);
+
+ }
+#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ }
+
+ UPDATENOISEPROB_STOP();
+}
+
+
+#define OVERRIDE_COMPUTE_GAIN_FLOOR
+static void compute_gain_floor(
+ int noise_suppress,
+ int effective_echo_suppress,
+ float * restrict noise,
+ float * restrict echo,
+ float * gain_floor,
+ int len
+)
+{
+ register int i;
+ register float echo_floor;
+ register float noise_floor;
+
+ COMPUTEGAINFLOOR_START();
+
+ noise_floor = exp(.2302585f*noise_suppress);
+ echo_floor = exp(.2302585f*effective_echo_suppress);
+
+#if (TM_UNROLL && TM_UNROLL_COMPUTEGAINFLOOR)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for (i=0;i<len;i++)
+ { register float noisei, echoi;
+
+ noisei = noise[i];
+ echoi = echo[i];
+
+ gain_floor[i] = FRAC_SCALING * sqrt(noise_floor * noisei + echo_floor * echoi) / sqrt(1+noisei+echoi);
+
+ }
+#if (TM_UNROLL && TM_UNROLL_COMPUTEGAINFLOOR)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ COMPUTEGAINFLOOR_STOP();
+}
+
+#endif
+
+static inline spx_word32_t hypergeom_gain(spx_word32_t xx);
+static inline spx_word16_t qcurve(spx_word16_t x);
+static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len);
+void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
+
+#ifndef FIXED_POINT
+static void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx_word16_t *ft);
+#endif
+
+void preprocess_residue_echo(
+ SpeexPreprocessState * restrict st,
+ int N,
+ int NM
+)
+{
+ if (st->echo_state)
+ {
+ register spx_word32_t * restrict r_echo = st->residual_echo;
+ register spx_word32_t * restrict e_noise = st->echo_noise;
+ register int i;
+
+#ifndef FIXED_POINT
+ register spx_word32_t r;
+#endif
+
+ speex_echo_get_residual(st->echo_state, r_echo, N);
+
+#ifndef FIXED_POINT
+ r = r_echo[0];
+ if (!(r >=0 && r < N*1e9f) )
+ {
+ memset(r_echo, 0, N * sizeof(spx_word32_t));
+ }
+#endif
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for (i=0;i<N;i++)
+ { register spx_word32_t eni = e_noise[i];
+ e_noise[i] = MAX32(MULT16_32_Q15(QCONST16(.6f,15),eni), r_echo[i]);
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ filterbank_compute_bank32(st->bank, e_noise, e_noise+N);
+
+ } else
+ { memset(st->echo_noise, 0, (NM) * sizeof(spx_word32_t));
+ }
+}
+
+void preprocess_update_noise(
+ SpeexPreprocessState * restrict st,
+ spx_word32_t * restrict ps,
+ int N
+)
+{
+ register spx_word16_t beta, beta_1;
+ register int * restrict up = st->update_prob;
+ register spx_word32_t * restrict noise = st->noise;
+ register int i;
+
+ beta = MAX16(QCONST16(.03,15),DIV32_16(Q15_ONE,st->nb_adapt));
+ beta_1 = Q15_ONE-beta;
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for (i=0;i<N;i++)
+ { register spx_word32_t ni = noise[i];
+ register spx_word32_t psi = ps[i];
+
+ if ( !up[i] || psi < PSHR32(ni, NOISE_SHIFT) )
+ { noise[i] = MAX32(EXTEND32(0),MULT16_32_Q15(beta_1,ni) +
+ MULT16_32_Q15(beta,SHL32(psi,NOISE_SHIFT)));
+ }
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ filterbank_compute_bank32(st->bank, noise, noise+N);
+}
+
+void preprocess_compute_SNR(
+ SpeexPreprocessState * restrict st,
+ spx_word32_t * restrict ps,
+ int NM
+)
+{
+ register spx_word32_t * restrict noise = st->noise;
+ register spx_word32_t * restrict echo = st->echo_noise;
+ register spx_word32_t * restrict reverb = st->reverb_estimate;
+ register spx_word16_t * restrict post = st->post;
+ register spx_word32_t * restrict old_ps = st->old_ps;
+ register spx_word16_t * restrict prior = st->prior;
+ register int i;
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<NM ; i++)
+ {
+ register spx_word16_t gamma;
+ register spx_word32_t tot_noise;
+ register spx_word16_t posti;
+ register spx_word32_t opsi;
+ register spx_word16_t priori;
+
+ tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(noise[i],NOISE_SHIFT)), echo[i]) , reverb[i]);
+
+ posti = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT));
+ posti = MIN16(posti, QCONST16(100.f,SNR_SHIFT));
+ post[i] = posti;
+
+ opsi = old_ps[i];
+
+ gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(opsi,ADD32(opsi,tot_noise))));
+
+ priori = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,posti)), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(opsi,tot_noise))), 15));
+ prior[i]=MIN16(priori, QCONST16(100.f,SNR_SHIFT));
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+}
+
+spx_word32_t preprocess_smooth_SNR(
+ SpeexPreprocessState * restrict st,
+ int N,
+ int NM
+)
+{
+ register spx_word16_t * restrict zeta = st->zeta;
+ register spx_word16_t * restrict prior = st->prior;
+ register spx_word32_t Zframe;
+ register spx_word16_t iprior, priori;
+ register int _N = N-1;
+ register int i;
+
+ iprior = prior[0];
+ priori = prior[1];
+ zeta[0] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),zeta[0]), MULT16_16(QCONST16(.3f,15),iprior)),15);
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unroll=2
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=1 ; i<_N ; i++)
+ { register spx_word16_t zetai = zeta[i];
+ register spx_word16_t priorii = prior[i+1];
+
+ zeta[i] = PSHR32(ADD32(ADD32(ADD32(MULT16_16(QCONST16(.7f,15),zetai), MULT16_16(QCONST16(.15f,15),priori)),
+ MULT16_16(QCONST16(.075f,15),iprior)), MULT16_16(QCONST16(.075f,15),priorii)),15);
+
+ iprior = priori;
+ priori = priorii;
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ for (i=_N; i<NM ; i++)
+ { register spx_word16_t zetai = zeta[i];
+
+ priori = prior[i];
+ zeta[i] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),zetai), MULT16_16(QCONST16(.3f,15),priori)),15);
+ }
+
+ Zframe = 0;
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=N ; i<NM ; i++ )
+ { Zframe = ADD32(Zframe, EXTEND32(zeta[i]));
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ return Zframe;
+}
+
+void preprocess_compute_emgain(
+ SpeexPreprocessState * restrict st,
+ spx_word32_t * restrict ps,
+ spx_word16_t Pframe,
+ int NM
+)
+{
+ register spx_word16_t * restrict zeta = st->zeta;
+ register spx_word16_t * restrict prior = st->prior;
+ register spx_word16_t * restrict gain = st->gain;
+ register spx_word32_t * restrict old_ps = st->old_ps;
+ register spx_word16_t * restrict post = st->post;
+ register spx_word16_t * restrict gain2 = st->gain2;
+ register int i;
+ register int N=st->ps_size;
+
+ for ( i=N ; i<NM ; ++i )
+ {
+ register spx_word32_t theta;
+ register spx_word32_t MM;
+ register spx_word16_t prior_ratio;
+ register spx_word16_t P1;
+ register spx_word16_t q;
+
+#ifdef FIXED_POINT
+ register spx_word16_t tmp;
+#endif
+ register spx_word16_t priori = prior[i];
+
+ prior_ratio = PDIV32_16(SHL32(EXTEND32(priori), 15), ADD16(priori, SHL32(1,SNR_SHIFT)));
+ theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(post[i]),EXPIN_SHIFT-SNR_SHIFT));
+
+ MM = hypergeom_gain(theta);
+ gain[i] = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
+ old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(gain[i])),ps[i]);
+
+ P1 = QCONST16(.199f,15)+MULT16_16_Q15(QCONST16(.8f,15),qcurve (zeta[i]));
+ q = Q15_ONE-MULT16_16_Q15(Pframe,P1);
+
+#ifdef FIXED_POINT
+ theta = MIN32(theta, EXTEND32(32767));
+ tmp = MULT16_16_Q15((SHL32(1,SNR_SHIFT)+priori),EXTRACT16(MIN32(Q15ONE,SHR32(spx_exp(-EXTRACT16(theta)),1))));
+ tmp = MIN16(QCONST16(3.,SNR_SHIFT), tmp);
+ tmp = EXTRACT16(PSHR32(MULT16_16(PDIV32_16(SHL32(EXTEND32(q),8),(Q15_ONE-q)),tmp),8));
+ gain2[i]=DIV32_16(SHL32(EXTEND32(32767),SNR_SHIFT), ADD16(256,tmp));
+#else
+ gain2[i]=1/(1.f + (q/(1.f-q))*(1+priori)*exp(-theta));
+#endif
+ }
+
+ filterbank_compute_psd16(st->bank,gain2+N, gain2);
+ filterbank_compute_psd16(st->bank,gain+N, gain);
+}
+
+void preprocess_compute_linear_gain(
+ SpeexPreprocessState * restrict st,
+ spx_word32_t * restrict ps,
+ int N
+)
+{
+ register spx_word16_t * restrict gain_floor = st->gain_floor;
+ register spx_word16_t * restrict prior = st->prior;
+ register spx_word16_t * restrict gain = st->gain;
+ register spx_word32_t * restrict old_ps = st->old_ps;
+ register spx_word16_t * restrict post = st->post;
+ register spx_word16_t * restrict gain2 = st->gain2;
+ register int i;
+
+ filterbank_compute_psd16(st->bank,gain_floor+N,gain_floor);
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for (i=0;i<N;i++)
+ {
+ register spx_word32_t MM;
+ register spx_word32_t theta;
+ register spx_word16_t prior_ratio;
+ register spx_word16_t tmp;
+ register spx_word16_t p;
+ register spx_word16_t g;
+ register spx_word16_t gfi = gain_floor[i];
+
+ prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(prior[i], SHL32(1,SNR_SHIFT)));
+ theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(post[i]),EXPIN_SHIFT-SNR_SHIFT));
+ MM = hypergeom_gain(theta);
+
+ g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
+ p = gain2[i];
+
+ g = VMUX( MULT16_16_Q15(QCONST16(.333f,15),g) > gain[i], MULT16_16(3,gain[i]), g);
+
+ old_ps[i]= MULT16_32_P15(QCONST16(.2f,15),old_ps[i]) +
+ MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(g)),ps[i]);
+
+ g = VMUX( g < gfi, gfi, g );
+ gain[i] = g;
+
+ tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(g),15))) +
+ MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(gfi),15)));
+
+ gain2[i]=SQR16_Q15(tmp);
+
+ /* Use this if you want a log-domain MMSE estimator instead */
+ /* gain2[i] = pow(g, p) * pow(gfi,1.f-p);*/
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+}
+
+
+#if 0
+void preprocess_compute_bark_gain(
+ SpeexPreprocessState * restrict st,
+ int N,
+ int NM
+)
+{
+ register spx_word16_t * restrict gain_floor = st->gain_floor;
+ register spx_word16_t * restrict gain = st->gain;
+ register spx_word16_t * restrict gain2 = st->gain2;
+ register int i;
+
+ for (i=N;i<NM;i++)
+ {
+ register spx_word16_t tmp;
+ register spx_word16_t p = gain2[i];
+ register spx_word16_t gaini;
+ register spx_word16_t gfi = gain_floor[i];
+
+ gaini = MAX16(gain[i], gfi);
+
+ gain[i] = gaini;
+
+ tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(gaini),15))) +
+ MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(gfi),15)));
+
+ gain2[i]=SQR16_Q15(tmp);
+ }
+
+ filterbank_compute_psd16(st->bank,gain2+N, gain2);
+}
+#endif
+
+void preprocess_apply_gain(
+ SpeexPreprocessState * restrict st,
+ int N
+)
+{
+ register spx_word16_t * restrict ft = st->ft;
+ register spx_word16_t * restrict gain2 = st->gain2;
+ register int j, i;
+
+ ft[0] = MULT16_16_P15(gain2[0],ft[0]);
+
+ for (i=1,j=1; i<N ; i++,j+=2)
+ {
+ register spx_word16_t gain2i = gain2[i];
+ register spx_word16_t ftj = ft[j];
+ register spx_word16_t ftjj = ft[j+1];
+
+ ft[j] = MULT16_16_P15(gain2i,ftj);
+ ft[j+1] = MULT16_16_P15(gain2i,ftjj);
+ }
+
+ ft[(N<<1)-1] = MULT16_16_P15(gain2[N-1],ft[(N<<1)-1]);
+}
+
+#ifdef FIXED_POINT
+void preprocess_scale(
+ SpeexPreprocessState * restrict st,
+ int N
+)
+{
+ register spx_word16_t * restrict frame = st->frame;
+ register int shift = st->frame_shift;
+ register int i;
+ register int N2 = N << 1;
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<N2 ;i++)
+ { register spx_word16_t framei = frame[i];
+
+ frame[i] = PSHR16(framei,shift);
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+}
+
+#else
+
+void preprocess_apply_agc(
+ SpeexPreprocessState * restrict st,
+ int N
+)
+{
+ register spx_word16_t max_sample=0;
+ register spx_word16_t * restrict frame = st->frame;
+ register int i;
+ register int N2 = N << 1;
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for (i=0;i<N2;i++)
+ { register spx_word16_t framei = VABS(frame[i]);
+
+ max_sample = VMUX( framei > max_sample, framei, max_sample);
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ if ( max_sample > 28000.f )
+ {
+ float damp = 28000.f/max_sample;
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i< N2 ; i++ )
+ { frame[i] *= damp;
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ }
+}
+#endif
+
+
+void preprocess_update(
+ SpeexPreprocessState * restrict st,
+ spx_int16_t * restrict x,
+ int N
+)
+{
+ register spx_word16_t * restrict frame = st->frame;
+ register spx_word16_t * restrict window = st->window;
+ register spx_word16_t * restrict outbuf = st->outbuf;
+ register int framesize = st->frame_size;
+ register int N2 = N << 1;
+ register int N3 = N2 - framesize;
+ register int N4 = (framesize) - N3;
+ register int i;
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<N2 ; i++)
+ { register spx_word16_t fi = frame[i];
+ register spx_word16_t wi = window[i];
+
+ frame[i] = MULT16_16_Q15(fi, wi);
+ }
+ for (i=0;i<N3;i++)
+ { x[i] = outbuf[i] + frame[i];
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+ for ( i=0;i<N4;i++)
+ { x[N3+i] = frame[N3+i];
+ }
+
+ memcpy(outbuf, frame+framesize, (N3) * sizeof(spx_word16_t));
+}
+
+#define OVERRIDE_SPEEX_PREPROCESS_RUN
+int speex_preprocess_run(SpeexPreprocessState * restrict st, spx_int16_t * restrict x)
+{
+ register int i, N, M, NM;
+ register spx_word32_t * restrict ps=st->ps;
+ register spx_word32_t Zframe;
+ register spx_word16_t Pframe;
+
+ st->nb_adapt++;
+ st->min_count++;
+ N = st->ps_size;
+ M = st->nbands;
+ NM = N + M;
+
+ preprocess_residue_echo(st, N, NM);
+ preprocess_analysis(st, x);
+ update_noise_prob(st);
+ preprocess_update_noise(st, ps, N);
+
+ if ( st->nb_adapt == 1 )
+ { memcpy(st->old_ps, ps, (NM) * sizeof(spx_word32_t));
+ }
+
+ preprocess_compute_SNR(st, ps, NM);
+ Zframe = preprocess_smooth_SNR(st, N, NM);
+
+
+ {
+ register spx_word16_t effective_echo_suppress;
+
+ Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,M)));
+ effective_echo_suppress = EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress),
+ MULT16_16(Pframe, st->echo_suppress_active)),15));
+ compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M);
+
+ }
+
+ preprocess_compute_emgain(st, ps, Pframe, NM);
+ preprocess_compute_linear_gain(st, ps, N);
+
+
+ if (!st->denoise_enabled)
+ {
+ register spx_word16_t * restrict gain2 = st->gain2;
+
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0 ; i<NM ; i++ )
+ { gain2[i] = Q15_ONE;
+ }
+#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+ }
+
+ preprocess_apply_gain(st, N);
+
+#ifndef FIXED_POINT
+ if (st->agc_enabled)
+ { speex_compute_agc(st, Pframe, st->ft);
+ }
+#endif
+
+
+ spx_ifft(st->fft_lookup, st->ft, st->frame);
+
+#ifdef FIXED_POINT
+ preprocess_scale(st, N);
+#endif
+
+#ifndef FIXED_POINT
+ if ( st->agc_enabled )
+ { preprocess_apply_agc(st, N);
+ }
+#endif
+
+ preprocess_update(st, x, N);
+
+ if ( st->vad_enabled )
+ {
+ if (Pframe > st->speech_prob_start || (st->was_speech && Pframe > st->speech_prob_continue))
+ { st->was_speech=1;
+ return 1;
+
+ } else
+ { st->was_speech=0;
+ return 0;
+ }
+ } else
+ { return 1;
+ }
+}
diff --git a/tmv/profile_tm.h b/tmv/profile_tm.h
new file mode 100644
index 0000000..8588e26
--- /dev/null
+++ b/tmv/profile_tm.h
@@ -0,0 +1,407 @@
+/* Copyright (C) 2007 Hong Zhiqian */
+/**
+ @file profile_tm.h
+ @author Hong Zhiqian
+ @brief Various compatibility routines for Speex (TriMedia version)
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+/**
+* @remarks This file provide some capabilities to measure clock cycles.
+* Use this if unable to compile with TriMedia profile options
+*/
+
+extern int __profile_begin;
+extern int __profile_end;
+
+#if TM_PROFILE
+#define PROFILE_START() \
+ { \
+ __profile_begin = cycles(); \
+ } \
+
+#define PROFILE_STOP() \
+ { \
+ __profile_end = cycles(); \
+ printf("%s\t%d\n", __FUNCTION__, end - begin); \
+ } \
+
+#else
+#define PROFILE_START()
+#define PROFILE_STOP()
+#endif
+
+#if TM_PROFILE_SPXAUTOCORR
+#define _SPX_AUTOCORR_START() PROFILE_START()
+#define _SPX_AUTOCORR_STOP() PROFILE_STOP()
+#else
+#define _SPX_AUTOCORR_START()
+#define _SPX_AUTOCORR_STOP()
+#endif
+
+#if TM_PROFILE_INNERPROD
+#define INNERPROD_START() PROFILE_START()
+#define INNERPROD_STOP() PROFILE_STOP()
+#else
+#define INNERPROD_START()
+#define INNERPROD_STOP()
+#endif
+
+#if TM_PROFILE_PITCHXCORR
+#define PITCHXCORR_START() PROFILE_START()
+#define PITCHXCORR_STOP() PROFILE_STOP()
+#else
+#define PITCHXCORR_START()
+#define PITCHXCORR_STOP()
+#endif
+
+#if TM_PROFILE_COMPUTEPITCHERROR
+#define COMPUTEPITCHERROR_START() PROFILE_START()
+#define COMPUTEPITCHERROR_STOP() PROFILE_STOP()
+#else
+#define COMPUTEPITCHERROR_START()
+#define COMPUTEPITCHERROR_STOP()
+#endif
+
+#if TM_PROFILE_PITCHGAINSEARCH3TAPVQ
+#define PITCHGAINSEARCH3TAPVQ_START() PROFILE_START()
+#define PITCHGAINSEARCH3TAPVQ_STOP() PROFILE_STOP()
+#else
+#define PITCHGAINSEARCH3TAPVQ_START()
+#define PITCHGAINSEARCH3TAPVQ_STOP()
+#endif
+
+#if TM_PROFILE_OPENLOOPNBESTPITCH
+#define OPENLOOPNBESTPITCH_START() PROFILE_START()
+#define OPENLOOPNBESTPITCH_STOP() PROFILE_STOP()
+#else
+#define OPENLOOPNBESTPITCH_START()
+#define OPENLOOPNBESTPITCH_STOP()
+#endif
+
+
+#if TM_PROFILE_LSP_INTERPOLATE
+#define LSPINTERPOLATE_START() PROFILE_START()
+#define LSPINTERPOLATE_STOP() PROFILE_STOP()
+#else
+#define LSPINTERPOLATE_START()
+#define LSPINTERPOLATE_STOP()
+#endif
+
+#if TM_PROFILE_CHEBPOLYEVA
+#define CHEBPOLYEVA_START() PROFILE_START()
+#define CHEBPOLYEVA_STOP() PROFILE_STOP()
+#else
+#define CHEBPOLYEVA_START()
+#define CHEBPOLYEVA_STOP()
+#endif
+
+
+#if TM_PROFILE_COMPUTEQUANTWEIGHTS
+#define COMPUTEQUANTWEIGHTS_START() PROFILE_START()
+#define COMPUTEQUANTWEIGHTS_STOP() PROFILE_STOP()
+#else
+#define COMPUTEQUANTWEIGHTS_START()
+#define COMPUTEQUANTWEIGHTS_STOP()
+#endif
+
+#if TM_PROFILE_LSPQUANT
+#define LSPQUANT_START() PROFILE_START()
+#define LSPQUANT_STOP() PROFILE_STOP()
+#else
+#define LSPQUANT_START()
+#define LSPQUANT_STOP()
+#endif
+
+#if TM_PROFILE_LSPWEIGHTQUANT
+#define LSPWEIGHTQUANT_START() PROFILE_START()
+#define LSPWEIGHTQUANT_STOP() PROFILE_STOP()
+#else
+#define LSPWEIGHTQUANT_START()
+#define LSPWEIGHTQUANT_STOP()
+#endif
+
+#if TM_PROFILE_FIRMEM16
+#define FIRMEM16_START() PROFILE_START()
+#define FIRMEM16_STOP() PROFILE_STOP()
+#else
+#define FIRMEM16_START()
+#define FIRMEM16_STOP()
+#endif
+
+#if TM_PROFILE_IIRMEM16
+#define IIRMEM16_START() PROFILE_START()
+#define IIRMEM16_STOP() PROFILE_STOP()
+#else
+#define IIRMEM16_START()
+#define IIRMEM16_STOP()
+#endif
+
+#if TM_PROFILE_FILTERMEM16
+#define FILTERMEM16_START() PROFILE_START()
+#define FILTERMEM16_STOP() PROFILE_STOP()
+#else
+#define FILTERMEM16_START()
+#define FILTERMEM16_STOP()
+#endif
+
+#if TM_PROFILE_COMPUTERMS16
+#define COMPUTERMS16_START() PROFILE_START()
+#define COMPUTERMS16_STOP() PROFILE_STOP()
+#else
+#define COMPUTERMS16_START()
+#define COMPUTERMS16_STOP()
+#endif
+
+#if TM_PROFILE_NORMALIZE16
+#define NORMALIZE16_START() PROFILE_START()
+#define NORMALIZE16_STOP() PROFILE_STOP()
+#else
+#define NORMALIZE16_START()
+#define NORMALIZE16_STOP()
+#endif
+
+#if TM_PROFILE_BWLPC
+#define BWLPC_START() PROFILE_START()
+#define BWLPC_STOP() PROFILE_STOP()
+#else
+#define BWLPC_START()
+#define BWLPC_STOP()
+#endif
+
+#if TM_PROFILE_HIGHPASS
+#define HIGHPASS_START() PROFILE_START()
+#define HIGHPASS_STOP() PROFILE_STOP()
+#else
+#define HIGHPASS_START()
+#define HIGHPASS_STOP()
+#endif
+
+#if TM_PROFILE_SIGNALMUL
+#define SIGNALMUL_START() PROFILE_START()
+#define SIGNALMUL_STOP() PROFILE_STOP()
+#else
+#define SIGNALMUL_START()
+#define SIGNALMUL_STOP()
+#endif
+
+#if TM_PROFILE_SIGNALDIV
+#define SIGNALDIV_START() PROFILE_START()
+#define SIGNALDIV_STOP() PROFILE_STOP()
+#else
+#define SIGNALDIV_START()
+#define SIGNALDIV_STOP()
+#endif
+
+#if TM_PROFILE_COMPUTEIMPULSERESPONSE
+#define COMPUTEIMPULSERESPONSE_START() PROFILE_START()
+#define COMPUTEIMPULSERESPONSE_STOP() PROFILE_STOP()
+#else
+#define COMPUTEIMPULSERESPONSE_START()
+#define COMPUTEIMPULSERESPONSE_STOP()
+#endif
+
+#if TM_PROFILE_COMPUTEWEIGHTEDCODEBOOK
+#define COMPUTEWEIGHTEDCODEBOOK_START() PROFILE_START()
+#define COMPUTEWEIGHTEDCODEBOOK_STOP() PROFILE_STOP()
+#else
+#define COMPUTEWEIGHTEDCODEBOOK_START()
+#define COMPUTEWEIGHTEDCODEBOOK_STOP()
+#endif
+
+#if TM_PROFILE_TARGETUPDATE
+#define TARGETUPDATE_START() PROFILE_START()
+#define TARGETUPDATE_STOP() PROFILE_STOP()
+#else
+#define TARGETUPDATE_START()
+#define TARGETUPDATE_STOP()
+#endif
+
+
+#if TM_PROFILE_VQNBEST
+#define VQNBEST_START() PROFILE_START()
+#define VQNBEST_STOP() PROFILE_STOP()
+#else
+#define VQNBEST_START()
+#define VQNBEST_STOP()
+#endif
+
+#if TM_PROFILE_VQNBESTSIGN
+#define VQNBESTSIGN_START() PROFILE_START()
+#define VQNBESTSIGN_STOP() PROFILE_STOP()
+#else
+#define VQNBESTSIGN_START()
+#define VQNBESTSIGN_STOP()
+#endif
+
+#if TM_PROFILE_PREPROCESSANALYSIS
+#define PREPROCESSANAYLSIS_START() PROFILE_START()
+#define PREPROCESSANAYLSIS_STOP() PROFILE_STOP()
+#else
+#define PREPROCESSANAYLSIS_START()
+#define PREPROCESSANAYLSIS_STOP()
+#endif
+
+#if TM_PROFILE_UPDATENOISEPROB
+#define UPDATENOISEPROB_START() PROFILE_START()
+#define UPDATENOISEPROB_STOP() PROFILE_STOP()
+#else
+#define UPDATENOISEPROB_START()
+#define UPDATENOISEPROB_STOP()
+#endif
+
+#if TM_PROFILE_COMPUTEGAINFLOOR
+#define COMPUTEGAINFLOOR_START() PROFILE_START()
+#define COMPUTEGAINFLOOR_STOP() PROFILE_STOP()
+#else
+#define COMPUTEGAINFLOOR_START()
+#define COMPUTEGAINFLOOR_STOP()
+#endif
+
+#if TM_PROFILE_FILTERDCNOTCH16
+#define FILTERDCNOTCH16_START() PROFILE_START()
+#define FILTERDCNOTCH16_STOP() PROFILE_STOP()
+#else
+#define FILTERDCNOTCH16_START()
+#define FILTERDCNOTCH16_STOP()
+#endif
+
+#if TM_PROFILE_MDFINNERPROD
+#define MDFINNERPROD_START() PROFILE_START()
+#define MDFINNERPROD_STOP() PROFILE_STOP()
+#else
+#define MDFINNERPROD_START()
+#define MDFINNERPROD_STOP()
+#endif
+
+#if TM_PROFILE_SPECTRALMULACCUM
+#define SPECTRALMULACCUM_START() PROFILE_START()
+#define SPECTRALMULACCUM_STOP() PROFILE_STOP()
+#else
+#define SPECTRALMULACCUM_START()
+#define SPECTRALMULACCUM_STOP()
+#endif
+
+#if TM_PROFILE_WEIGHTEDSPECTRALMULCONJ
+#define WEIGHTEDSPECTRALMULCONJ_START() PROFILE_START()
+#define WEIGHTEDSPECTRALMULCONJ_STOP() PROFILE_STOP()
+#else
+#define WEIGHTEDSPECTRALMULCONJ_START()
+#define WEIGHTEDSPECTRALMULCONJ_STOP()
+#endif
+
+#if TM_PROFILE_MDFADJUSTPROP
+#define MDFADJUSTPROP_START() PROFILE_START()
+#define MDFADJUSTPROP_STOP() PROFILE_STOP()
+#else
+#define MDFADJUSTPROP_START()
+#define MDFADJUSTPROP_STOP()
+#endif
+
+#if TM_PROFILE_SPEEXECHOGETRESIDUAL
+#define SPEEXECHOGETRESIDUAL_START() PROFILE_START()
+#define SPEEXECHOGETRESIDUAL_STOP() PROFILE_STOP()
+#else
+#define SPEEXECHOGETRESIDUAL_START()
+#define SPEEXECHOGETRESIDUAL_STOP()
+#endif
+
+#if TM_PROFILE_LSPENFORCEMARGIN
+#define LSPENFORCEMARGIN_START() PROFILE_START()
+#define LSPENFORCEMARGIN_STOP() PROFILE_STOP()
+#else
+#define LSPENFORCEMARGIN_START()
+#define LSPENFORCEMARGIN_STOP()
+#endif
+
+#if TM_PROFILE_LSPTOLPC
+#define LSPTOLPC_START() PROFILE_START()
+#define LSPTOLPC_STOP() PROFILE_STOP()
+#else
+#define LSPTOLPC_START()
+#define LSPTOLPC_STOP()
+#endif
+
+#if TM_PROFILE_MAXIMIZERANGE
+#define MAXIMIZERANGE_START() PROFILE_START()
+#define MAXIMIZERANGE_STOP() PROFILE_STOP()
+#else
+#define MAXIMIZERANGE_START()
+#define MAXIMIZERANGE_STOP()
+#endif
+
+#if TM_PROFILE_RENORMRANGE
+#define RENORMRANGE_START() PROFILE_START()
+#define RENORMRANGE_STOP() PROFILE_STOP()
+#else
+#define RENORMRANGE_START()
+#define RENORMRANGE_STOP()
+#endif
+
+#if TM_PROFILE_POWERSPECTRUM
+#define POWERSPECTRUM_START() PROFILE_START()
+#define POWERSPECTRUM_STOP() PROFILE_STOP()
+#else
+#define POWERSPECTRUM_START()
+#define POWERSPECTRUM_STOP()
+#endif
+
+#if TM_PROFILE_QMFSYNTH
+#define QMFSYNTH_START() PROFILE_START()
+#define QMFSYNTH_STOP() PROFILE_STOP()
+#else
+#define QMFSYNTH_START()
+#define QMFSYNTH_STOP()
+#endif
+
+#if TM_PROFILE_QMFDECOMP
+#define QMFDECOMP_START() PROFILE_START()
+#define QMFDECOMP_STOP() PROFILE_STOP()
+#else
+#define QMFDECOMP_START()
+#define QMFDECOMP_STOP()
+#endif
+
+#if TM_PROFILE_FILTERBANKCOMPUTEBANK32
+#define FILTERBANKCOMPUTEBANK32_START() PROFILE_START()
+#define FILTERBANKCOMPUTEBANK32_STOP() PROFILE_STOP()
+#else
+#define FILTERBANKCOMPUTEBANK32_START()
+#define FILTERBANKCOMPUTEBANK32_STOP()
+#endif
+
+#if TM_PROFILE_FILTERBANKCOMPUTEPSD16
+#define FILTERBANKCOMPUTEPSD16_START() PROFILE_START()
+#define FILTERBANKCOMPUTEPSD16_STOP() PROFILE_STOP()
+#else
+#define FILTERBANKCOMPUTEPSD16_START()
+#define FILTERBANKCOMPUTEPSD16_STOP()
+#endif
+
+
diff --git a/tmv/quant_lsp_tm.h b/tmv/quant_lsp_tm.h
new file mode 100644
index 0000000..e704eae
--- /dev/null
+++ b/tmv/quant_lsp_tm.h
@@ -0,0 +1,448 @@
+/* Copyright (C) 2007 Hong Zhiqian */
+/**
+ @file quant_lsp_tm.h
+ @author Hong Zhiqian
+ @brief Various compatibility routines for Speex (TriMedia version)
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#include <ops/custom_defs.h>
+#include "profile_tm.h"
+
+#ifdef FIXED_POINT
+
+#define OVERRIDE_COMPUTE_QUANT_WEIGHTS
+static void compute_quant_weights(Int16 *qlsp, Int16 *qw, int order)
+{
+ int qlspi, qlspii;
+ int w1, w2;
+ int i;
+
+ TMDEBUG_ALIGNMEM(qlsp);
+ TMDEBUG_ALIGNMEM(qw);
+
+ COMPUTEQUANTWEIGHTS_START();
+
+ --order;
+
+ qlspi = (int)qlsp[0];
+ qlspii = (int)qlsp[1];
+ w1 = qlspi;
+ w2 = qlspii - qlspi;
+
+ qw[0] = 81920 / (300 + imin(w1,w2));
+
+ for ( i=1 ; i<order ; ++i )
+ { qlspi = qlspii;
+ qlspii = qlsp[i+1];
+
+ w1 = w2;
+ w2 = qlspii - qlspi;
+
+ qw[i] = 81920 / (300 + imin(w1,w2));
+ }
+
+ w1 = LSP_PI - qlspii;
+ qw[i] = 81920 / (300 + imin(w1,w2));
+
+ COMPUTEQUANTWEIGHTS_STOP();
+}
+
+
+
+#define OVERRIDE_LSP_QUANT
+static int lsp_quant(Int16 *x, const signed char *cdbk, int nbVec, int nbDim)
+{
+ register int best_dist=VERY_LARGE32;
+ register int best_id=0;
+ register int i, j;
+ register int dt0, dt1, dt2, dt3;
+ register int cb0, cb1, cb2, cb3, xx;
+ register int ptr_inc = nbDim * 3;
+ register int five = 5;
+ const signed char *ptr;
+
+ TMDEBUG_ALIGNMEM(x);
+
+ LSPQUANT_START();
+
+ for ( i=0, ptr=cdbk ; i<nbVec ; i+=4, ptr += ptr_inc )
+ { dt3 = dt2 = dt1 = dt0 = 0;
+
+ for ( j=0 ; j <nbDim ; j += 2 )
+ {
+ xx = ld32x(x,j>>1);
+ cb0 = pack16lsb((int)ptr[1], (int)ptr[0]);
+ cb0 = dualasl(cb0,five);
+ cb0 = dspidualsub(xx,cb0);
+ dt0 += ifir16(cb0,cb0);
+
+ cb1 = pack16lsb((int)ptr[nbDim+1], (int)ptr[nbDim]);
+ cb1 = dualasl(cb1,five);
+ cb1 = dspidualsub(xx,cb1);
+ dt1 += ifir16(cb1, cb1);
+
+ cb2 = pack16lsb((int)ptr[nbDim*2+1], (int)ptr[nbDim*2]);
+ cb2 = dualasl(cb2,five);
+ cb2 = dspidualsub(xx,cb2);
+ dt2 += ifir16(cb2, cb2);
+
+ cb3 = pack16lsb((int)ptr[nbDim*3+1], (int)ptr[nbDim*3]);
+ cb3 = dualasl(cb3,five);
+ cb3 = dspidualsub(xx,cb3);
+ dt3 += ifir16(cb3, cb3);
+
+ ptr += 2;
+ }
+
+ if ( dt0<best_dist )
+ { best_dist = dt0;
+ best_id = i;
+ }
+
+ if ( dt1<best_dist )
+ { best_dist = dt1;
+ best_id = i+1;
+ }
+
+ if ( dt2<best_dist )
+ { best_dist = dt2;
+ best_id = i+2;
+ }
+
+ if ( dt3<best_dist )
+ { best_dist = dt3;
+ best_id = i+3;
+ }
+ }
+
+ for ( j=0,ptr=cdbk+best_id*nbDim ; j<nbDim ; j+=2 )
+ { xx = ld32x(x,j>>1);
+ cb0 = pack16lsb((int)ptr[j+1], (int)ptr[j]);
+ cb0 = dualasl(cb0,five);
+ dt0 = dspidualsub(xx,cb0);
+ st32d(j<<1, x, dt0);
+ }
+
+ LSPQUANT_STOP();
+ return best_id;
+}
+
+
+#define OVERRIDE_LSP_WEIGHT_QUANT
+static int lsp_weight_quant(Int16 *x, Int16 *weight, const signed char *cdbk, int nbVec, int nbDim)
+{
+ register int best_dist=VERY_LARGE32;
+ register int best_id=0;
+ register int i, j;
+ register int dt1, dt2, dt3, dt4;
+ register int cb1, cb2, cb3, cb4, wt, xx;
+ register int ptr_inc = nbDim * 3;
+ const signed char *ptr;
+
+ LSPWEIGHTQUANT_START();
+
+ for ( i=0, ptr=cdbk ; i<nbVec ; i+=4, ptr += ptr_inc )
+ { dt4 = dt3 = dt2 = dt1 = 0;
+
+ for ( j=0 ; j<nbDim ; ++j )
+ { wt = weight[j];
+ xx = x[j];
+
+ cb1 = xx - (ptr[0] << 5);
+ cb2 = xx - (ptr[nbDim] << 5);
+ cb3 = xx - (ptr[nbDim*2] << 5);
+ cb4 = xx - (ptr[nbDim*3] << 5);
+
+ ++ptr;
+
+ cb1 *= cb1;
+ cb2 *= cb2;
+ cb3 *= cb3;
+ cb4 *= cb4;
+
+ dt1 += (wt * (cb1 >> 15)) + ((wt * (cb1 & 0x7fff)) >> 15);
+ dt2 += (wt * (cb2 >> 15)) + ((wt * (cb2 & 0x7fff)) >> 15);
+ dt3 += (wt * (cb3 >> 15)) + ((wt * (cb3 & 0x7fff)) >> 15);
+ dt4 += (wt * (cb4 >> 15)) + ((wt * (cb4 & 0x7fff)) >> 15);
+
+ }
+
+ if ( dt1<best_dist )
+ { best_dist = dt1;
+ best_id = i;
+ }
+
+ if ( dt2<best_dist )
+ { best_dist = dt2;
+ best_id = i+1;
+ }
+
+ if ( dt3<best_dist )
+ { best_dist = dt3;
+ best_id = i+2;
+ }
+
+ if ( dt4<best_dist )
+ { best_dist = dt4;
+ best_id = i+3;
+ }
+ }
+
+ for ( j=0 ; j<nbDim ; ++j )
+ { x[j] = x[j] - ((Int16)cdbk[best_id*nbDim+j] << 5);
+ }
+
+ LSPWEIGHTQUANT_STOP();
+
+ return best_id;
+}
+
+#if 0
+// TODO: unroll loops
+#define OVERRIDE_LSP_QUANT_NB
+void lsp_quant_nb(spx_lsp_t *lsp, spx_lsp_t *qlsp, int order, SpeexBits *bits)
+{
+ int i;
+ int id;
+ spx_word16_t quant_weight[10];
+
+ for (i=0;i<order;i++)
+ qlsp[i]=lsp[i];
+
+ compute_quant_weights(qlsp, quant_weight, order);
+
+ for (i=0;i<order;i++)
+ qlsp[i]=SUB16(qlsp[i],LSP_LINEAR(i));
+
+#ifndef FIXED_POINT
+ for (i=0;i<order;i++)
+ qlsp[i] = LSP_SCALE*qlsp[i];
+#endif
+ id = lsp_quant(qlsp, cdbk_nb, NB_CDBK_SIZE, order);
+ speex_bits_pack(bits, id, 6);
+
+ for (i=0;i<order;i++)
+ qlsp[i]*=2;
+
+ id = lsp_weight_quant(qlsp, quant_weight, cdbk_nb_low1, NB_CDBK_SIZE_LOW1, 5);
+ speex_bits_pack(bits, id, 6);
+
+ for (i=0;i<5;i++)
+ qlsp[i]*=2;
+
+ id = lsp_weight_quant(qlsp, quant_weight, cdbk_nb_low2, NB_CDBK_SIZE_LOW2, 5);
+ speex_bits_pack(bits, id, 6);
+
+ id = lsp_weight_quant(qlsp+5, quant_weight+5, cdbk_nb_high1, NB_CDBK_SIZE_HIGH1, 5);
+ speex_bits_pack(bits, id, 6);
+
+ for (i=5;i<10;i++)
+ qlsp[i]*=2;
+
+ id = lsp_weight_quant(qlsp+5, quant_weight+5, cdbk_nb_high2, NB_CDBK_SIZE_HIGH2, 5);
+ speex_bits_pack(bits, id, 6);
+
+#ifdef FIXED_POINT
+ for (i=0;i<order;i++)
+ qlsp[i]=PSHR16(qlsp[i],2);
+#else
+ for (i=0;i<order;i++)
+ qlsp[i]=qlsp[i] * .00097656;
+#endif
+
+ for (i=0;i<order;i++)
+ qlsp[i]=lsp[i]-qlsp[i];
+}
+
+
+#define OVERRIDE_LSP_UNQUANT_NB
+void lsp_unquant_nb(spx_lsp_t *lsp, int order, SpeexBits *bits)
+{
+ int i, id;
+ for (i=0;i<order;i++)
+ lsp[i]=LSP_LINEAR(i);
+
+ id=speex_bits_unpack_unsigned(bits, 6);
+ for (i=0;i<10;i++)
+ lsp[i] = ADD32(lsp[i], LSP_DIV_256(cdbk_nb[id*10+i]));
+
+ id=speex_bits_unpack_unsigned(bits, 6);
+ for (i=0;i<5;i++)
+ lsp[i] = ADD16(lsp[i], LSP_DIV_512(cdbk_nb_low1[id*5+i]));
+
+ id=speex_bits_unpack_unsigned(bits, 6);
+ for (i=0;i<5;i++)
+ lsp[i] = ADD32(lsp[i], LSP_DIV_1024(cdbk_nb_low2[id*5+i]));
+
+ id=speex_bits_unpack_unsigned(bits, 6);
+ for (i=0;i<5;i++)
+ lsp[i+5] = ADD32(lsp[i+5], LSP_DIV_512(cdbk_nb_high1[id*5+i]));
+
+ id=speex_bits_unpack_unsigned(bits, 6);
+ for (i=0;i<5;i++)
+ lsp[i+5] = ADD32(lsp[i+5], LSP_DIV_1024(cdbk_nb_high2[id*5+i]));
+}
+
+#define OVERRIDE_LSP_QUANT_LBR
+void lsp_quant_lbr(spx_lsp_t *lsp, spx_lsp_t *qlsp, int order, SpeexBits *bits)
+{
+ int i;
+ int id;
+ spx_word16_t quant_weight[10];
+
+ for (i=0;i<order;i++)
+ qlsp[i]=lsp[i];
+
+ compute_quant_weights(qlsp, quant_weight, order);
+
+ for (i=0;i<order;i++)
+ qlsp[i]=SUB16(qlsp[i],LSP_LINEAR(i));
+#ifndef FIXED_POINT
+ for (i=0;i<order;i++)
+ qlsp[i]=qlsp[i]*LSP_SCALE;
+#endif
+ id = lsp_quant(qlsp, cdbk_nb, NB_CDBK_SIZE, order);
+ speex_bits_pack(bits, id, 6);
+
+ for (i=0;i<order;i++)
+ qlsp[i]*=2;
+
+ id = lsp_weight_quant(qlsp, quant_weight, cdbk_nb_low1, NB_CDBK_SIZE_LOW1, 5);
+ speex_bits_pack(bits, id, 6);
+
+ id = lsp_weight_quant(qlsp+5, quant_weight+5, cdbk_nb_high1, NB_CDBK_SIZE_HIGH1, 5);
+ speex_bits_pack(bits, id, 6);
+
+#ifdef FIXED_POINT
+ for (i=0;i<order;i++)
+ qlsp[i] = PSHR16(qlsp[i],1);
+#else
+ for (i=0;i<order;i++)
+ qlsp[i] = qlsp[i]*0.0019531;
+#endif
+
+ for (i=0;i<order;i++)
+ qlsp[i]=lsp[i]-qlsp[i];
+}
+
+#define OVERRIDE_LSP_UNQUANT_LBR
+void lsp_unquant_lbr(spx_lsp_t *lsp, int order, SpeexBits *bits)
+{
+ int i, id;
+ for (i=0;i<order;i++)
+ lsp[i]=LSP_LINEAR(i);
+
+
+ id=speex_bits_unpack_unsigned(bits, 6);
+ for (i=0;i<10;i++)
+ lsp[i] += LSP_DIV_256(cdbk_nb[id*10+i]);
+
+ id=speex_bits_unpack_unsigned(bits, 6);
+ for (i=0;i<5;i++)
+ lsp[i] += LSP_DIV_512(cdbk_nb_low1[id*5+i]);
+
+ id=speex_bits_unpack_unsigned(bits, 6);
+ for (i=0;i<5;i++)
+ lsp[i+5] += LSP_DIV_512(cdbk_nb_high1[id*5+i]);
+
+}
+
+extern const signed char high_lsp_cdbk[];
+extern const signed char high_lsp_cdbk2[];
+
+#define OVERRIDE_LSP_UNQUANT_HIGH
+void lsp_unquant_high(spx_lsp_t *lsp, int order, SpeexBits *bits)
+{
+
+ int i, id;
+ for (i=0;i<order;i++)
+ lsp[i]=LSP_LINEAR_HIGH(i);
+
+
+ id=speex_bits_unpack_unsigned(bits, 6);
+ for (i=0;i<order;i++)
+ lsp[i] += LSP_DIV_256(high_lsp_cdbk[id*order+i]);
+
+
+ id=speex_bits_unpack_unsigned(bits, 6);
+ for (i=0;i<order;i++)
+ lsp[i] += LSP_DIV_512(high_lsp_cdbk2[id*order+i]);
+}
+
+#define OVERRIDE_LSP_QUANT_HIGH
+void lsp_quant_high(spx_lsp_t *lsp, spx_lsp_t *qlsp, int order, SpeexBits *bits)
+{
+ int i;
+ int id;
+ spx_word16_t quant_weight[10];
+
+ for (i=0;i<order;i++)
+ qlsp[i]=lsp[i];
+
+ compute_quant_weights(qlsp, quant_weight, order);
+
+ /* quant_weight[0] = 10/(qlsp[1]-qlsp[0]);
+ quant_weight[order-1] = 10/(qlsp[order-1]-qlsp[order-2]);
+ for (i=1;i<order-1;i++)
+ {
+ tmp1 = 10/(qlsp[i]-qlsp[i-1]);
+ tmp2 = 10/(qlsp[i+1]-qlsp[i]);
+ quant_weight[i] = tmp1 > tmp2 ? tmp1 : tmp2;
+ }*/
+
+ for (i=0;i<order;i++)
+ qlsp[i]=SUB16(qlsp[i],LSP_LINEAR_HIGH(i));
+#ifndef FIXED_POINT
+ for (i=0;i<order;i++)
+ qlsp[i] = qlsp[i]*LSP_SCALE;
+#endif
+ id = lsp_quant(qlsp, high_lsp_cdbk, 64, order);
+ speex_bits_pack(bits, id, 6);
+
+ for (i=0;i<order;i++)
+ qlsp[i]*=2;
+
+ id = lsp_weight_quant(qlsp, quant_weight, high_lsp_cdbk2, 64, order);
+ speex_bits_pack(bits, id, 6);
+
+#ifdef FIXED_POINT
+ for (i=0;i<order;i++)
+ qlsp[i] = PSHR16(qlsp[i],1);
+#else
+ for (i=0;i<order;i++)
+ qlsp[i] = qlsp[i]*0.0019531;
+#endif
+
+ for (i=0;i<order;i++)
+ qlsp[i]=lsp[i]-qlsp[i];
+}
+#endif
+
+#endif
diff --git a/tmv/speex_config_types.h b/tmv/speex_config_types.h
new file mode 100644
index 0000000..e63303f
--- /dev/null
+++ b/tmv/speex_config_types.h
@@ -0,0 +1,31 @@
+#ifndef __SPEEX_TYPES_H__
+#define __SPEEX_TYPES_H__
+
+#ifdef __TCS__
+
+#include <tmNxTypes.h>
+
+
+
+typedef Int16 spx_int16_t;
+typedef UInt16 spx_uint16_t;
+typedef Int32 spx_int32_t;
+typedef UInt32 spx_uint32_t;
+
+#ifdef FIXED_POINT
+#define VMUX(a,b,c) mux((a),(b),(c))
+#define VABS(a) iabs((a))
+#define VMAX(a,b) imax((a),(b))
+#define VMIN(a,b) imin((a),(b))
+#else
+#define VMUX(a,b,c) fmux((a),(b),(c))
+#define VABS(a) fabs((a))
+#define VMAX(a,b) fmax((a),(b))
+#define VMIN(a,b) fmin((a),(b))
+#endif
+
+#endif
+
+
+#endif
+
diff --git a/tmv/vq_tm.h b/tmv/vq_tm.h
new file mode 100644
index 0000000..c8c6109
--- /dev/null
+++ b/tmv/vq_tm.h
@@ -0,0 +1,494 @@
+/* Copyright (C) 2007 Hong Zhiqian */
+/**
+ @file vq_tm.h
+ @author Hong Zhiqian
+ @brief Various compatibility routines for Speex (TriMedia version)
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#include <ops/custom_defs.h>
+#include "profile_tm.h"
+
+#ifdef FIXED_POINT
+
+inline void vq_nbest_dist(int i, int N, int dist, int *used, int *nbest, Int32 *best_dist)
+{
+ register int k;
+
+ if (i<N || dist<best_dist[N-1])
+ {
+ for (k=N-1; (k >= 1) && (k > *used || dist < best_dist[k-1]); k--)
+ { best_dist[k]=best_dist[k-1];
+ nbest[k] = nbest[k-1];
+ }
+
+ best_dist[k]=dist;
+ nbest[k]=i;
+ *used++;
+ }
+}
+
+void vq_nbest_5(Int16 *in, const Int16 *codebook, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist)
+{
+ register int i, j;
+ register int in10, in32, in4;
+ int used = 0;
+
+ in10 = pack16lsb(in[1],in[0]); /* Note: memory is not align here */
+ in32 = pack16lsb(in[3],in[2]);
+ in4 = sex16(in[4]);
+
+#if (TM_UNROLL && TM_UNROLL_VQNBEST > 0)
+#pragma TCS_unroll=2
+#pragma TCS_unrollexact=1
+#endif
+
+ for ( i=0,j=0 ; i<entries ; i+=2,j+=5 )
+ {
+ register int dist1, dist2;
+ register int cb10, cb32, cb54, cb76, cb98, cb87, cb65;
+
+ cb10 = ld32x(codebook,j);
+ cb32 = ld32x(codebook,j+1);
+ cb54 = ld32x(codebook,j+2);
+ cb76 = ld32x(codebook,j+3);
+ cb98 = ld32x(codebook,j+4);
+
+ dist1 = sex16(cb54) * in4;
+ dist1 += ifir16(in10,cb10) + ifir16(in32,cb32);
+ dist1 = (E[i] >> 1) - dist1;
+
+ cb65 = funshift2(cb76,cb54);
+ cb87 = funshift2(cb98,cb76);
+ dist2 = asri(16,cb98) * in4;
+ dist2 += ifir16(in10,cb65) + ifir16(in32,cb87);
+ dist2 = (E[i+1] >> 1) - dist2;
+
+ vq_nbest_dist(i,N,dist1,&used,nbest,best_dist);
+ vq_nbest_dist(i+1,N,dist2,&used,nbest,best_dist);
+ }
+
+#if (TM_UNROLL && TM_UNROLL_VQNBEST > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+}
+
+
+void vq_nbest_8(Int16 *in, const Int16 *codebook, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist)
+{
+ register int i, j;
+ register int in10, in32, in54, in76;
+ int used = 0;
+
+ in10 = pack16lsb(in[1],in[0]); /* Note: memory is not align here */
+ in32 = pack16lsb(in[3],in[2]);
+ in54 = pack16lsb(in[5],in[4]);
+ in76 = pack16lsb(in[7],in[6]);
+
+#if (TM_UNROLL && TM_UNROLL_VQNBEST > 0)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0,j=0 ; i<entries ; ++i,j+=4 )
+ {
+ register int dist;
+ register int cb10, cb32, cb54, cb76;
+
+ cb10 = ld32x(codebook,j);
+ cb32 = ld32x(codebook,j+1);
+ cb54 = ld32x(codebook,j+2);
+ cb76 = ld32x(codebook,j+3);
+
+ dist = ifir16(in10,cb10) + ifir16(in32,cb32);
+ dist += ifir16(in54,cb54) + ifir16(in76,cb76);
+ dist = (E[i] >> 1) - dist;
+
+ vq_nbest_dist(i,N,dist,&used,nbest,best_dist);
+ }
+#if (TM_UNROLL && TM_UNROLL_VQNBEST > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+
+}
+
+
+void vq_nbest_10(Int16 *in, const Int16 *codebook, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist)
+{
+ register int i, j;
+ register int in10, in32, in54, in76, in98;
+ int used = 0;
+
+ in10 = pack16lsb(in[1],in[0]);
+ in32 = pack16lsb(in[3],in[2]);
+ in54 = pack16lsb(in[5],in[4]);
+ in76 = pack16lsb(in[7],in[6]);
+ in98 = pack16lsb(in[9],in[8]);
+
+#if (TM_UNROLL && TM_UNROLL_VQNBEST > 0)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0,j=0 ; i<entries ; ++i,j+=5 )
+ {
+ register int dist;
+ register int cb10, cb32, cb54, cb76, cb98;
+
+ cb10 = ld32x(codebook,j);
+ cb32 = ld32x(codebook,j+1);
+ cb54 = ld32x(codebook,j+2);
+ cb76 = ld32x(codebook,j+3);
+ cb98 = ld32x(codebook,j+4);
+
+ dist = ifir16(in10,cb10) + ifir16(in32,cb32);
+ dist += ifir16(in54,cb54) + ifir16(in76,cb76);
+ dist += ifir16(in98,cb98);
+ dist = (E[i] >> 1) - dist;
+
+ vq_nbest_dist(i,N,dist,&used,nbest,best_dist);
+ }
+#if (TM_UNROLL && TM_UNROLL_VQNBEST > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+}
+
+void vq_nbest_20(Int16 *in, const Int16 *codebook, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist)
+{
+ register int i, j;
+ register int in10, in32, in54, in76, in98, in_10, in_32, in_54, in_76, in_98;
+ int used = 0;
+
+ in10 = pack16lsb(in[1],in[0]); /* Note: memory is not align here */
+ in32 = pack16lsb(in[3],in[2]);
+ in54 = pack16lsb(in[5],in[4]);
+ in76 = pack16lsb(in[6],in[6]);
+ in98 = pack16lsb(in[9],in[8]);
+ in_10 = pack16lsb(in[11],in[10]);
+ in_32 = pack16lsb(in[13],in[12]);
+ in_54 = pack16lsb(in[15],in[14]);
+ in_76 = pack16lsb(in[17],in[16]);
+ in_98 = pack16lsb(in[19],in[18]);
+
+#if (TM_UNROLL && TM_UNROLL_VQNBEST > 0)
+#pragma TCS_unroll=2
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0,j=0 ; i<entries ; ++i,j+=10 )
+ {
+ register int dist;
+ register int cb10, cb32, cb54, cb76, cb98, cb_10, cb_32, cb_54, cb_76, cb_98;
+
+ cb10 = ld32x(codebook,j);
+ cb32 = ld32x(codebook,j+1);
+ cb54 = ld32x(codebook,j+2);
+ cb76 = ld32x(codebook,j+3);
+ cb98 = ld32x(codebook,j+4);
+ cb_10 = ld32x(codebook,j+5);
+ cb_32 = ld32x(codebook,j+6);
+ cb_54 = ld32x(codebook,j+7);
+ cb_76 = ld32x(codebook,j+8);
+ cb_98 = ld32x(codebook,j+9);
+
+ dist = ifir16(in10,cb10) + ifir16(in32,cb32);
+ dist += ifir16(in54,cb54) + ifir16(in76,cb76);
+ dist += ifir16(in98,cb98) + ifir16(in_10,cb_10);
+ dist += ifir16(in_32,cb_32) + ifir16(in_54,cb_54);
+ dist += ifir16(in_76,cb_76) + ifir16(in_98,cb_98);
+
+ dist = (E[i] >> 1) - dist;
+ vq_nbest_dist(i,N,dist,&used,nbest,best_dist);
+ }
+#if (TM_UNROLL && TM_UNROLL_VQNBEST > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+}
+
+
+#define OVERRIDE_VQ_NBEST
+void vq_nbest (Int16 *in, const Int16 *codebook, int len, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist, char *stack)
+{
+ TMDEBUG_ALIGNMEM(codebook);
+
+ VQNBEST_START();
+ if( len==5 )
+ vq_nbest_5(in,codebook,entries,E,N,nbest,best_dist);
+ else if ( len==8 )
+ vq_nbest_8(in,codebook,entries,E,N,nbest,best_dist);
+ else if ( len==10 )
+ vq_nbest_10(in,codebook,entries,E,N,nbest,best_dist);
+ else if ( len==20 )
+ vq_nbest_20(in,codebook,entries,E,N,nbest,best_dist);
+
+#ifndef REMARK_ON
+ (void)stack;
+#endif
+
+ VQNBEST_STOP();
+}
+
+inline void vq_nbest_sign_dist(int i, int N, int dist, int sign, int entries, int *used, int *nbest, Int32 *best_dist)
+{
+ register int k;
+
+ if (i<N || dist<best_dist[N-1])
+ { for (k=N-1; (k >= 1) && (k > *used || dist < best_dist[k-1]); k--)
+ {
+ best_dist[k]=best_dist[k-1];
+ nbest[k] = nbest[k-1];
+ }
+
+ if ( sign ) i += entries;
+ best_dist[k]=dist;
+ *used++;
+ nbest[k] = i;
+ }
+}
+
+void vq_nbest_sign_5(Int16 *in, const Int16 *codebook, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist)
+{
+ register int i, j;
+ register int in10, in32, in4;
+ int used = 0;
+
+ in10 = ld32(in);
+ in32 = ld32x(in,1);
+ in4 = sex16(in[4]);
+
+#if (TM_UNROLL && TM_UNROLL_VQSIGNNBEST > 0)
+#pragma TCS_unroll=2
+#pragma TCS_unrollexact=1
+#endif
+
+ for ( i=0,j=0 ; i<entries ; i+=2,j+=5 )
+ {
+ register int dist1, dist2, sign1, sign2;
+ register int cb10, cb32, cb54, cb76, cb98, cb87, cb65;
+
+ cb10 = ld32x(codebook,j);
+ cb32 = ld32x(codebook,j+1);
+ cb54 = ld32x(codebook,j+2);
+ cb76 = ld32x(codebook,j+3);
+ cb98 = ld32x(codebook,j+4);
+
+ dist1 = sex16(cb54) * in4;
+ dist1 += ifir16(in10,cb10) + ifir16(in32,cb32);
+
+ sign1 = mux(dist1>0,0,1);
+ dist1 = iflip(dist1>0,dist1);
+ dist1 = (E[i] >> 1) + dist1;
+
+ cb65 = funshift2(cb76,cb54);
+ cb87 = funshift2(cb98,cb76);
+ dist2 = asri(16,cb98) * in4;
+ dist2 += ifir16(in10,cb65) + ifir16(in32,cb87);
+
+ sign2 = mux(dist2>0,0,1);
+ dist2 = iflip(dist2>0,dist2);
+ dist2 = (E[i] >> 1) + dist2;
+
+ vq_nbest_sign_dist(i,N,dist1,sign1,entries,&used,nbest,best_dist);
+ vq_nbest_sign_dist(i+1,N,dist2,sign2,entries,&used,nbest,best_dist);
+ }
+#if (TM_UNROLL && TM_UNROLL_VQSIGNNBEST > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+}
+
+void vq_nbest_sign_8(Int16 *in, const Int16 *codebook, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist)
+{
+ register int i, j;
+ register int sign;
+ register int in10, in32, in54, in76;
+ int used = 0;
+
+ in10 = ld32(in);
+ in32 = ld32x(in,1);
+ in54 = ld32x(in,2);
+ in76 = ld32x(in,3);
+
+#if (TM_UNROLL && TM_UNROLL_VQSIGNNBEST > 0)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+
+ for ( i=0,j=0 ; i<entries ; ++i,j+=4 )
+ {
+ register int dist;
+ register int cb10, cb32, cb54, cb76;
+
+ cb10 = ld32x(codebook,j);
+ cb32 = ld32x(codebook,j+1);
+ cb54 = ld32x(codebook,j+2);
+ cb76 = ld32x(codebook,j+3);
+
+ dist = ifir16(in10,cb10) + ifir16(in32,cb32);
+ dist += ifir16(in54,cb54) + ifir16(in76,cb76);
+
+ sign = mux(dist>0,0,1);
+ dist = iflip(dist>0,dist);
+ dist = (E[i] >> 1) + dist;
+
+ vq_nbest_sign_dist(i,N,dist,sign,entries,&used,nbest,best_dist);
+ }
+#if (TM_UNROLL && TM_UNROLL_VQSIGNNBEST > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+}
+
+void vq_nbest_sign_10(Int16 *in, const Int16 *codebook, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist)
+{
+ register int i, j;
+ register int sign;
+ register int in10, in32, in54, in76, in98;
+ int used = 0;
+
+ in10 = ld32(in);
+ in32 = ld32x(in,1);
+ in54 = ld32x(in,2);
+ in76 = ld32x(in,3);
+ in98 = ld32x(in,4);
+
+#if (TM_UNROLL && TM_UNROLL_VQSIGNNBEST > 0)
+#pragma TCS_unroll=4
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0,j=0 ; i<entries ; ++i,j+=5 )
+ {
+ register int dist;
+ register int cb10, cb32, cb54, cb76, cb98;
+
+ cb10 = ld32x(codebook,j);
+ cb32 = ld32x(codebook,j+1);
+ cb54 = ld32x(codebook,j+2);
+ cb76 = ld32x(codebook,j+3);
+ cb98 = ld32x(codebook,j+4);
+
+ dist = ifir16(in10,cb10) + ifir16(in32,cb32);
+ dist += ifir16(in54,cb54) + ifir16(in76,cb76);
+ dist += ifir16(in98,cb98);
+
+ sign = mux(dist>0,0,1);
+ dist = iflip(dist>0,dist);
+ dist = (E[i] >> 1) + dist;
+
+ vq_nbest_sign_dist(i,N,dist,sign,entries,&used,nbest,best_dist);
+ }
+#if (TM_UNROLL && TM_UNROLL_VQSIGNNBEST > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+}
+
+void vq_nbest_sign_20(Int16 *in, const Int16 *codebook, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist)
+{
+ register int i, j;
+ register int sign;
+ register int in10, in32, in54, in76, in98, in_10, in_32, in_54, in_76, in_98;
+ int used = 0;
+
+ in10 = ld32(in);
+ in32 = ld32x(in,1);
+ in54 = ld32x(in,2);
+ in76 = ld32x(in,3);
+ in98 = ld32x(in,4);
+ in_10 = ld32x(in,5);
+ in_32 = ld32x(in,6);
+ in_54 = ld32x(in,7);
+ in_76 = ld32x(in,8);
+ in_98 = ld32x(in,9);
+
+#if (TM_UNROLL && TM_UNROLL_VQSIGNNBEST > 0)
+#pragma TCS_unroll=2
+#pragma TCS_unrollexact=1
+#endif
+ for ( i=0,j=0 ; i<entries ; ++i,j+=10 )
+ {
+ register int dist;
+ register int cb10, cb32, cb54, cb76, cb98, cb_10, cb_32, cb_54, cb_76, cb_98;
+
+ cb10 = ld32x(codebook,j);
+ cb32 = ld32x(codebook,j+1);
+ cb54 = ld32x(codebook,j+2);
+ cb76 = ld32x(codebook,j+3);
+ cb98 = ld32x(codebook,j+4);
+ cb_10 = ld32x(codebook,j+5);
+ cb_32 = ld32x(codebook,j+6);
+ cb_54 = ld32x(codebook,j+7);
+ cb_76 = ld32x(codebook,j+8);
+ cb_98 = ld32x(codebook,j+9);
+
+ dist = ifir16(in10,cb10) + ifir16(in32,cb32);
+ dist += ifir16(in54,cb54) + ifir16(in76,cb76);
+ dist += ifir16(in98,cb98) + ifir16(in_10,cb_10);
+ dist += ifir16(in_32,cb_32) + ifir16(in_54,cb_54);
+ dist += ifir16(in_76,cb_76) + ifir16(in_98,cb_98);
+
+ sign = mux(dist>0,0,1);
+ dist = iflip(dist>0,dist);
+ dist = (E[i] >> 1) + dist;
+
+ vq_nbest_sign_dist(i,N,dist,sign,entries,&used,nbest,best_dist);
+ }
+#if (TM_UNROLL && TM_UNROLL_VQSIGNNBEST > 0)
+#pragma TCS_unrollexact=0
+#pragma TCS_unroll=0
+#endif
+}
+
+#define OVERRIDE_VQ_NBEST_SIGN
+void vq_nbest_sign (Int16 *in, const Int16 *codebook, int len, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist, char *stack)
+{
+ TMDEBUG_ALIGNMEM(in);
+ TMDEBUG_ALIGNMEM(codebook);
+
+ VQNBESTSIGN_START();
+
+ if( len==5 )
+ vq_nbest_sign_5(in,codebook,entries,E,N,nbest,best_dist);
+ else if ( len==8 )
+ vq_nbest_sign_8(in,codebook,entries,E,N,nbest,best_dist);
+ else if ( len==10 )
+ vq_nbest_sign_10(in,codebook,entries,E,N,nbest,best_dist);
+ else if ( len==20 )
+ vq_nbest_sign_20(in,codebook,entries,E,N,nbest,best_dist);
+
+#ifndef REMARK_ON
+ (void)stack;
+#endif
+
+ VQNBESTSIGN_STOP();
+}
+
+#endif
+