diff options
author | Jean-Marc Valin <jean-marc.valin@octasic.com> | 2011-05-25 00:39:15 +0400 |
---|---|---|
committer | Jean-Marc Valin <jean-marc.valin@octasic.com> | 2011-05-25 00:39:15 +0400 |
commit | 61a67f3865477bc20a78d17edbc7fb6eab67700f (patch) | |
tree | 065ef817870fbb08e550d46def9496c521b8440b | |
parent | f7c106d5a00427d9b3c44f2295c11f92130a1d45 (diff) |
Feature extraction
-rw-r--r-- | opus_sources.mk | 3 | ||||
-rw-r--r-- | src/features.c | 90 | ||||
-rw-r--r-- | src/opus_encoder.c | 3 |
3 files changed, 88 insertions, 8 deletions
diff --git a/opus_sources.mk b/opus_sources.mk index 077d2117..ad967944 100644 --- a/opus_sources.mk +++ b/opus_sources.mk @@ -1,2 +1,3 @@ OPUS_SOURCES = src/opus_decoder.c \ -src/opus_encoder.c +src/opus_encoder.c \ +src/features.c diff --git a/src/features.c b/src/features.c index 89add4f3..7981a482 100644 --- a/src/features.c +++ b/src/features.c @@ -32,19 +32,44 @@ #include "kiss_fft.h" #include "celt.h" #include "modes.h" +#include "arch.h" +#include "features.h" +#include "quant_bands.h" #define NBANDS 17 const int bands[NBANDS+1] = {1, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96,112,136,160}; -void feature_analysis(CELTEncoder *celt_enc, celt_word16 *x) +float dct_table[128] = { + 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, + 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, + 0.351851, 0.338330, 0.311806, 0.273300, 0.224292, 0.166664, 0.102631, 0.034654, + -0.034654, -0.102631, -0.166664, -0.224292, -0.273300, -0.311806, -0.338330, -0.351851, + 0.346760, 0.293969, 0.196424, 0.068975, -0.068975, -0.196424, -0.293969, -0.346760, + -0.346760, -0.293969, -0.196424, -0.068975, 0.068975, 0.196424, 0.293969, 0.346760, + 0.338330, 0.224292, 0.034654, -0.166664, -0.311806, -0.351851, -0.273300, -0.102631, + 0.102631, 0.273300, 0.351851, 0.311806, 0.166664, -0.034654, -0.224292, -0.338330, + 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641, + 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641, + 0.311806, 0.034654, -0.273300, -0.338330, -0.102631, 0.224292, 0.351851, 0.166664, + -0.166664, -0.351851, -0.224292, 0.102631, 0.338330, 0.273300, -0.034654, -0.311806, + 0.293969, -0.068975, -0.346760, -0.196424, 0.196424, 0.346760, 0.068975, -0.293969, + -0.293969, 0.068975, 0.346760, 0.196424, -0.196424, -0.346760, -0.068975, 0.293969, + 0.273300, -0.166664, -0.338330, 0.034654, 0.351851, 0.102631, -0.311806, -0.224292, + 0.224292, 0.311806, -0.102631, -0.351851, -0.034654, 0.338330, 0.166664, -0.273300, +}; + +static void feature_analysis(CELTEncoder *celt_enc, const celt_word16 *x, + celt_word16 *features, celt_word16 *mem) { int i; const CELTMode *mode; const kiss_fft_state *kfft; kiss_fft_cpx in[480], out[480]; const celt_word16 *window; - celt_word32 E[NBANDS+1]; + celt_word32 E[NBANDS]; + celt_word16 logE[NBANDS]; + celt_word16 BFCC[16]; int overlap = 120; int N = 480; @@ -70,11 +95,62 @@ void feature_analysis(CELTEncoder *celt_enc, celt_word16 *x) for (i=0;i<NBANDS;i++) { int j; - E[i] = 0; + celt_word32 sum = 0; for (j=bands[i];j<bands[i+1];j++) - E[i] = E[i] + MULT_32_32_Q31(out[ j].r, out[ j].r) - + MULT_32_32_Q31(out[ j].i, out[ j].i) - + MULT_32_32_Q31(out[N-j].r, out[N-j].r) - + MULT_32_32_Q31(out[N-j].i, out[N-j].i); + sum = sum + MULT32_32_Q31(out[ j].r, out[ j].r) + + MULT32_32_Q31(out[ j].i, out[ j].i) + + MULT32_32_Q31(out[N-j].r, out[N-j].r) + + MULT32_32_Q31(out[N-j].i, out[N-j].i); + E[i] = MAX32(EPSILON, sum); + //printf ("%f ", E[i]); + } + amp2Log2(mode, NBANDS, NBANDS, E, logE, 1); + for (i=0;i<NBANDS;i++) + logE[i] = MAX32(logE[i], -14.); + //for (i=0;i<16;i++) + // printf ("%f ", logE[i]); + + for (i=0;i<8;i++) + { + int j; + float sum = 0; + for (j=0;j<16;j++) + sum += dct_table[i*16+j]*logE[j]; + BFCC[i] = sum; + //printf ("%f ", BFCC[i]); + } + for (i=0;i<7;i++) + features[i] = BFCC[i+1]; + + for (i=0;i<8;i++) + features[7+i] = .707*(BFCC[i] - mem[i+8]); + for (i=0;i<8;i++) + features[15+i] = .5*(BFCC[i] - 2*mem[i+8] + mem[i]); + for (i=0;i<8;i++) + { + mem[i+8] = mem[i]; + mem[i] = BFCC[i]; } + for (i=0;i<23;i++) + printf ("%f ", features[i]); + + printf("\n"); +} + +void feature_analysis_fixed(CELTEncoder *celt_enc, const celt_int16 *x) +{ + /* FIXME: Get rid of this static var ASAP! */ + static float mem[16]; + float features[23]; +#ifdef FIXED_POINT + feature_analysis(celt_enc, x); +#else + int i; + int N = 960-120; + celt_word16 x2[960-120]; + + for (i=0;i<N;i++) + x2[i] = x[i]; + feature_analysis(celt_enc, x2, features, mem); +#endif } diff --git a/src/opus_encoder.c b/src/opus_encoder.c index f306606a..65dd929d 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -38,6 +38,7 @@ #include "entenc.h" #include "modes.h" #include "silk_API.h" +#include "features.h" /* Transition tables for the voice and audio modes. First column is the middle (memoriless) threshold. The second column is the hysteresis @@ -180,6 +181,8 @@ int opus_encode(OpusEncoder *st, const short *pcm, int frame_size, silk_enc = (char*)st+st->silk_enc_offset; celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); + feature_analysis_fixed(celt_enc, pcm); + if (st->user_bitrate_bps==OPUS_BITRATE_AUTO) st->bitrate_bps = 60*st->Fs/frame_size + st->Fs*st->channels; else |