Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.xiph.org/xiph/opus.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jean-marc.valin@octasic.com>2011-05-25 00:39:15 +0400
committerJean-Marc Valin <jean-marc.valin@octasic.com>2011-05-25 00:39:15 +0400
commit61a67f3865477bc20a78d17edbc7fb6eab67700f (patch)
tree065ef817870fbb08e550d46def9496c521b8440b
parentf7c106d5a00427d9b3c44f2295c11f92130a1d45 (diff)
Feature extraction
-rw-r--r--opus_sources.mk3
-rw-r--r--src/features.c90
-rw-r--r--src/opus_encoder.c3
3 files changed, 88 insertions, 8 deletions
diff --git a/opus_sources.mk b/opus_sources.mk
index 077d2117..ad967944 100644
--- a/opus_sources.mk
+++ b/opus_sources.mk
@@ -1,2 +1,3 @@
OPUS_SOURCES = src/opus_decoder.c \
-src/opus_encoder.c
+src/opus_encoder.c \
+src/features.c
diff --git a/src/features.c b/src/features.c
index 89add4f3..7981a482 100644
--- a/src/features.c
+++ b/src/features.c
@@ -32,19 +32,44 @@
#include "kiss_fft.h"
#include "celt.h"
#include "modes.h"
+#include "arch.h"
+#include "features.h"
+#include "quant_bands.h"
#define NBANDS 17
const int bands[NBANDS+1] =
{1, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96,112,136,160};
-void feature_analysis(CELTEncoder *celt_enc, celt_word16 *x)
+float dct_table[128] = {
+ 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
+ 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
+ 0.351851, 0.338330, 0.311806, 0.273300, 0.224292, 0.166664, 0.102631, 0.034654,
+ -0.034654, -0.102631, -0.166664, -0.224292, -0.273300, -0.311806, -0.338330, -0.351851,
+ 0.346760, 0.293969, 0.196424, 0.068975, -0.068975, -0.196424, -0.293969, -0.346760,
+ -0.346760, -0.293969, -0.196424, -0.068975, 0.068975, 0.196424, 0.293969, 0.346760,
+ 0.338330, 0.224292, 0.034654, -0.166664, -0.311806, -0.351851, -0.273300, -0.102631,
+ 0.102631, 0.273300, 0.351851, 0.311806, 0.166664, -0.034654, -0.224292, -0.338330,
+ 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
+ 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
+ 0.311806, 0.034654, -0.273300, -0.338330, -0.102631, 0.224292, 0.351851, 0.166664,
+ -0.166664, -0.351851, -0.224292, 0.102631, 0.338330, 0.273300, -0.034654, -0.311806,
+ 0.293969, -0.068975, -0.346760, -0.196424, 0.196424, 0.346760, 0.068975, -0.293969,
+ -0.293969, 0.068975, 0.346760, 0.196424, -0.196424, -0.346760, -0.068975, 0.293969,
+ 0.273300, -0.166664, -0.338330, 0.034654, 0.351851, 0.102631, -0.311806, -0.224292,
+ 0.224292, 0.311806, -0.102631, -0.351851, -0.034654, 0.338330, 0.166664, -0.273300,
+};
+
+static void feature_analysis(CELTEncoder *celt_enc, const celt_word16 *x,
+ celt_word16 *features, celt_word16 *mem)
{
int i;
const CELTMode *mode;
const kiss_fft_state *kfft;
kiss_fft_cpx in[480], out[480];
const celt_word16 *window;
- celt_word32 E[NBANDS+1];
+ celt_word32 E[NBANDS];
+ celt_word16 logE[NBANDS];
+ celt_word16 BFCC[16];
int overlap = 120;
int N = 480;
@@ -70,11 +95,62 @@ void feature_analysis(CELTEncoder *celt_enc, celt_word16 *x)
for (i=0;i<NBANDS;i++)
{
int j;
- E[i] = 0;
+ celt_word32 sum = 0;
for (j=bands[i];j<bands[i+1];j++)
- E[i] = E[i] + MULT_32_32_Q31(out[ j].r, out[ j].r)
- + MULT_32_32_Q31(out[ j].i, out[ j].i)
- + MULT_32_32_Q31(out[N-j].r, out[N-j].r)
- + MULT_32_32_Q31(out[N-j].i, out[N-j].i);
+ sum = sum + MULT32_32_Q31(out[ j].r, out[ j].r)
+ + MULT32_32_Q31(out[ j].i, out[ j].i)
+ + MULT32_32_Q31(out[N-j].r, out[N-j].r)
+ + MULT32_32_Q31(out[N-j].i, out[N-j].i);
+ E[i] = MAX32(EPSILON, sum);
+ //printf ("%f ", E[i]);
+ }
+ amp2Log2(mode, NBANDS, NBANDS, E, logE, 1);
+ for (i=0;i<NBANDS;i++)
+ logE[i] = MAX32(logE[i], -14.);
+ //for (i=0;i<16;i++)
+ // printf ("%f ", logE[i]);
+
+ for (i=0;i<8;i++)
+ {
+ int j;
+ float sum = 0;
+ for (j=0;j<16;j++)
+ sum += dct_table[i*16+j]*logE[j];
+ BFCC[i] = sum;
+ //printf ("%f ", BFCC[i]);
+ }
+ for (i=0;i<7;i++)
+ features[i] = BFCC[i+1];
+
+ for (i=0;i<8;i++)
+ features[7+i] = .707*(BFCC[i] - mem[i+8]);
+ for (i=0;i<8;i++)
+ features[15+i] = .5*(BFCC[i] - 2*mem[i+8] + mem[i]);
+ for (i=0;i<8;i++)
+ {
+ mem[i+8] = mem[i];
+ mem[i] = BFCC[i];
}
+ for (i=0;i<23;i++)
+ printf ("%f ", features[i]);
+
+ printf("\n");
+}
+
+void feature_analysis_fixed(CELTEncoder *celt_enc, const celt_int16 *x)
+{
+ /* FIXME: Get rid of this static var ASAP! */
+ static float mem[16];
+ float features[23];
+#ifdef FIXED_POINT
+ feature_analysis(celt_enc, x);
+#else
+ int i;
+ int N = 960-120;
+ celt_word16 x2[960-120];
+
+ for (i=0;i<N;i++)
+ x2[i] = x[i];
+ feature_analysis(celt_enc, x2, features, mem);
+#endif
}
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index f306606a..65dd929d 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -38,6 +38,7 @@
#include "entenc.h"
#include "modes.h"
#include "silk_API.h"
+#include "features.h"
/* Transition tables for the voice and audio modes. First column is the
middle (memoriless) threshold. The second column is the hysteresis
@@ -180,6 +181,8 @@ int opus_encode(OpusEncoder *st, const short *pcm, int frame_size,
silk_enc = (char*)st+st->silk_enc_offset;
celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
+ feature_analysis_fixed(celt_enc, pcm);
+
if (st->user_bitrate_bps==OPUS_BITRATE_AUTO)
st->bitrate_bps = 60*st->Fs/frame_size + st->Fs*st->channels;
else