Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.xiph.org/xiph/opus.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Buethe <jbuethe@amazon.de>2023-11-29 17:45:50 +0300
committerJean-Marc Valin <jmvalin@amazon.com>2023-12-01 00:23:19 +0300
commit799b5a6688ecde7e7c0a09baa862bcce021b0a4e (patch)
treec899685a05ab5c1fee24e3834eb6823182943fb9
parent2f4445b54a80a450a38af5734a0e2e768e53fc02 (diff)
added LACE to SILK decoder (--enable-osce)
-rw-r--r--Makefile.am3
-rw-r--r--configure.ac2
-rw-r--r--dnn/osce.h13
-rw-r--r--lpcnet_headers.mk4
-rw-r--r--lpcnet_sources.mk8
-rw-r--r--silk/decode_frame.c15
-rw-r--r--silk/init_decoder.c8
-rw-r--r--silk/osce_config.h4
-rw-r--r--silk/silk_enhancer.c89
-rw-r--r--silk/silk_enhancer.h9
-rw-r--r--silk/structs.h7
-rw-r--r--silk_headers.mk3
-rw-r--r--silk_sources.mk3
-rw-r--r--src/opus_encoder.c5
14 files changed, 126 insertions, 47 deletions
diff --git a/Makefile.am b/Makefile.am
index 1b772446..ba37506f 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -25,6 +25,9 @@ endif
if ENABLE_DRED
LPCNET_SOURCES += $(DRED_SOURCES)
endif
+if ENABLE_OSCE
+LPCNET_SOURCES += $(OSCE_SOURCES)
+endif
if FIXED_POINT
SILK_SOURCES += $(SILK_SOURCES_FIXED)
diff --git a/configure.ac b/configure.ac
index 2c301c58..3ceeff48 100644
--- a/configure.ac
+++ b/configure.ac
@@ -913,6 +913,8 @@ AS_IF([test "$enable_osce" = "yes"], [
AC_DEFINE([ENABLE_OSCE], [1], [Enable Opus Speech Coding Enhancement])
])
+AM_CONDITIONAL([ENABLE_OSCE], [test "$enable_osce" = "yes"])
+
AM_CONDITIONAL([HAVE_DOXYGEN], [test "$HAVE_DOXYGEN" = "yes"])
AC_ARG_ENABLE([extra-programs],
diff --git a/dnn/osce.h b/dnn/osce.h
index a1d200cf..74836594 100644
--- a/dnn/osce.h
+++ b/dnn/osce.h
@@ -28,9 +28,22 @@ typedef struct
float window[LACE_OVERLAP_SIZE];
} LACE;
+typedef LACE OSCEModel;
+
typedef struct NOLACE NOLACE;
typedef struct NoLACEState NoLACEState;
void init_lace(LACE *hLACE);
+void lace_process_20ms_frame(
+ LACE* hLACE,
+ float *x_out,
+ const float *x_in,
+ const float *features,
+ const float *numbits,
+ const int *periods
+);
+
+#define init_osce(x) init_lace(x)
+
#endif \ No newline at end of file
diff --git a/lpcnet_headers.mk b/lpcnet_headers.mk
index da610ca1..b85495d1 100644
--- a/lpcnet_headers.mk
+++ b/lpcnet_headers.mk
@@ -29,3 +29,7 @@ dnn/dred_rdovae_enc_data.h \
dnn/dred_rdovae_dec.h \
dnn/dred_rdovae_dec_data.h \
dnn/dred_rdovae_stats_data.h
+
+OSCE_HEAD= \
+dnn/osce.h \
+dnn/silk_enhancer.h \ No newline at end of file
diff --git a/lpcnet_sources.mk b/lpcnet_sources.mk
index 9b8863ad..17258ae7 100644
--- a/lpcnet_sources.mk
+++ b/lpcnet_sources.mk
@@ -23,6 +23,14 @@ silk/dred_encoder.c \
silk/dred_coding.c \
silk/dred_decoder.c
+OSCE_SOURCES = \
+dnn/osce.c \
+dnn/nndsp.c \
+silk/silk_enhancer.c \
+dnn/nnet.c \
+dnn/parse_lpcnet_weights.c \
+dnn/lace_data.c
+
DNN_SOURCES_X86_RTCD = dnn/x86/x86_dnn_map.c
DNN_SOURCES_AVX2 = dnn/x86/nnet_avx2.c
DNN_SOURCES_SSE4_1 = dnn/x86/nnet_sse4_1.c
diff --git a/silk/decode_frame.c b/silk/decode_frame.c
index 564c5033..20665e71 100644
--- a/silk/decode_frame.c
+++ b/silk/decode_frame.c
@@ -97,12 +97,7 @@ opus_int silk_decode_frame(
/********************************************************/
silk_decode_core( psDec, psDecCtrl, pOut, pulses, arch );
-#ifdef ENABLE_OSCE
- /********************************************************/
- /* Run SILK enhancer */
- /********************************************************/
- silk_enhancer( psDec, psDecCtrl, pOut, ec_tell(psRangeDec) - ec_start, arch );
-#endif
+
/********************************************************/
/* Update PLC state */
/********************************************************/
@@ -135,6 +130,14 @@ opus_int silk_decode_frame(
silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
+
+#ifdef ENABLE_OSCE
+ /********************************************************/
+ /* Run SILK enhancer */
+ /********************************************************/
+ silk_enhancer( psDec, psDecCtrl, pOut, ec_tell(psRangeDec) - ec_start, arch );
+#endif
+
/************************************************/
/* Comfort noise generation / estimation */
/************************************************/
diff --git a/silk/init_decoder.c b/silk/init_decoder.c
index 16c03dcd..fc8dbdd8 100644
--- a/silk/init_decoder.c
+++ b/silk/init_decoder.c
@@ -31,6 +31,10 @@ POSSIBILITY OF SUCH DAMAGE.
#include "main.h"
+#ifdef ENABLE_OSCE
+#include "osce.h"
+#endif
+
/************************/
/* Init Decoder State */
/************************/
@@ -52,6 +56,10 @@ opus_int silk_init_decoder(
/* Reset PLC state */
silk_PLC_Reset( psDec );
+#ifdef ENABLE_OSCE
+ init_osce(&psDec->osce.model);
+#endif
+
return(0);
}
diff --git a/silk/osce_config.h b/silk/osce_config.h
index 559f4258..014435e9 100644
--- a/silk/osce_config.h
+++ b/silk/osce_config.h
@@ -3,7 +3,7 @@
#define OSCE_FEATURES_MAX_HISTORY 350
-#define OSCE_FEATURE_DIM (93+64)
+#define OSCE_FEATURE_DIM 93
#define OSCE_MAX_FEATURE_FRAMES 4
#define OSCE_CLEAN_SPEC_NUM_BANDS 64
@@ -30,7 +30,5 @@
#define OSCE_LOG_GAIN_START 92
#define OSCE_LOG_GAIN_LENGTH 1
-#define OSCE_EMBED_PITCH_START 93
-#define OSCE_EMBED_PITCH_LENGTH 64
#endif \ No newline at end of file
diff --git a/silk/silk_enhancer.c b/silk/silk_enhancer.c
index c0d6a316..03b4b2ab 100644
--- a/silk/silk_enhancer.c
+++ b/silk/silk_enhancer.c
@@ -230,7 +230,7 @@ static void calculate_log_spectrum_from_lpc(float *spec, opus_int16 *a_q12, int
apply_filterbank(spec, buffer, center_bins_clean, band_weights_clean, OSCE_CLEAN_SPEC_NUM_BANDS);
/* log and scaling */
- for (i = 0; i < 161; i++)
+ for (i = 0; i < OSCE_CLEAN_SPEC_NUM_BANDS; i++)
{
spec[i] = 0.3f * log(spec[i] + 1e-9f);
}
@@ -290,42 +290,49 @@ static void calculate_acorr(float *acorr, float *signal, int lag)
}
}
-static int pitch_postprocessing(silk_OSCE_struct *psOSCE, int lag, int type)
+static int pitch_postprocessing(silk_OSCE_features *psFeatures, int lag, int type)
{
int new_lag;
+
+#ifdef OSCE_HANGOVER_BUGFIX
+#define TESTBIT 1
+#else
+#define TESTBIT 0
+#endif
+
/* hangover is currently disabled to reflect a bug in the python code. ToDo: re-evaluate hangover */
- if (type != TYPE_VOICED && psOSCE->last_type == TYPE_VOICED && 0)
+ if (type != TYPE_VOICED && psFeatures->last_type == TYPE_VOICED && TESTBIT)
/* enter hangover */
{
new_lag = OSCE_NO_PITCH_VALUE;
- if (psOSCE->pitch_hangover_count < OSCE_PITCH_HANGOVER)
+ if (psFeatures->pitch_hangover_count < OSCE_PITCH_HANGOVER)
{
- new_lag = psOSCE->last_lag;
- psOSCE->pitch_hangover_count = (psOSCE->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER;
+ new_lag = psFeatures->last_lag;
+ psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER;
}
}
- else if (type != TYPE_VOICED && psOSCE->pitch_hangover_count && 0)
+ else if (type != TYPE_VOICED && psFeatures->pitch_hangover_count && TESTBIT)
/* continue hangover */
{
- new_lag = psOSCE->last_lag;
- psOSCE->pitch_hangover_count = (psOSCE->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER;
+ new_lag = psFeatures->last_lag;
+ psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER;
}
else if (type != TYPE_VOICED)
/* unvoiced frame after hangover */
{
new_lag = OSCE_NO_PITCH_VALUE;
- psOSCE->pitch_hangover_count = 0;
+ psFeatures->pitch_hangover_count = 0;
}
else
/* voiced frame: update last_lag */
{
new_lag = lag;
- psOSCE->last_lag = lag;
- psOSCE->pitch_hangover_count = 0;
+ psFeatures->last_lag = lag;
+ psFeatures->pitch_hangover_count = 0;
}
/* buffer update */
- psOSCE->last_type = type;
+ psFeatures->last_type = type;
/* with the current setup this should never happen (but who knows...) */
celt_assert(new_lag)
@@ -343,7 +350,7 @@ static void calculate_features(
int num_subframes, num_samples;
float buffer[OSCE_FEATURES_MAX_HISTORY + OSCE_MAX_FEATURE_FRAMES * 80];
float *frame, *features;
- silk_OSCE_struct *psOSCE;
+ silk_OSCE_features *psFeatures;
int i, n, k;
#ifdef WRITE_FEATURES
static FILE *f_feat = NULL;
@@ -353,23 +360,30 @@ static void calculate_features(
}
#endif
- (void) num_bits; /* TODO: implement num_bits embedding */
-
+ //OPUS_CLEAR(buffer, 1);
memset(buffer, 0, sizeof(buffer));
num_subframes = psDec->nb_subfr;
num_samples = num_subframes * 80;
- psOSCE = &psDec->osce;
+ psFeatures = &psDec->osce.features;
+
+ /* smooth bit count */
+ psFeatures->numbits[0] = num_bits;
+#ifdef OSCE_NUMBITS_BUGFIX
+ psFeatures->numbits[1] = 0.9 * psFeatures->numbits[1] + 0.1 * num_bits;
+#else
+ psFeatures->numbits[1] = num_bits;
+#endif
for (n = 0; n < num_samples; n++)
{
buffer[OSCE_FEATURES_MAX_HISTORY + n] = (float) xq[n] / (1U<<15);
}
- OPUS_COPY(buffer, psOSCE->signal_history, OSCE_FEATURES_MAX_HISTORY);
+ OPUS_COPY(buffer, psFeatures->signal_history, OSCE_FEATURES_MAX_HISTORY);
for (k = 0; k < num_subframes; k++)
{
- features = &psOSCE->features[k * OSCE_FEATURE_DIM];
+ features = &psFeatures->features[k * OSCE_FEATURE_DIM];
frame = &buffer[OSCE_FEATURES_MAX_HISTORY + k * 80];
memset(features, 0, OSCE_FEATURE_DIM); /* precaution */
@@ -394,10 +408,10 @@ static void calculate_features(
}
/* pitch hangover and zero value replacement */
- psOSCE->lags[k] = pitch_postprocessing(psOSCE, psDecCtrl->pitchL[k], psDec->indices.signalType);
+ psFeatures->lags[k] = pitch_postprocessing(psFeatures, psDecCtrl->pitchL[k], psDec->indices.signalType);
/* auto-correlation around pitch lag */
- calculate_acorr(features + OSCE_ACORR_START, frame, psOSCE->lags[k]);
+ calculate_acorr(features + OSCE_ACORR_START, frame, psFeatures->lags[k]);
/* ltp */
celt_assert(OSCE_LTP_LENGTH == LTP_ORDER)
@@ -415,7 +429,7 @@ static void calculate_features(
}
/* buffer update */
- OPUS_COPY(psOSCE->signal_history, &buffer[num_samples], OSCE_FEATURES_MAX_HISTORY);
+ OPUS_COPY(psFeatures->signal_history, &buffer[num_samples], OSCE_FEATURES_MAX_HISTORY);
}
void silk_enhancer(
@@ -426,15 +440,27 @@ void silk_enhancer(
int arch /* I Run-time architecture */
)
{
+ float in_buffer[320];
+ float out_buffer[320];
+ int i;
+
(void) arch;
- (void) num_bits;
/* ToDo: decide when to enhance (20 ms frame, 16kHz) */
calculate_features(psDec, psDecCtrl, xq, num_bits);
+ /* scale input */
+ for (i = 0; i < 320; i++)
+ {
+ in_buffer[i] = ((float) xq[i]) / (1U<<15);
+ }
+
+ lace_process_20ms_frame(&psDec->osce.model, out_buffer, in_buffer, psDec->osce.features.features, psDec->osce.features.numbits, psDec->osce.features.lags);
+
+
#ifdef WRITE_FEATURES
- int i, k;
+ int k;
static FILE *flpc = NULL;
static FILE *fgain = NULL;
@@ -443,6 +469,7 @@ void silk_enhancer(
static FILE *fnoisy16k = NULL;
static FILE* f_numbits = NULL;
static FILE* f_numbits_smooth = NULL;
+ static FILE* f_noisy = NULL;
if (flpc == NULL) {flpc = fopen("features_lpc.f32", "wb");}
if (fgain == NULL) {fgain = fopen("features_gain.f32", "wb");}
@@ -452,10 +479,10 @@ void silk_enhancer(
if(f_numbits == NULL) {f_numbits = fopen("features_num_bits.s32", "wb");}
if (f_numbits_smooth == NULL) {f_numbits_smooth = fopen("features_num_bits_smooth.f32", "wb");}
- psDec->osce.num_bits_smooth = 0.9 * psDec->osce.num_bits_smooth + 0.1 * num_bits;
+ psDec->osce.features.num_bits_smooth = 0.9 * psDec->osce.features.num_bits_smooth + 0.1 * num_bits;
fwrite(&num_bits, sizeof(num_bits), 1, f_numbits);
- fwrite(&(psDec->osce.num_bits_smooth), sizeof(psDec->osce.num_bits_smooth), 1, f_numbits_smooth);
+ fwrite(&(psDec->osce.features.num_bits_smooth), sizeof(psDec->osce.features.num_bits_smooth), 1, f_numbits_smooth);
for (k = 0; k < psDec->nb_subfr; k++)
{
@@ -494,6 +521,16 @@ void silk_enhancer(
fwrite(xq, psDec->nb_subfr * psDec->subfr_length, sizeof(xq[0]), fnoisy16k);
#endif
+
+ /* scale output */
+ for (i = 0; i < 320; i++)
+ {
+ float tmp = round((1U<<15) * out_buffer[i]);
+ if (tmp > INT16_MAX) tmp = INT16_MAX;
+ if (tmp < INT16_MIN) tmp = INT16_MIN;
+ xq[i] = (opus_int16) tmp;
+ }
+
}
#endif \ No newline at end of file
diff --git a/silk/silk_enhancer.h b/silk/silk_enhancer.h
index ab1eba11..95a76e13 100644
--- a/silk/silk_enhancer.h
+++ b/silk/silk_enhancer.h
@@ -1,11 +1,6 @@
#ifndef SILK_ENHANCER_H
#define SILK_ENHANCER_H
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
#include "main.h"
void silk_enhancer(
@@ -16,7 +11,5 @@ void silk_enhancer(
int arch /* I Run-time architecture */
);
-#ifdef __cplusplus
-}
-#endif
+
#endif
diff --git a/silk/structs.h b/silk/structs.h
index 137611f9..50d3d65e 100644
--- a/silk/structs.h
+++ b/silk/structs.h
@@ -46,6 +46,7 @@ POSSIBILITY OF SUCH DAMAGE.
#ifdef ENABLE_OSCE
#include "osce_config.h"
+#include "osce.h"
#endif
#ifdef __cplusplus
@@ -249,9 +250,15 @@ typedef struct {
int last_lag;
int last_type;
int lags[OSCE_MAX_FEATURE_FRAMES];
+ float numbits[2];
float signal_history[OSCE_FEATURES_MAX_HISTORY];
float features[OSCE_FEATURE_DIM * OSCE_MAX_FEATURE_FRAMES];
+} silk_OSCE_features;
+
+typedef struct {
+ silk_OSCE_features features;
+ OSCEModel model;
} silk_OSCE_struct;
#endif
diff --git a/silk_headers.mk b/silk_headers.mk
index 30749222..2588067c 100644
--- a/silk_headers.mk
+++ b/silk_headers.mk
@@ -41,5 +41,4 @@ silk/float/structs_FLP.h \
silk/float/SigProc_FLP.h \
silk/mips/macros_mipsr1.h \
silk/mips/NSQ_del_dec_mipsr1.h \
-silk/mips/sigproc_fix_mipsr1.h \
-silk/silk_enhancer.h
+silk/mips/sigproc_fix_mipsr1.h
diff --git a/silk_sources.mk b/silk_sources.mk
index ec9833a8..3780b164 100644
--- a/silk_sources.mk
+++ b/silk_sources.mk
@@ -75,8 +75,7 @@ silk/stereo_decode_pred.c \
silk/stereo_encode_pred.c \
silk/stereo_find_predictor.c \
silk/stereo_quant_pred.c \
-silk/LPC_fit.c \
-silk/silk_enhancer.c
+silk/LPC_fit.c
SILK_SOURCES_X86_RTCD = \
silk/x86/x86_silk_map.c
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index 3ec8c5ca..0ff20904 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -130,8 +130,13 @@ struct OpusEncoder {
middle (memoriless) threshold. The second column is the hysteresis
(difference with the middle) */
static const opus_int32 mono_voice_bandwidth_thresholds[8] = {
+#ifdef ENABLE_OSCE
+ 1000, 700, /* NB<->MB */
+ 1000, 700, /* MB<->WB */
+#else
9000, 700, /* NB<->MB */
9000, 700, /* MB<->WB */
+#endif
13500, 1000, /* WB<->SWB */
14000, 2000, /* SWB<->FB */
};