diff options
author | Jan Buethe <jbuethe@amazon.de> | 2023-11-29 17:45:50 +0300 |
---|---|---|
committer | Jan Buethe <jbuethe@amazon.de> | 2023-11-29 17:45:50 +0300 |
commit | dee887dd9b9b613996747a01f2055f6343e61863 (patch) | |
tree | 967750dd19356ca8c559849f0981f949841e1ff0 | |
parent | e516f2c270dd6e3c23f09ed0be7d3b05bdbaf426 (diff) |
added LACE to SILK decoder (--enable-osce)
-rw-r--r-- | Makefile.am | 3 | ||||
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | dnn/osce.h | 13 | ||||
-rw-r--r-- | lpcnet_headers.mk | 4 | ||||
-rw-r--r-- | lpcnet_sources.mk | 8 | ||||
-rw-r--r-- | silk/decode_frame.c | 15 | ||||
-rw-r--r-- | silk/init_decoder.c | 8 | ||||
-rw-r--r-- | silk/osce_config.h | 4 | ||||
-rw-r--r-- | silk/silk_enhancer.c | 89 | ||||
-rw-r--r-- | silk/silk_enhancer.h | 9 | ||||
-rw-r--r-- | silk/structs.h | 7 | ||||
-rw-r--r-- | silk_headers.mk | 3 | ||||
-rw-r--r-- | silk_sources.mk | 3 | ||||
-rw-r--r-- | src/opus_encoder.c | 5 |
14 files changed, 126 insertions, 47 deletions
diff --git a/Makefile.am b/Makefile.am index 452f6d22..62f2dfec 100644 --- a/Makefile.am +++ b/Makefile.am @@ -25,6 +25,9 @@ endif if ENABLE_DRED LPCNET_SOURCES += $(DRED_SOURCES) endif +if ENABLE_OSCE +LPCNET_SOURCES += $(OSCE_SOURCES) +endif if FIXED_POINT SILK_SOURCES += $(SILK_SOURCES_FIXED) diff --git a/configure.ac b/configure.ac index 293b1e4d..8d46d3b2 100644 --- a/configure.ac +++ b/configure.ac @@ -856,6 +856,8 @@ AS_IF([test "$enable_osce" = "yes"], [ AC_DEFINE([ENABLE_OSCE], [1], [Enable Opus Speech Coding Enhancement]) ]) +AM_CONDITIONAL([ENABLE_OSCE], [test "$enable_osce" = "yes"]) + AM_CONDITIONAL([HAVE_DOXYGEN], [test "$HAVE_DOXYGEN" = "yes"]) AC_ARG_ENABLE([extra-programs], @@ -28,9 +28,22 @@ typedef struct float window[LACE_OVERLAP_SIZE]; } LACE; +typedef LACE OSCEModel; + typedef struct NOLACE NOLACE; typedef struct NoLACEState NoLACEState; void init_lace(LACE *hLACE); +void lace_process_20ms_frame( + LACE* hLACE, + float *x_out, + const float *x_in, + const float *features, + const float *numbits, + const int *periods +); + +#define init_osce(x) init_lace(x) + #endif
\ No newline at end of file diff --git a/lpcnet_headers.mk b/lpcnet_headers.mk index be8cf301..3e88c5b9 100644 --- a/lpcnet_headers.mk +++ b/lpcnet_headers.mk @@ -26,3 +26,7 @@ dnn/dred_rdovae_enc_data.h \ dnn/dred_rdovae_dec.h \ dnn/dred_rdovae_dec_data.h \ dnn/dred_rdovae_stats_data.h + +OSCE_HEAD= \ +dnn/osce.h \ +dnn/silk_enhancer.h
\ No newline at end of file diff --git a/lpcnet_sources.mk b/lpcnet_sources.mk index 09b8b462..b2eb4935 100644 --- a/lpcnet_sources.mk +++ b/lpcnet_sources.mk @@ -21,3 +21,11 @@ dnn/dred_rdovae_stats_data.c \ silk/dred_encoder.c \ silk/dred_coding.c \ silk/dred_decoder.c + +OSCE_SOURCES = \ +dnn/osce.c \ +dnn/nndsp.c \ +silk/silk_enhancer.c \ +dnn/nnet.c \ +dnn/parse_lpcnet_weights.c \ +dnn/lace_data.c
\ No newline at end of file diff --git a/silk/decode_frame.c b/silk/decode_frame.c index 564c5033..20665e71 100644 --- a/silk/decode_frame.c +++ b/silk/decode_frame.c @@ -97,12 +97,7 @@ opus_int silk_decode_frame( /********************************************************/ silk_decode_core( psDec, psDecCtrl, pOut, pulses, arch ); -#ifdef ENABLE_OSCE - /********************************************************/ - /* Run SILK enhancer */ - /********************************************************/ - silk_enhancer( psDec, psDecCtrl, pOut, ec_tell(psRangeDec) - ec_start, arch ); -#endif + /********************************************************/ /* Update PLC state */ /********************************************************/ @@ -135,6 +130,14 @@ opus_int silk_decode_frame( silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) ); silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) ); + +#ifdef ENABLE_OSCE + /********************************************************/ + /* Run SILK enhancer */ + /********************************************************/ + silk_enhancer( psDec, psDecCtrl, pOut, ec_tell(psRangeDec) - ec_start, arch ); +#endif + /************************************************/ /* Comfort noise generation / estimation */ /************************************************/ diff --git a/silk/init_decoder.c b/silk/init_decoder.c index 16c03dcd..fc8dbdd8 100644 --- a/silk/init_decoder.c +++ b/silk/init_decoder.c @@ -31,6 +31,10 @@ POSSIBILITY OF SUCH DAMAGE. #include "main.h" +#ifdef ENABLE_OSCE +#include "osce.h" +#endif + /************************/ /* Init Decoder State */ /************************/ @@ -52,6 +56,10 @@ opus_int silk_init_decoder( /* Reset PLC state */ silk_PLC_Reset( psDec ); +#ifdef ENABLE_OSCE + init_osce(&psDec->osce.model); +#endif + return(0); } diff --git a/silk/osce_config.h b/silk/osce_config.h index 559f4258..014435e9 100644 --- a/silk/osce_config.h +++ b/silk/osce_config.h @@ -3,7 +3,7 @@ #define OSCE_FEATURES_MAX_HISTORY 350 -#define OSCE_FEATURE_DIM (93+64) +#define OSCE_FEATURE_DIM 93 #define OSCE_MAX_FEATURE_FRAMES 4 #define OSCE_CLEAN_SPEC_NUM_BANDS 64 @@ -30,7 +30,5 @@ #define OSCE_LOG_GAIN_START 92 #define OSCE_LOG_GAIN_LENGTH 1 -#define OSCE_EMBED_PITCH_START 93 -#define OSCE_EMBED_PITCH_LENGTH 64 #endif
\ No newline at end of file diff --git a/silk/silk_enhancer.c b/silk/silk_enhancer.c index c0d6a316..03b4b2ab 100644 --- a/silk/silk_enhancer.c +++ b/silk/silk_enhancer.c @@ -230,7 +230,7 @@ static void calculate_log_spectrum_from_lpc(float *spec, opus_int16 *a_q12, int apply_filterbank(spec, buffer, center_bins_clean, band_weights_clean, OSCE_CLEAN_SPEC_NUM_BANDS); /* log and scaling */ - for (i = 0; i < 161; i++) + for (i = 0; i < OSCE_CLEAN_SPEC_NUM_BANDS; i++) { spec[i] = 0.3f * log(spec[i] + 1e-9f); } @@ -290,42 +290,49 @@ static void calculate_acorr(float *acorr, float *signal, int lag) } } -static int pitch_postprocessing(silk_OSCE_struct *psOSCE, int lag, int type) +static int pitch_postprocessing(silk_OSCE_features *psFeatures, int lag, int type) { int new_lag; + +#ifdef OSCE_HANGOVER_BUGFIX +#define TESTBIT 1 +#else +#define TESTBIT 0 +#endif + /* hangover is currently disabled to reflect a bug in the python code. ToDo: re-evaluate hangover */ - if (type != TYPE_VOICED && psOSCE->last_type == TYPE_VOICED && 0) + if (type != TYPE_VOICED && psFeatures->last_type == TYPE_VOICED && TESTBIT) /* enter hangover */ { new_lag = OSCE_NO_PITCH_VALUE; - if (psOSCE->pitch_hangover_count < OSCE_PITCH_HANGOVER) + if (psFeatures->pitch_hangover_count < OSCE_PITCH_HANGOVER) { - new_lag = psOSCE->last_lag; - psOSCE->pitch_hangover_count = (psOSCE->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER; + new_lag = psFeatures->last_lag; + psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER; } } - else if (type != TYPE_VOICED && psOSCE->pitch_hangover_count && 0) + else if (type != TYPE_VOICED && psFeatures->pitch_hangover_count && TESTBIT) /* continue hangover */ { - new_lag = psOSCE->last_lag; - psOSCE->pitch_hangover_count = (psOSCE->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER; + new_lag = psFeatures->last_lag; + psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER; } else if (type != TYPE_VOICED) /* unvoiced frame after hangover */ { new_lag = OSCE_NO_PITCH_VALUE; - psOSCE->pitch_hangover_count = 0; + psFeatures->pitch_hangover_count = 0; } else /* voiced frame: update last_lag */ { new_lag = lag; - psOSCE->last_lag = lag; - psOSCE->pitch_hangover_count = 0; + psFeatures->last_lag = lag; + psFeatures->pitch_hangover_count = 0; } /* buffer update */ - psOSCE->last_type = type; + psFeatures->last_type = type; /* with the current setup this should never happen (but who knows...) */ celt_assert(new_lag) @@ -343,7 +350,7 @@ static void calculate_features( int num_subframes, num_samples; float buffer[OSCE_FEATURES_MAX_HISTORY + OSCE_MAX_FEATURE_FRAMES * 80]; float *frame, *features; - silk_OSCE_struct *psOSCE; + silk_OSCE_features *psFeatures; int i, n, k; #ifdef WRITE_FEATURES static FILE *f_feat = NULL; @@ -353,23 +360,30 @@ static void calculate_features( } #endif - (void) num_bits; /* TODO: implement num_bits embedding */ - + //OPUS_CLEAR(buffer, 1); memset(buffer, 0, sizeof(buffer)); num_subframes = psDec->nb_subfr; num_samples = num_subframes * 80; - psOSCE = &psDec->osce; + psFeatures = &psDec->osce.features; + + /* smooth bit count */ + psFeatures->numbits[0] = num_bits; +#ifdef OSCE_NUMBITS_BUGFIX + psFeatures->numbits[1] = 0.9 * psFeatures->numbits[1] + 0.1 * num_bits; +#else + psFeatures->numbits[1] = num_bits; +#endif for (n = 0; n < num_samples; n++) { buffer[OSCE_FEATURES_MAX_HISTORY + n] = (float) xq[n] / (1U<<15); } - OPUS_COPY(buffer, psOSCE->signal_history, OSCE_FEATURES_MAX_HISTORY); + OPUS_COPY(buffer, psFeatures->signal_history, OSCE_FEATURES_MAX_HISTORY); for (k = 0; k < num_subframes; k++) { - features = &psOSCE->features[k * OSCE_FEATURE_DIM]; + features = &psFeatures->features[k * OSCE_FEATURE_DIM]; frame = &buffer[OSCE_FEATURES_MAX_HISTORY + k * 80]; memset(features, 0, OSCE_FEATURE_DIM); /* precaution */ @@ -394,10 +408,10 @@ static void calculate_features( } /* pitch hangover and zero value replacement */ - psOSCE->lags[k] = pitch_postprocessing(psOSCE, psDecCtrl->pitchL[k], psDec->indices.signalType); + psFeatures->lags[k] = pitch_postprocessing(psFeatures, psDecCtrl->pitchL[k], psDec->indices.signalType); /* auto-correlation around pitch lag */ - calculate_acorr(features + OSCE_ACORR_START, frame, psOSCE->lags[k]); + calculate_acorr(features + OSCE_ACORR_START, frame, psFeatures->lags[k]); /* ltp */ celt_assert(OSCE_LTP_LENGTH == LTP_ORDER) @@ -415,7 +429,7 @@ static void calculate_features( } /* buffer update */ - OPUS_COPY(psOSCE->signal_history, &buffer[num_samples], OSCE_FEATURES_MAX_HISTORY); + OPUS_COPY(psFeatures->signal_history, &buffer[num_samples], OSCE_FEATURES_MAX_HISTORY); } void silk_enhancer( @@ -426,15 +440,27 @@ void silk_enhancer( int arch /* I Run-time architecture */ ) { + float in_buffer[320]; + float out_buffer[320]; + int i; + (void) arch; - (void) num_bits; /* ToDo: decide when to enhance (20 ms frame, 16kHz) */ calculate_features(psDec, psDecCtrl, xq, num_bits); + /* scale input */ + for (i = 0; i < 320; i++) + { + in_buffer[i] = ((float) xq[i]) / (1U<<15); + } + + lace_process_20ms_frame(&psDec->osce.model, out_buffer, in_buffer, psDec->osce.features.features, psDec->osce.features.numbits, psDec->osce.features.lags); + + #ifdef WRITE_FEATURES - int i, k; + int k; static FILE *flpc = NULL; static FILE *fgain = NULL; @@ -443,6 +469,7 @@ void silk_enhancer( static FILE *fnoisy16k = NULL; static FILE* f_numbits = NULL; static FILE* f_numbits_smooth = NULL; + static FILE* f_noisy = NULL; if (flpc == NULL) {flpc = fopen("features_lpc.f32", "wb");} if (fgain == NULL) {fgain = fopen("features_gain.f32", "wb");} @@ -452,10 +479,10 @@ void silk_enhancer( if(f_numbits == NULL) {f_numbits = fopen("features_num_bits.s32", "wb");} if (f_numbits_smooth == NULL) {f_numbits_smooth = fopen("features_num_bits_smooth.f32", "wb");} - psDec->osce.num_bits_smooth = 0.9 * psDec->osce.num_bits_smooth + 0.1 * num_bits; + psDec->osce.features.num_bits_smooth = 0.9 * psDec->osce.features.num_bits_smooth + 0.1 * num_bits; fwrite(&num_bits, sizeof(num_bits), 1, f_numbits); - fwrite(&(psDec->osce.num_bits_smooth), sizeof(psDec->osce.num_bits_smooth), 1, f_numbits_smooth); + fwrite(&(psDec->osce.features.num_bits_smooth), sizeof(psDec->osce.features.num_bits_smooth), 1, f_numbits_smooth); for (k = 0; k < psDec->nb_subfr; k++) { @@ -494,6 +521,16 @@ void silk_enhancer( fwrite(xq, psDec->nb_subfr * psDec->subfr_length, sizeof(xq[0]), fnoisy16k); #endif + + /* scale output */ + for (i = 0; i < 320; i++) + { + float tmp = round((1U<<15) * out_buffer[i]); + if (tmp > INT16_MAX) tmp = INT16_MAX; + if (tmp < INT16_MIN) tmp = INT16_MIN; + xq[i] = (opus_int16) tmp; + } + } #endif
\ No newline at end of file diff --git a/silk/silk_enhancer.h b/silk/silk_enhancer.h index ab1eba11..95a76e13 100644 --- a/silk/silk_enhancer.h +++ b/silk/silk_enhancer.h @@ -1,11 +1,6 @@ #ifndef SILK_ENHANCER_H #define SILK_ENHANCER_H -#ifdef __cplusplus -extern "C" -{ -#endif - #include "main.h" void silk_enhancer( @@ -16,7 +11,5 @@ void silk_enhancer( int arch /* I Run-time architecture */ ); -#ifdef __cplusplus -} -#endif + #endif diff --git a/silk/structs.h b/silk/structs.h index 137611f9..50d3d65e 100644 --- a/silk/structs.h +++ b/silk/structs.h @@ -46,6 +46,7 @@ POSSIBILITY OF SUCH DAMAGE. #ifdef ENABLE_OSCE #include "osce_config.h" +#include "osce.h" #endif #ifdef __cplusplus @@ -249,9 +250,15 @@ typedef struct { int last_lag; int last_type; int lags[OSCE_MAX_FEATURE_FRAMES]; + float numbits[2]; float signal_history[OSCE_FEATURES_MAX_HISTORY]; float features[OSCE_FEATURE_DIM * OSCE_MAX_FEATURE_FRAMES]; +} silk_OSCE_features; + +typedef struct { + silk_OSCE_features features; + OSCEModel model; } silk_OSCE_struct; #endif diff --git a/silk_headers.mk b/silk_headers.mk index 30749222..2588067c 100644 --- a/silk_headers.mk +++ b/silk_headers.mk @@ -41,5 +41,4 @@ silk/float/structs_FLP.h \ silk/float/SigProc_FLP.h \ silk/mips/macros_mipsr1.h \ silk/mips/NSQ_del_dec_mipsr1.h \ -silk/mips/sigproc_fix_mipsr1.h \ -silk/silk_enhancer.h +silk/mips/sigproc_fix_mipsr1.h diff --git a/silk_sources.mk b/silk_sources.mk index eba50d44..d63ad969 100644 --- a/silk_sources.mk +++ b/silk_sources.mk @@ -75,8 +75,7 @@ silk/stereo_decode_pred.c \ silk/stereo_encode_pred.c \ silk/stereo_find_predictor.c \ silk/stereo_quant_pred.c \ -silk/LPC_fit.c \ -silk/silk_enhancer.c +silk/LPC_fit.c SILK_SOURCES_X86_RTCD = \ silk/x86/x86_silk_map.c diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 27b3196a..3f1a7fa6 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -130,8 +130,13 @@ struct OpusEncoder { middle (memoriless) threshold. The second column is the hysteresis (difference with the middle) */ static const opus_int32 mono_voice_bandwidth_thresholds[8] = { +#ifdef ENABLE_OSCE + 1000, 700, /* NB<->MB */ + 1000, 700, /* MB<->WB */ +#else 9000, 700, /* NB<->MB */ 9000, 700, /* MB<->WB */ +#endif 13500, 1000, /* WB<->SWB */ 14000, 2000, /* SWB<->FB */ }; |