diff options
author | Jan Buethe <jbuethe@amazon.de> | 2023-12-11 14:40:28 +0300 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@amazon.com> | 2023-12-20 07:01:23 +0300 |
commit | 6f28d4958648a91bdaa5e6287622c815304e961d (patch) | |
tree | 459404c3060e8ee57b01f5ba536144f56828d0c5 | |
parent | 0a2213c49678be23cdc2ef3d7768e93ad869be6c (diff) |
added osce training data dumping option
-rw-r--r-- | configure.ac | 22 | ||||
-rw-r--r-- | dnn/osce.c | 11 | ||||
-rw-r--r-- | src/opus_demo.c | 81 | ||||
-rw-r--r-- | src/opus_encoder.c | 22 |
4 files changed, 127 insertions, 9 deletions
diff --git a/configure.ac b/configure.ac index 3ceeff48..84ce651d 100644 --- a/configure.ac +++ b/configure.ac @@ -175,10 +175,10 @@ AC_ARG_ENABLE([deep-plc], [AS_HELP_STRING([--enable-deep-plc], [Use deep PLC for SILK])],, [enable_deep_plc=no]) -AS_IF([test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes"],[ +AS_IF([test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"],[ AC_DEFINE([ENABLE_DEEP_PLC], [1], [Deep PLC]) ]) -AM_CONDITIONAL([ENABLE_DEEP_PLC], [test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes"]) +AM_CONDITIONAL([ENABLE_DEEP_PLC], [test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"]) has_float_approx=no case "$host_cpu" in @@ -904,16 +904,30 @@ AS_IF([test "$enable_dnn_debug_float" = "no"], [ AC_DEFINE([DISABLE_DEBUG_FLOAT], [1], [Disable DNN debug float]) ]) +AC_ARG_ENABLE([osce-training-data], + AS_HELP_STRING([--enable-osce-training-data], [enables feature output for SILK enhancement]),, + [enable_osc_training_data=no] +) + +AS_IF([test "$enable_osce_training_data" = "yes"], [ + AC_DEFINE([ENABLE_OSCE_TRAINING_DATA], [1], [Enable dumping of OSCE training data]) +]) + +AC_MSG_CHECKING([argument osce training data]) +AS_IF([test "$enable_osce_training_data" = "yes"], [ + AC_MSG_RESULT([yes]) +], [AC_MSG_RESULT([no])]) + AC_ARG_ENABLE([osce], AS_HELP_STRING([--enable-osce], [enables feature output for SILK enhancement]),, [enable_osce=no] ) -AS_IF([test "$enable_osce" = "yes"], [ +AS_IF([test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"], [ AC_DEFINE([ENABLE_OSCE], [1], [Enable Opus Speech Coding Enhancement]) ]) -AM_CONDITIONAL([ENABLE_OSCE], [test "$enable_osce" = "yes"]) +AM_CONDITIONAL([ENABLE_OSCE], [test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"]) AM_CONDITIONAL([HAVE_DOXYGEN], [test "$HAVE_DOXYGEN" = "yes"]) @@ -17,6 +17,10 @@ #define FINIT(fid, name, mode) do{if (fid == NULL) {fid = fopen(name, mode);}} while(0) #endif +#ifdef ENABLE_OSCE_TRAINING_DATA +#include <stdio.h> +#endif + #define CLIP(a, min, max) (((a) < (min) ? (min) : (a)) > (max) ? (max) : (a)) #define MAX(a, b) ((a) < (b) ? (b) : (a)) @@ -864,7 +868,7 @@ void osce_enhance_frame( celt_assert(0 && "method not defined"); } -#ifdef WRITE_FEATURES +#ifdef ENABLE_OSCE_TRAINING_DATA int k; static FILE *flpc = NULL; @@ -874,7 +878,6 @@ void osce_enhance_frame( static FILE *fnoisy16k = NULL; static FILE* f_numbits = NULL; static FILE* f_numbits_smooth = NULL; - static FILE* f_noisy = NULL; if (flpc == NULL) {flpc = fopen("features_lpc.f32", "wb");} if (fgain == NULL) {fgain = fopen("features_gain.f32", "wb");} @@ -884,10 +887,8 @@ void osce_enhance_frame( if(f_numbits == NULL) {f_numbits = fopen("features_num_bits.s32", "wb");} if (f_numbits_smooth == NULL) {f_numbits_smooth = fopen("features_num_bits_smooth.f32", "wb");} - psDec->osce.features.num_bits_smooth = 0.9 * psDec->osce.features.num_bits_smooth + 0.1 * num_bits; - fwrite(&num_bits, sizeof(num_bits), 1, f_numbits); - fwrite(&(psDec->osce.features.num_bits_smooth), sizeof(psDec->osce.features.num_bits_smooth), 1, f_numbits_smooth); + fwrite(&(psDec->osce.features.numbits_smooth), sizeof(psDec->osce.features.numbits_smooth), 1, f_numbits_smooth); for (k = 0; k < psDec->nb_subfr; k++) { diff --git a/src/opus_demo.c b/src/opus_demo.c index 535d39d6..d1c2481e 100644 --- a/src/opus_demo.c +++ b/src/opus_demo.c @@ -254,6 +254,68 @@ static OpusDecoder *ms_opus_decoder_create(opus_int32 Fs, int channels, int *err } #endif + +#ifdef ENABLE_OSCE_TRAINING_DATA +#define COMPLEXITY_MIN 0 +#define COMPLEXITY_MAX 10 + +#define PACKET_LOSS_PERC_MIN 0 +#define PACKET_LOSS_PERC_MAX 50 +#define PACKET_LOSS_PERC_STEP 5 + +#define CBR_BITRATE_LIMIT 8000 + +#define NUM_BITRATES 102 +static int bitrates[NUM_BITRATES] = { + 6000, 6060, 6120, 6180, 6240, 6300, 6360, 6420, 6480, + 6525, 6561, 6598, 6634, 6670, 6707, 6743, 6780, 6816, + 6853, 6889, 6926, 6962, 6999, 7042, 7085, 7128, 7171, + 7215, 7258, 7301, 7344, 7388, 7431, 7474, 7512, 7541, + 7570, 7599, 7628, 7657, 7686, 7715, 7744, 7773, 7802, + 7831, 7860, 7889, 7918, 7947, 7976, 8013, 8096, 8179, + 8262, 8344, 8427, 8511, 8605, 8699, 8792, 8886, 8980, + 9100, 9227, 9354, 9480, 9561, 9634, 9706, 9779, 9851, + 9924, 9996, 10161, 10330, 10499, 10698, 10898, 11124, 11378, + 11575, 11719, 11862, 12014, 12345, 12751, 13195, 13561, 13795, + 14069, 14671, 15403, 15790, 16371, 17399, 17968, 19382, 20468, + 22000, 32000, 64000 +}; + +static int randint(int min, int max, int step) +{ + double r = ((double) rand())/ (RAND_MAX + 1.); + int d; + + d = ((int) ((max + 1 - min) * r / step) * step) + min; + + return d; +} + +static void new_random_setting(OpusEncoder *enc) +{ + int bitrate_bps; + int complexity; + int packet_loss_perc; + int use_vbr; + + bitrate_bps = bitrates[randint(0, NUM_BITRATES - 1, 1)]; + complexity = randint(COMPLEXITY_MIN, COMPLEXITY_MAX, 1); + packet_loss_perc = randint(PACKET_LOSS_PERC_MIN, PACKET_LOSS_PERC_MAX, PACKET_LOSS_PERC_STEP); + use_vbr = bitrate_bps < CBR_BITRATE_LIMIT ? 1 : randint(0, 1, 1); + + if (1) + { + printf("changing settings to %d\t%d\t%d\t%d\n", bitrate_bps, complexity, packet_loss_perc, use_vbr); + } + + opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate_bps)); + opus_encoder_ctl(enc, OPUS_SET_COMPLEXITY(complexity)); + opus_encoder_ctl(enc, OPUS_SET_PACKET_LOSS_PERC(packet_loss_perc)); + opus_encoder_ctl(enc, OPUS_SET_VBR(use_vbr)); +} + +#endif + int main(int argc, char *argv[]) { int err; @@ -316,6 +378,10 @@ int main(int argc, char *argv[]) int lost_count=0; FILE *packet_loss_file=NULL; int dred_duration=0; +#ifdef ENABLE_OSCE_TRAINING_DATA + int silk_random_switching = 0; + int silk_frame_counter = 0; +#endif #ifdef USE_WEIGHTS_FILE int blob_len; unsigned char *blob_data; @@ -546,6 +612,12 @@ int main(int argc, char *argv[]) mode_list = celt_hq_test; nb_modes_in_list = 4; args++; +#ifdef ENABLE_OSCE_TRAINING_DATA + } else if( strcmp( argv[ args ], "-silk_random_switching" ) == 0 ){ + silk_random_switching = atoi( argv[ args + 1 ] ); + printf("switching encoding parameters every %dth frame\n", silk_random_switching); + args += 2; +#endif } else { printf( "Error: unrecognized setting: %s\n\n", argv[ args ] ); print_usage( argv ); @@ -764,6 +836,15 @@ int main(int argc, char *argv[]) opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(mode_list[curr_mode][3])); frame_size = mode_list[curr_mode][2]; } +#ifdef ENABLE_OSCE_TRAINING_DATA + if (silk_random_switching) + { + silk_frame_counter += 1; + if (silk_frame_counter % silk_random_switching == 0) { + new_random_setting(enc); + } + } +#endif num_read = fread(fbytes, sizeof(short)*channels, frame_size-remaining, fin); curr_read = (int)num_read; tot_in += curr_read; diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 78c1ed7f..357d1e41 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -50,6 +50,9 @@ #else #include "float/structs_FLP.h" #endif +#ifdef ENABLE_OSCE_TRAINING_DATA +#include <stdio.h> +#endif #define MAX_ENCODER_BUFFER 480 @@ -1698,6 +1701,25 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (st->application == OPUS_APPLICATION_VOIP) { hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs, st->arch); + +#ifdef ENABLE_OSCE_TRAINING_DATA + /* write out high pass filtered clean signal*/ + static FILE *fout =NULL; + if (fout == NULL) + { + fout = fopen("clean_hp.s16", "wb"); + } + + { + int idx; + opus_int16 tmp; + for (idx = 0; idx < frame_size; idx++) + { + tmp = (opus_int16) (32768 * pcm_buf[total_buffer + idx] + 0.5f); + fwrite(&tmp, sizeof(tmp), 1, fout); + } + } +#endif } else { dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); } |