Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.xiph.org/xiph/opus.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Buethe <jbuethe@amazon.de>2023-12-11 14:40:28 +0300
committerJean-Marc Valin <jmvalin@amazon.com>2023-12-20 07:01:23 +0300
commit6f28d4958648a91bdaa5e6287622c815304e961d (patch)
tree459404c3060e8ee57b01f5ba536144f56828d0c5
parent0a2213c49678be23cdc2ef3d7768e93ad869be6c (diff)
added osce training data dumping option
-rw-r--r--configure.ac22
-rw-r--r--dnn/osce.c11
-rw-r--r--src/opus_demo.c81
-rw-r--r--src/opus_encoder.c22
4 files changed, 127 insertions, 9 deletions
diff --git a/configure.ac b/configure.ac
index 3ceeff48..84ce651d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -175,10 +175,10 @@ AC_ARG_ENABLE([deep-plc],
[AS_HELP_STRING([--enable-deep-plc], [Use deep PLC for SILK])],,
[enable_deep_plc=no])
-AS_IF([test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes"],[
+AS_IF([test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"],[
AC_DEFINE([ENABLE_DEEP_PLC], [1], [Deep PLC])
])
-AM_CONDITIONAL([ENABLE_DEEP_PLC], [test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes"])
+AM_CONDITIONAL([ENABLE_DEEP_PLC], [test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"])
has_float_approx=no
case "$host_cpu" in
@@ -904,16 +904,30 @@ AS_IF([test "$enable_dnn_debug_float" = "no"], [
AC_DEFINE([DISABLE_DEBUG_FLOAT], [1], [Disable DNN debug float])
])
+AC_ARG_ENABLE([osce-training-data],
+ AS_HELP_STRING([--enable-osce-training-data], [enables feature output for SILK enhancement]),,
+ [enable_osc_training_data=no]
+)
+
+AS_IF([test "$enable_osce_training_data" = "yes"], [
+ AC_DEFINE([ENABLE_OSCE_TRAINING_DATA], [1], [Enable dumping of OSCE training data])
+])
+
+AC_MSG_CHECKING([argument osce training data])
+AS_IF([test "$enable_osce_training_data" = "yes"], [
+ AC_MSG_RESULT([yes])
+], [AC_MSG_RESULT([no])])
+
AC_ARG_ENABLE([osce],
AS_HELP_STRING([--enable-osce], [enables feature output for SILK enhancement]),,
[enable_osce=no]
)
-AS_IF([test "$enable_osce" = "yes"], [
+AS_IF([test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"], [
AC_DEFINE([ENABLE_OSCE], [1], [Enable Opus Speech Coding Enhancement])
])
-AM_CONDITIONAL([ENABLE_OSCE], [test "$enable_osce" = "yes"])
+AM_CONDITIONAL([ENABLE_OSCE], [test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"])
AM_CONDITIONAL([HAVE_DOXYGEN], [test "$HAVE_DOXYGEN" = "yes"])
diff --git a/dnn/osce.c b/dnn/osce.c
index 093f4f2b..d51cb34d 100644
--- a/dnn/osce.c
+++ b/dnn/osce.c
@@ -17,6 +17,10 @@
#define FINIT(fid, name, mode) do{if (fid == NULL) {fid = fopen(name, mode);}} while(0)
#endif
+#ifdef ENABLE_OSCE_TRAINING_DATA
+#include <stdio.h>
+#endif
+
#define CLIP(a, min, max) (((a) < (min) ? (min) : (a)) > (max) ? (max) : (a))
#define MAX(a, b) ((a) < (b) ? (b) : (a))
@@ -864,7 +868,7 @@ void osce_enhance_frame(
celt_assert(0 && "method not defined");
}
-#ifdef WRITE_FEATURES
+#ifdef ENABLE_OSCE_TRAINING_DATA
int k;
static FILE *flpc = NULL;
@@ -874,7 +878,6 @@ void osce_enhance_frame(
static FILE *fnoisy16k = NULL;
static FILE* f_numbits = NULL;
static FILE* f_numbits_smooth = NULL;
- static FILE* f_noisy = NULL;
if (flpc == NULL) {flpc = fopen("features_lpc.f32", "wb");}
if (fgain == NULL) {fgain = fopen("features_gain.f32", "wb");}
@@ -884,10 +887,8 @@ void osce_enhance_frame(
if(f_numbits == NULL) {f_numbits = fopen("features_num_bits.s32", "wb");}
if (f_numbits_smooth == NULL) {f_numbits_smooth = fopen("features_num_bits_smooth.f32", "wb");}
- psDec->osce.features.num_bits_smooth = 0.9 * psDec->osce.features.num_bits_smooth + 0.1 * num_bits;
-
fwrite(&num_bits, sizeof(num_bits), 1, f_numbits);
- fwrite(&(psDec->osce.features.num_bits_smooth), sizeof(psDec->osce.features.num_bits_smooth), 1, f_numbits_smooth);
+ fwrite(&(psDec->osce.features.numbits_smooth), sizeof(psDec->osce.features.numbits_smooth), 1, f_numbits_smooth);
for (k = 0; k < psDec->nb_subfr; k++)
{
diff --git a/src/opus_demo.c b/src/opus_demo.c
index 535d39d6..d1c2481e 100644
--- a/src/opus_demo.c
+++ b/src/opus_demo.c
@@ -254,6 +254,68 @@ static OpusDecoder *ms_opus_decoder_create(opus_int32 Fs, int channels, int *err
}
#endif
+
+#ifdef ENABLE_OSCE_TRAINING_DATA
+#define COMPLEXITY_MIN 0
+#define COMPLEXITY_MAX 10
+
+#define PACKET_LOSS_PERC_MIN 0
+#define PACKET_LOSS_PERC_MAX 50
+#define PACKET_LOSS_PERC_STEP 5
+
+#define CBR_BITRATE_LIMIT 8000
+
+#define NUM_BITRATES 102
+static int bitrates[NUM_BITRATES] = {
+ 6000, 6060, 6120, 6180, 6240, 6300, 6360, 6420, 6480,
+ 6525, 6561, 6598, 6634, 6670, 6707, 6743, 6780, 6816,
+ 6853, 6889, 6926, 6962, 6999, 7042, 7085, 7128, 7171,
+ 7215, 7258, 7301, 7344, 7388, 7431, 7474, 7512, 7541,
+ 7570, 7599, 7628, 7657, 7686, 7715, 7744, 7773, 7802,
+ 7831, 7860, 7889, 7918, 7947, 7976, 8013, 8096, 8179,
+ 8262, 8344, 8427, 8511, 8605, 8699, 8792, 8886, 8980,
+ 9100, 9227, 9354, 9480, 9561, 9634, 9706, 9779, 9851,
+ 9924, 9996, 10161, 10330, 10499, 10698, 10898, 11124, 11378,
+ 11575, 11719, 11862, 12014, 12345, 12751, 13195, 13561, 13795,
+ 14069, 14671, 15403, 15790, 16371, 17399, 17968, 19382, 20468,
+ 22000, 32000, 64000
+};
+
+static int randint(int min, int max, int step)
+{
+ double r = ((double) rand())/ (RAND_MAX + 1.);
+ int d;
+
+ d = ((int) ((max + 1 - min) * r / step) * step) + min;
+
+ return d;
+}
+
+static void new_random_setting(OpusEncoder *enc)
+{
+ int bitrate_bps;
+ int complexity;
+ int packet_loss_perc;
+ int use_vbr;
+
+ bitrate_bps = bitrates[randint(0, NUM_BITRATES - 1, 1)];
+ complexity = randint(COMPLEXITY_MIN, COMPLEXITY_MAX, 1);
+ packet_loss_perc = randint(PACKET_LOSS_PERC_MIN, PACKET_LOSS_PERC_MAX, PACKET_LOSS_PERC_STEP);
+ use_vbr = bitrate_bps < CBR_BITRATE_LIMIT ? 1 : randint(0, 1, 1);
+
+ if (1)
+ {
+ printf("changing settings to %d\t%d\t%d\t%d\n", bitrate_bps, complexity, packet_loss_perc, use_vbr);
+ }
+
+ opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate_bps));
+ opus_encoder_ctl(enc, OPUS_SET_COMPLEXITY(complexity));
+ opus_encoder_ctl(enc, OPUS_SET_PACKET_LOSS_PERC(packet_loss_perc));
+ opus_encoder_ctl(enc, OPUS_SET_VBR(use_vbr));
+}
+
+#endif
+
int main(int argc, char *argv[])
{
int err;
@@ -316,6 +378,10 @@ int main(int argc, char *argv[])
int lost_count=0;
FILE *packet_loss_file=NULL;
int dred_duration=0;
+#ifdef ENABLE_OSCE_TRAINING_DATA
+ int silk_random_switching = 0;
+ int silk_frame_counter = 0;
+#endif
#ifdef USE_WEIGHTS_FILE
int blob_len;
unsigned char *blob_data;
@@ -546,6 +612,12 @@ int main(int argc, char *argv[])
mode_list = celt_hq_test;
nb_modes_in_list = 4;
args++;
+#ifdef ENABLE_OSCE_TRAINING_DATA
+ } else if( strcmp( argv[ args ], "-silk_random_switching" ) == 0 ){
+ silk_random_switching = atoi( argv[ args + 1 ] );
+ printf("switching encoding parameters every %dth frame\n", silk_random_switching);
+ args += 2;
+#endif
} else {
printf( "Error: unrecognized setting: %s\n\n", argv[ args ] );
print_usage( argv );
@@ -764,6 +836,15 @@ int main(int argc, char *argv[])
opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(mode_list[curr_mode][3]));
frame_size = mode_list[curr_mode][2];
}
+#ifdef ENABLE_OSCE_TRAINING_DATA
+ if (silk_random_switching)
+ {
+ silk_frame_counter += 1;
+ if (silk_frame_counter % silk_random_switching == 0) {
+ new_random_setting(enc);
+ }
+ }
+#endif
num_read = fread(fbytes, sizeof(short)*channels, frame_size-remaining, fin);
curr_read = (int)num_read;
tot_in += curr_read;
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index 78c1ed7f..357d1e41 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -50,6 +50,9 @@
#else
#include "float/structs_FLP.h"
#endif
+#ifdef ENABLE_OSCE_TRAINING_DATA
+#include <stdio.h>
+#endif
#define MAX_ENCODER_BUFFER 480
@@ -1698,6 +1701,25 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
if (st->application == OPUS_APPLICATION_VOIP)
{
hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs, st->arch);
+
+#ifdef ENABLE_OSCE_TRAINING_DATA
+ /* write out high pass filtered clean signal*/
+ static FILE *fout =NULL;
+ if (fout == NULL)
+ {
+ fout = fopen("clean_hp.s16", "wb");
+ }
+
+ {
+ int idx;
+ opus_int16 tmp;
+ for (idx = 0; idx < frame_size; idx++)
+ {
+ tmp = (opus_int16) (32768 * pcm_buf[total_buffer + idx] + 0.5f);
+ fwrite(&tmp, sizeof(tmp), 1, fout);
+ }
+ }
+#endif
} else {
dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
}