From 7d328f5bfaa321d823ff4d11b62d5357c99e0693 Mon Sep 17 00:00:00 2001 From: Jan Buethe Date: Wed, 8 Nov 2023 14:03:39 +0100 Subject: Merge LACE/NoLACE under OSCE framework --- .github/workflows/autotools.yml | 8 +- .github/workflows/dred.yml | 2 +- .gitlab-ci.yml | 8 +- CMakeLists.txt | 47 +- Makefile.am | 6 + autogen.sh | 2 +- cmake/OpusSources.cmake | 2 + configure.ac | 29 +- dnn/adaconvtest.c | 449 +++++++ dnn/meson.build | 5 + dnn/nndsp.c | 412 ++++++ dnn/nndsp.h | 141 ++ dnn/nnet.c | 9 +- dnn/nnet_arch.h | 16 + dnn/osce.c | 1411 ++++++++++++++++++++ dnn/osce.h | 81 ++ dnn/osce_config.h | 62 + dnn/osce_features.c | 454 +++++++ dnn/osce_features.h | 50 + dnn/osce_structs.h | 124 ++ dnn/torch/osce/create_testvectors.py | 165 +++ dnn/torch/osce/data/silk_enhancement_set.py | 6 +- dnn/torch/osce/export_model_weights.py | 101 +- dnn/torch/osce/models/lace.py | 2 +- dnn/torch/osce/models/no_lace.py | 4 +- .../osce/utils/layers/limited_adaptive_comb1d.py | 18 +- .../osce/utils/layers/limited_adaptive_conv1d.py | 15 +- dnn/torch/osce/utils/silk_features.py | 16 +- dnn/torch/osce/utils/spec.py | 1 + .../weight-exchange/wexchange/c_export/c_writer.py | 14 +- .../weight-exchange/wexchange/torch/__init__.py | 1 + dnn/torch/weight-exchange/wexchange/torch/torch.py | 157 ++- dnn/write_lpcnet_weights.c | 15 + lpcnet_headers.mk | 9 + lpcnet_sources.mk | 7 + meson.build | 1 + meson_options.txt | 1 + silk/API.h | 16 +- silk/control.h | 5 + silk/dec_API.c | 57 + silk/decode_frame.c | 47 +- silk/init_decoder.c | 33 +- silk/main.h | 7 + silk/structs.h | 17 + silk_sources.mk | 2 +- src/opus_decoder.c | 18 +- src/opus_demo.c | 85 ++ src/opus_encoder.c | 24 + tests/test_opus_api.c | 4 +- 49 files changed, 4062 insertions(+), 104 deletions(-) create mode 100644 dnn/adaconvtest.c create mode 100644 dnn/nndsp.c create mode 100644 dnn/nndsp.h create mode 100644 dnn/osce.c create mode 100644 dnn/osce.h create mode 100644 dnn/osce_config.h create mode 100644 dnn/osce_features.c create mode 100644 dnn/osce_features.h create mode 100644 dnn/osce_structs.h create mode 100644 dnn/torch/osce/create_testvectors.py diff --git a/.github/workflows/autotools.yml b/.github/workflows/autotools.yml index 91d332bf..bb66d5b0 100644 --- a/.github/workflows/autotools.yml +++ b/.github/workflows/autotools.yml @@ -29,6 +29,12 @@ jobs: compiler: gcc, buildconfig: --enable-assertions --enable-custom-modes } + - { + name: "Linux/GCC/EnableDNN", + os: ubuntu-latest, + compiler: gcc, + buildconfig: --enable-assertions --enable-custom-modes --enable-dred --enable-osce + } steps: - uses: actions/checkout@v3 # No AutoMake on Mac so let's install it @@ -42,4 +48,4 @@ jobs: - name: Build run: make -j 2 - name: Test - run: make check -j 2 \ No newline at end of file + run: make check -j 2 diff --git a/.github/workflows/dred.yml b/.github/workflows/dred.yml index 52ac2571..ac703dd1 100644 --- a/.github/workflows/dred.yml +++ b/.github/workflows/dred.yml @@ -74,7 +74,7 @@ jobs: run: mkdir build - name: Configure working-directory: ./build - run: cmake .. ${{ matrix.config.args }} -DCMAKE_BUILD_TYPE=${{ matrix.config.config }} -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON + run: cmake .. ${{ matrix.config.args }} -DCMAKE_BUILD_TYPE=${{ matrix.config.config }} -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_OSCE=ON - name: Build working-directory: ./build run: cmake --build . -j 2 --config ${{ matrix.config.config }} --target package diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0117c46e..92f578bc 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -64,9 +64,9 @@ autoconf: - !reference [.snippets, git_prep] script: - ./autogen.sh - - CFLAGS="-mavx -mfma -mavx2 -O2 -ffast-math" ./configure --enable-float-approx + - CFLAGS="-mavx -mfma -mavx2 -O2 -ffast-math" ./configure --enable-float-approx --enable-dred --enable-osce - make -j16 - - DISTCHECK_CONFIGURE_FLAGS="--enable-float-approx CFLAGS='-mavx -mfma -mavx2 -O2'" make distcheck -j16 + - DISTCHECK_CONFIGURE_FLAGS="--enable-float-approx --enable-dred --enable-osce CFLAGS='-mavx -mfma -mavx2 -O2'" make distcheck -j16 cache: paths: - "src/*.o" @@ -87,7 +87,7 @@ cmake: script: - ./autogen.sh - mkdir build - - cmake -S . -B build -G "Ninja" -DCMAKE_BUILD_TYPE=Release -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_X86_PRESUME_AVX2=ON + - cmake -S . -B build -G "Ninja" -DCMAKE_BUILD_TYPE=Release -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_OSCE=ON -DOPUS_X86_PRESUME_AVX2=ON - cmake --build build - cd build && ctest --output-on-failure -j 16 @@ -101,7 +101,7 @@ cmake: script: - ./autogen.sh - mkdir builddir - - meson setup -Dtests=enabled -Ddocs=enabled -Dbuildtype=release builddir + - meson setup -Denable-deep-plc=true -Denable-osce=true -Denable-dred=true -Dtests=enabled -Ddocs=enabled -Dbuildtype=release builddir - meson compile -C builddir - meson test -C builddir #- meson dist --no-tests -C builddir diff --git a/CMakeLists.txt b/CMakeLists.txt index 073d7de8..06e9b675 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,6 +87,10 @@ set(OPUS_DRED_HELP_STR "enable DRED.") option(OPUS_DRED ${OPUS_DRED_HELP_STR} OFF) add_feature_info(OPUS_DRED OPUS_DRED ${OPUS_DRED_HELP_STR}) +set(OPUS_OSCE_HELP_STR "enable OSCE.") +option(OPUS_OSCE ${OPUS_OSCE_HELP_STR} OFF) +add_feature_info(OPUS_OSCE OPUS_OSCE ${OPUS_OSCE_HELP_STR}) + if(APPLE) set(OPUS_BUILD_FRAMEWORK_HELP_STR "build Framework bundle for Apple systems.") option(OPUS_BUILD_FRAMEWORK ${OPUS_BUILD_FRAMEWORK_HELP_STR} OFF) @@ -364,8 +368,6 @@ endif() add_sources_group(opus silk ${silk_headers} ${silk_sources}) add_sources_group(opus celt ${celt_headers} ${celt_sources}) -add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources}) -add_sources_group(opus lpcnet ${dred_headers} ${dred_sources}) if(OPUS_FIXED_POINT) add_sources_group(opus silk ${silk_sources_fixed}) @@ -380,11 +382,26 @@ if(NOT OPUS_ENABLE_FLOAT_API) target_compile_definitions(opus PRIVATE DISABLE_FLOAT_API) endif() +if (OPUS_DEEP_PLC OR OPUS_DRED OR OPUS_OSCE) + add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources}) + set(OPUS_DNN TRUE) +else() + set(OPUS_DNN FALSE) +endif() + +if (OPUS_DNN) + add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources}) + target_compile_definitions(opus PRIVATE ENABLE_DEEP_PLC) +endif() + if (OPUS_DRED) + add_sources_group(opus lpcnet ${dred_headers} ${dred_sources}) target_compile_definitions(opus PRIVATE ENABLE_DRED) - if(NOT OPUS_DEEP_PLC) - target_compile_definitions(opus PRIVATE ENABLE_DEEP_PLC) - endif() +endif() + +if (OPUS_OSCE) + add_sources_group(opus lpcnet ${osce_headers} ${osce_sources}) + target_compile_definitions(opus PRIVATE ENABLE_OSCE) endif() if(NOT OPUS_DISABLE_INTRINSICS) @@ -405,7 +422,9 @@ if(NOT OPUS_DISABLE_INTRINSICS) endif() add_sources_group(opus celt ${celt_sources_x86_rtcd}) add_sources_group(opus silk ${silk_sources_x86_rtcd}) - add_sources_group(opus lpcnet ${dnn_sources_x86_rtcd}) + if (OPUS_DNN) + add_sources_group(opus lpcnet ${dnn_sources_x86_rtcd}) + endif() endif() if(SSE1_SUPPORTED) @@ -427,7 +446,9 @@ if(NOT OPUS_DISABLE_INTRINSICS) if(SSE2_SUPPORTED) if(OPUS_X86_MAY_HAVE_SSE2) add_sources_group(opus celt ${celt_sources_sse2}) - add_sources_group(opus lpcnet ${dnn_sources_sse2}) + if (OPUS_DNN) + add_sources_group(opus lpcnet ${dnn_sources_sse2}) + endif() target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE2) if(NOT MSVC) set_source_files_properties(${celt_sources_sse2} ${dnn_sources_sse2} PROPERTIES COMPILE_FLAGS -msse2) @@ -445,7 +466,9 @@ if(NOT OPUS_DISABLE_INTRINSICS) if(OPUS_X86_MAY_HAVE_SSE4_1) add_sources_group(opus celt ${celt_sources_sse4_1}) add_sources_group(opus silk ${silk_sources_sse4_1}) - add_sources_group(opus lpcnet ${dnn_sources_sse4_1}) + if (OPUS_DNN) + add_sources_group(opus lpcnet ${dnn_sources_sse4_1}) + endif() target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE4_1) if(NOT MSVC) set_source_files_properties(${celt_sources_sse4_1} ${silk_sources_sse4_1} ${dnn_sources_sse4_1} PROPERTIES COMPILE_FLAGS -msse4.1) @@ -471,7 +494,9 @@ if(NOT OPUS_DISABLE_INTRINSICS) add_sources_group(opus celt ${celt_sources_avx2}) add_sources_group(opus silk ${silk_sources_avx2}) add_sources_group(opus silk ${silk_sources_float_avx2}) - add_sources_group(opus lpcnet ${dnn_sources_avx2}) + if (OPUS_DNN) + add_sources_group(opus lpcnet ${dnn_sources_avx2}) + endif() target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_AVX2) if(MSVC) set(AVX2_FLAGS "${AVX2_FLAGS} /arch:AVX2") @@ -524,7 +549,9 @@ if(NOT OPUS_DISABLE_INTRINSICS) add_sources_group(opus celt ${celt_sources_arm_neon_intr}) add_sources_group(opus silk ${silk_sources_arm_neon_intr}) - add_sources_group(opus lpcnet ${dnn_sources_arm_neon}) + if (OPUS_DNN) + add_sources_group(opus lpcnet ${dnn_sources_arm_neon}) + endif() # silk arm neon depends on main_Fix.h target_include_directories(opus PRIVATE silk/fixed) diff --git a/Makefile.am b/Makefile.am index d09c1771..4fd821a5 100644 --- a/Makefile.am +++ b/Makefile.am @@ -25,6 +25,9 @@ endif if ENABLE_DRED LPCNET_SOURCES += $(DRED_SOURCES) endif +if ENABLE_OSCE +LPCNET_SOURCES += $(OSCE_SOURCES) +endif if FIXED_POINT SILK_SOURCES += $(SILK_SOURCES_FIXED) @@ -132,6 +135,9 @@ endif if ENABLE_DRED LPCNET_HEAD += $(DRED_HEAD) endif +if ENABLE_OSCE +LPCNET_HEAD += $(OSCE_HEAD) +endif libopus_la_SOURCES = $(CELT_SOURCES) $(SILK_SOURCES) $(LPCNET_SOURCES) $(OPUS_SOURCES) libopus_la_LDFLAGS = -no-undefined -version-info @OPUS_LT_CURRENT@:@OPUS_LT_REVISION@:@OPUS_LT_AGE@ diff --git a/autogen.sh b/autogen.sh index b7482c2d..1987e38b 100755 --- a/autogen.sh +++ b/autogen.sh @@ -9,7 +9,7 @@ set -e srcdir=`dirname $0` test -n "$srcdir" && cd "$srcdir" -dnn/download_model.sh df63771 +dnn/download_model.sh 591c8ba echo "Updating build configuration files, please wait...." diff --git a/cmake/OpusSources.cmake b/cmake/OpusSources.cmake index 74e4eaed..0cf24557 100644 --- a/cmake/OpusSources.cmake +++ b/cmake/OpusSources.cmake @@ -42,8 +42,10 @@ get_opus_sources(CELT_SOURCES_ARM_NE10 celt_sources.mk celt_sources_arm_ne10) get_opus_sources(DEEP_PLC_HEAD lpcnet_headers.mk deep_plc_headers) get_opus_sources(DRED_HEAD lpcnet_headers.mk dred_headers) +get_opus_sources(OSCE_HEAD lpcnet_headers.mk osce_headers) get_opus_sources(DEEP_PLC_SOURCES lpcnet_sources.mk deep_plc_sources) get_opus_sources(DRED_SOURCES lpcnet_sources.mk dred_sources) +get_opus_sources(OSCE_SOURCES lpcnet_sources.mk osce_sources) get_opus_sources(DNN_SOURCES_X86_RTCD lpcnet_sources.mk dnn_sources_x86_rtcd) get_opus_sources(DNN_SOURCES_SSE2 lpcnet_sources.mk dnn_sources_sse2) get_opus_sources(DNN_SOURCES_SSE4_1 lpcnet_sources.mk dnn_sources_sse4_1) diff --git a/configure.ac b/configure.ac index b4c5f2a5..84ce651d 100644 --- a/configure.ac +++ b/configure.ac @@ -175,10 +175,10 @@ AC_ARG_ENABLE([deep-plc], [AS_HELP_STRING([--enable-deep-plc], [Use deep PLC for SILK])],, [enable_deep_plc=no]) -AS_IF([test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes"],[ +AS_IF([test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"],[ AC_DEFINE([ENABLE_DEEP_PLC], [1], [Deep PLC]) ]) -AM_CONDITIONAL([ENABLE_DEEP_PLC], [test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes"]) +AM_CONDITIONAL([ENABLE_DEEP_PLC], [test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"]) has_float_approx=no case "$host_cpu" in @@ -904,6 +904,31 @@ AS_IF([test "$enable_dnn_debug_float" = "no"], [ AC_DEFINE([DISABLE_DEBUG_FLOAT], [1], [Disable DNN debug float]) ]) +AC_ARG_ENABLE([osce-training-data], + AS_HELP_STRING([--enable-osce-training-data], [enables feature output for SILK enhancement]),, + [enable_osc_training_data=no] +) + +AS_IF([test "$enable_osce_training_data" = "yes"], [ + AC_DEFINE([ENABLE_OSCE_TRAINING_DATA], [1], [Enable dumping of OSCE training data]) +]) + +AC_MSG_CHECKING([argument osce training data]) +AS_IF([test "$enable_osce_training_data" = "yes"], [ + AC_MSG_RESULT([yes]) +], [AC_MSG_RESULT([no])]) + +AC_ARG_ENABLE([osce], + AS_HELP_STRING([--enable-osce], [enables feature output for SILK enhancement]),, + [enable_osce=no] +) + +AS_IF([test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"], [ + AC_DEFINE([ENABLE_OSCE], [1], [Enable Opus Speech Coding Enhancement]) +]) + +AM_CONDITIONAL([ENABLE_OSCE], [test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"]) + AM_CONDITIONAL([HAVE_DOXYGEN], [test "$HAVE_DOXYGEN" = "yes"]) AC_ARG_ENABLE([extra-programs], diff --git a/dnn/adaconvtest.c b/dnn/adaconvtest.c new file mode 100644 index 00000000..722e4aff --- /dev/null +++ b/dnn/adaconvtest.c @@ -0,0 +1,449 @@ +#include "lace_data.h" +#include "nolace_data.h" +#include "osce.h" +#include "nndsp.h" + + +#include +#include +#include + + +extern const WeightArray lacelayers_arrays[]; +extern const WeightArray nolacelayers_arrays[]; + +void adaconv_compare( + const char * prefix, + int num_frames, + AdaConvState* hAdaConv, + LinearLayer *kernel_layer, + LinearLayer *gain_layer, + int feature_dim, + int frame_size, + int overlap_size, + int in_channels, + int out_channels, + int kernel_size, + int left_padding, + float filter_gain_a, + float filter_gain_b, + float shape_gain +) +{ + char feature_file[256]; + char x_in_file[256]; + char x_out_file[256]; + char message[512]; + int i_frame, i_sample; + float mse; + float features[512]; + float x_in[512]; + float x_out_ref[512]; + float x_out[512]; + float window[40]; + + init_adaconv_state(hAdaConv); + compute_overlap_window(window, 40); + + FILE *f_features, *f_x_in, *f_x_out; + + strcpy(feature_file, prefix); + strcat(feature_file, "_features.f32"); + f_features = fopen(feature_file, "r"); + if (f_features == NULL) + { + sprintf(message, "could not open file %s", feature_file); + perror(message); + exit(1); + } + + strcpy(x_in_file, prefix); + strcat(x_in_file, "_x_in.f32"); + f_x_in = fopen(x_in_file, "r"); + if (f_x_in == NULL) + { + sprintf(message, "could not open file %s", x_in_file); + perror(message); + exit(1); + } + + strcpy(x_out_file, prefix); + strcat(x_out_file, "_x_out.f32"); + f_x_out = fopen(x_out_file, "r"); + if (f_x_out == NULL) + { + sprintf(message, "could not open file %s", x_out_file); + perror(message); + exit(1); + } + + for (i_frame = 0; i_frame < num_frames; i_frame ++) + { + if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim) + { + fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file); + exit(1); + } + + if (fread(x_in, sizeof(float), frame_size * in_channels, f_x_in) != frame_size * in_channels) + { + fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file); + exit(1); + } + + if (fread(x_out_ref, sizeof(float), frame_size * out_channels, f_x_out) != frame_size * out_channels) + { + fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file); + exit(1); + } + + adaconv_process_frame(hAdaConv, x_out, x_in, features, kernel_layer, gain_layer, feature_dim, + frame_size, overlap_size, in_channels, out_channels, kernel_size, left_padding, + filter_gain_a, filter_gain_b, shape_gain, window, 0); + + mse = 0; + for (i_sample = 0; i_sample < frame_size * out_channels; i_sample ++) + { + mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2); + } + mse = sqrt(mse / (frame_size * out_channels)); + printf("rmse[%d] %f\n", i_frame, mse); + + } +} + + +void adacomb_compare( + const char * prefix, + int num_frames, + AdaCombState* hAdaComb, + LinearLayer *kernel_layer, + LinearLayer *gain_layer, + LinearLayer *global_gain_layer, + int feature_dim, + int frame_size, + int overlap_size, + int kernel_size, + int left_padding, + float filter_gain_a, + float filter_gain_b, + float log_gain_limit +) +{ + char feature_file[256]; + char x_in_file[256]; + char p_in_file[256]; + char x_out_file[256]; + char message[512]; + int i_frame, i_sample; + float mse; + float features[512]; + float x_in[512]; + float x_out_ref[512]; + float x_out[512]; + int pitch_lag; + float window[40]; + + init_adacomb_state(hAdaComb); + compute_overlap_window(window, 40); + + FILE *f_features, *f_x_in, *f_p_in, *f_x_out; + + strcpy(feature_file, prefix); + strcat(feature_file, "_features.f32"); + f_features = fopen(feature_file, "r"); + if (f_features == NULL) + { + sprintf(message, "could not open file %s", feature_file); + perror(message); + exit(1); + } + + strcpy(x_in_file, prefix); + strcat(x_in_file, "_x_in.f32"); + f_x_in = fopen(x_in_file, "r"); + if (f_x_in == NULL) + { + sprintf(message, "could not open file %s", x_in_file); + perror(message); + exit(1); + } + + strcpy(p_in_file, prefix); + strcat(p_in_file, "_p_in.s32"); + f_p_in = fopen(p_in_file, "r"); + if (f_p_in == NULL) + { + sprintf(message, "could not open file %s", p_in_file); + perror(message); + exit(1); + } + + strcpy(x_out_file, prefix); + strcat(x_out_file, "_x_out.f32"); + f_x_out = fopen(x_out_file, "r"); + if (f_x_out == NULL) + { + sprintf(message, "could not open file %s", x_out_file); + perror(message); + exit(1); + } + + for (i_frame = 0; i_frame < num_frames; i_frame ++) + { + if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim) + { + fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file); + exit(1); + } + + if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size) + { + fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file); + exit(1); + } + + if (fread(&pitch_lag, sizeof(int), 1, f_p_in) != 1) + { + fprintf(stderr, "could not read frame %d from %s\n", i_frame, p_in_file); + exit(1); + } + + if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size) + { + fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file); + exit(1); + } + + adacomb_process_frame(hAdaComb, x_out, x_in, features, kernel_layer, gain_layer, global_gain_layer, + pitch_lag, feature_dim, frame_size, overlap_size, kernel_size, left_padding, filter_gain_a, filter_gain_b, log_gain_limit, window, 0); + + + mse = 0; + for (i_sample = 0; i_sample < frame_size; i_sample ++) + { + mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2); + } + mse = sqrt(mse / (frame_size)); + printf("rmse[%d] %f\n", i_frame, mse); + + } +} + +void adashape_compare( + const char * prefix, + int num_frames, + AdaShapeState* hAdaShape, + LinearLayer *alpha1, + LinearLayer *alpha2, + int feature_dim, + int frame_size, + int avg_pool_k +) +{ + char feature_file[256]; + char x_in_file[256]; + char x_out_file[256]; + char message[512]; + int i_frame, i_sample; + float mse; + float features[512]; + float x_in[512]; + float x_out_ref[512]; + float x_out[512]; + + init_adashape_state(hAdaShape); + + FILE *f_features, *f_x_in, *f_x_out; + + strcpy(feature_file, prefix); + strcat(feature_file, "_features.f32"); + f_features = fopen(feature_file, "r"); + if (f_features == NULL) + { + sprintf(message, "could not open file %s", feature_file); + perror(message); + exit(1); + } + + strcpy(x_in_file, prefix); + strcat(x_in_file, "_x_in.f32"); + f_x_in = fopen(x_in_file, "r"); + if (f_x_in == NULL) + { + sprintf(message, "could not open file %s", x_in_file); + perror(message); + exit(1); + } + + strcpy(x_out_file, prefix); + strcat(x_out_file, "_x_out.f32"); + f_x_out = fopen(x_out_file, "r"); + if (f_x_out == NULL) + { + sprintf(message, "could not open file %s", x_out_file); + perror(message); + exit(1); + } + + for (i_frame = 0; i_frame < num_frames; i_frame ++) + { + if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim) + { + fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file); + exit(1); + } + + if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size) + { + fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file); + exit(1); + } + + if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size) + { + fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file); + exit(1); + } + + adashape_process_frame(hAdaShape, x_out, x_in, features, alpha1, alpha2, feature_dim, + frame_size, avg_pool_k, 0); + + mse = 0; + for (i_sample = 0; i_sample < frame_size; i_sample ++) + { + mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2); + } + mse = sqrt(mse / (frame_size)); + printf("rmse[%d] %f\n", i_frame, mse); + + } +} + + +int main() +{ + LACELayers hLACE; + NOLACELayers hNoLACE; + + AdaConvState hAdaConv; + AdaCombState hAdaComb; + AdaShapeState hAdaShape; + + init_adaconv_state(&hAdaConv); + + init_lacelayers(&hLACE, lacelayers_arrays); + init_nolacelayers(&hNoLACE, nolacelayers_arrays); + + printf("\ntesting lace.af1 (1 in, 1 out)...\n"); + adaconv_compare( + "testvectors/lace_af1", + 5, + &hAdaConv, + &hLACE.lace_af1_kernel, + &hLACE.lace_af1_gain, + LACE_AF1_FEATURE_DIM, + LACE_AF1_FRAME_SIZE, + LACE_AF1_OVERLAP_SIZE, + LACE_AF1_IN_CHANNELS, + LACE_AF1_OUT_CHANNELS, + LACE_AF1_KERNEL_SIZE, + LACE_AF1_LEFT_PADDING, + LACE_AF1_FILTER_GAIN_A, + LACE_AF1_FILTER_GAIN_B, + LACE_AF1_SHAPE_GAIN + ); + + + printf("\ntesting nolace.af1 (1 in, 2 out)...\n"); + adaconv_compare( + "testvectors/nolace_af1", + 5, + &hAdaConv, + &hNoLACE.nolace_af1_kernel, + &hNoLACE.nolace_af1_gain, + NOLACE_AF1_FEATURE_DIM, + NOLACE_AF1_FRAME_SIZE, + NOLACE_AF1_OVERLAP_SIZE, + NOLACE_AF1_IN_CHANNELS, + NOLACE_AF1_OUT_CHANNELS, + NOLACE_AF1_KERNEL_SIZE, + NOLACE_AF1_LEFT_PADDING, + NOLACE_AF1_FILTER_GAIN_A, + NOLACE_AF1_FILTER_GAIN_B, + NOLACE_AF1_SHAPE_GAIN + ); + + + printf("testing nolace.af4 (2 in, 1 out)...\n"); + adaconv_compare( + "testvectors/nolace_af4", + 5, + &hAdaConv, + &hNoLACE.nolace_af4_kernel, + &hNoLACE.nolace_af4_gain, + NOLACE_AF4_FEATURE_DIM, + NOLACE_AF4_FRAME_SIZE, + NOLACE_AF4_OVERLAP_SIZE, + NOLACE_AF4_IN_CHANNELS, + NOLACE_AF4_OUT_CHANNELS, + NOLACE_AF4_KERNEL_SIZE, + NOLACE_AF4_LEFT_PADDING, + NOLACE_AF4_FILTER_GAIN_A, + NOLACE_AF4_FILTER_GAIN_B, + NOLACE_AF4_SHAPE_GAIN + ); + + printf("\ntesting nolace.af2 (2 in, 2 out)...\n"); + adaconv_compare( + "testvectors/nolace_af2", + 5, + &hAdaConv, + &hNoLACE.nolace_af2_kernel, + &hNoLACE.nolace_af2_gain, + NOLACE_AF2_FEATURE_DIM, + NOLACE_AF2_FRAME_SIZE, + NOLACE_AF2_OVERLAP_SIZE, + NOLACE_AF2_IN_CHANNELS, + NOLACE_AF2_OUT_CHANNELS, + NOLACE_AF2_KERNEL_SIZE, + NOLACE_AF2_LEFT_PADDING, + NOLACE_AF2_FILTER_GAIN_A, + NOLACE_AF2_FILTER_GAIN_B, + NOLACE_AF2_SHAPE_GAIN + ); + + printf("\ntesting lace.cf1...\n"); + adacomb_compare( + "testvectors/lace_cf1", + 5, + &hAdaComb, + &hLACE.lace_cf1_kernel, + &hLACE.lace_cf1_gain, + &hLACE.lace_cf1_global_gain, + LACE_CF1_FEATURE_DIM, + LACE_CF1_FRAME_SIZE, + LACE_CF1_OVERLAP_SIZE, + LACE_CF1_KERNEL_SIZE, + LACE_CF1_LEFT_PADDING, + LACE_CF1_FILTER_GAIN_A, + LACE_CF1_FILTER_GAIN_B, + LACE_CF1_LOG_GAIN_LIMIT + ); + + printf("\ntesting nolace.tdshape1...\n"); + adashape_compare( + "testvectors/nolace_tdshape1", + 5, + &hAdaShape, + &hNoLACE.nolace_tdshape1_alpha1, + &hNoLACE.nolace_tdshape1_alpha2, + NOLACE_TDSHAPE1_FEATURE_DIM, + NOLACE_TDSHAPE1_FRAME_SIZE, + NOLACE_TDSHAPE1_AVG_POOL_K + ); + + return 0; +} + +/* gcc -DVAR_ARRAYS -DENABLE_OSCE -I ../include -I ../silk -I . -I ../celt adaconvtest.c nndsp.c lace_data.c nolace_data.c nnet.c nnet_default.c ../celt/pitch.c ../celt/celt_lpc.c parse_lpcnet_weights.c -lm -o adaconvtest */ \ No newline at end of file diff --git a/dnn/meson.build b/dnn/meson.build index 6e520fbc..737d4a02 100644 --- a/dnn/meson.build +++ b/dnn/meson.build @@ -5,6 +5,11 @@ if opt_enable_dred dnn_sources += dred_sources endif +osce_sources = sources['OSCE_SOURCES'] +if opt_enable_osce + dnn_sources += osce_sources +endif + dnn_sources_sse2 = sources['DNN_SOURCES_SSE2'] dnn_sources_sse4_1 = sources['DNN_SOURCES_SSE4_1'] dnn_sources_avx2 = sources['DNN_SOURCES_AVX2'] diff --git a/dnn/nndsp.c b/dnn/nndsp.c new file mode 100644 index 00000000..bfbf5735 --- /dev/null +++ b/dnn/nndsp.c @@ -0,0 +1,412 @@ +/* Copyright (c) 2023 Amazon + Written by Jan Buethe */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + + +#include "nndsp.h" +#include "arch.h" +#include "nnet.h" +#include "os_support.h" +#include "pitch.h" + +#include + +#ifndef M_PI +#define M_PI 3.141592653589793f +#endif + +#define KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel) ((((i_out_channels) * in_channels) + (i_in_channels)) * kernel_size + (i_kernel)) + +void init_adaconv_state(AdaConvState *hAdaConv) +{ + OPUS_CLEAR(hAdaConv, 1); +} + +void init_adacomb_state(AdaCombState *hAdaComb) +{ + OPUS_CLEAR(hAdaComb, 1); +} + +void init_adashape_state(AdaShapeState *hAdaShape) +{ + OPUS_CLEAR(hAdaShape, 1); +} + +void compute_overlap_window(float *window, int overlap_size) +{ + int i_sample; + for (i_sample=0; i_sample < overlap_size; i_sample++) + { + window[i_sample] = 0.5f + 0.5f * cos(M_PI * (i_sample + 0.5f) / overlap_size); + } +} + +#ifdef DEBUG_NNDSP +void print_float_vector(const char* name, const float *vec, int length) +{ + for (int i = 0; i < length; i ++) + { + printf("%s[%d]: %f\n", name, i, vec[i]); + } +} +#endif + +static void scale_kernel( + float *kernel, + int in_channels, + int out_channels, + int kernel_size, + float *gain +) +/* normalizes (p-norm) kernel over input channel and kernel dimension */ +{ + float norm; + int i_in_channels, i_out_channels, i_kernel; + + for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++) + { + norm = 0; + for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels ++) + { + for (i_kernel = 0; i_kernel < kernel_size; i_kernel++) + { + norm += kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] * kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)]; + } + } +#ifdef DEBUG_NNDSP + printf("kernel norm: %f, %f\n", norm, sqrt(norm)); +#endif + norm = 1.f / (1e-6f + sqrt(norm)); + for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++) + { + for (i_kernel = 0; i_kernel < kernel_size; i_kernel++) + { + + kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] *= norm * gain[i_out_channels]; + } + } + } +} + +static void transform_gains( + float *gains, + int num_gains, + float filter_gain_a, + float filter_gain_b +) +{ + int i; + for (i = 0; i < num_gains; i++) + { + gains[i] = exp(filter_gain_a * gains[i] + filter_gain_b); + } +} + +void adaconv_process_frame( + AdaConvState* hAdaConv, + float *x_out, + const float *x_in, + const float *features, + const LinearLayer *kernel_layer, + const LinearLayer *gain_layer, + int feature_dim, + int frame_size, + int overlap_size, + int in_channels, + int out_channels, + int kernel_size, + int left_padding, + float filter_gain_a, + float filter_gain_b, + float shape_gain, + float *window, + int arch +) +{ + float output_buffer[ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS]; + float kernel_buffer[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS]; + float input_buffer[ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE)]; + float kernel0[ADACONV_MAX_KERNEL_SIZE]; + float kernel1[ADACONV_MAX_KERNEL_SIZE]; + float channel_buffer0[ADACONV_MAX_OVERLAP_SIZE]; + float channel_buffer1[ADACONV_MAX_FRAME_SIZE]; + float gain_buffer[ADACONV_MAX_OUTPUT_CHANNELS]; + float *p_input; + int i_in_channels, i_out_channels, i_sample; + + (void) feature_dim; /* ToDo: figure out whether we might need this information */ + + celt_assert(shape_gain == 1); + celt_assert(left_padding == kernel_size - 1); /* currently only supports causal version. Non-causal version not difficult to implement but will require third loop */ + celt_assert(kernel_size < frame_size); + + OPUS_CLEAR(output_buffer, ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS); + OPUS_CLEAR(kernel_buffer, ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS); + OPUS_CLEAR(input_buffer, ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE)); + +#ifdef DEBUG_NNDSP + print_float_vector("x_in", x_in, in_channels * frame_size); +#endif + + /* prepare input */ + for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++) + { + OPUS_COPY(input_buffer + i_in_channels * (kernel_size + frame_size), hAdaConv->history + i_in_channels * kernel_size, kernel_size); + OPUS_COPY(input_buffer + kernel_size + i_in_channels * (kernel_size + frame_size), x_in + frame_size * i_in_channels, frame_size); + } + p_input = input_buffer + kernel_size; + + + /* calculate new kernel and new gain */ + compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch); + compute_generic_dense(gain_layer, gain_buffer, features, ACTIVATION_TANH, arch); +#ifdef DEBUG_NNDSP + print_float_vector("features", features, feature_dim); + print_float_vector("adaconv_kernel_raw", kernel_buffer, in_channels * out_channels * kernel_size); + print_float_vector("adaconv_gain_raw", gain_buffer, out_channels); +#endif + transform_gains(gain_buffer, out_channels, filter_gain_a, filter_gain_b); + scale_kernel(kernel_buffer, in_channels, out_channels, kernel_size, gain_buffer); + +#ifdef DEBUG_NNDSP + print_float_vector("adaconv_kernel", kernel_buffer, in_channels * out_channels * kernel_size); + print_float_vector("adaconv_gain", gain_buffer, out_channels); +#endif + + /* calculate overlapping part using kernel from last frame */ + + for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++) + { + for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++) + { + OPUS_CLEAR(kernel0, ADACONV_MAX_KERNEL_SIZE); + OPUS_CLEAR(kernel1, ADACONV_MAX_KERNEL_SIZE); + + OPUS_COPY(kernel0, hAdaConv->last_kernel + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size); + OPUS_COPY(kernel1, kernel_buffer + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size); + celt_pitch_xcorr(kernel0, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer0, ADACONV_MAX_KERNEL_SIZE, overlap_size, arch); + celt_pitch_xcorr(kernel1, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer1, ADACONV_MAX_KERNEL_SIZE, frame_size, arch); + for (i_sample = 0; i_sample < overlap_size; i_sample++) + { + output_buffer[i_sample + i_out_channels * frame_size] += window[i_sample] * channel_buffer0[i_sample]; + output_buffer[i_sample + i_out_channels * frame_size] += (1.f - window[i_sample]) * channel_buffer1[i_sample]; + } + for (i_sample = overlap_size; i_sample < frame_size; i_sample++) + { + output_buffer[i_sample + i_out_channels * frame_size] += channel_buffer1[i_sample]; + } + } + } + + OPUS_COPY(x_out, output_buffer, out_channels * frame_size); + +#ifdef DEBUG_NNDSP + print_float_vector("x_out", x_out, out_channels * frame_size); +#endif + + /* buffer update */ + for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++) + { + OPUS_COPY(hAdaConv->history + i_in_channels * kernel_size, p_input + i_in_channels * (frame_size + kernel_size) + frame_size - kernel_size, kernel_size); + } + OPUS_COPY(hAdaConv->last_kernel, kernel_buffer, kernel_size * in_channels * out_channels); +} + +void adacomb_process_frame( + AdaCombState* hAdaComb, + float *x_out, + const float *x_in, + const float *features, + const LinearLayer *kernel_layer, + const LinearLayer *gain_layer, + const LinearLayer *global_gain_layer, + int pitch_lag, + int feature_dim, + int frame_size, + int overlap_size, + int kernel_size, + int left_padding, + float filter_gain_a, + float filter_gain_b, + float log_gain_limit, + float *window, + int arch +) +{ + float output_buffer[ADACOMB_MAX_FRAME_SIZE]; + float output_buffer_last[ADACOMB_MAX_FRAME_SIZE]; + float kernel_buffer[ADACOMB_MAX_KERNEL_SIZE]; + float input_buffer[ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE]; + float gain, global_gain; + float *p_input; + int i_sample; + float kernel[16]; + float last_kernel[16]; + + (void) feature_dim; /* ToDo: figure out whether we might need this information */ + + OPUS_CLEAR(output_buffer, ADACOMB_MAX_FRAME_SIZE); + OPUS_CLEAR(kernel_buffer, ADACOMB_MAX_KERNEL_SIZE); + OPUS_CLEAR(input_buffer, ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE); + + OPUS_COPY(input_buffer, hAdaComb->history, kernel_size + ADACOMB_MAX_LAG); + OPUS_COPY(input_buffer + kernel_size + ADACOMB_MAX_LAG, x_in, frame_size); + p_input = input_buffer + kernel_size + ADACOMB_MAX_LAG; + + /* calculate new kernel and new gain */ + compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch); + compute_generic_dense(gain_layer, &gain, features, ACTIVATION_RELU, arch); + compute_generic_dense(global_gain_layer, &global_gain, features, ACTIVATION_TANH, arch); +#ifdef DEBUG_NNDSP + print_float_vector("features", features, feature_dim); + print_float_vector("adacomb_kernel_raw", kernel_buffer, kernel_size); + print_float_vector("adacomb_gain_raw", &gain, 1); + print_float_vector("adacomb_global_gain_raw", &global_gain, 1); +#endif + gain = exp(log_gain_limit - gain); + global_gain = exp(filter_gain_a * global_gain + filter_gain_b); + scale_kernel(kernel_buffer, 1, 1, kernel_size, &gain); + +#ifdef DEBUG_NNDSP + print_float_vector("adacomb_kernel", kernel_buffer, kernel_size); + print_float_vector("adacomb_gain", &gain, 1); +#endif + + OPUS_CLEAR(kernel, ADACOMB_MAX_KERNEL_SIZE); + OPUS_CLEAR(last_kernel, ADACOMB_MAX_KERNEL_SIZE); + OPUS_COPY(kernel, kernel_buffer, kernel_size); + OPUS_COPY(last_kernel, hAdaComb->last_kernel, kernel_size); + + celt_pitch_xcorr(last_kernel, &p_input[- left_padding - hAdaComb->last_pitch_lag], output_buffer_last, ADACOMB_MAX_KERNEL_SIZE, overlap_size, arch); + + celt_pitch_xcorr(kernel, &p_input[- left_padding - pitch_lag], output_buffer, ADACOMB_MAX_KERNEL_SIZE, frame_size, arch); + for (i_sample = 0; i_sample < overlap_size; i_sample++) + { + output_buffer[i_sample] = hAdaComb->last_global_gain * window[i_sample] * output_buffer_last[i_sample] + global_gain * (1.f - window[i_sample]) * output_buffer[i_sample]; + } + + for (i_sample = 0; i_sample < overlap_size; i_sample++) + { + output_buffer[i_sample] += (window[i_sample] * hAdaComb->last_global_gain + (1.f - window[i_sample]) * global_gain) * p_input[i_sample]; + } + + for (i_sample = overlap_size; i_sample < frame_size; i_sample++) + { + output_buffer[i_sample] = global_gain * (output_buffer[i_sample] + p_input[i_sample]); + } + OPUS_COPY(x_out, output_buffer, frame_size); + +#ifdef DEBUG_NNDSP + print_float_vector("x_out", x_out, frame_size); +#endif + + /* buffer update */ + OPUS_COPY(hAdaComb->last_kernel, kernel_buffer, kernel_size); + OPUS_COPY(hAdaComb->history, p_input + frame_size - kernel_size - ADACOMB_MAX_LAG, kernel_size + ADACOMB_MAX_LAG); + hAdaComb->last_pitch_lag = pitch_lag; + hAdaComb->last_global_gain = global_gain; +} + + +void adashape_process_frame( + AdaShapeState *hAdaShape, + float *x_out, + const float *x_in, + const float *features, + const LinearLayer *alpha1, + const LinearLayer *alpha2, + int feature_dim, + int frame_size, + int avg_pool_k, + int arch +) +{ + float in_buffer[ADASHAPE_MAX_INPUT_DIM + ADASHAPE_MAX_FRAME_SIZE]; + float out_buffer[ADASHAPE_MAX_FRAME_SIZE]; + int i, k; + int tenv_size; + float mean; + float *tenv; + + celt_assert(frame_size % avg_pool_k == 0); + celt_assert(feature_dim + frame_size / avg_pool_k + 1 < ADASHAPE_MAX_INPUT_DIM); + + tenv_size = frame_size / avg_pool_k; + tenv = in_buffer + feature_dim; + OPUS_CLEAR(tenv, tenv_size + 1); + + OPUS_COPY(in_buffer, features, feature_dim); + + /* calculate temporal envelope */ + mean = 0; + for (i = 0; i < tenv_size; i++) + { + for (k = 0; k < avg_pool_k; k++) + { + tenv[i] += fabs(x_in[i * avg_pool_k + k]); + } + tenv[i] = log(tenv[i] / avg_pool_k + 1.52587890625e-05f); + mean += tenv[i]; + } + mean /= tenv_size; + for (i = 0; i < tenv_size; i++) + { + tenv[i] -= mean; + } + tenv[tenv_size] = mean; +#ifdef DEBUG_NNDSP + print_float_vector("tenv", tenv, tenv_size + 1); +#endif + + /* calculate temporal weights */ +#ifdef DEBUG_NNDSP + print_float_vector("alpha1_in", in_buffer, feature_dim + tenv_size + 1); +#endif + compute_generic_conv1d(alpha1, out_buffer, hAdaShape->conv_alpha1_state, in_buffer, feature_dim + tenv_size + 1, ACTIVATION_LINEAR, arch); +#ifdef DEBUG_NNDSP + print_float_vector("alpha1_out", out_buffer, frame_size); +#endif + /* compute leaky ReLU by hand. ToDo: try tanh activation */ + for (i = 0; i < frame_size; i ++) + { + in_buffer[i] = out_buffer[i] >= 0 ? out_buffer[i] : 0.2f * out_buffer[i]; + } +#ifdef DEBUG_NNDSP + print_float_vector("post_alpha1", in_buffer, frame_size); +#endif + compute_generic_conv1d(alpha2, out_buffer, hAdaShape->conv_alpha2_state, in_buffer, frame_size, ACTIVATION_LINEAR, arch); + + /* shape signal */ + for (i = 0; i < frame_size; i ++) + { + x_out[i] = exp(out_buffer[i]) * x_in[i]; + } + +} diff --git a/dnn/nndsp.h b/dnn/nndsp.h new file mode 100644 index 00000000..f00094b6 --- /dev/null +++ b/dnn/nndsp.h @@ -0,0 +1,141 @@ +/* Copyright (c) 2023 Amazon + Written by Jan Buethe */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef NNDSP_H +#define NNDSP_H + +#include "opus_types.h" +#include "nnet.h" +#include + + +#define ADACONV_MAX_KERNEL_SIZE 16 +#define ADACONV_MAX_INPUT_CHANNELS 2 +#define ADACONV_MAX_OUTPUT_CHANNELS 2 +#define ADACONV_MAX_FRAME_SIZE 80 +#define ADACONV_MAX_OVERLAP_SIZE 40 + +#define ADACOMB_MAX_LAG 300 +#define ADACOMB_MAX_KERNEL_SIZE 16 +#define ADACOMB_MAX_FRAME_SIZE 80 +#define ADACOMB_MAX_OVERLAP_SIZE 40 + +#define ADASHAPE_MAX_INPUT_DIM 512 +#define ADASHAPE_MAX_FRAME_SIZE 160 + +/*#define DEBUG_NNDSP*/ +#ifdef DEBUG_NNDSP +#include +#endif + + +void print_float_vector(const char* name, const float *vec, int length); + +typedef struct { + float history[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS]; + float last_kernel[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS]; + float last_gain; +} AdaConvState; + + +typedef struct { + float history[ADACOMB_MAX_KERNEL_SIZE + ADACOMB_MAX_LAG]; + float last_kernel[ADACOMB_MAX_KERNEL_SIZE]; + float last_global_gain; + int last_pitch_lag; +} AdaCombState; + + +typedef struct { + float conv_alpha1_state[ADASHAPE_MAX_INPUT_DIM]; + float conv_alpha2_state[ADASHAPE_MAX_FRAME_SIZE]; +} AdaShapeState; + +void init_adaconv_state(AdaConvState *hAdaConv); + +void init_adacomb_state(AdaCombState *hAdaComb); + +void init_adashape_state(AdaShapeState *hAdaShape); + +void compute_overlap_window(float *window, int overlap_size); + +void adaconv_process_frame( + AdaConvState* hAdaConv, + float *x_out, + const float *x_in, + const float *features, + const LinearLayer *kernel_layer, + const LinearLayer *gain_layer, + int feature_dim, /* not strictly necessary */ + int frame_size, + int overlap_size, + int in_channels, + int out_channels, + int kernel_size, + int left_padding, + float filter_gain_a, + float filter_gain_b, + float shape_gain, + float *window, + int arch +); + +void adacomb_process_frame( + AdaCombState* hAdaComb, + float *x_out, + const float *x_in, + const float *features, + const LinearLayer *kernel_layer, + const LinearLayer *gain_layer, + const LinearLayer *global_gain_layer, + int pitch_lag, + int feature_dim, + int frame_size, + int overlap_size, + int kernel_size, + int left_padding, + float filter_gain_a, + float filter_gain_b, + float log_gain_limit, + float *window, + int arch +); + +void adashape_process_frame( + AdaShapeState *hAdaShape, + float *x_out, + const float *x_in, + const float *features, + const LinearLayer *alpha1, + const LinearLayer *alpha2, + int feature_dim, + int frame_size, + int avg_pool_k, + int arch +); + +#endif diff --git a/dnn/nnet.c b/dnn/nnet.c index e794e450..7ba623ca 100644 --- a/dnn/nnet.c +++ b/dnn/nnet.c @@ -41,6 +41,10 @@ #include "os_support.h" #include "vec.h" +#ifdef ENABLE_OSCE +#include "osce_config.h" +#endif + #ifdef NO_OPTIMIZATIONS #if defined(_MSC_VER) #pragma message ("Compiling without any vectorization. This code will be very slow") @@ -59,8 +63,11 @@ void compute_generic_dense(const LinearLayer *layer, float *output, const float compute_activation(output, output, layer->nb_outputs, activation, arch); } +#ifdef ENABLE_OSCE +#define MAX_RNN_NEURONS_ALL IMAX(IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS), DRED_MAX_RNN_NEURONS), OSCE_MAX_RNN_NEURONS) +#else #define MAX_RNN_NEURONS_ALL IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS), DRED_MAX_RNN_NEURONS) - +#endif void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch) { diff --git a/dnn/nnet_arch.h b/dnn/nnet_arch.h index 12a467e5..694a3608 100644 --- a/dnn/nnet_arch.h +++ b/dnn/nnet_arch.h @@ -64,13 +64,29 @@ static OPUS_INLINE float relu(float x) return x < 0 ? 0 : x; } +/*#define HIGH_ACCURACY */ + void RTCD_SUF(compute_activation_)(float *output, const float *input, int N, int activation) { int i; if (activation == ACTIVATION_SIGMOID) { +#ifdef HIGH_ACCURACY + for (int n=0; n +#include "osce.h" +#include "osce_features.h" +#include "os_support.h" +#include "nndsp.h" +#include "float_cast.h" +#include "arch.h" + +#ifdef OSCE_DEBUG +#include +/*#define WRITE_FEATURES*/ +/*#define DEBUG_LACE*/ +/*#define DEBUG_NOLACE*/ +#define FINIT(fid, name, mode) do{if (fid == NULL) {fid = fopen(name, mode);}} while(0) +#endif + +#ifdef ENABLE_OSCE_TRAINING_DATA +#include +#endif + +#define CLIP(a, min, max) (((a) < (min) ? (min) : (a)) > (max) ? (max) : (a)) + +extern const WeightArray lacelayers_arrays[]; +extern const WeightArray nolacelayers_arrays[]; + +/* LACE */ + +#ifndef DISABLE_LACE + +static void compute_lace_numbits_embedding(float *emb, float numbits, int dim, float min_val, float max_val, int logscale) +{ + float x; + (void) dim; + + numbits = logscale ? log(numbits) : numbits; + x = CLIP(numbits, min_val, max_val) - (max_val + min_val) / 2; + + emb[0] = sin(x * LACE_NUMBITS_SCALE_0 - 0.5f); + emb[1] = sin(x * LACE_NUMBITS_SCALE_1 - 0.5f); + emb[2] = sin(x * LACE_NUMBITS_SCALE_2 - 0.5f); + emb[3] = sin(x * LACE_NUMBITS_SCALE_3 - 0.5f); + emb[4] = sin(x * LACE_NUMBITS_SCALE_4 - 0.5f); + emb[5] = sin(x * LACE_NUMBITS_SCALE_5 - 0.5f); + emb[6] = sin(x * LACE_NUMBITS_SCALE_6 - 0.5f); + emb[7] = sin(x * LACE_NUMBITS_SCALE_7 - 0.5f); +} + + +static int init_lace(LACE *hLACE, const WeightArray *weights) +{ + int ret = 0; + OPUS_CLEAR(hLACE, 1); + celt_assert(weights != NULL); + + ret = init_lacelayers(&hLACE->layers, weights); + + compute_overlap_window(hLACE->window, LACE_OVERLAP_SIZE); + + return ret; +} + +static void reset_lace_state(LACEState *state) +{ + OPUS_CLEAR(state, 1); + + init_adacomb_state(&state->cf1_state); + init_adacomb_state(&state->cf2_state); + init_adaconv_state(&state->af1_state); +} + +static void lace_feature_net( + LACE *hLACE, + LACEState *state, + float *output, + const float *features, + const float *numbits, + const int *periods, + int arch +) +{ + float input_buffer[4 * IMAX(LACE_COND_DIM, LACE_HIDDEN_FEATURE_DIM)]; + float output_buffer[4 * IMAX(LACE_COND_DIM, LACE_HIDDEN_FEATURE_DIM)]; + float numbits_embedded[2 * LACE_NUMBITS_EMBEDDING_DIM]; + int i_subframe; + + compute_lace_numbits_embedding(numbits_embedded, numbits[0], LACE_NUMBITS_EMBEDDING_DIM, + log(LACE_NUMBITS_RANGE_LOW), log(LACE_NUMBITS_RANGE_HIGH), 1); + compute_lace_numbits_embedding(numbits_embedded + LACE_NUMBITS_EMBEDDING_DIM, numbits[1], LACE_NUMBITS_EMBEDDING_DIM, + log(LACE_NUMBITS_RANGE_LOW), log(LACE_NUMBITS_RANGE_HIGH), 1); + + /* scaling and dimensionality reduction */ + for (i_subframe = 0; i_subframe < 4; i_subframe ++) + { + OPUS_COPY(input_buffer, features + i_subframe * LACE_NUM_FEATURES, LACE_NUM_FEATURES); + OPUS_COPY(input_buffer + LACE_NUM_FEATURES, hLACE->layers.lace_pitch_embedding.float_weights + periods[i_subframe] * LACE_PITCH_EMBEDDING_DIM, LACE_PITCH_EMBEDDING_DIM); + OPUS_COPY(input_buffer + LACE_NUM_FEATURES + LACE_PITCH_EMBEDDING_DIM, numbits_embedded, 2 * LACE_NUMBITS_EMBEDDING_DIM); + + compute_generic_conv1d( + &hLACE->layers.lace_fnet_conv1, + output_buffer + i_subframe * LACE_HIDDEN_FEATURE_DIM, + NULL, + input_buffer, + LACE_NUM_FEATURES + LACE_PITCH_EMBEDDING_DIM + 2 * LACE_NUMBITS_EMBEDDING_DIM, + ACTIVATION_TANH, + arch); + } + + /* subframe accumulation */ + OPUS_COPY(input_buffer, output_buffer, 4 * LACE_HIDDEN_FEATURE_DIM); + compute_generic_conv1d( + &hLACE->layers.lace_fnet_conv2, + output_buffer, + state->feature_net_conv2_state, + input_buffer, + 4 * LACE_HIDDEN_FEATURE_DIM, + ACTIVATION_TANH, + arch + ); + + /* tconv upsampling */ + OPUS_COPY(input_buffer, output_buffer, 4 * LACE_COND_DIM); + compute_generic_dense( + &hLACE->layers.lace_fnet_tconv, + output_buffer, + input_buffer, + ACTIVATION_LINEAR, + arch + ); + + /* GRU */ + OPUS_COPY(input_buffer, output_buffer, 4 * LACE_COND_DIM); + for (i_subframe = 0; i_subframe < 4; i_subframe++) + { + compute_generic_gru( + &hLACE->layers.lace_fnet_gru_input, + &hLACE->layers.lace_fnet_gru_recurrent, + state->feature_net_gru_state, + input_buffer + i_subframe * LACE_COND_DIM, + arch + ); + OPUS_COPY(output + i_subframe * LACE_COND_DIM, state->feature_net_gru_state, LACE_COND_DIM); + } +} + + +static void lace_process_20ms_frame( + LACE* hLACE, + LACEState *state, + float *x_out, + const float *x_in, + const float *features, + const float *numbits, + const int *periods, + int arch +) +{ + float feature_buffer[4 * LACE_COND_DIM]; + float output_buffer[4 * LACE_FRAME_SIZE]; + int i_subframe, i_sample; + +#ifdef DEBUG_LACE + static FILE *f_features=NULL, *f_encfeatures=NULL, *f_xin=NULL, *f_xpreemph=NULL, *f_postcf1=NULL; + static FILE *f_postcf2=NULL, *f_postaf1=NULL, *f_xdeemph, *f_numbits, *f_periods; + + + FINIT(f_features, "debug/c_features.f32", "wb"); + FINIT(f_encfeatures, "debug/c_encoded_features.f32", "wb"); + FINIT(f_xin, "debug/c_x_in.f32", "wb"); + FINIT(f_xpreemph, "debug/c_xpreemph.f32", "wb"); + FINIT(f_xdeemph, "debug/c_xdeemph.f32", "wb"); + FINIT(f_postcf1, "debug/c_post_cf1.f32", "wb"); + FINIT(f_postcf2, "debug/c_post_cf2.f32", "wb"); + FINIT(f_postaf1, "debug/c_post_af1.f32", "wb"); + FINIT(f_numbits, "debug/c_numbits.f32", "wb"); + FINIT(f_periods, "debug/c_periods.s32", "wb"); + + fwrite(x_in, sizeof(*x_in), 4 * LACE_FRAME_SIZE, f_xin); + fwrite(numbits, sizeof(*numbits), 2, f_numbits); + fwrite(periods, sizeof(*periods), 4, f_periods); +#endif + + /* pre-emphasis */ + for (i_sample = 0; i_sample < 4 * LACE_FRAME_SIZE; i_sample ++) + { + output_buffer[i_sample] = x_in[i_sample] - LACE_PREEMPH * state->preemph_mem; + state->preemph_mem = x_in[i_sample]; + } + + /* run feature encoder */ + lace_feature_net(hLACE, state, feature_buffer, features, numbits, periods, arch); +#ifdef DEBUG_LACE + fwrite(features, sizeof(*features), 4 * LACE_NUM_FEATURES, f_features); + fwrite(feature_buffer, sizeof(*feature_buffer), 4 * LACE_COND_DIM, f_encfeatures); + fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_xpreemph); +#endif + + /* 1st comb filtering stage */ + for (i_subframe = 0; i_subframe < 4; i_subframe++) + { + adacomb_process_frame( + &state->cf1_state, + output_buffer + i_subframe * LACE_FRAME_SIZE, + output_buffer + i_subframe * LACE_FRAME_SIZE, + feature_buffer + i_subframe * LACE_COND_DIM, + &hLACE->layers.lace_cf1_kernel, + &hLACE->layers.lace_cf1_gain, + &hLACE->layers.lace_cf1_global_gain, + periods[i_subframe], + LACE_COND_DIM, + LACE_FRAME_SIZE, + LACE_OVERLAP_SIZE, + LACE_CF1_KERNEL_SIZE, + LACE_CF1_LEFT_PADDING, + LACE_CF1_FILTER_GAIN_A, + LACE_CF1_FILTER_GAIN_B, + LACE_CF1_LOG_GAIN_LIMIT, + hLACE->window, + arch); + } + +#ifdef DEBUG_LACE + fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_postcf1); +#endif + + /* 2nd comb filtering stage */ + for (i_subframe = 0; i_subframe < 4; i_subframe++) + { + adacomb_process_frame( + &state->cf2_state, + output_buffer + i_subframe * LACE_FRAME_SIZE, + output_buffer + i_subframe * LACE_FRAME_SIZE, + feature_buffer + i_subframe * LACE_COND_DIM, + &hLACE->layers.lace_cf2_kernel, + &hLACE->layers.lace_cf2_gain, + &hLACE->layers.lace_cf2_global_gain, + periods[i_subframe], + LACE_COND_DIM, + LACE_FRAME_SIZE, + LACE_OVERLAP_SIZE, + LACE_CF2_KERNEL_SIZE, + LACE_CF2_LEFT_PADDING, + LACE_CF2_FILTER_GAIN_A, + LACE_CF2_FILTER_GAIN_B, + LACE_CF2_LOG_GAIN_LIMIT, + hLACE->window, + arch); + } +#ifdef DEBUG_LACE + fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_postcf2); +#endif + + /* final adaptive filtering stage */ + for (i_subframe = 0; i_subframe < 4; i_subframe++) + { + adaconv_process_frame( + &state->af1_state, + output_buffer + i_subframe * LACE_FRAME_SIZE, + output_buffer + i_subframe * LACE_FRAME_SIZE, + feature_buffer + i_subframe * LACE_COND_DIM, + &hLACE->layers.lace_af1_kernel, + &hLACE->layers.lace_af1_gain, + LACE_COND_DIM, + LACE_FRAME_SIZE, + LACE_OVERLAP_SIZE, + LACE_AF1_IN_CHANNELS, + LACE_AF1_OUT_CHANNELS, + LACE_AF1_KERNEL_SIZE, + LACE_AF1_LEFT_PADDING, + LACE_AF1_FILTER_GAIN_A, + LACE_AF1_FILTER_GAIN_B, + LACE_AF1_SHAPE_GAIN, + hLACE->window, + arch); + } +#ifdef DEBUG_LACE + fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_postaf1); +#endif + + /* de-emphasis */ + for (i_sample = 0; i_sample < 4 * LACE_FRAME_SIZE; i_sample ++) + { + x_out[i_sample] = output_buffer[i_sample] + LACE_PREEMPH * state->deemph_mem; + state->deemph_mem = x_out[i_sample]; + } +#ifdef DEBUG_LACE + fwrite(x_out, sizeof(float), 4 * LACE_FRAME_SIZE, f_xdeemph); +#endif +} + +#endif /* #ifndef DISABLE_LACE */ + + +/* NoLACE */ +#ifndef DISABLE_NOLACE + +static void compute_nolace_numbits_embedding(float *emb, float numbits, int dim, float min_val, float max_val, int logscale) +{ + float x; + (void) dim; + + numbits = logscale ? log(numbits) : numbits; + x = CLIP(numbits, min_val, max_val) - (max_val + min_val) / 2; + + emb[0] = sin(x * NOLACE_NUMBITS_SCALE_0 - 0.5f); + emb[1] = sin(x * NOLACE_NUMBITS_SCALE_1 - 0.5f); + emb[2] = sin(x * NOLACE_NUMBITS_SCALE_2 - 0.5f); + emb[3] = sin(x * NOLACE_NUMBITS_SCALE_3 - 0.5f); + emb[4] = sin(x * NOLACE_NUMBITS_SCALE_4 - 0.5f); + emb[5] = sin(x * NOLACE_NUMBITS_SCALE_5 - 0.5f); + emb[6] = sin(x * NOLACE_NUMBITS_SCALE_6 - 0.5f); + emb[7] = sin(x * NOLACE_NUMBITS_SCALE_7 - 0.5f); +} + +static int init_nolace(NoLACE *hNoLACE, const WeightArray *weights) +{ + int ret = 0; + OPUS_CLEAR(hNoLACE, 1); + celt_assert(weights != NULL); + + ret = init_nolacelayers(&hNoLACE->layers, weights); + + compute_overlap_window(hNoLACE->window, NOLACE_OVERLAP_SIZE); + + return ret; +} + +static void reset_nolace_state(NoLACEState *state) +{ + OPUS_CLEAR(state, 1); + + init_adacomb_state(&state->cf1_state); + init_adacomb_state(&state->cf2_state); + init_adaconv_state(&state->af1_state); + init_adaconv_state(&state->af2_state); + init_adaconv_state(&state->af3_state); + init_adaconv_state(&state->af4_state); + init_adashape_state(&state->tdshape1_state); + init_adashape_state(&state->tdshape2_state); + init_adashape_state(&state->tdshape3_state); +} + +static void nolace_feature_net( + NoLACE *hNoLACE, + NoLACEState *state, + float *output, + const float *features, + const float *numbits, + const int *periods, + int arch +) +{ + float input_buffer[4 * IMAX(NOLACE_COND_DIM, NOLACE_HIDDEN_FEATURE_DIM)]; + float output_buffer[4 * IMAX(NOLACE_COND_DIM, NOLACE_HIDDEN_FEATURE_DIM)]; + float numbits_embedded[2 * NOLACE_NUMBITS_EMBEDDING_DIM]; + int i_subframe; + + compute_nolace_numbits_embedding(numbits_embedded, numbits[0], NOLACE_NUMBITS_EMBEDDING_DIM, + log(NOLACE_NUMBITS_RANGE_LOW), log(NOLACE_NUMBITS_RANGE_HIGH), 1); + compute_nolace_numbits_embedding(numbits_embedded + NOLACE_NUMBITS_EMBEDDING_DIM, numbits[1], NOLACE_NUMBITS_EMBEDDING_DIM, + log(NOLACE_NUMBITS_RANGE_LOW), log(NOLACE_NUMBITS_RANGE_HIGH), 1); + + /* scaling and dimensionality reduction */ + for (i_subframe = 0; i_subframe < 4; i_subframe ++) + { + OPUS_COPY(input_buffer, features + i_subframe * NOLACE_NUM_FEATURES, NOLACE_NUM_FEATURES); + OPUS_COPY(input_buffer + NOLACE_NUM_FEATURES, hNoLACE->layers.nolace_pitch_embedding.float_weights + periods[i_subframe] * NOLACE_PITCH_EMBEDDING_DIM, NOLACE_PITCH_EMBEDDING_DIM); + OPUS_COPY(input_buffer + NOLACE_NUM_FEATURES + NOLACE_PITCH_EMBEDDING_DIM, numbits_embedded, 2 * NOLACE_NUMBITS_EMBEDDING_DIM); + + compute_generic_conv1d( + &hNoLACE->layers.nolace_fnet_conv1, + output_buffer + i_subframe * NOLACE_HIDDEN_FEATURE_DIM, + NULL, + input_buffer, + NOLACE_NUM_FEATURES + NOLACE_PITCH_EMBEDDING_DIM + 2 * NOLACE_NUMBITS_EMBEDDING_DIM, + ACTIVATION_TANH, + arch); + } + + /* subframe accumulation */ + OPUS_COPY(input_buffer, output_buffer, 4 * NOLACE_HIDDEN_FEATURE_DIM); + compute_generic_conv1d( + &hNoLACE->layers.nolace_fnet_conv2, + output_buffer, + state->feature_net_conv2_state, + input_buffer, + 4 * NOLACE_HIDDEN_FEATURE_DIM, + ACTIVATION_TANH, + arch + ); + + /* tconv upsampling */ + OPUS_COPY(input_buffer, output_buffer, 4 * NOLACE_COND_DIM); + compute_generic_dense( + &hNoLACE->layers.nolace_fnet_tconv, + output_buffer, + input_buffer, + ACTIVATION_LINEAR, + arch + ); + + /* GRU */ + OPUS_COPY(input_buffer, output_buffer, 4 * NOLACE_COND_DIM); + for (i_subframe = 0; i_subframe < 4; i_subframe++) + { + compute_generic_gru( + &hNoLACE->layers.nolace_fnet_gru_input, + &hNoLACE->layers.nolace_fnet_gru_recurrent, + state->feature_net_gru_state, + input_buffer + i_subframe * NOLACE_COND_DIM, + arch + ); + OPUS_COPY(output + i_subframe * NOLACE_COND_DIM, state->feature_net_gru_state, NOLACE_COND_DIM); + } +} + + +static void nolace_process_20ms_frame( + NoLACE* hNoLACE, + NoLACEState *state, + float *x_out, + const float *x_in, + const float *features, + const float *numbits, + const int *periods, + int arch +) +{ + float feature_buffer[4 * NOLACE_COND_DIM]; + float feature_transform_buffer[4 * NOLACE_COND_DIM]; + float x_buffer1[8 * NOLACE_FRAME_SIZE]; + float x_buffer2[8 * NOLACE_FRAME_SIZE]; + int i_subframe, i_sample; + NOLACELayers *layers = &hNoLACE->layers; + +#ifdef DEBUG_NOLACE + static FILE *f_features=NULL, *f_encfeatures=NULL, *f_xin=NULL, *f_xpreemph=NULL, *f_postcf1=NULL; + static FILE *f_postcf2=NULL, *f_postaf1=NULL, *f_xdeemph, *f_numbits, *f_periods; + static FILE *f_ffpostcf1, *f_fpostcf2, *f_fpostaf1; + + + FINIT(f_features, "debug/c_features.f32", "wb"); + FINIT(f_encfeatures, "debug/c_encoded_features.f32", "wb"); + FINIT(f_xin, "debug/c_x_in.f32", "wb"); + FINIT(f_xpreemph, "debug/c_xpreemph.f32", "wb"); + FINIT(f_xdeemph, "debug/c_xdeemph.f32", "wb"); + FINIT(f_postcf1, "debug/c_post_cf1.f32", "wb"); + FINIT(f_postcf2, "debug/c_post_cf2.f32", "wb"); + FINIT(f_postaf1, "debug/c_post_af1.f32", "wb"); + FINIT(f_numbits, "debug/c_numbits.f32", "wb"); + FINIT(f_periods, "debug/c_periods.s32", "wb"); + + fwrite(x_in, sizeof(*x_in), 4 * NOLACE_FRAME_SIZE, f_xin); + fwrite(numbits, sizeof(*numbits), 2, f_numbits); + fwrite(periods, sizeof(*periods), 4, f_periods); +#endif + + /* pre-emphasis */ + for (i_sample = 0; i_sample < 4 * NOLACE_FRAME_SIZE; i_sample ++) + { + x_buffer1[i_sample] = x_in[i_sample] - NOLACE_PREEMPH * state->preemph_mem; + state->preemph_mem = x_in[i_sample]; + } + + /* run feature encoder */ + nolace_feature_net(hNoLACE, state, feature_buffer, features, numbits, periods, arch); +#ifdef DEBUG_NOLACE + fwrite(features, sizeof(*features), 4 * NOLACE_NUM_FEATURES, f_features); + fwrite(feature_buffer, sizeof(*feature_buffer), 4 * NOLACE_COND_DIM, f_encfeatures); + fwrite(output_buffer, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_xpreemph); +#endif + + /* 1st comb filtering stage */ + for (i_subframe = 0; i_subframe < 4; i_subframe++) + { + /* modifies signal in place */ + adacomb_process_frame( + &state->cf1_state, + x_buffer1 + i_subframe * NOLACE_FRAME_SIZE, + x_buffer1 + i_subframe * NOLACE_FRAME_SIZE, + feature_buffer + i_subframe * NOLACE_COND_DIM, + &hNoLACE->layers.nolace_cf1_kernel, + &hNoLACE->layers.nolace_cf1_gain, + &hNoLACE->layers.nolace_cf1_global_gain, + periods[i_subframe], + NOLACE_COND_DIM, + NOLACE_FRAME_SIZE, + NOLACE_OVERLAP_SIZE, + NOLACE_CF1_KERNEL_SIZE, + NOLACE_CF1_LEFT_PADDING, + NOLACE_CF1_FILTER_GAIN_A, + NOLACE_CF1_FILTER_GAIN_B, + NOLACE_CF1_LOG_GAIN_LIMIT, + hNoLACE->window, + arch); + + compute_generic_conv1d( + &layers->nolace_post_cf1, + feature_transform_buffer + i_subframe * NOLACE_COND_DIM, + state->post_cf1_state, + feature_buffer + i_subframe * NOLACE_COND_DIM, + NOLACE_COND_DIM, + ACTIVATION_TANH, + arch); + } + + /* update feature buffer */ + OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM); + +#ifdef DEBUG_NOLACE + fwrite(x_buffer1, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_postcf1); +#endif + + /* 2nd comb filtering stage */ + for (i_subframe = 0; i_subframe < 4; i_subframe++) + { + /* modifies signal in place */ + adacomb_process_frame( + &state->cf2_state, + x_buffer1 + i_subframe * NOLACE_FRAME_SIZE, + x_buffer1 + i_subframe * NOLACE_FRAME_SIZE, + feature_buffer + i_subframe * NOLACE_COND_DIM, + &hNoLACE->layers.nolace_cf2_kernel, + &hNoLACE->layers.nolace_cf2_gain, + &hNoLACE->layers.nolace_cf2_global_gain, + periods[i_subframe], + NOLACE_COND_DIM, + NOLACE_FRAME_SIZE, + NOLACE_OVERLAP_SIZE, + NOLACE_CF2_KERNEL_SIZE, + NOLACE_CF2_LEFT_PADDING, + NOLACE_CF2_FILTER_GAIN_A, + NOLACE_CF2_FILTER_GAIN_B, + NOLACE_CF2_LOG_GAIN_LIMIT, + hNoLACE->window, + arch); + + compute_generic_conv1d( + &layers->nolace_post_cf2, + feature_transform_buffer + i_subframe * NOLACE_COND_DIM, + state->post_cf2_state, + feature_buffer + i_subframe * NOLACE_COND_DIM, + NOLACE_COND_DIM, + ACTIVATION_TANH, + arch); + } + + /* update feature buffer */ + OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM); + +#ifdef DEBUG_NOLACE + fwrite(x_buffer1, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_postcf2); +#endif + + /* final adaptive filtering stage */ + for (i_subframe = 0; i_subframe < 4; i_subframe++) + { + adaconv_process_frame( + &state->af1_state, + x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF1_OUT_CHANNELS, + x_buffer1 + i_subframe * NOLACE_FRAME_SIZE, + feature_buffer + i_subframe * NOLACE_COND_DIM, + &hNoLACE->layers.nolace_af1_kernel, + &hNoLACE->layers.nolace_af1_gain, + NOLACE_COND_DIM, + NOLACE_FRAME_SIZE, + NOLACE_OVERLAP_SIZE, + NOLACE_AF1_IN_CHANNELS, + NOLACE_AF1_OUT_CHANNELS, + NOLACE_AF1_KERNEL_SIZE, + NOLACE_AF1_LEFT_PADDING, + NOLACE_AF1_FILTER_GAIN_A, + NOLACE_AF1_FILTER_GAIN_B, + NOLACE_AF1_SHAPE_GAIN, + hNoLACE->window, + arch); + + compute_generic_conv1d( + &layers->nolace_post_af1, + feature_transform_buffer + i_subframe * NOLACE_COND_DIM, + state->post_af1_state, + feature_buffer + i_subframe * NOLACE_COND_DIM, + NOLACE_COND_DIM, + ACTIVATION_TANH, + arch); + } + + /* update feature buffer */ + OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM); + +#ifdef DEBUG_NOLACE + fwrite(x_buffer2, sizeof(float), 4 * NOLACE_FRAME_SIZE * NOLACE_AF1_OUT_CHANNELS, f_postaf1); +#endif + + /* first shape-mix round */ + for (i_subframe = 0; i_subframe < 4; i_subframe++) + { + celt_assert(NOLACE_AF1_OUT_CHANNELS == 2); + /* modifies second channel in place */ + adashape_process_frame( + &state->tdshape1_state, + x_buffer2 + i_subframe * NOLACE_AF1_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE, + x_buffer2 + i_subframe * NOLACE_AF1_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE, + feature_buffer + i_subframe * NOLACE_COND_DIM, + &layers->nolace_tdshape1_alpha1, + &layers->nolace_tdshape1_alpha2, + NOLACE_TDSHAPE1_FEATURE_DIM, + NOLACE_TDSHAPE1_FRAME_SIZE, + NOLACE_TDSHAPE1_AVG_POOL_K, + arch + ); + + adaconv_process_frame( + &state->af2_state, + x_buffer1 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF2_OUT_CHANNELS, + x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF2_IN_CHANNELS, + feature_buffer + i_subframe * NOLACE_COND_DIM, + &hNoLACE->layers.nolace_af2_kernel, + &hNoLACE->layers.nolace_af2_gain, + NOLACE_COND_DIM, + NOLACE_FRAME_SIZE, + NOLACE_OVERLAP_SIZE, + NOLACE_AF2_IN_CHANNELS, + NOLACE_AF2_OUT_CHANNELS, + NOLACE_AF2_KERNEL_SIZE, + NOLACE_AF2_LEFT_PADDING, + NOLACE_AF2_FILTER_GAIN_A, + NOLACE_AF2_FILTER_GAIN_B, + NOLACE_AF2_SHAPE_GAIN, + hNoLACE->window, + arch); + + compute_generic_conv1d( + &layers->nolace_post_af2, + feature_transform_buffer + i_subframe * NOLACE_COND_DIM, + state->post_af2_state, + feature_buffer + i_subframe * NOLACE_COND_DIM, + NOLACE_COND_DIM, + ACTIVATION_TANH, + arch); + } + + /* update feature buffer */ + OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM); + +#ifdef DEBUG_NOLACE + fwrite(x_buffer1, sizeof(float), 4 * NOLACE_FRAME_SIZE * NOLACE_AF2_OUT_CHANNELS, f_postaf2); +#endif + + /* second shape-mix round */ + for (i_subframe = 0; i_subframe < 4; i_subframe++) + { + celt_assert(NOLACE_AF2_OUT_CHANNELS == 2); + /* modifies second channel in place */ + adashape_process_frame( + &state->tdshape2_state, + x_buffer1 + i_subframe * NOLACE_AF2_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE, + x_buffer1 + i_subframe * NOLACE_AF2_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE, + feature_buffer + i_subframe * NOLACE_COND_DIM, + &layers->nolace_tdshape2_alpha1, + &layers->nolace_tdshape2_alpha2, + NOLACE_TDSHAPE2_FEATURE_DIM, + NOLACE_TDSHAPE2_FRAME_SIZE, + NOLACE_TDSHAPE2_AVG_POOL_K, + arch + ); + + adaconv_process_frame( + &state->af3_state, + x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF3_OUT_CHANNELS, + x_buffer1 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF3_IN_CHANNELS, + feature_buffer + i_subframe * NOLACE_COND_DIM, + &hNoLACE->layers.nolace_af3_kernel, + &hNoLACE->layers.nolace_af3_gain, + NOLACE_COND_DIM, + NOLACE_FRAME_SIZE, + NOLACE_OVERLAP_SIZE, + NOLACE_AF3_IN_CHANNELS, + NOLACE_AF3_OUT_CHANNELS, + NOLACE_AF3_KERNEL_SIZE, + NOLACE_AF3_LEFT_PADDING, + NOLACE_AF3_FILTER_GAIN_A, + NOLACE_AF3_FILTER_GAIN_B, + NOLACE_AF3_SHAPE_GAIN, + hNoLACE->window, + arch); + + compute_generic_conv1d( + &layers->nolace_post_af3, + feature_transform_buffer + i_subframe * NOLACE_COND_DIM, + state->post_af3_state, + feature_buffer + i_subframe * NOLACE_COND_DIM, + NOLACE_COND_DIM, + ACTIVATION_TANH, + arch); + } + + /* update feature buffer */ + OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM); + + /* third shape-mix round */ + for (i_subframe = 0; i_subframe < 4; i_subframe++) + { + celt_assert(NOLACE_AF3_OUT_CHANNELS == 2); + /* modifies second channel in place */ + adashape_process_frame( + &state->tdshape3_state, + x_buffer2 + i_subframe * NOLACE_AF3_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE, + x_buffer2 + i_subframe * NOLACE_AF3_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE, + feature_buffer + i_subframe * NOLACE_COND_DIM, + &layers->nolace_tdshape3_alpha1, + &layers->nolace_tdshape3_alpha2, + NOLACE_TDSHAPE3_FEATURE_DIM, + NOLACE_TDSHAPE3_FRAME_SIZE, + NOLACE_TDSHAPE3_AVG_POOL_K, + arch + ); + + adaconv_process_frame( + &state->af4_state, + x_buffer1 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF4_OUT_CHANNELS, + x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF4_IN_CHANNELS, + feature_buffer + i_subframe * NOLACE_COND_DIM, + &hNoLACE->layers.nolace_af4_kernel, + &hNoLACE->layers.nolace_af4_gain, + NOLACE_COND_DIM, + NOLACE_FRAME_SIZE, + NOLACE_OVERLAP_SIZE, + NOLACE_AF4_IN_CHANNELS, + NOLACE_AF4_OUT_CHANNELS, + NOLACE_AF4_KERNEL_SIZE, + NOLACE_AF4_LEFT_PADDING, + NOLACE_AF4_FILTER_GAIN_A, + NOLACE_AF4_FILTER_GAIN_B, + NOLACE_AF4_SHAPE_GAIN, + hNoLACE->window, + arch); + + } + + + /* de-emphasis */ + for (i_sample = 0; i_sample < 4 * NOLACE_FRAME_SIZE; i_sample ++) + { + x_out[i_sample] = x_buffer1[i_sample] + NOLACE_PREEMPH * state->deemph_mem; + state->deemph_mem = x_out[i_sample]; + } +#ifdef DEBUG_NOLACE + fwrite(x_out, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_xdeemph); +#endif +} + +#endif /* #ifndef DISABLE_NOLACE */ + +/* API */ + +void osce_reset(silk_OSCE_struct *hOSCE, int method) +{ + OSCEState *state = &hOSCE->state; + + OPUS_CLEAR(&hOSCE->features, 1); + + switch(method) + { + case OSCE_METHOD_NONE: + break; +#ifndef DISABLE_LACE + case OSCE_METHOD_LACE: + reset_lace_state(&state->lace); + break; +#endif +#ifndef DISABLE_NOLACE + case OSCE_METHOD_NOLACE: + reset_nolace_state(&state->nolace); + break; +#endif + default: + celt_assert(0 && "method not defined"); /* Question: return error code? */ + } + hOSCE->method = method; + hOSCE->features.reset = 2; +} + + +#if 0 +#include +static void print_float_array(FILE *fid, const char *name, const float *array, int n) +{ + int i; + for (i = 0; i < n; i++) + { + fprintf(fid, "%s[%d]: %f\n", name, i, array[i]); + } +} + +static void print_int_array(FILE *fid, const char *name, const int *array, int n) +{ + int i; + for (i = 0; i < n; i++) + { + fprintf(fid, "%s[%d]: %d\n", name, i, array[i]); + } +} + +static void print_int8_array(FILE *fid, const char *name, const opus_int8 *array, int n) +{ + int i; + for (i = 0; i < n; i++) + { + fprintf(fid, "%s[%d]: %d\n", name, i, array[i]); + } +} + +static void print_linear_layer(FILE *fid, const char *name, LinearLayer *layer) +{ + int i, n_in, n_out, n_total; + char tmp[256]; + + n_in = layer->nb_inputs; + n_out = layer->nb_outputs; + n_total = n_in * n_out; + + fprintf(fid, "\nprinting layer %s...\n", name); + fprintf(fid, "%s.nb_inputs: %d\n%s.nb_outputs: %d\n", name, n_in, name, n_out); + + if (layer->bias !=NULL){} + if (layer->subias !=NULL){} + if (layer->weights !=NULL){} + if (layer->float_weights !=NULL){} + + if (layer->bias != NULL) {sprintf(tmp, "%s.bias", name); print_float_array(fid, tmp, layer->bias, n_out);} + if (layer->subias != NULL) {sprintf(tmp, "%s.subias", name); print_float_array(fid, tmp, layer->subias, n_out);} + if (layer->weights != NULL) {sprintf(tmp, "%s.weights", name); print_int8_array(fid, tmp, layer->weights, n_total);} + if (layer->float_weights != NULL) {sprintf(tmp, "%s.float_weights", name); print_float_array(fid, tmp, layer->float_weights, n_total);} + //if (layer->weights_idx != NULL) {sprintf(tmp, "%s.weights_idx", name); print_float_array(fid, tmp, layer->weights_idx, n_total);} + if (layer->diag != NULL) {sprintf(tmp, "%s.diag", name); print_float_array(fid, tmp, layer->diag, n_in);} + if (layer->scale != NULL) {sprintf(tmp, "%s.scale", name); print_float_array(fid, tmp, layer->scale, n_out);} + +} +#endif + +int osce_load_models(OSCEModel *model, const unsigned char *data, int len) +{ + int ret = 0; + WeightArray *list; + + if (data != NULL && len) + { + /* init from buffer */ + parse_weights(&list, data, len); + +#ifndef DISABLE_LACE + if (ret == 0) {ret = init_lace(&model->lace, list);} +#endif + +#ifndef DISABLE_LACE + if (ret == 0) {ret = init_nolace(&model->nolace, list);} +#endif + + free(list); + } else + { +#ifdef USE_WEIGHTS_FILE + return -1; +#else +#ifndef DISABLE_LACE + if (ret == 0) {ret = init_lace(&model->lace, lacelayers_arrays);} +#endif + +#ifndef DISABLE_LACE + if (ret == 0) {ret = init_nolace(&model->nolace, nolacelayers_arrays);} +#endif + +#endif /* USE_WEIGHTS_FILE */ + } + + ret = ret ? -1 : 0; + return ret; +} + +void osce_enhance_frame( + OSCEModel *model, /* I OSCE model struct */ + silk_decoder_state *psDec, /* I/O Decoder state */ + silk_decoder_control *psDecCtrl, /* I Decoder control */ + opus_int16 xq[], /* I/O Decoded speech */ + opus_int32 num_bits, /* I Size of SILK payload in bits */ + int arch /* I Run-time architecture */ +) +{ + float in_buffer[320]; + float out_buffer[320]; + float features[4 * OSCE_FEATURE_DIM]; + float numbits[2]; + int periods[4]; + int i; + + /* enhancement only implemented for 20 ms frame at 16kHz */ + if (psDec->fs_kHz != 16 || psDec->nb_subfr != 4) + { + osce_reset(&psDec->osce, psDec->osce.method); + return; + } + + osce_calculate_features(psDec, psDecCtrl, features, numbits, periods, xq, num_bits); + + /* scale input */ + for (i = 0; i < 320; i++) + { + in_buffer[i] = ((float) xq[i]) * (1.f/32768.f); + } + + switch(psDec->osce.method) + { + case OSCE_METHOD_NONE: + OPUS_COPY(out_buffer, in_buffer, 320); + break; +#ifndef DISABLE_LACE + case OSCE_METHOD_LACE: + lace_process_20ms_frame(&model->lace, &psDec->osce.state.lace, out_buffer, in_buffer, features, numbits, periods, arch); + break; +#endif +#ifndef DISABLE_NOLACE + case OSCE_METHOD_NOLACE: + nolace_process_20ms_frame(&model->nolace, &psDec->osce.state.nolace, out_buffer, in_buffer, features, numbits, periods, arch); + break; +#endif + default: + celt_assert(0 && "method not defined"); + } + +#ifdef ENABLE_OSCE_TRAINING_DATA + int k; + + static FILE *flpc = NULL; + static FILE *fgain = NULL; + static FILE *fltp = NULL; + static FILE *fperiod = NULL; + static FILE *fnoisy16k = NULL; + static FILE* f_numbits = NULL; + static FILE* f_numbits_smooth = NULL; + + if (flpc == NULL) {flpc = fopen("features_lpc.f32", "wb");} + if (fgain == NULL) {fgain = fopen("features_gain.f32", "wb");} + if (fltp == NULL) {fltp = fopen("features_ltp.f32", "wb");} + if (fperiod == NULL) {fperiod = fopen("features_period.s16", "wb");} + if (fnoisy16k == NULL) {fnoisy16k = fopen("noisy_16k.s16", "wb");} + if(f_numbits == NULL) {f_numbits = fopen("features_num_bits.s32", "wb");} + if (f_numbits_smooth == NULL) {f_numbits_smooth = fopen("features_num_bits_smooth.f32", "wb");} + + fwrite(&num_bits, sizeof(num_bits), 1, f_numbits); + fwrite(&(psDec->osce.features.numbits_smooth), sizeof(psDec->osce.features.numbits_smooth), 1, f_numbits_smooth); + + for (k = 0; k < psDec->nb_subfr; k++) + { + float tmp; + int16_t itmp; + float lpc_buffer[16] = {0}; + opus_int16 *A_Q12, *B_Q14; + + (void) num_bits; + (void) arch; + + /* gain */ + tmp = (float) psDecCtrl->Gains_Q16[k] / (1UL << 16); + fwrite(&tmp, sizeof(tmp), 1, fgain); + + /* LPC */ + A_Q12 = psDecCtrl->PredCoef_Q12[ k >> 1 ]; + for (i = 0; i < psDec->LPC_order; i++) + { + lpc_buffer[i] = (float) A_Q12[i] / (1U << 12); + } + fwrite(lpc_buffer, sizeof(lpc_buffer[0]), 16, flpc); + + /* LTP */ + B_Q14 = &psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER ]; + for (i = 0; i < 5; i++) + { + tmp = (float) B_Q14[i] / (1U << 14); + fwrite(&tmp, sizeof(tmp), 1, fltp); + } + + /* periods */ + itmp = psDec->indices.signalType == TYPE_VOICED ? psDecCtrl->pitchL[ k ] : 0; + fwrite(&itmp, sizeof(itmp), 1, fperiod); + } + + fwrite(xq, psDec->nb_subfr * psDec->subfr_length, sizeof(xq[0]), fnoisy16k); +#endif + + if (psDec->osce.features.reset > 1) + { + OPUS_COPY(out_buffer, in_buffer, 320); + psDec->osce.features.reset --; + } + else if (psDec->osce.features.reset) + { + osce_cross_fade_10ms(out_buffer, in_buffer, 320); + psDec->osce.features.reset = 0; + } + + /* scale output */ + for (i = 0; i < 320; i++) + { + float tmp = 32768.f * out_buffer[i]; + if (tmp > 32767.f) tmp = 32767.f; + if (tmp < -32767.f) tmp = -32767.f; + xq[i] = float2int(tmp); + } + +} + + +#if 0 + +#include + +void lace_feature_net_compare( + const char * prefix, + int num_frames, + LACE* hLACE +) +{ + char in_feature_file[256]; + char out_feature_file[256]; + char numbits_file[256]; + char periods_file[256]; + char message[512]; + int i_frame, i_feature; + float mse; + float in_features[4 * LACE_NUM_FEATURES]; + float out_features[4 * LACE_COND_DIM]; + float out_features2[4 * LACE_COND_DIM]; + float numbits[2]; + int periods[4]; + + init_lace(hLACE); + + FILE *f_in_features, *f_out_features, *f_numbits, *f_periods; + + strcpy(in_feature_file, prefix); + strcat(in_feature_file, "_in_features.f32"); + f_in_features = fopen(in_feature_file, "rb"); + if (f_in_features == NULL) + { + sprintf(message, "could not open file %s", in_feature_file); + perror(message); + exit(1); + } + + strcpy(out_feature_file, prefix); + strcat(out_feature_file, "_out_features.f32"); + f_out_features = fopen(out_feature_file, "rb"); + if (f_out_features == NULL) + { + sprintf(message, "could not open file %s", out_feature_file); + perror(message); + exit(1); + } + + strcpy(periods_file, prefix); + strcat(periods_file, "_periods.s32"); + f_periods = fopen(periods_file, "rb"); + if (f_periods == NULL) + { + sprintf(message, "could not open file %s", periods_file); + perror(message); + exit(1); + } + + strcpy(numbits_file, prefix); + strcat(numbits_file, "_numbits.f32"); + f_numbits = fopen(numbits_file, "rb"); + if (f_numbits == NULL) + { + sprintf(message, "could not open file %s", numbits_file); + perror(message); + exit(1); + } + + for (i_frame = 0; i_frame < num_frames; i_frame ++) + { + if(fread(in_features, sizeof(float), 4 * LACE_NUM_FEATURES, f_in_features) != 4 * LACE_NUM_FEATURES) + { + fprintf(stderr, "could not read frame %d from in_features\n", i_frame); + exit(1); + } + if(fread(out_features, sizeof(float), 4 * LACE_COND_DIM, f_out_features) != 4 * LACE_COND_DIM) + { + fprintf(stderr, "could not read frame %d from out_features\n", i_frame); + exit(1); + } + if(fread(periods, sizeof(int), 4, f_periods) != 4) + { + fprintf(stderr, "could not read frame %d from periods\n", i_frame); + exit(1); + } + if(fread(numbits, sizeof(float), 2, f_numbits) != 2) + { + fprintf(stderr, "could not read frame %d from numbits\n", i_frame); + exit(1); + } + + + lace_feature_net(hLACE, out_features2, in_features, numbits, periods); + + float mse = 0; + for (int i = 0; i < 4 * LACE_COND_DIM; i ++) + { + mse += pow(out_features[i] - out_features2[i], 2); + } + mse /= (4 * LACE_COND_DIM); + printf("rmse: %f\n", sqrt(mse)); + + } + + fclose(f_in_features); + fclose(f_out_features); + fclose(f_numbits); + fclose(f_periods); +} + + +void lace_demo( + char *prefix, + char *output +) +{ + char feature_file[256]; + char numbits_file[256]; + char periods_file[256]; + char x_in_file[256]; + char message[512]; + int i_frame; + float mse; + float features[4 * LACE_NUM_FEATURES]; + float numbits[2]; + int periods[4]; + float x_in[4 * LACE_FRAME_SIZE]; + int16_t x_out[4 * LACE_FRAME_SIZE]; + float buffer[4 * LACE_FRAME_SIZE]; + LACE hLACE; + int frame_counter = 0; + FILE *f_features, *f_numbits, *f_periods, *f_x_in, *f_x_out; + + init_lace(&hLACE); + + strcpy(feature_file, prefix); + strcat(feature_file, "_features.f32"); + f_features = fopen(feature_file, "rb"); + if (f_features == NULL) + { + sprintf(message, "could not open file %s", feature_file); + perror(message); + exit(1); + } + + strcpy(x_in_file, prefix); + strcat(x_in_file, "_x_in.f32"); + f_x_in = fopen(x_in_file, "rb"); + if (f_x_in == NULL) + { + sprintf(message, "could not open file %s", x_in_file); + perror(message); + exit(1); + } + + f_x_out = fopen(output, "wb"); + if (f_x_out == NULL) + { + sprintf(message, "could not open file %s", output); + perror(message); + exit(1); + } + + strcpy(periods_file, prefix); + strcat(periods_file, "_periods.s32"); + f_periods = fopen(periods_file, "rb"); + if (f_periods == NULL) + { + sprintf(message, "could not open file %s", periods_file); + perror(message); + exit(1); + } + + strcpy(numbits_file, prefix); + strcat(numbits_file, "_numbits.f32"); + f_numbits = fopen(numbits_file, "rb"); + if (f_numbits == NULL) + { + sprintf(message, "could not open file %s", numbits_file); + perror(message); + exit(1); + } + + printf("processing %s\n", prefix); + + while (fread(x_in, sizeof(float), 4 * LACE_FRAME_SIZE, f_x_in) == 4 * LACE_FRAME_SIZE) + { + printf("\rframe: %d", frame_counter++); + if(fread(features, sizeof(float), 4 * LACE_NUM_FEATURES, f_features) != 4 * LACE_NUM_FEATURES) + { + fprintf(stderr, "could not read frame %d from features\n", i_frame); + exit(1); + } + if(fread(periods, sizeof(int), 4, f_periods) != 4) + { + fprintf(stderr, "could not read frame %d from periods\n", i_frame); + exit(1); + } + if(fread(numbits, sizeof(float), 2, f_numbits) != 2) + { + fprintf(stderr, "could not read frame %d from numbits\n", i_frame); + exit(1); + } + + lace_process_20ms_frame( + &hLACE, + buffer, + x_in, + features, + numbits, + periods + ); + + for (int n=0; n < 4 * LACE_FRAME_SIZE; n ++) + { + float tmp = (1UL<<15) * buffer[n]; + tmp = CLIP(tmp, -32768, 32767); + x_out[n] = (int16_t) round(tmp); + } + + fwrite(x_out, sizeof(int16_t), 4 * LACE_FRAME_SIZE, f_x_out); + } + printf("\ndone!\n"); + + fclose(f_features); + fclose(f_numbits); + fclose(f_periods); + fclose(f_x_in); + fclose(f_x_out); +} + +void nolace_demo( + char *prefix, + char *output +) +{ + char feature_file[256]; + char numbits_file[256]; + char periods_file[256]; + char x_in_file[256]; + char message[512]; + int i_frame; + float mse; + float features[4 * LACE_NUM_FEATURES]; + float numbits[2]; + int periods[4]; + float x_in[4 * LACE_FRAME_SIZE]; + int16_t x_out[4 * LACE_FRAME_SIZE]; + float buffer[4 * LACE_FRAME_SIZE]; + NoLACE hNoLACE; + int frame_counter = 0; + FILE *f_features, *f_numbits, *f_periods, *f_x_in, *f_x_out; + + init_nolace(&hNoLACE); + + strcpy(feature_file, prefix); + strcat(feature_file, "_features.f32"); + f_features = fopen(feature_file, "rb"); + if (f_features == NULL) + { + sprintf(message, "could not open file %s", feature_file); + perror(message); + exit(1); + } + + strcpy(x_in_file, prefix); + strcat(x_in_file, "_x_in.f32"); + f_x_in = fopen(x_in_file, "rb"); + if (f_x_in == NULL) + { + sprintf(message, "could not open file %s", x_in_file); + perror(message); + exit(1); + } + + f_x_out = fopen(output, "wb"); + if (f_x_out == NULL) + { + sprintf(message, "could not open file %s", output); + perror(message); + exit(1); + } + + strcpy(periods_file, prefix); + strcat(periods_file, "_periods.s32"); + f_periods = fopen(periods_file, "rb"); + if (f_periods == NULL) + { + sprintf(message, "could not open file %s", periods_file); + perror(message); + exit(1); + } + + strcpy(numbits_file, prefix); + strcat(numbits_file, "_numbits.f32"); + f_numbits = fopen(numbits_file, "rb"); + if (f_numbits == NULL) + { + sprintf(message, "could not open file %s", numbits_file); + perror(message); + exit(1); + } + + printf("processing %s\n", prefix); + + while (fread(x_in, sizeof(float), 4 * LACE_FRAME_SIZE, f_x_in) == 4 * LACE_FRAME_SIZE) + { + printf("\rframe: %d", frame_counter++); + if(fread(features, sizeof(float), 4 * LACE_NUM_FEATURES, f_features) != 4 * LACE_NUM_FEATURES) + { + fprintf(stderr, "could not read frame %d from features\n", i_frame); + exit(1); + } + if(fread(periods, sizeof(int), 4, f_periods) != 4) + { + fprintf(stderr, "could not read frame %d from periods\n", i_frame); + exit(1); + } + if(fread(numbits, sizeof(float), 2, f_numbits) != 2) + { + fprintf(stderr, "could not read frame %d from numbits\n", i_frame); + exit(1); + } + + nolace_process_20ms_frame( + &hNoLACE, + buffer, + x_in, + features, + numbits, + periods + ); + + for (int n=0; n < 4 * LACE_FRAME_SIZE; n ++) + { + float tmp = (1UL<<15) * buffer[n]; + tmp = CLIP(tmp, -32768, 32767); + x_out[n] = (int16_t) round(tmp); + } + + fwrite(x_out, sizeof(int16_t), 4 * LACE_FRAME_SIZE, f_x_out); + } + printf("\ndone!\n"); + + fclose(f_features); + fclose(f_numbits); + fclose(f_periods); + fclose(f_x_in); + fclose(f_x_out); +} + + +int main() +{ +#if 0 + LACE hLACE; + + lace_feature_net_compare("testvec2/lace", 5, &hLACE); + + lace_demo("testdata/test9", "out_lace_c_9kbps.pcm"); + lace_demo("testdata/test6", "out_lace_c_6kbps.pcm"); +#endif + nolace_demo("testdata/test9", "out_nolace_c_9kbps.pcm"); + +} +#endif + +/*gcc -I ../include -I . -I ../silk -I ../celt osce.c nndsp.c lace_data.c nolace_data.c nnet.c parse_lpcnet_weights.c -lm -o lacetest*/ diff --git a/dnn/osce.h b/dnn/osce.h new file mode 100644 index 00000000..3dd8b7c0 --- /dev/null +++ b/dnn/osce.h @@ -0,0 +1,81 @@ +/* Copyright (c) 2023 Amazon + Written by Jan Buethe */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef OSCE_H +#define OSCE_H + + +#include "opus_types.h" +/*#include "osce_config.h"*/ +#ifndef DISABLE_LACE +#include "lace_data.h" +#endif +#ifndef DISABLE_NOLACE +#include "nolace_data.h" +#endif +#include "nndsp.h" +#include "nnet.h" +#include "osce_structs.h" +#include "structs.h" + +#define OSCE_METHOD_NONE 0 +#ifndef DISABLE_LACE +#define OSCE_METHOD_LACE 1 +#endif +#ifndef DISABLE_NOLACE +#define OSCE_METHOD_NOLACE 2 +#endif + +#if !defined(DISABLE_NOLACE) +#define OSCE_DEFAULT_METHOD OSCE_METHOD_NOLACE +#elif !defined(DISABLE_LACE) +#define OSCE_DEFAULT_METHOD OSCE_METHOD_LACE +#else +#define OSCE_DEFAULT_METHOD OSCE_METHOD_NONE +#endif + + + + +/* API */ + + +void osce_enhance_frame( + OSCEModel *model, /* I OSCE model struct */ + silk_decoder_state *psDec, /* I/O Decoder state */ + silk_decoder_control *psDecCtrl, /* I Decoder control */ + opus_int16 xq[], /* I/O Decoded speech */ + opus_int32 num_bits, /* I Size of SILK payload in bits */ + int arch /* I Run-time architecture */ +); + + +int osce_load_models(OSCEModel *hModel, const unsigned char *data, int len); +void osce_reset(silk_OSCE_struct *hOSCE, int method); + + +#endif diff --git a/dnn/osce_config.h b/dnn/osce_config.h new file mode 100644 index 00000000..de94fe2f --- /dev/null +++ b/dnn/osce_config.h @@ -0,0 +1,62 @@ +/* Copyright (c) 2023 Amazon + Written by Jan Buethe */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef OSCE_CONFIG +#define OSCE_CONFIG + +#define OSCE_MAX_RNN_NEURONS 256 + +#define OSCE_FEATURES_MAX_HISTORY 350 +#define OSCE_FEATURE_DIM 93 +#define OSCE_MAX_FEATURE_FRAMES 4 + +#define OSCE_CLEAN_SPEC_NUM_BANDS 64 +#define OSCE_NOISY_SPEC_NUM_BANDS 18 + +#define OSCE_NO_PITCH_VALUE 7 + +#define OSCE_PREEMPH 0.85f + +#define OSCE_PITCH_HANGOVER 8 + +#define OSCE_CLEAN_SPEC_START 0 +#define OSCE_CLEAN_SPEC_LENGTH 64 + +#define OSCE_NOISY_CEPSTRUM_START 64 +#define OSCE_NOISY_CEPSTRUM_LENGTH 18 + +#define OSCE_ACORR_START 82 +#define OSCE_ACORR_LENGTH 5 + +#define OSCE_LTP_START 87 +#define OSCE_LTP_LENGTH 5 + +#define OSCE_LOG_GAIN_START 92 +#define OSCE_LOG_GAIN_LENGTH 1 + + +#endif \ No newline at end of file diff --git a/dnn/osce_features.c b/dnn/osce_features.c new file mode 100644 index 00000000..0466f132 --- /dev/null +++ b/dnn/osce_features.c @@ -0,0 +1,454 @@ +/* Copyright (c) 2023 Amazon + Written by Jan Buethe */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#define OSCE_SPEC_WINDOW_SIZE 320 +#define OSCE_SPEC_NUM_FREQS 161 + + +/*DEBUG*/ +/*#define WRITE_FEATURES*/ +/*#define DEBUG_PRING*/ +/*******/ + +#include "stack_alloc.h" +#include "osce_features.h" +#include "kiss_fft.h" +#include "os_support.h" +#include "osce.h" +#include "freq.h" + + +#if defined(WRITE_FEATURES) || defined(DEBUG_PRING) +#include +#include +#endif + +static const int center_bins_clean[64] = { + 0, 2, 5, 8, 10, 12, 15, 18, + 20, 22, 25, 28, 30, 33, 35, 38, + 40, 42, 45, 48, 50, 52, 55, 58, + 60, 62, 65, 68, 70, 73, 75, 78, + 80, 82, 85, 88, 90, 92, 95, 98, + 100, 102, 105, 108, 110, 112, 115, 118, + 120, 122, 125, 128, 130, 132, 135, 138, + 140, 142, 145, 148, 150, 152, 155, 160 +}; + +static const int center_bins_noisy[18] = { + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 40, 48, 56, 64, 80, 96, 112, + 136, 160 +}; + +static const float band_weights_clean[64] = { + 0.666666666667f, 0.400000000000f, 0.333333333333f, 0.400000000000f, + 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f, + 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f, + 0.400000000000f, 0.400000000000f, 0.400000000000f, 0.400000000000f, + 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f, + 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f, + 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f, + 0.400000000000f, 0.400000000000f, 0.400000000000f, 0.400000000000f, + 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f, + 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f, + 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f, + 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f, + 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f, + 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f, + 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f, + 0.500000000000f, 0.400000000000f, 0.250000000000f, 0.333333333333f +}; + +static const float band_weights_noisy[18] = { + 0.400000000000f, 0.250000000000f, 0.250000000000f, 0.250000000000f, + 0.250000000000f, 0.250000000000f, 0.250000000000f, 0.250000000000f, + 0.166666666667f, 0.125000000000f, 0.125000000000f, 0.125000000000f, + 0.083333333333f, 0.062500000000f, 0.062500000000f, 0.050000000000f, + 0.041666666667f, 0.080000000000f +}; + +static float osce_window[OSCE_SPEC_WINDOW_SIZE] = { + 0.004908718808f, 0.014725683311f, 0.024541228523f, 0.034354408400f, 0.044164277127f, + 0.053969889210f, 0.063770299562f, 0.073564563600f, 0.083351737332f, 0.093130877450f, + 0.102901041421f, 0.112661287575f, 0.122410675199f, 0.132148264628f, 0.141873117332f, + 0.151584296010f, 0.161280864678f, 0.170961888760f, 0.180626435180f, 0.190273572448f, + 0.199902370753f, 0.209511902052f, 0.219101240157f, 0.228669460829f, 0.238215641862f, + 0.247738863176f, 0.257238206902f, 0.266712757475f, 0.276161601717f, 0.285583828929f, + 0.294978530977f, 0.304344802381f, 0.313681740399f, 0.322988445118f, 0.332264019538f, + 0.341507569661f, 0.350718204573f, 0.359895036535f, 0.369037181064f, 0.378143757022f, + 0.387213886697f, 0.396246695891f, 0.405241314005f, 0.414196874117f, 0.423112513073f, + 0.431987371563f, 0.440820594212f, 0.449611329655f, 0.458358730621f, 0.467061954019f, + 0.475720161014f, 0.484332517110f, 0.492898192230f, 0.501416360796f, 0.509886201809f, + 0.518306898929f, 0.526677640552f, 0.534997619887f, 0.543266035038f, 0.551482089078f, + 0.559644990127f, 0.567753951426f, 0.575808191418f, 0.583806933818f, 0.591749407690f, + 0.599634847523f, 0.607462493302f, 0.615231590581f, 0.622941390558f, 0.630591150148f, + 0.638180132051f, 0.645707604824f, 0.653172842954f, 0.660575126926f, 0.667913743292f, + 0.675187984742f, 0.682397150168f, 0.689540544737f, 0.696617479953f, 0.703627273726f, + 0.710569250438f, 0.717442741007f, 0.724247082951f, 0.730981620454f, 0.737645704427f, + 0.744238692572f, 0.750759949443f, 0.757208846506f, 0.763584762206f, 0.769887082016f, + 0.776115198508f, 0.782268511401f, 0.788346427627f, 0.794348361383f, 0.800273734191f, + 0.806121974951f, 0.811892519997f, 0.817584813152f, 0.823198305781f, 0.828732456844f, + 0.834186732948f, 0.839560608398f, 0.844853565250f, 0.850065093356f, 0.855194690420f, + 0.860241862039f, 0.865206121757f, 0.870086991109f, 0.874883999665f, 0.879596685080f, + 0.884224593137f, 0.888767277786f, 0.893224301196f, 0.897595233788f, 0.901879654283f, + 0.906077149740f, 0.910187315596f, 0.914209755704f, 0.918144082372f, 0.921989916403f, + 0.925746887127f, 0.929414632439f, 0.932992798835f, 0.936481041442f, 0.939879024058f, + 0.943186419177f, 0.946402908026f, 0.949528180593f, 0.952561935658f, 0.955503880820f, + 0.958353732530f, 0.961111216112f, 0.963776065795f, 0.966348024735f, 0.968826845041f, + 0.971212287799f, 0.973504123096f, 0.975702130039f, 0.977806096779f, 0.979815820533f, + 0.981731107599f, 0.983551773378f, 0.985277642389f, 0.986908548290f, 0.988444333892f, + 0.989884851171f, 0.991229961288f, 0.992479534599f, 0.993633450666f, 0.994691598273f, + 0.995653875433f, 0.996520189401f, 0.997290456679f, 0.997964603026f, 0.998542563469f, + 0.999024282300f, 0.999409713092f, 0.999698818696f, 0.999891571247f, 0.999987952167f, + 0.999987952167f, 0.999891571247f, 0.999698818696f, 0.999409713092f, 0.999024282300f, + 0.998542563469f, 0.997964603026f, 0.997290456679f, 0.996520189401f, 0.995653875433f, + 0.994691598273f, 0.993633450666f, 0.992479534599f, 0.991229961288f, 0.989884851171f, + 0.988444333892f, 0.986908548290f, 0.985277642389f, 0.983551773378f, 0.981731107599f, + 0.979815820533f, 0.977806096779f, 0.975702130039f, 0.973504123096f, 0.971212287799f, + 0.968826845041f, 0.966348024735f, 0.963776065795f, 0.961111216112f, 0.958353732530f, + 0.955503880820f, 0.952561935658f, 0.949528180593f, 0.946402908026f, 0.943186419177f, + 0.939879024058f, 0.936481041442f, 0.932992798835f, 0.929414632439f, 0.925746887127f, + 0.921989916403f, 0.918144082372f, 0.914209755704f, 0.910187315596f, 0.906077149740f, + 0.901879654283f, 0.897595233788f, 0.893224301196f, 0.888767277786f, 0.884224593137f, + 0.879596685080f, 0.874883999665f, 0.870086991109f, 0.865206121757f, 0.860241862039f, + 0.855194690420f, 0.850065093356f, 0.844853565250f, 0.839560608398f, 0.834186732948f, + 0.828732456844f, 0.823198305781f, 0.817584813152f, 0.811892519997f, 0.806121974951f, + 0.800273734191f, 0.794348361383f, 0.788346427627f, 0.782268511401f, 0.776115198508f, + 0.769887082016f, 0.763584762206f, 0.757208846506f, 0.750759949443f, 0.744238692572f, + 0.737645704427f, 0.730981620454f, 0.724247082951f, 0.717442741007f, 0.710569250438f, + 0.703627273726f, 0.696617479953f, 0.689540544737f, 0.682397150168f, 0.675187984742f, + 0.667913743292f, 0.660575126926f, 0.653172842954f, 0.645707604824f, 0.638180132051f, + 0.630591150148f, 0.622941390558f, 0.615231590581f, 0.607462493302f, 0.599634847523f, + 0.591749407690f, 0.583806933818f, 0.575808191418f, 0.567753951426f, 0.559644990127f, + 0.551482089078f, 0.543266035038f, 0.534997619887f, 0.526677640552f, 0.518306898929f, + 0.509886201809f, 0.501416360796f, 0.492898192230f, 0.484332517110f, 0.475720161014f, + 0.467061954019f, 0.458358730621f, 0.449611329655f, 0.440820594212f, 0.431987371563f, + 0.423112513073f, 0.414196874117f, 0.405241314005f, 0.396246695891f, 0.387213886697f, + 0.378143757022f, 0.369037181064f, 0.359895036535f, 0.350718204573f, 0.341507569661f, + 0.332264019538f, 0.322988445118f, 0.313681740399f, 0.304344802381f, 0.294978530977f, + 0.285583828929f, 0.276161601717f, 0.266712757475f, 0.257238206902f, 0.247738863176f, + 0.238215641862f, 0.228669460829f, 0.219101240157f, 0.209511902052f, 0.199902370753f, + 0.190273572448f, 0.180626435180f, 0.170961888760f, 0.161280864678f, 0.151584296010f, + 0.141873117332f, 0.132148264628f, 0.122410675199f, 0.112661287575f, 0.102901041421f, + 0.093130877450f, 0.083351737332f, 0.073564563600f, 0.063770299562f, 0.053969889210f, + 0.044164277127f, 0.034354408400f, 0.024541228523f, 0.014725683311f, 0.004908718808f +}; + +static void apply_filterbank(float *x_out, float *x_in, const int *center_bins, const float* band_weights, int num_bands) +{ + int b, i; + float frac; + + celt_assert(x_in != x_out) + + x_out[0] = 0; + for (b = 0; b < num_bands - 1; b++) + { + x_out[b+1] = 0; + for (i = center_bins[b]; i < center_bins[b+1]; i++) + { + frac = (float) (center_bins[b+1] - i) / (center_bins[b+1] - center_bins[b]); + x_out[b] += band_weights[b] * frac * x_in[i]; + x_out[b+1] += band_weights[b+1] * (1 - frac) * x_in[i]; + + } + } + x_out[num_bands - 1] += band_weights[num_bands - 1] * x_in[center_bins[num_bands - 1]]; +#ifdef DEBUG_PRINT + for (b = 0; b < num_bands; b++) + { + printf("band[%d]: %f\n", b, x_out[b]); + } +#endif +} + + +static void mag_spec_320_onesided(float *out, float *in) +{ + celt_assert(OSCE_SPEC_WINDOW_SIZE == 320); + kiss_fft_cpx buffer[OSCE_SPEC_WINDOW_SIZE]; + int k; + forward_transform(buffer, in); + + for (k = 0; k < OSCE_SPEC_NUM_FREQS; k++) + { + out[k] = OSCE_SPEC_WINDOW_SIZE * sqrt(buffer[k].r * buffer[k].r + buffer[k].i * buffer[k].i); +#ifdef DEBUG_PRINT + printf("magspec[%d]: %f\n", k, out[k]); +#endif + } +} + + +static void calculate_log_spectrum_from_lpc(float *spec, opus_int16 *a_q12, int lpc_order) +{ + float buffer[OSCE_SPEC_WINDOW_SIZE] = {0}; + int i; + + /* zero expansion */ + buffer[0] = 1; + for (i = 0; i < lpc_order; i++) + { + buffer[i+1] = - (float)a_q12[i] / (1U << 12); + } + + /* calculate and invert magnitude spectrum */ + mag_spec_320_onesided(buffer, buffer); + + for (i = 0; i < OSCE_SPEC_NUM_FREQS; i++) + { + buffer[i] = 1.f / (buffer[i] + 1e-9f); + } + + /* apply filterbank */ + apply_filterbank(spec, buffer, center_bins_clean, band_weights_clean, OSCE_CLEAN_SPEC_NUM_BANDS); + + /* log and scaling */ + for (i = 0; i < OSCE_CLEAN_SPEC_NUM_BANDS; i++) + { + spec[i] = 0.3f * log(spec[i] + 1e-9f); + } +} + +static void calculate_cepstrum(float *cepstrum, float *signal) +{ + float buffer[OSCE_SPEC_WINDOW_SIZE]; + float *spec = &buffer[OSCE_SPEC_NUM_FREQS + 3]; + int n; + + celt_assert(cepstrum != signal) + + for (n = 0; n < OSCE_SPEC_WINDOW_SIZE; n++) + { + buffer[n] = osce_window[n] * signal[n]; + } + + /* calculate magnitude spectrum */ + mag_spec_320_onesided(buffer, buffer); + + /* accumulate bands */ + apply_filterbank(spec, buffer, center_bins_noisy, band_weights_noisy, OSCE_NOISY_SPEC_NUM_BANDS); + + /* log domain conversion */ + for (n = 0; n < OSCE_NOISY_SPEC_NUM_BANDS; n++) + { + spec[n] = log(spec[n] + 1e-9f); +#ifdef DEBUG_PRINT + printf("logspec[%d]: %f\n", n, spec[n]); +#endif + } + + /* DCT-II (orthonormal) */ + celt_assert(OSCE_NOISY_SPEC_NUM_BANDS == NB_BANDS); + dct(cepstrum, spec); +} + +static void calculate_acorr(float *acorr, float *signal, int lag) +{ + int n, k; + celt_assert(acorr != signal) + + for (k = -2; k <= 2; k++) + { + acorr[k+2] = 0; + float xx = 0; + float xy = 0; + float yy = 0; + for (n = 0; n < 80; n++) + { + /* obviously wasteful -> fix later */ + xx += signal[n] * signal[n]; + yy += signal[n - lag + k] * signal[n - lag + k]; + xy += signal[n] * signal[n - lag + k]; + } + acorr[k+2] = xy / sqrt(xx * yy + 1e-9f); + } +} + +static int pitch_postprocessing(OSCEFeatureState *psFeatures, int lag, int type) +{ + int new_lag; + +#ifdef OSCE_HANGOVER_BUGFIX +#define TESTBIT 1 +#else +#define TESTBIT 0 +#endif + + /* hangover is currently disabled to reflect a bug in the python code. ToDo: re-evaluate hangover */ + if (type != TYPE_VOICED && psFeatures->last_type == TYPE_VOICED && TESTBIT) + /* enter hangover */ + { + new_lag = OSCE_NO_PITCH_VALUE; + if (psFeatures->pitch_hangover_count < OSCE_PITCH_HANGOVER) + { + new_lag = psFeatures->last_lag; + psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER; + } + } + else if (type != TYPE_VOICED && psFeatures->pitch_hangover_count && TESTBIT) + /* continue hangover */ + { + new_lag = psFeatures->last_lag; + psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER; + } + else if (type != TYPE_VOICED) + /* unvoiced frame after hangover */ + { + new_lag = OSCE_NO_PITCH_VALUE; + psFeatures->pitch_hangover_count = 0; + } + else + /* voiced frame: update last_lag */ + { + new_lag = lag; + psFeatures->last_lag = lag; + psFeatures->pitch_hangover_count = 0; + } + + /* buffer update */ + psFeatures->last_type = type; + + /* with the current setup this should never happen (but who knows...) */ + celt_assert(new_lag) + + return new_lag; +} + +void osce_calculate_features( + silk_decoder_state *psDec, /* I/O Decoder state */ + silk_decoder_control *psDecCtrl, /* I Decoder control */ + float *features, /* O input features */ + float *numbits, /* O numbits and smoothed numbits */ + int *periods, /* O pitch lags on subframe basis */ + const opus_int16 xq[], /* I Decoded speech */ + opus_int32 num_bits /* I Size of SILK payload in bits */ +) +{ + int num_subframes, num_samples; + float buffer[OSCE_FEATURES_MAX_HISTORY + OSCE_MAX_FEATURE_FRAMES * 80]; + float *frame, *pfeatures; + OSCEFeatureState *psFeatures; + int i, n, k; +#ifdef WRITE_FEATURES + static FILE *f_feat = NULL; + if (f_feat == NULL) + { + f_feat = fopen("assembled_features.f32", "wb"); + } +#endif + + /*OPUS_CLEAR(buffer, 1);*/ + memset(buffer, 0, sizeof(buffer)); + + num_subframes = psDec->nb_subfr; + num_samples = num_subframes * 80; + psFeatures = &psDec->osce.features; + + /* smooth bit count */ + psFeatures->numbits_smooth = 0.9f * psFeatures->numbits_smooth + 0.1f * num_bits; + numbits[0] = num_bits; +#ifdef OSCE_NUMBITS_BUGFIX + numbits[1] = psFeatures->numbits_smooth; +#else + numbits[1] = num_bits; +#endif + + for (n = 0; n < num_samples; n++) + { + buffer[OSCE_FEATURES_MAX_HISTORY + n] = (float) xq[n] / (1U<<15); + } + OPUS_COPY(buffer, psFeatures->signal_history, OSCE_FEATURES_MAX_HISTORY); + + for (k = 0; k < num_subframes; k++) + { + pfeatures = features + k * OSCE_FEATURE_DIM; + frame = &buffer[OSCE_FEATURES_MAX_HISTORY + k * 80]; + memset(pfeatures, 0, OSCE_FEATURE_DIM); /* precaution */ + + /* clean spectrum from lpcs (update every other frame) */ + if (k % 2 == 0) + { + calculate_log_spectrum_from_lpc(pfeatures + OSCE_CLEAN_SPEC_START, psDecCtrl->PredCoef_Q12[k >> 1], psDec->LPC_order); + } + else + { + OPUS_COPY(pfeatures + OSCE_CLEAN_SPEC_START, pfeatures + OSCE_CLEAN_SPEC_START - OSCE_FEATURE_DIM, OSCE_CLEAN_SPEC_LENGTH); + } + + /* noisy cepstrum from signal (update every other frame) */ + if (k % 2 == 0) + { + calculate_cepstrum(pfeatures + OSCE_NOISY_CEPSTRUM_START, frame - 160); + } + else + { + OPUS_COPY(pfeatures + OSCE_NOISY_CEPSTRUM_START, pfeatures + OSCE_NOISY_CEPSTRUM_START - OSCE_FEATURE_DIM, OSCE_NOISY_CEPSTRUM_LENGTH); + } + + /* pitch hangover and zero value replacement */ + periods[k] = pitch_postprocessing(psFeatures, psDecCtrl->pitchL[k], psDec->indices.signalType); + + /* auto-correlation around pitch lag */ + calculate_acorr(pfeatures + OSCE_ACORR_START, frame, periods[k]); + + /* ltp */ + celt_assert(OSCE_LTP_LENGTH == LTP_ORDER) + for (i = 0; i < OSCE_LTP_LENGTH; i++) + { + pfeatures[OSCE_LTP_START + i] = (float) psDecCtrl->LTPCoef_Q14[k * LTP_ORDER + i] / (1U << 14); + } + + /* frame gain */ + pfeatures[OSCE_LOG_GAIN_START] = log((float) psDecCtrl->Gains_Q16[k] / (1UL << 16) + 1e-9f); + +#ifdef WRITE_FEATURES + fwrite(pfeatures, sizeof(*pfeatures), 93, f_feat); +#endif + } + + /* buffer update */ + OPUS_COPY(psFeatures->signal_history, &buffer[num_samples], OSCE_FEATURES_MAX_HISTORY); +} + + +void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length) +{ + int i; + celt_assert(length >= 160); + + for (i = 0; i < 160; i++) + { + x_enhanced[i] = osce_window[i] * x_enhanced[i] + (1.f - osce_window[i]) * x_in[i]; + } + + +} diff --git a/dnn/osce_features.h b/dnn/osce_features.h new file mode 100644 index 00000000..91e95f1e --- /dev/null +++ b/dnn/osce_features.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2023 Amazon + Written by Jan Buethe */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef OSCE_FEATURES_H +#define OSCE_FEATURES_H + + +#include "structs.h" +#include "opus_types.h" + +#define OSCE_NUMBITS_BUGFIX + +void osce_calculate_features( + silk_decoder_state *psDec, /* I/O Decoder state */ + silk_decoder_control *psDecCtrl, /* I Decoder control */ + float *features, /* O input features */ + float *numbits, /* O numbits and smoothed numbits */ + int *periods, /* O pitch lags on subframe basis */ + const opus_int16 xq[], /* I Decoded speech */ + opus_int32 num_bits /* I Size of SILK payload in bits */ +); + + +void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length); + +#endif \ No newline at end of file diff --git a/dnn/osce_structs.h b/dnn/osce_structs.h new file mode 100644 index 00000000..a4350be2 --- /dev/null +++ b/dnn/osce_structs.h @@ -0,0 +1,124 @@ +/* Copyright (c) 2023 Amazon + Written by Jan Buethe */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef OSCE_STRUCTS_H +#define OSCE_STRUCTS_H + +#include "opus_types.h" +#include "osce_config.h" +#ifndef DISABLE_LACE +#include "lace_data.h" +#endif +#ifndef DISABLE_NOLACE +#include "nolace_data.h" +#endif +#include "nndsp.h" +#include "nnet.h" + +/* feature calculation */ + +typedef struct { + float numbits_smooth; + int pitch_hangover_count; + int last_lag; + int last_type; + float signal_history[OSCE_FEATURES_MAX_HISTORY]; + int reset; +} OSCEFeatureState; + + +#ifndef DISABLE_LACE +/* LACE */ +typedef struct { + float feature_net_conv2_state[LACE_FNET_CONV2_STATE_SIZE]; + float feature_net_gru_state[LACE_COND_DIM]; + AdaCombState cf1_state; + AdaCombState cf2_state; + AdaConvState af1_state; + float preemph_mem; + float deemph_mem; +} LACEState; + +typedef struct +{ + LACELayers layers; + float window[LACE_OVERLAP_SIZE]; +} LACE; + +#endif /* #ifndef DISABLE_LACE */ + + +#ifndef DISABLE_NOLACE +/* NoLACE */ +typedef struct { + float feature_net_conv2_state[NOLACE_FNET_CONV2_STATE_SIZE]; + float feature_net_gru_state[NOLACE_COND_DIM]; + float post_cf1_state[NOLACE_COND_DIM]; + float post_cf2_state[NOLACE_COND_DIM]; + float post_af1_state[NOLACE_COND_DIM]; + float post_af2_state[NOLACE_COND_DIM]; + float post_af3_state[NOLACE_COND_DIM]; + AdaCombState cf1_state; + AdaCombState cf2_state; + AdaConvState af1_state; + AdaConvState af2_state; + AdaConvState af3_state; + AdaConvState af4_state; + AdaShapeState tdshape1_state; + AdaShapeState tdshape2_state; + AdaShapeState tdshape3_state; + float preemph_mem; + float deemph_mem; +} NoLACEState; + +typedef struct { + NOLACELayers layers; + float window[LACE_OVERLAP_SIZE]; +} NoLACE; + +#endif /* #ifndef DISABLE_NOLACE */ + +/* OSCEModel */ +typedef struct { +#ifndef DISABLE_LACE + LACE lace; +#endif +#ifndef DISABLE_NOLACE + NoLACE nolace; +#endif +} OSCEModel; + +typedef union { +#ifndef DISABLE_LACE + LACEState lace; +#endif +#ifndef DISABLE_NOLACE + NoLACEState nolace; +#endif +} OSCEState; + +#endif \ No newline at end of file diff --git a/dnn/torch/osce/create_testvectors.py b/dnn/torch/osce/create_testvectors.py new file mode 100644 index 00000000..a037d0db --- /dev/null +++ b/dnn/torch/osce/create_testvectors.py @@ -0,0 +1,165 @@ +""" +/* Copyright (c) 2023 Amazon + Written by Jan Buethe */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +""" + +import os +import argparse + +import torch +import numpy as np + +from models import model_dict +from utils import endoscopy + +parser = argparse.ArgumentParser() + +parser.add_argument('checkpoint_path', type=str, help='path to folder containing checkpoints "lace_checkpoint.pth" and nolace_checkpoint.pth"') +parser.add_argument('output_folder', type=str, help='output folder for testvectors') +parser.add_argument('--debug', action='store_true', help='add debug output to output folder') + + +def create_adaconv_testvector(prefix, adaconv, num_frames, debug=False): + feature_dim = adaconv.feature_dim + in_channels = adaconv.in_channels + out_channels = adaconv.out_channels + frame_size = adaconv.frame_size + + features = torch.randn((1, num_frames, feature_dim)) + x_in = torch.randn((1, in_channels, num_frames * frame_size)) + + x_out = adaconv(x_in, features, debug=debug) + + features = features[0].detach().numpy() + x_in = x_in[0].reshape(in_channels, num_frames, frame_size).permute(1, 0, 2).detach().numpy() + x_out = x_out[0].reshape(out_channels, num_frames, frame_size).permute(1, 0, 2).detach().numpy() + + features.tofile(prefix + '_features.f32') + x_in.tofile(prefix + '_x_in.f32') + x_out.tofile(prefix + '_x_out.f32') + +def create_adacomb_testvector(prefix, adacomb, num_frames, debug=False): + feature_dim = adacomb.feature_dim + in_channels = 1 + frame_size = adacomb.frame_size + + features = torch.randn((1, num_frames, feature_dim)) + x_in = torch.randn((1, in_channels, num_frames * frame_size)) + p_in = torch.randint(adacomb.kernel_size, 250, (1, num_frames)) + + x_out = adacomb(x_in, features, p_in, debug=debug) + + features = features[0].detach().numpy() + x_in = x_in[0].permute(1, 0).detach().numpy() + p_in = p_in[0].detach().numpy().astype(np.int32) + x_out = x_out[0].permute(1, 0).detach().numpy() + + features.tofile(prefix + '_features.f32') + x_in.tofile(prefix + '_x_in.f32') + p_in.tofile(prefix + '_p_in.s32') + x_out.tofile(prefix + '_x_out.f32') + +def create_adashape_testvector(prefix, adashape, num_frames): + feature_dim = adashape.feature_dim + frame_size = adashape.frame_size + + features = torch.randn((1, num_frames, feature_dim)) + x_in = torch.randn((1, 1, num_frames * frame_size)) + + x_out = adashape(x_in, features) + + features = features[0].detach().numpy() + x_in = x_in.flatten().detach().numpy() + x_out = x_out.flatten().detach().numpy() + + features.tofile(prefix + '_features.f32') + x_in.tofile(prefix + '_x_in.f32') + x_out.tofile(prefix + '_x_out.f32') + +def create_feature_net_testvector(prefix, model, num_frames): + num_features = model.num_features + num_subframes = 4 * num_frames + + input_features = torch.randn((1, num_subframes, num_features)) + periods = torch.randint(32, 300, (1, num_subframes)) + numbits = model.numbits_range[0] + torch.rand((1, num_frames, 2)) * (model.numbits_range[1] - model.numbits_range[0]) + + + pembed = model.pitch_embedding(periods) + nembed = torch.repeat_interleave(model.numbits_embedding(numbits).flatten(2), 4, dim=1) + full_features = torch.cat((input_features, pembed, nembed), dim=-1) + + cf = model.feature_net(full_features) + + input_features.float().numpy().tofile(prefix + "_in_features.f32") + periods.numpy().astype(np.int32).tofile(prefix + "_periods.s32") + numbits.float().numpy().tofile(prefix + "_numbits.f32") + full_features.detach().numpy().tofile(prefix + "_full_features.f32") + cf.detach().numpy().tofile(prefix + "_out_features.f32") + + + +if __name__ == "__main__": + args = parser.parse_args() + + os.makedirs(args.output_folder, exist_ok=True) + + lace_checkpoint = torch.load(os.path.join(args.checkpoint_path, "lace_checkpoint.pth"), map_location='cpu') + nolace_checkpoint = torch.load(os.path.join(args.checkpoint_path, "nolace_checkpoint.pth"), map_location='cpu') + + lace = model_dict['lace'](**lace_checkpoint['setup']['model']['kwargs']) + nolace = model_dict['nolace'](**nolace_checkpoint['setup']['model']['kwargs']) + + lace.load_state_dict(lace_checkpoint['state_dict']) + nolace.load_state_dict(nolace_checkpoint['state_dict']) + + if args.debug: + endoscopy.init(args.output_folder) + + # lace af1, 1 input channel, 1 output channel + create_adaconv_testvector(os.path.join(args.output_folder, "lace_af1"), lace.af1, 5, debug=args.debug) + + # nolace af1, 1 input channel, 2 output channels + create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af1"), nolace.af1, 5, debug=args.debug) + + # nolace af4, 2 input channel, 1 output channels + create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af4"), nolace.af4, 5, debug=args.debug) + + # nolace af2, 2 input channel, 2 output channels + create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af2"), nolace.af2, 5, debug=args.debug) + + # lace cf1 + create_adacomb_testvector(os.path.join(args.output_folder, "lace_cf1"), lace.cf1, 5, debug=args.debug) + + # nolace tdshape1 + create_adashape_testvector(os.path.join(args.output_folder, "nolace_tdshape1"), nolace.tdshape1, 5) + + # lace feature net + create_feature_net_testvector(os.path.join(args.output_folder, 'lace'), lace, 5) + + if args.debug: + endoscopy.close() diff --git a/dnn/torch/osce/data/silk_enhancement_set.py b/dnn/torch/osce/data/silk_enhancement_set.py index 65e97508..fd18c4de 100644 --- a/dnn/torch/osce/data/silk_enhancement_set.py +++ b/dnn/torch/osce/data/silk_enhancement_set.py @@ -49,7 +49,6 @@ class SilkEnhancementSet(Dataset): num_bands_noisy_spec=18, noisy_spec_scale='opus', noisy_apply_dct=True, - add_offset=False, add_double_lag_acorr=False, ): @@ -73,7 +72,6 @@ class SilkEnhancementSet(Dataset): self.gains = np.fromfile(os.path.join(path, 'features_gain.f32'), dtype=np.float32) self.num_bits = np.fromfile(os.path.join(path, 'features_num_bits.s32'), dtype=np.int32) self.num_bits_smooth = np.fromfile(os.path.join(path, 'features_num_bits_smooth.f32'), dtype=np.float32) - self.offsets = np.fromfile(os.path.join(path, 'features_offset.f32'), dtype=np.float32) self.clean_signal_hp = np.fromfile(os.path.join(path, 'clean_hp.s16'), dtype=np.int16) self.clean_signal = np.fromfile(os.path.join(path, 'clean.s16'), dtype=np.int16) @@ -86,7 +84,6 @@ class SilkEnhancementSet(Dataset): num_bands_noisy_spec, noisy_spec_scale, noisy_apply_dct, - add_offset, add_double_lag_acorr) self.history_len = 700 if add_double_lag_acorr else 350 @@ -120,8 +117,7 @@ class SilkEnhancementSet(Dataset): self.lpcs[frame_start : frame_stop], self.gains[frame_start : frame_stop], self.ltps[frame_start : frame_stop], - self.periods[frame_start : frame_stop], - self.offsets[frame_start : frame_stop] + self.periods[frame_start : frame_stop] ) if self.preemph > 0: diff --git a/dnn/torch/osce/export_model_weights.py b/dnn/torch/osce/export_model_weights.py index 8b95aca9..f94431d3 100644 --- a/dnn/torch/osce/export_model_weights.py +++ b/dnn/torch/osce/export_model_weights.py @@ -40,10 +40,53 @@ import wexchange.torch from wexchange.torch import dump_torch_weights from models import model_dict +from utils.layers.limited_adaptive_comb1d import LimitedAdaptiveComb1d +from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d +from utils.layers.td_shaper import TDShaper +from wexchange.torch import dump_torch_weights + + + parser = argparse.ArgumentParser() parser.add_argument('checkpoint', type=str, help='LACE or NoLACE model checkpoint') parser.add_argument('output_dir', type=str, help='output folder') +parser.add_argument('--quantize', action="store_true", help='quantization according to schedule') + + +schedules = { + 'nolace': [ + ('pitch_embedding', dict()), + ('feature_net.conv1', dict()), + ('feature_net.conv2', dict(quantize=True, scale=None)), + ('feature_net.tconv', dict(quantize=True, scale=None)), + ('feature_net.gru', dict()), + ('cf1', dict(quantize=True, scale=None)), + ('cf2', dict(quantize=True, scale=None)), + ('af1', dict(quantize=True, scale=None)), + ('tdshape1', dict()), + ('tdshape2', dict()), + ('tdshape3', dict()), + ('af2', dict(quantize=True, scale=None)), + ('af3', dict(quantize=True, scale=None)), + ('af4', dict(quantize=True, scale=None)), + ('post_cf1', dict(quantize=True, scale=None)), + ('post_cf2', dict(quantize=True, scale=None)), + ('post_af1', dict(quantize=True, scale=None)), + ('post_af2', dict(quantize=True, scale=None)), + ('post_af3', dict(quantize=True, scale=None)) + ], + 'lace' : [ + ('pitch_embedding', dict()), + ('feature_net.conv1', dict()), + ('feature_net.conv2', dict(quantize=True, scale=None)), + ('feature_net.tconv', dict(quantize=True, scale=None)), + ('feature_net.gru', dict()), + ('cf1', dict(quantize=True, scale=None)), + ('cf2', dict(quantize=True, scale=None)), + ('af1', dict(quantize=True, scale=None)) + ] +} # auxiliary functions @@ -60,8 +103,28 @@ def sha1(filename): return sha1.hexdigest() +def osce_dump_generic(writer, name, module): + if isinstance(module, torch.nn.Linear) or isinstance(module, torch.nn.Conv1d) \ + or isinstance(module, torch.nn.ConvTranspose1d) or isinstance(module, torch.nn.Embedding) \ + or isinstance(module, LimitedAdaptiveConv1d) or isinstance(module, LimitedAdaptiveComb1d) \ + or isinstance(module, TDShaper) or isinstance(module, torch.nn.GRU): + dump_torch_weights(writer, module, name=name, verbose=True) + else: + for child_name, child in module.named_children(): + osce_dump_generic(writer, (name + "_" + child_name).replace("feature_net", "fnet"), child) + + def export_name(name): - return name.replace('.', '_') + name = name.replace('.', '_') + name = name.replace('feature_net', 'fnet') + return name + +def osce_scheduled_dump(writer, prefix, model, schedule): + if not prefix.endswith('_'): + prefix += '_' + + for name, kwargs in schedule: + dump_torch_weights(writer, model.get_submodule(name), prefix + export_name(name), **kwargs, verbose=True) if __name__ == "__main__": args = parser.parse_args() @@ -76,22 +139,34 @@ if __name__ == "__main__": # create model and load weights checkpoint = torch.load(checkpoint_path, map_location='cpu') model = model_dict[checkpoint['setup']['model']['name']](*checkpoint['setup']['model']['args'], **checkpoint['setup']['model']['kwargs']) + model.load_state_dict(checkpoint['state_dict']) # CWriter model_name = checkpoint['setup']['model']['name'] - cwriter = wexchange.c_export.CWriter(os.path.join(outdir, model_name + "_data"), message=message, model_struct_name=model_name.upper()) - - # dump numbits_embedding parameters by hand - numbits_embedding = model.get_submodule('numbits_embedding') - weights = next(iter(numbits_embedding.parameters())) - for i, c in enumerate(weights): - cwriter.header.write(f"\nNUMBITS_COEF_{i} {float(c.detach())}f") - cwriter.header.write("\n\n") + cwriter = wexchange.c_export.CWriter(os.path.join(outdir, model_name + "_data"), message=message, model_struct_name=model_name.upper() + 'Layers', add_typedef=True) + + # Add custom includes and global parameters + cwriter.header.write(f''' +#define {model_name.upper()}_PREEMPH {model.preemph}f +#define {model_name.upper()}_FRAME_SIZE {model.FRAME_SIZE} +#define {model_name.upper()}_OVERLAP_SIZE 40 +#define {model_name.upper()}_NUM_FEATURES {model.num_features} +#define {model_name.upper()}_PITCH_MAX {model.pitch_max} +#define {model_name.upper()}_PITCH_EMBEDDING_DIM {model.pitch_embedding_dim} +#define {model_name.upper()}_NUMBITS_RANGE_LOW {model.numbits_range[0]} +#define {model_name.upper()}_NUMBITS_RANGE_HIGH {model.numbits_range[1]} +#define {model_name.upper()}_NUMBITS_EMBEDDING_DIM {model.numbits_embedding_dim} +#define {model_name.upper()}_COND_DIM {model.cond_dim} +#define {model_name.upper()}_HIDDEN_FEATURE_DIM {model.hidden_feature_dim} +''') + + for i, s in enumerate(model.numbits_embedding.scale_factors): + cwriter.header.write(f"#define {model_name.upper()}_NUMBITS_SCALE_{i} {float(s.detach().cpu())}f\n") # dump layers - for name, module in model.named_modules(): - if isinstance(module, torch.nn.Linear) or isinstance(module, torch.nn.Conv1d) \ - or isinstance(module, torch.nn.ConvTranspose1d) or isinstance(module, torch.nn.Embedding): - dump_torch_weights(cwriter, module, name=export_name(name), verbose=True) + if model_name in schedules and args.quantize: + osce_scheduled_dump(cwriter, model_name, model, schedules[model_name]) + else: + osce_dump_generic(cwriter, model_name, model) cwriter.close() diff --git a/dnn/torch/osce/models/lace.py b/dnn/torch/osce/models/lace.py index a11dfc41..58293de4 100644 --- a/dnn/torch/osce/models/lace.py +++ b/dnn/torch/osce/models/lace.py @@ -96,7 +96,7 @@ class LACE(NNSBase): self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p) # spectral shaping - self.af1 = LimitedAdaptiveConv1d(1, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p) + self.af1 = LimitedAdaptiveConv1d(1, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p) def flop_count(self, rate=16000, verbose=False): diff --git a/dnn/torch/osce/models/no_lace.py b/dnn/torch/osce/models/no_lace.py index 2709274c..0e0fb1b3 100644 --- a/dnn/torch/osce/models/no_lace.py +++ b/dnn/torch/osce/models/no_lace.py @@ -96,8 +96,8 @@ class NoLACE(NNSBase): # comb filters left_pad = self.kernel_size // 2 right_pad = self.kernel_size - 1 - left_pad - self.cf1 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p) - self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p) + self.cf1 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p) + self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p) # spectral shaping self.af1 = LimitedAdaptiveConv1d(1, 2, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p) diff --git a/dnn/torch/osce/utils/layers/limited_adaptive_comb1d.py b/dnn/torch/osce/utils/layers/limited_adaptive_comb1d.py index b146240e..3bb6fa07 100644 --- a/dnn/torch/osce/utils/layers/limited_adaptive_comb1d.py +++ b/dnn/torch/osce/utils/layers/limited_adaptive_comb1d.py @@ -41,13 +41,13 @@ class LimitedAdaptiveComb1d(nn.Module): feature_dim, frame_size=160, overlap_size=40, - use_bias=True, padding=None, max_lag=256, name=None, gain_limit_db=10, global_gain_limits_db=[-6, 6], - norm_p=2): + norm_p=2, + **kwargs): """ Parameters: @@ -87,7 +87,6 @@ class LimitedAdaptiveComb1d(nn.Module): self.kernel_size = kernel_size self.frame_size = frame_size self.overlap_size = overlap_size - self.use_bias = use_bias self.max_lag = max_lag self.limit_db = gain_limit_db self.norm_p = norm_p @@ -101,8 +100,6 @@ class LimitedAdaptiveComb1d(nn.Module): # network for generating convolution weights self.conv_kernel = nn.Linear(feature_dim, kernel_size) - if self.use_bias: - self.conv_bias = nn.Linear(feature_dim,1) # comb filter gain self.filter_gain = nn.Linear(feature_dim, 1) @@ -154,9 +151,6 @@ class LimitedAdaptiveComb1d(nn.Module): conv_kernels = self.conv_kernel(features).reshape((batch_size, num_frames, self.out_channels, self.in_channels, self.kernel_size)) conv_kernels = conv_kernels / (1e-6 + torch.norm(conv_kernels, p=self.norm_p, dim=-1, keepdim=True)) - if self.use_bias: - conv_biases = self.conv_bias(features).permute(0, 2, 1) - conv_gains = torch.exp(- torch.relu(self.filter_gain(features).permute(0, 2, 1)) + self.log_gain_limit) # calculate gains global_conv_gains = torch.exp(self.filter_gain_a * torch.tanh(self.global_filter_gain(features).permute(0, 2, 1)) + self.filter_gain_b) @@ -190,10 +184,6 @@ class LimitedAdaptiveComb1d(nn.Module): new_chunk = torch.conv1d(xx, conv_kernels[:, i, ...].reshape((batch_size * self.out_channels, self.in_channels, self.kernel_size)), groups=batch_size).reshape(batch_size, self.out_channels, -1) - - if self.use_bias: - new_chunk = new_chunk + conv_biases[:, :, i : i + 1] - offset = self.max_lag + self.padding[0] new_chunk = global_conv_gains[:, :, i : i + 1] * (new_chunk * conv_gains[:, :, i : i + 1] + x[..., offset + i * frame_size : offset + (i + 1) * frame_size + overlap_size]) @@ -223,10 +213,6 @@ class LimitedAdaptiveComb1d(nn.Module): count += 2 * (self.in_channels * self.out_channels * self.kernel_size * (1 + overhead) * rate) count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels - # bias computation - if self.use_bias: - count += 2 * (frame_rate * self.feature_dim) + rate * (1 + overhead) - # a0 computation count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels diff --git a/dnn/torch/osce/utils/layers/limited_adaptive_conv1d.py b/dnn/torch/osce/utils/layers/limited_adaptive_conv1d.py index 073ea1b1..a17b0e9b 100644 --- a/dnn/torch/osce/utils/layers/limited_adaptive_conv1d.py +++ b/dnn/torch/osce/utils/layers/limited_adaptive_conv1d.py @@ -46,12 +46,12 @@ class LimitedAdaptiveConv1d(nn.Module): feature_dim, frame_size=160, overlap_size=40, - use_bias=True, padding=None, name=None, gain_limits_db=[-6, 6], shape_gain_db=0, - norm_p=2): + norm_p=2, + **kwargs): """ Parameters: @@ -90,7 +90,6 @@ class LimitedAdaptiveConv1d(nn.Module): self.kernel_size = kernel_size self.frame_size = frame_size self.overlap_size = overlap_size - self.use_bias = use_bias self.gain_limits_db = gain_limits_db self.shape_gain_db = shape_gain_db self.norm_p = norm_p @@ -104,9 +103,6 @@ class LimitedAdaptiveConv1d(nn.Module): # network for generating convolution weights self.conv_kernel = nn.Linear(feature_dim, in_channels * out_channels * kernel_size) - if self.use_bias: - self.conv_bias = nn.Linear(feature_dim, out_channels) - self.shape_gain = min(1, 10**(shape_gain_db / 20)) self.filter_gain = nn.Linear(feature_dim, out_channels) @@ -133,10 +129,6 @@ class LimitedAdaptiveConv1d(nn.Module): count += 2 * (frame_rate * self.feature_dim * self.kernel_size) count += 2 * (self.in_channels * self.out_channels * self.kernel_size * (1 + overhead) * rate) - # bias computation - if self.use_bias: - count += 2 * (frame_rate * self.feature_dim) + rate * (1 + overhead) - # gain computation count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels @@ -183,9 +175,6 @@ class LimitedAdaptiveConv1d(nn.Module): conv_kernels = self.shape_gain * conv_kernels + (1 - self.shape_gain) * id_kernels - if self.use_bias: - conv_biases = self.conv_bias(features).permute(0, 2, 1) - # calculate gains conv_gains = torch.exp(self.filter_gain_a * torch.tanh(self.filter_gain(features)) + self.filter_gain_b) if debug and batch_size == 1: diff --git a/dnn/torch/osce/utils/silk_features.py b/dnn/torch/osce/utils/silk_features.py index 2997ef5f..8c5dbf05 100644 --- a/dnn/torch/osce/utils/silk_features.py +++ b/dnn/torch/osce/utils/silk_features.py @@ -33,6 +33,7 @@ import numpy as np import torch import scipy +import scipy.signal from utils.pitch import hangover, calculate_acorr_window from utils.spec import create_filter_bank, cepstrum, log_spectrum, log_spectrum_from_lpc @@ -59,7 +60,6 @@ def silk_feature_factory(no_pitch_value=256, num_bands_noisy_spec=18, noisy_spec_scale='opus', noisy_apply_dct=True, - add_offset=False, add_double_lag_acorr=False ): @@ -67,7 +67,7 @@ def silk_feature_factory(no_pitch_value=256, fb_clean_spec = create_filter_bank(num_bands_clean_spec, 320, scale='erb', round_center_bins=True, normalize=True) fb_noisy_spec = create_filter_bank(num_bands_noisy_spec, 320, scale=noisy_spec_scale, round_center_bins=True, normalize=True) - def create_features(noisy, noisy_history, lpcs, gains, ltps, periods, offsets): + def create_features(noisy, noisy_history, lpcs, gains, ltps, periods): periods = periods.copy() @@ -89,10 +89,7 @@ def silk_feature_factory(no_pitch_value=256, acorr, _ = calculate_acorr_window(noisy, 80, periods, noisy_history, radius=acorr_radius, add_double_lag_acorr=add_double_lag_acorr) - if add_offset: - features = np.concatenate((clean_spectrum, noisy_cepstrum, acorr, ltps, log_gains, offsets.reshape(-1, 1)), axis=-1, dtype=np.float32) - else: - features = np.concatenate((clean_spectrum, noisy_cepstrum, acorr, ltps, log_gains), axis=-1, dtype=np.float32) + features = np.concatenate((clean_spectrum, noisy_cepstrum, acorr, ltps, log_gains), axis=-1, dtype=np.float32) return features, periods.astype(np.int64) @@ -110,7 +107,6 @@ def load_inference_data(path, num_bands_noisy_spec=18, noisy_spec_scale='opus', noisy_apply_dct=True, - add_offset=False, add_double_lag_acorr=False, **kwargs): @@ -122,13 +118,12 @@ def load_inference_data(path, periods = np.fromfile(os.path.join(path, 'features_period.s16'), dtype=np.int16) num_bits = np.fromfile(os.path.join(path, 'features_num_bits.s32'), dtype=np.int32).astype(np.float32).reshape(-1, 1) num_bits_smooth = np.fromfile(os.path.join(path, 'features_num_bits_smooth.f32'), dtype=np.float32).reshape(-1, 1) - offsets = np.fromfile(os.path.join(path, 'features_offset.f32'), dtype=np.float32) # load signal, add back delay and pre-emphasize signal = np.fromfile(os.path.join(path, 'noisy.s16'), dtype=np.int16).astype(np.float32) / (2 ** 15) signal = np.concatenate((np.zeros(skip, dtype=np.float32), signal), dtype=np.float32) - create_features = silk_feature_factory(no_pitch_value, acorr_radius, pitch_hangover, num_bands_clean_spec, num_bands_noisy_spec, noisy_spec_scale, noisy_apply_dct, add_offset, add_double_lag_acorr) + create_features = silk_feature_factory(no_pitch_value, acorr_radius, pitch_hangover, num_bands_clean_spec, num_bands_noisy_spec, noisy_spec_scale, noisy_apply_dct, add_double_lag_acorr) num_frames = min((len(signal) // 320) * 4, len(lpcs)) signal = signal[: num_frames * 80] @@ -138,11 +133,10 @@ def load_inference_data(path, periods = periods[: num_frames] num_bits = num_bits[: num_frames // 4] num_bits_smooth = num_bits[: num_frames // 4] - offsets = offsets[: num_frames] numbits = np.repeat(np.concatenate((num_bits, num_bits_smooth), axis=-1, dtype=np.float32), 4, axis=0) - features, periods = create_features(signal, np.zeros(350, dtype=signal.dtype), lpcs, gains, ltps, periods, offsets) + features, periods = create_features(signal, np.zeros(350, dtype=signal.dtype), lpcs, gains, ltps, periods) if preemph > 0: signal[1:] -= preemph * signal[:-1] diff --git a/dnn/torch/osce/utils/spec.py b/dnn/torch/osce/utils/spec.py index 01b923ae..59f53538 100644 --- a/dnn/torch/osce/utils/spec.py +++ b/dnn/torch/osce/utils/spec.py @@ -30,6 +30,7 @@ import math as m import numpy as np import scipy +import scipy.fftpack import torch def erb(f): diff --git a/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py b/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py index 36050881..2745f337 100644 --- a/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py +++ b/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py @@ -38,7 +38,8 @@ class CWriter: create_state_struct=False, enable_binary_blob=True, model_struct_name="Model", - nnet_header="nnet.h"): + nnet_header="nnet.h", + add_typedef=False): """ Writer class for creating souce and header files for weight exports to C @@ -73,6 +74,7 @@ class CWriter: self.enable_binary_blob = enable_binary_blob self.create_state_struct = create_state_struct self.model_struct_name = model_struct_name + self.add_typedef = add_typedef # for binary blob format, format is key=, value=(, ) self.layer_dict = OrderedDict() @@ -119,11 +121,17 @@ f""" # create model type if self.enable_binary_blob: - self.header.write(f"\nstruct {self.model_struct_name} {{") + if self.add_typedef: + self.header.write(f"\ntypedef struct {{") + else: + self.header.write(f"\nstruct {self.model_struct_name} {{") for name, data in self.layer_dict.items(): layer_type = data[0] self.header.write(f"\n {layer_type} {name};") - self.header.write(f"\n}};\n") + if self.add_typedef: + self.header.write(f"\n}} {self.model_struct_name};\n") + else: + self.header.write(f"\n}};\n") init_prototype = f"int init_{self.model_struct_name.lower()}({self.model_struct_name} *model, const WeightArray *arrays)" self.header.write(f"\n{init_prototype};\n") diff --git a/dnn/torch/weight-exchange/wexchange/torch/__init__.py b/dnn/torch/weight-exchange/wexchange/torch/__init__.py index 98c96fad..8245566d 100644 --- a/dnn/torch/weight-exchange/wexchange/torch/__init__.py +++ b/dnn/torch/weight-exchange/wexchange/torch/__init__.py @@ -34,3 +34,4 @@ from .torch import dump_torch_gru_weights, load_torch_gru_weights from .torch import dump_torch_grucell_weights from .torch import dump_torch_embedding_weights, load_torch_embedding_weights from .torch import dump_torch_weights, load_torch_weights +from .torch import dump_torch_adaptive_conv1d_weights \ No newline at end of file diff --git a/dnn/torch/weight-exchange/wexchange/torch/torch.py b/dnn/torch/weight-exchange/wexchange/torch/torch.py index 281d9be3..f7e16032 100644 --- a/dnn/torch/weight-exchange/wexchange/torch/torch.py +++ b/dnn/torch/weight-exchange/wexchange/torch/torch.py @@ -28,12 +28,154 @@ """ import os +import sys import torch import numpy as np +sys.path.append(sys.path.append(os.path.join(os.path.dirname(__file__), '../osce'))) +try: + import utils.layers as osce_layers + from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d + from utils.layers.limited_adaptive_comb1d import LimitedAdaptiveComb1d + from utils.layers.td_shaper import TDShaper + has_osce=True +except: + has_osce=False + from wexchange.c_export import CWriter, print_gru_layer, print_dense_layer, print_conv1d_layer, print_tconv1d_layer, print_conv2d_layer +def dump_torch_adaptive_conv1d_weights(where, adaconv, name='adaconv', scale=1/128, quantize=False): + + + w_kernel = adaconv.conv_kernel.weight.detach().cpu().numpy().copy() + b_kernel = adaconv.conv_kernel.bias.detach().cpu().numpy().copy() + w_gain = adaconv.filter_gain.weight.detach().cpu().numpy().copy() + b_gain = adaconv.filter_gain.bias.detach().cpu().numpy().copy() + + if isinstance(where, CWriter): + # pad kernel for quantization + left_padding = adaconv.padding[0] + kernel_size = adaconv.kernel_size + in_channels = adaconv.in_channels + out_channels = adaconv.out_channels + feature_dim = adaconv.feature_dim + + if quantize and kernel_size % 8: + kernel_padding = 8 - (kernel_size % 8) + w_kernel = np.concatenate( + (np.zeros((out_channels, in_channels, kernel_padding, feature_dim)), w_kernel.reshape(out_channels, in_channels, kernel_size, feature_dim)), + dtype=w_kernel.dtype, + axis=2).reshape(-1, feature_dim) + b_kernel = np.concatenate( + (np.zeros((out_channels, in_channels, kernel_padding)), b_kernel.reshape(out_channels, in_channels, kernel_size)), + dtype=b_kernel.dtype, + axis=2).reshape(-1) + left_padding += kernel_padding + kernel_size += kernel_padding + + # write relevant scalar parameters to header file + where.header.write(f""" +#define {name.upper()}_FILTER_GAIN_A {adaconv.filter_gain_a:f}f +#define {name.upper()}_FILTER_GAIN_B {adaconv.filter_gain_b:f}f +#define {name.upper()}_SHAPE_GAIN {adaconv.shape_gain:f}f +#define {name.upper()}_KERNEL_SIZE {kernel_size} +#define {name.upper()}_FRAME_SIZE {adaconv.frame_size} +#define {name.upper()}_LEFT_PADDING {left_padding} +#define {name.upper()}_OVERLAP_SIZE {adaconv.overlap_size} +#define {name.upper()}_IN_CHANNELS {adaconv.in_channels} +#define {name.upper()}_OUT_CHANNELS {adaconv.out_channels} +#define {name.upper()}_NORM_P {adaconv.norm_p} +#define {name.upper()}_FEATURE_DIM {adaconv.feature_dim} +""" + ) + + print_dense_layer(where, name + "_kernel", w_kernel, b_kernel, scale=scale, format='torch', sparse=False, diagonal=False, quantize=quantize) + print_dense_layer(where, name + "_gain", w_gain, b_gain, format='torch', sparse=False, diagonal=False, quantize=False) + + + else: + np.save(where, 'weight_kernel.npy', w_kernel) + np.save(where, 'bias_kernel.npy', b_kernel) + np.save(where, 'weight_gain.npy', w_gain) + np.save(where, 'bias_gain.npy', b_gain) + + +def dump_torch_adaptive_comb1d_weights(where, adaconv, name='adaconv', scale=1/128, quantize=False): + + + w_kernel = adaconv.conv_kernel.weight.detach().cpu().numpy().copy() + b_kernel = adaconv.conv_kernel.bias.detach().cpu().numpy().copy() + w_gain = adaconv.filter_gain.weight.detach().cpu().numpy().copy() + b_gain = adaconv.filter_gain.bias.detach().cpu().numpy().copy() + w_global_gain = adaconv.global_filter_gain.weight.detach().cpu().numpy().copy() + b_global_gain = adaconv.global_filter_gain.bias.detach().cpu().numpy().copy() + + + if isinstance(where, CWriter): + # pad kernel for quantization + left_padding = adaconv.padding[0] + kernel_size = adaconv.kernel_size + + if quantize and w_kernel.shape[0] % 8: + kernel_padding = 8 - (w_kernel.shape[0] % 8) + w_kernel = np.concatenate((np.zeros((kernel_padding, w_kernel.shape[1])), w_kernel), dtype=w_kernel.dtype) + b_kernel = np.concatenate((np.zeros((kernel_padding)), b_kernel), dtype=b_kernel.dtype) + left_padding += kernel_padding + kernel_size += kernel_padding + # write relevant scalar parameters to header file + where.header.write(f""" +#define {name.upper()}_FILTER_GAIN_A {adaconv.filter_gain_a:f}f +#define {name.upper()}_FILTER_GAIN_B {adaconv.filter_gain_b:f}f +#define {name.upper()}_LOG_GAIN_LIMIT {adaconv.log_gain_limit:f}f +#define {name.upper()}_KERNEL_SIZE {kernel_size} +#define {name.upper()}_LEFT_PADDING {left_padding} +#define {name.upper()}_FRAME_SIZE {adaconv.frame_size} +#define {name.upper()}_OVERLAP_SIZE {adaconv.overlap_size} +#define {name.upper()}_IN_CHANNELS {adaconv.in_channels} +#define {name.upper()}_OUT_CHANNELS {adaconv.out_channels} +#define {name.upper()}_NORM_P {adaconv.norm_p} +#define {name.upper()}_FEATURE_DIM {adaconv.feature_dim} +#define {name.upper()}_MAX_LAG {adaconv.max_lag} +""" + ) + + print_dense_layer(where, name + "_kernel", w_kernel, b_kernel, scale=scale, format='torch', sparse=False, diagonal=False, quantize=quantize) + print_dense_layer(where, name + "_gain", w_gain, b_gain, format='torch', sparse=False, diagonal=False, quantize=False) + print_dense_layer(where, name + "_global_gain", w_global_gain, b_global_gain, format='torch', sparse=False, diagonal=False, quantize=False) + + + else: + np.save(where, 'weight_kernel.npy', w_kernel) + np.save(where, 'bias_kernel.npy', b_kernel) + np.save(where, 'weight_gain.npy', w_gain) + np.save(where, 'bias_gain.npy', b_gain) + np.save(where, 'weight_global_gain.npy', w_global_gain) + np.save(where, 'bias_global_gain.npy', b_global_gain) + +def dump_torch_tdshaper(where, shaper, name='tdshaper'): + + if isinstance(where, CWriter): + where.header.write(f""" +#define {name.upper()}_FEATURE_DIM {shaper.feature_dim} +#define {name.upper()}_FRAME_SIZE {shaper.frame_size} +#define {name.upper()}_AVG_POOL_K {shaper.avg_pool_k} +#define {name.upper()}_INNOVATE {1 if shaper.innovate else 0} +#define {name.upper()}_POOL_AFTER {1 if shaper.pool_after else 0} +""" + ) + + dump_torch_conv1d_weights(where, shaper.feature_alpha1, name + "_alpha1") + dump_torch_conv1d_weights(where, shaper.feature_alpha2, name + "_alpha2") + + if shaper.innovate: + dump_torch_conv1d_weights(where, shaper.feature_alpha1b, name + "_alpha1b") + dump_torch_conv1d_weights(where, shaper.feature_alpha1c, name + "_alpha1c") + dump_torch_conv1d_weights(where, shaper.feature_alpha2b, name + "_alpha2b") + dump_torch_conv1d_weights(where, shaper.feature_alpha2c, name + "_alpha2c") + + + def dump_torch_gru_weights(where, gru, name='gru', input_sparse=False, recurrent_sparse=False, quantize=False, scale=1/128, recurrent_scale=1/128): assert gru.num_layers == 1 @@ -221,7 +363,6 @@ def load_torch_conv2d_weights(where, conv): def dump_torch_embedding_weights(where, embed, name='embed', scale=1/128, sparse=False, diagonal=False, quantize=False): - print("quantize = ", quantize) w = embed.weight.detach().cpu().numpy().copy().transpose() b = np.zeros(w.shape[0], dtype=w.dtype) @@ -257,11 +398,21 @@ def dump_torch_weights(where, module, name=None, verbose=False, **kwargs): elif isinstance(module, torch.nn.Conv2d): return dump_torch_conv2d_weights(where, module, name, **kwargs) elif isinstance(module, torch.nn.Embedding): - return dump_torch_embedding_weights(where, module) + return dump_torch_embedding_weights(where, module, name, **kwargs) elif isinstance(module, torch.nn.ConvTranspose1d): return dump_torch_tconv1d_weights(where, module, name, **kwargs) else: - raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported') + if has_osce: + if isinstance(module, LimitedAdaptiveConv1d): + dump_torch_adaptive_conv1d_weights(where, module, name, **kwargs) + elif isinstance(module, LimitedAdaptiveComb1d): + dump_torch_adaptive_comb1d_weights(where, module, name, **kwargs) + elif isinstance(module, TDShaper): + dump_torch_tdshaper(where, module, name, **kwargs) + else: + raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported') + else: + raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported') def load_torch_weights(where, module): """ generic function for loading weights of some torch.nn.Module """ diff --git a/dnn/write_lpcnet_weights.c b/dnn/write_lpcnet_weights.c index 722a373d..395590f4 100644 --- a/dnn/write_lpcnet_weights.c +++ b/dnn/write_lpcnet_weights.c @@ -46,6 +46,10 @@ #include "plc_data.c" #include "dred_rdovae_enc_data.c" #include "dred_rdovae_dec_data.c" +#ifdef ENABLE_OSCE +#include "lace_data.c" +#include "nolace_data.c" +#endif void write_weights(const WeightArray *list, FILE *fout) { @@ -53,6 +57,9 @@ void write_weights(const WeightArray *list, FILE *fout) unsigned char zeros[WEIGHT_BLOCK_SIZE] = {0}; while (list[i].name != NULL) { WeightHead h; + if (strlen(list[i].name) >= sizeof(h.name) - 1) { + printf("[write_weights] warning: name %s too long\n", list[i].name); + } memcpy(h.head, "DNNw", 4); h.version = WEIGHT_BLOB_VERSION; h.type = list[i].type; @@ -77,6 +84,14 @@ int main(void) write_weights(lpcnet_plc_arrays, fout); write_weights(rdovaeenc_arrays, fout); write_weights(rdovaedec_arrays, fout); +#ifdef ENABLE_OSCE +#ifndef DISABLE_LACE + write_weights(lacelayers_arrays, fout); +#endif +#ifndef DISABLE_NOLACE + write_weights(nolacelayers_arrays, fout); +#endif +#endif fclose(fout); return 0; } diff --git a/lpcnet_headers.mk b/lpcnet_headers.mk index da610ca1..ce74d954 100644 --- a/lpcnet_headers.mk +++ b/lpcnet_headers.mk @@ -29,3 +29,12 @@ dnn/dred_rdovae_enc_data.h \ dnn/dred_rdovae_dec.h \ dnn/dred_rdovae_dec_data.h \ dnn/dred_rdovae_stats_data.h + +OSCE_HEAD= \ +dnn/osce.h \ +dnn/osce_config.h \ +dnn/osce_structs.h \ +dnn/osce_features.h \ +dnn/nndsp.h \ +dnn/lace_data.h \ +dnn/nolace_data.h diff --git a/lpcnet_sources.mk b/lpcnet_sources.mk index 9b8863ad..17f04756 100644 --- a/lpcnet_sources.mk +++ b/lpcnet_sources.mk @@ -23,6 +23,13 @@ silk/dred_encoder.c \ silk/dred_coding.c \ silk/dred_decoder.c +OSCE_SOURCES = \ +dnn/osce.c \ +dnn/osce_features.c \ +dnn/nndsp.c \ +dnn/lace_data.c \ +dnn/nolace_data.c + DNN_SOURCES_X86_RTCD = dnn/x86/x86_dnn_map.c DNN_SOURCES_AVX2 = dnn/x86/nnet_avx2.c DNN_SOURCES_SSE4_1 = dnn/x86/nnet_sse4_1.c diff --git a/meson.build b/meson.build index 289c5917..f468bad4 100644 --- a/meson.build +++ b/meson.build @@ -148,6 +148,7 @@ opts = [ [ 'float-approx', 'FLOAT_APPROX' ], [ 'enable-deep-plc', 'ENABLE_DEEP_PLC' ], [ 'enable-dred', 'ENABLE_DRED' ], + [ 'enable-osce', 'ENABLE_OSCE' ], [ 'assertions', 'ENABLE_ASSERTIONS' ], [ 'hardening', 'ENABLE_HARDENING' ], [ 'fuzzing', 'FUZZING' ], diff --git a/meson_options.txt b/meson_options.txt index d5b69eea..46099276 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -9,6 +9,7 @@ option('intrinsics', type : 'feature', value : 'auto', description : 'Intrinsics option('enable-deep-plc', type : 'boolean', value : false, description : 'Enable Deep Packet Loss Concealment (PLC)') option('enable-dred', type : 'boolean', value : false, description : 'Enable Deep Redundancy (DRED)') +option('enable-osce', type : 'boolean', value : false, description : 'Enable Opus Speech Coding Enhancement (OSCE)') option('enable-dnn-debug-float', type : 'boolean', value : false, description : 'Compute DNN using float weights') option('custom-modes', type : 'boolean', value : false, description : 'Enable non-Opus modes, e.g. 44.1 kHz & 2^n frames') diff --git a/silk/API.h b/silk/API.h index 6e623b84..878965c7 100644 --- a/silk/API.h +++ b/silk/API.h @@ -92,6 +92,16 @@ opus_int silk_Encode( /* O Returns error co /* Decoder functions */ /****************************************/ + +/***********************************************/ +/* Load OSCE models from external data pointer */ +/***********************************************/ +opus_int silk_LoadOSCEModels( + void *decState, /* O I/O State */ + const unsigned char *data, /* I pointer to binary blob */ + int len /* I length of binary blob data */ +); + /***********************************************/ /* Get size in bytes of the Silk decoder state */ /***********************************************/ @@ -100,8 +110,12 @@ opus_int silk_Get_Decoder_Size( /* O Returns error co ); /*************************/ -/* Init or Reset decoder */ +/* Init and Reset decoder */ /*************************/ +opus_int silk_ResetDecoder( /* O Returns error code */ + void *decState /* I/O State */ +); + opus_int silk_InitDecoder( /* O Returns error code */ void *decState /* I/O State */ ); diff --git a/silk/control.h b/silk/control.h index d30d114c..f5633e62 100644 --- a/silk/control.h +++ b/silk/control.h @@ -147,6 +147,11 @@ typedef struct { /* I: Enable Deep PLC */ opus_int enable_deep_plc; + +#ifdef ENABLE_OSCE + /* I: OSCE method */ + opus_int osce_method; +#endif } silk_DecControlStruct; #ifdef __cplusplus diff --git a/silk/dec_API.c b/silk/dec_API.c index a29ecc73..e4ae8343 100644 --- a/silk/dec_API.c +++ b/silk/dec_API.c @@ -33,6 +33,11 @@ POSSIBILITY OF SUCH DAMAGE. #include "stack_alloc.h" #include "os_support.h" +#ifdef ENABLE_OSCE +#include "osce.h" +#include "osce_structs.h" +#endif + /************************/ /* Decoder Super Struct */ /************************/ @@ -42,12 +47,33 @@ typedef struct { opus_int nChannelsAPI; opus_int nChannelsInternal; opus_int prev_decode_only_middle; +#ifdef ENABLE_OSCE + OSCEModel osce_model; +#endif } silk_decoder; /*********************/ /* Decoder functions */ /*********************/ + + +opus_int silk_LoadOSCEModels(void *decState, const unsigned char *data, int len) +{ +#ifdef ENABLE_OSCE + opus_int ret = SILK_NO_ERROR; + + ret = osce_load_models(&((silk_decoder *)decState)->osce_model, data, len); + + return ret; +#else + (void) decState; + (void) data; + (void) len; + return SILK_NO_ERROR; +#endif +} + opus_int silk_Get_Decoder_Size( /* O Returns error code */ opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */ ) @@ -60,6 +86,24 @@ opus_int silk_Get_Decoder_Size( /* O Returns error co } /* Reset decoder state */ +opus_int silk_ResetDecoder( /* O Returns error code */ + void *decState /* I/O State */ +) +{ + opus_int n, ret = SILK_NO_ERROR; + silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state; + + for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) { + ret = silk_reset_decoder( &channel_state[ n ] ); + } + silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo)); + /* Not strictly needed, but it's cleaner that way */ + ((silk_decoder *)decState)->prev_decode_only_middle = 0; + + return ret; +} + + opus_int silk_InitDecoder( /* O Returns error code */ void *decState /* I/O State */ ) @@ -67,6 +111,11 @@ opus_int silk_InitDecoder( /* O Returns error co opus_int n, ret = SILK_NO_ERROR; silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state; +#ifndef USE_WEIGHTS_FILE + /* load osce models */ + silk_LoadOSCEModels(decState, NULL, 0); +#endif + for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) { ret = silk_init_decoder( &channel_state[ n ] ); } @@ -301,9 +350,17 @@ opus_int silk_Decode( /* O Returns error co } else { condCoding = CODE_CONDITIONALLY; } +#ifdef ENABLE_OSCE + if ( channel_state[n].osce.method != decControl->osce_method ) { + osce_reset( &channel_state[n].osce, decControl->osce_method ); + } +#endif ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding, #ifdef ENABLE_DEEP_PLC n == 0 ? lpcnet : NULL, +#endif +#ifdef ENABLE_OSCE + &psDec->osce_model, #endif arch); } else { diff --git a/silk/decode_frame.c b/silk/decode_frame.c index b393952c..48f74aef 100644 --- a/silk/decode_frame.c +++ b/silk/decode_frame.c @@ -33,6 +33,10 @@ POSSIBILITY OF SUCH DAMAGE. #include "stack_alloc.h" #include "PLC.h" +#ifdef ENABLE_OSCE +#include "osce.h" +#endif + /****************/ /* Decode frame */ /****************/ @@ -45,17 +49,26 @@ opus_int silk_decode_frame( opus_int condCoding, /* I The type of conditional coding to use */ #ifdef ENABLE_DEEP_PLC LPCNetPLCState *lpcnet, +#endif +#ifdef ENABLE_OSCE + OSCEModel *osce_model, #endif int arch /* I Run-time architecture */ ) { VARDECL( silk_decoder_control, psDecCtrl ); opus_int L, mv_len, ret = 0; +#ifdef ENABLE_OSCE + opus_int32 ec_start; +#endif SAVE_STACK; L = psDec->frame_length; ALLOC( psDecCtrl, 1, silk_decoder_control ); psDecCtrl->LTP_scale_Q14 = 0; +#ifdef ENABLE_OSCE + ec_start = ec_tell(psRangeDec); +#endif /* Safety checks */ celt_assert( L > 0 && L <= MAX_FRAME_LENGTH ); @@ -87,6 +100,21 @@ opus_int silk_decode_frame( /********************************************************/ silk_decode_core( psDec, psDecCtrl, pOut, pulses, arch ); + /*************************/ + /* Update output buffer. */ + /*************************/ + celt_assert( psDec->ltp_mem_length >= psDec->frame_length ); + mv_len = psDec->ltp_mem_length - psDec->frame_length; + silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) ); + silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) ); + +#ifdef ENABLE_OSCE + /********************************************************/ + /* Run SILK enhancer */ + /********************************************************/ + osce_enhance_frame( osce_model, psDec, psDecCtrl, pOut, ec_tell(psRangeDec) - ec_start, arch ); +#endif + /********************************************************/ /* Update PLC state */ /********************************************************/ @@ -109,15 +137,18 @@ opus_int silk_decode_frame( lpcnet, #endif arch ); - } - /*************************/ - /* Update output buffer. */ - /*************************/ - celt_assert( psDec->ltp_mem_length >= psDec->frame_length ); - mv_len = psDec->ltp_mem_length - psDec->frame_length; - silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) ); - silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) ); +#ifdef ENABLE_OSCE + osce_reset( &psDec->osce, psDec->osce.method ); +#endif + /*************************/ + /* Update output buffer. */ + /*************************/ + celt_assert( psDec->ltp_mem_length >= psDec->frame_length ); + mv_len = psDec->ltp_mem_length - psDec->frame_length; + silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) ); + silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) ); + } /************************************************/ /* Comfort noise generation / estimation */ diff --git a/silk/init_decoder.c b/silk/init_decoder.c index 16c03dcd..01bc4b7a 100644 --- a/silk/init_decoder.c +++ b/silk/init_decoder.c @@ -31,15 +31,21 @@ POSSIBILITY OF SUCH DAMAGE. #include "main.h" +#ifdef ENABLE_OSCE +#include "osce.h" +#endif + +#include "structs.h" + /************************/ -/* Init Decoder State */ +/* Reset Decoder State */ /************************/ -opus_int silk_init_decoder( +opus_int silk_reset_decoder( silk_decoder_state *psDec /* I/O Decoder state pointer */ ) { /* Clear the entire encoder state, except anything copied */ - silk_memset( psDec, 0, sizeof( silk_decoder_state ) ); + silk_memset( &psDec->SILK_DECODER_STATE_RESET_START, 0, sizeof( silk_decoder_state ) - ((char*) &psDec->SILK_DECODER_STATE_RESET_START - (char*)psDec) ); /* Used to deactivate LSF interpolation */ psDec->first_frame_after_reset = 1; @@ -52,6 +58,27 @@ opus_int silk_init_decoder( /* Reset PLC state */ silk_PLC_Reset( psDec ); +#ifdef ENABLE_OSCE + /* Reset OSCE state and method */ + osce_reset(&psDec->osce, OSCE_DEFAULT_METHOD); +#endif + + return 0; +} + + +/************************/ +/* Init Decoder State */ +/************************/ +opus_int silk_init_decoder( + silk_decoder_state *psDec /* I/O Decoder state pointer */ +) +{ + /* Clear the entire encoder state, except anything copied */ + silk_memset( psDec, 0, sizeof( silk_decoder_state ) ); + + silk_reset_decoder( psDec ); + return(0); } diff --git a/silk/main.h b/silk/main.h index c67775ef..cd576d8c 100644 --- a/silk/main.h +++ b/silk/main.h @@ -389,6 +389,10 @@ void silk_NLSF_decode( /****************************************************/ /* Decoder Functions */ /****************************************************/ +opus_int silk_reset_decoder( + silk_decoder_state *psDec /* I/O Decoder state pointer */ +); + opus_int silk_init_decoder( silk_decoder_state *psDec /* I/O Decoder state pointer */ ); @@ -412,6 +416,9 @@ opus_int silk_decode_frame( opus_int condCoding, /* I The type of conditional coding to use */ #ifdef ENABLE_DEEP_PLC LPCNetPLCState *lpcnet, +#endif +#ifdef ENABLE_OSCE + OSCEModel *osce_model, #endif int arch /* I Run-time architecture */ ); diff --git a/silk/structs.h b/silk/structs.h index 709d3557..38243be1 100644 --- a/silk/structs.h +++ b/silk/structs.h @@ -44,6 +44,11 @@ POSSIBILITY OF SUCH DAMAGE. #include "dred_decoder.h" #endif +#ifdef ENABLE_OSCE +#include "osce_config.h" +#include "osce_structs.h" +#endif + #ifdef __cplusplus extern "C" { @@ -238,6 +243,14 @@ typedef struct { } silk_encoder_state; +#ifdef ENABLE_OSCE +typedef struct { + OSCEFeatureState features; + OSCEState state; + int method; +} silk_OSCE_struct; +#endif + /* Struct for Packet Loss Concealment */ typedef struct { opus_int32 pitchL_Q8; /* Pitch lag to use for voiced concealment */ @@ -270,6 +283,10 @@ typedef struct { /* Decoder state */ /********************************/ typedef struct { +#ifdef ENABLE_OSCE + silk_OSCE_struct osce; +#endif +#define SILK_DECODER_STATE_RESET_START prev_gain_Q16 opus_int32 prev_gain_Q16; opus_int32 exc_Q14[ MAX_FRAME_LENGTH ]; opus_int32 sLPC_Q14_buf[ MAX_LPC_ORDER ]; diff --git a/silk_sources.mk b/silk_sources.mk index 27c07129..3780b164 100644 --- a/silk_sources.mk +++ b/silk_sources.mk @@ -161,4 +161,4 @@ silk/float/schur_FLP.c \ silk/float/sort_FLP.c SILK_SOURCES_FLOAT_AVX2 = \ -silk/float/x86/inner_product_FLP_avx2.c +silk/float/x86/inner_product_FLP_avx2.c \ No newline at end of file diff --git a/src/opus_decoder.c b/src/opus_decoder.c index 596c2dd0..dd95aefc 100644 --- a/src/opus_decoder.c +++ b/src/opus_decoder.c @@ -57,6 +57,10 @@ #include "dred_rdovae_dec.h" #endif +#ifdef ENABLE_OSCE +#include "osce.h" +#endif + struct OpusDecoder { int celt_dec_offset; int silk_dec_offset; @@ -383,7 +387,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, pcm_ptr = pcm_silk; if (st->prev_mode==MODE_CELT_ONLY) - silk_InitDecoder( silk_dec ); + silk_ResetDecoder( silk_dec ); /* The SILK PLC cannot produce frames of less than 10 ms */ st->DecControl.payloadSize_ms = IMAX(10, 1000 * audiosize / st->Fs); @@ -408,6 +412,15 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, } } st->DecControl.enable_deep_plc = st->complexity >= 5; +#ifdef ENABLE_OSCE + st->DecControl.osce_method = OSCE_METHOD_NONE; +#ifndef DISABLE_LACE + if (st->complexity >= 6) {st->DecControl.osce_method = OSCE_METHOD_LACE;} +#endif +#ifndef DISABLE_NOLACE + if (st->complexity >= 7) {st->DecControl.osce_method = OSCE_METHOD_NOLACE;} +#endif +#endif lost_flag = data == NULL ? 1 : 2 * !!decode_fec; decoded_samples = 0; @@ -953,7 +966,7 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...) ((char*)&st->OPUS_DECODER_RESET_START - (char*)st)); celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); - silk_InitDecoder( silk_dec ); + silk_ResetDecoder( silk_dec ); st->stream_channels = st->channels; st->frame_size = st->Fs/400; #ifdef ENABLE_DEEP_PLC @@ -1044,6 +1057,7 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...) goto bad_arg; } ret = lpcnet_plc_load_model(&st->lpcnet, data, len); + ret = silk_LoadOSCEModels(silk_dec, data, len) || ret; } break; #endif diff --git a/src/opus_demo.c b/src/opus_demo.c index c5f6250f..bccdf976 100644 --- a/src/opus_demo.c +++ b/src/opus_demo.c @@ -70,6 +70,10 @@ unsigned char *load_blob(const char *filename, int *len) { FILE *file; unsigned char *data; file = fopen(filename, "r"); + if (file == NULL) + { + perror("could not open blob file\n"); + } fseek(file, 0L, SEEK_END); *len = ftell(file); fseek(file, 0L, SEEK_SET); @@ -254,6 +258,68 @@ static OpusDecoder *ms_opus_decoder_create(opus_int32 Fs, int channels, int *err } #endif + +#ifdef ENABLE_OSCE_TRAINING_DATA +#define COMPLEXITY_MIN 0 +#define COMPLEXITY_MAX 10 + +#define PACKET_LOSS_PERC_MIN 0 +#define PACKET_LOSS_PERC_MAX 50 +#define PACKET_LOSS_PERC_STEP 5 + +#define CBR_BITRATE_LIMIT 8000 + +#define NUM_BITRATES 102 +static int bitrates[NUM_BITRATES] = { + 6000, 6060, 6120, 6180, 6240, 6300, 6360, 6420, 6480, + 6525, 6561, 6598, 6634, 6670, 6707, 6743, 6780, 6816, + 6853, 6889, 6926, 6962, 6999, 7042, 7085, 7128, 7171, + 7215, 7258, 7301, 7344, 7388, 7431, 7474, 7512, 7541, + 7570, 7599, 7628, 7657, 7686, 7715, 7744, 7773, 7802, + 7831, 7860, 7889, 7918, 7947, 7976, 8013, 8096, 8179, + 8262, 8344, 8427, 8511, 8605, 8699, 8792, 8886, 8980, + 9100, 9227, 9354, 9480, 9561, 9634, 9706, 9779, 9851, + 9924, 9996, 10161, 10330, 10499, 10698, 10898, 11124, 11378, + 11575, 11719, 11862, 12014, 12345, 12751, 13195, 13561, 13795, + 14069, 14671, 15403, 15790, 16371, 17399, 17968, 19382, 20468, + 22000, 32000, 64000 +}; + +static int randint(int min, int max, int step) +{ + double r = ((double) rand())/ (RAND_MAX + 1.); + int d; + + d = ((int) ((max + 1 - min) * r / step) * step) + min; + + return d; +} + +static void new_random_setting(OpusEncoder *enc) +{ + int bitrate_bps; + int complexity; + int packet_loss_perc; + int use_vbr; + + bitrate_bps = bitrates[randint(0, NUM_BITRATES - 1, 1)]; + complexity = randint(COMPLEXITY_MIN, COMPLEXITY_MAX, 1); + packet_loss_perc = randint(PACKET_LOSS_PERC_MIN, PACKET_LOSS_PERC_MAX, PACKET_LOSS_PERC_STEP); + use_vbr = bitrate_bps < CBR_BITRATE_LIMIT ? 1 : randint(0, 1, 1); + + if (1) + { + printf("changing settings to %d\t%d\t%d\t%d\n", bitrate_bps, complexity, packet_loss_perc, use_vbr); + } + + opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate_bps)); + opus_encoder_ctl(enc, OPUS_SET_COMPLEXITY(complexity)); + opus_encoder_ctl(enc, OPUS_SET_PACKET_LOSS_PERC(packet_loss_perc)); + opus_encoder_ctl(enc, OPUS_SET_VBR(use_vbr)); +} + +#endif + int main(int argc, char *argv[]) { int err; @@ -316,6 +382,10 @@ int main(int argc, char *argv[]) int lost_count=0; FILE *packet_loss_file=NULL; int dred_duration=0; +#ifdef ENABLE_OSCE_TRAINING_DATA + int silk_random_switching = 0; + int silk_frame_counter = 0; +#endif #ifdef USE_WEIGHTS_FILE int blob_len; unsigned char *blob_data; @@ -546,6 +616,12 @@ int main(int argc, char *argv[]) mode_list = celt_hq_test; nb_modes_in_list = 4; args++; +#ifdef ENABLE_OSCE_TRAINING_DATA + } else if( strcmp( argv[ args ], "-silk_random_switching" ) == 0 ){ + silk_random_switching = atoi( argv[ args + 1 ] ); + printf("switching encoding parameters every %dth frame\n", silk_random_switching); + args += 2; +#endif } else { printf( "Error: unrecognized setting: %s\n\n", argv[ args ] ); print_usage( argv ); @@ -759,6 +835,15 @@ int main(int argc, char *argv[]) opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(mode_list[curr_mode][3])); frame_size = mode_list[curr_mode][2]; } +#ifdef ENABLE_OSCE_TRAINING_DATA + if (silk_random_switching) + { + silk_frame_counter += 1; + if (silk_frame_counter % silk_random_switching == 0) { + new_random_setting(enc); + } + } +#endif num_read = fread(fbytes, sizeof(short)*channels, frame_size-remaining, fin); curr_read = (int)num_read; tot_in += curr_read; diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 53c899a0..21dfe4ff 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -50,6 +50,9 @@ #else #include "float/structs_FLP.h" #endif +#ifdef ENABLE_OSCE_TRAINING_DATA +#include +#endif #define MAX_ENCODER_BUFFER 480 @@ -1693,6 +1696,25 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (st->application == OPUS_APPLICATION_VOIP) { hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs, st->arch); + +#ifdef ENABLE_OSCE_TRAINING_DATA + /* write out high pass filtered clean signal*/ + static FILE *fout =NULL; + if (fout == NULL) + { + fout = fopen("clean_hp.s16", "wb"); + } + + { + int idx; + opus_int16 tmp; + for (idx = 0; idx < frame_size; idx++) + { + tmp = (opus_int16) (32768 * pcm_buf[total_buffer + idx] + 0.5f); + fwrite(&tmp, sizeof(tmp), 1, fout); + } + } +#endif } else { dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); } @@ -2909,7 +2931,9 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) { goto bad_arg; } +#ifdef ENABLE_DRED ret = dred_encoder_load_model(&st->dred_encoder, data, len); +#endif } break; #endif diff --git a/tests/test_opus_api.c b/tests/test_opus_api.c index b6d67572..9500d407 100644 --- a/tests/test_opus_api.c +++ b/tests/test_opus_api.c @@ -103,7 +103,7 @@ opus_int32 test_dec_api(void) for(c=0;c<4;c++) { i=opus_decoder_get_size(c); - if(((c==1||c==2)&&(i<=2048||i>1<<17))||((c!=1&&c!=2)&&i!=0))test_failed(); + if(((c==1||c==2)&&(i<=2048||i>1<<18))||((c!=1&&c!=2)&&i!=0))test_failed(); fprintf(stdout," opus_decoder_get_size(%d)=%d ...............%s OK.\n",c,i,i>0?"":"...."); cfgs++; } @@ -367,7 +367,7 @@ opus_int32 test_msdec_api(void) for(b=-1;b<4;b++) { i=opus_multistream_decoder_get_size(a,b); - if(((a>0&&b<=a&&b>=0)&&(i<=2048||i>((1<<17)*a)))||((a<1||b>a||b<0)&&i!=0))test_failed(); + if(((a>0&&b<=a&&b>=0)&&(i<=2048||i>((1<<18)*a)))||((a<1||b>a||b<0)&&i!=0))test_failed(); fprintf(stdout," opus_multistream_decoder_get_size(%2d,%2d)=%d %sOK.\n",a,b,i,i>0?"":"... "); cfgs++; } -- cgit v1.2.3