Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.xiph.org/xiph/opus.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Buethe <jbuethe@amazon.de>2023-11-08 16:03:39 +0300
committerJean-Marc Valin <jmvalin@amazon.com>2023-12-20 11:42:44 +0300
commit7d328f5bfaa321d823ff4d11b62d5357c99e0693 (patch)
tree873593e93c87a7b9b1de7f710696502737f1922b
parent591c8bad70d8aa414729d1a243a6d930f64d6316 (diff)
Merge LACE/NoLACE under OSCE frameworkopus-ng-lace-integration5
-rw-r--r--.github/workflows/autotools.yml8
-rw-r--r--.github/workflows/dred.yml2
-rw-r--r--.gitlab-ci.yml8
-rw-r--r--CMakeLists.txt47
-rw-r--r--Makefile.am6
-rwxr-xr-xautogen.sh2
-rw-r--r--cmake/OpusSources.cmake2
-rw-r--r--configure.ac29
-rw-r--r--dnn/adaconvtest.c449
-rw-r--r--dnn/meson.build5
-rw-r--r--dnn/nndsp.c412
-rw-r--r--dnn/nndsp.h141
-rw-r--r--dnn/nnet.c9
-rw-r--r--dnn/nnet_arch.h16
-rw-r--r--dnn/osce.c1411
-rw-r--r--dnn/osce.h81
-rw-r--r--dnn/osce_config.h62
-rw-r--r--dnn/osce_features.c454
-rw-r--r--dnn/osce_features.h50
-rw-r--r--dnn/osce_structs.h124
-rw-r--r--dnn/torch/osce/create_testvectors.py165
-rw-r--r--dnn/torch/osce/data/silk_enhancement_set.py6
-rw-r--r--dnn/torch/osce/export_model_weights.py101
-rw-r--r--dnn/torch/osce/models/lace.py2
-rw-r--r--dnn/torch/osce/models/no_lace.py4
-rw-r--r--dnn/torch/osce/utils/layers/limited_adaptive_comb1d.py18
-rw-r--r--dnn/torch/osce/utils/layers/limited_adaptive_conv1d.py15
-rw-r--r--dnn/torch/osce/utils/silk_features.py16
-rw-r--r--dnn/torch/osce/utils/spec.py1
-rw-r--r--dnn/torch/weight-exchange/wexchange/c_export/c_writer.py14
-rw-r--r--dnn/torch/weight-exchange/wexchange/torch/__init__.py1
-rw-r--r--dnn/torch/weight-exchange/wexchange/torch/torch.py157
-rw-r--r--dnn/write_lpcnet_weights.c15
-rw-r--r--lpcnet_headers.mk9
-rw-r--r--lpcnet_sources.mk7
-rw-r--r--meson.build1
-rw-r--r--meson_options.txt1
-rw-r--r--silk/API.h16
-rw-r--r--silk/control.h5
-rw-r--r--silk/dec_API.c57
-rw-r--r--silk/decode_frame.c47
-rw-r--r--silk/init_decoder.c33
-rw-r--r--silk/main.h7
-rw-r--r--silk/structs.h17
-rw-r--r--silk_sources.mk2
-rw-r--r--src/opus_decoder.c18
-rw-r--r--src/opus_demo.c85
-rw-r--r--src/opus_encoder.c24
-rw-r--r--tests/test_opus_api.c4
49 files changed, 4062 insertions, 104 deletions
diff --git a/.github/workflows/autotools.yml b/.github/workflows/autotools.yml
index 91d332bf..bb66d5b0 100644
--- a/.github/workflows/autotools.yml
+++ b/.github/workflows/autotools.yml
@@ -29,6 +29,12 @@ jobs:
compiler: gcc,
buildconfig: --enable-assertions --enable-custom-modes
}
+ - {
+ name: "Linux/GCC/EnableDNN",
+ os: ubuntu-latest,
+ compiler: gcc,
+ buildconfig: --enable-assertions --enable-custom-modes --enable-dred --enable-osce
+ }
steps:
- uses: actions/checkout@v3
# No AutoMake on Mac so let's install it
@@ -42,4 +48,4 @@ jobs:
- name: Build
run: make -j 2
- name: Test
- run: make check -j 2 \ No newline at end of file
+ run: make check -j 2
diff --git a/.github/workflows/dred.yml b/.github/workflows/dred.yml
index 52ac2571..ac703dd1 100644
--- a/.github/workflows/dred.yml
+++ b/.github/workflows/dred.yml
@@ -74,7 +74,7 @@ jobs:
run: mkdir build
- name: Configure
working-directory: ./build
- run: cmake .. ${{ matrix.config.args }} -DCMAKE_BUILD_TYPE=${{ matrix.config.config }} -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON
+ run: cmake .. ${{ matrix.config.args }} -DCMAKE_BUILD_TYPE=${{ matrix.config.config }} -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_OSCE=ON
- name: Build
working-directory: ./build
run: cmake --build . -j 2 --config ${{ matrix.config.config }} --target package
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 0117c46e..92f578bc 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -64,9 +64,9 @@ autoconf:
- !reference [.snippets, git_prep]
script:
- ./autogen.sh
- - CFLAGS="-mavx -mfma -mavx2 -O2 -ffast-math" ./configure --enable-float-approx
+ - CFLAGS="-mavx -mfma -mavx2 -O2 -ffast-math" ./configure --enable-float-approx --enable-dred --enable-osce
- make -j16
- - DISTCHECK_CONFIGURE_FLAGS="--enable-float-approx CFLAGS='-mavx -mfma -mavx2 -O2'" make distcheck -j16
+ - DISTCHECK_CONFIGURE_FLAGS="--enable-float-approx --enable-dred --enable-osce CFLAGS='-mavx -mfma -mavx2 -O2'" make distcheck -j16
cache:
paths:
- "src/*.o"
@@ -87,7 +87,7 @@ cmake:
script:
- ./autogen.sh
- mkdir build
- - cmake -S . -B build -G "Ninja" -DCMAKE_BUILD_TYPE=Release -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_X86_PRESUME_AVX2=ON
+ - cmake -S . -B build -G "Ninja" -DCMAKE_BUILD_TYPE=Release -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_OSCE=ON -DOPUS_X86_PRESUME_AVX2=ON
- cmake --build build
- cd build && ctest --output-on-failure -j 16
@@ -101,7 +101,7 @@ cmake:
script:
- ./autogen.sh
- mkdir builddir
- - meson setup -Dtests=enabled -Ddocs=enabled -Dbuildtype=release builddir
+ - meson setup -Denable-deep-plc=true -Denable-osce=true -Denable-dred=true -Dtests=enabled -Ddocs=enabled -Dbuildtype=release builddir
- meson compile -C builddir
- meson test -C builddir
#- meson dist --no-tests -C builddir
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 073d7de8..06e9b675 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -87,6 +87,10 @@ set(OPUS_DRED_HELP_STR "enable DRED.")
option(OPUS_DRED ${OPUS_DRED_HELP_STR} OFF)
add_feature_info(OPUS_DRED OPUS_DRED ${OPUS_DRED_HELP_STR})
+set(OPUS_OSCE_HELP_STR "enable OSCE.")
+option(OPUS_OSCE ${OPUS_OSCE_HELP_STR} OFF)
+add_feature_info(OPUS_OSCE OPUS_OSCE ${OPUS_OSCE_HELP_STR})
+
if(APPLE)
set(OPUS_BUILD_FRAMEWORK_HELP_STR "build Framework bundle for Apple systems.")
option(OPUS_BUILD_FRAMEWORK ${OPUS_BUILD_FRAMEWORK_HELP_STR} OFF)
@@ -364,8 +368,6 @@ endif()
add_sources_group(opus silk ${silk_headers} ${silk_sources})
add_sources_group(opus celt ${celt_headers} ${celt_sources})
-add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})
-add_sources_group(opus lpcnet ${dred_headers} ${dred_sources})
if(OPUS_FIXED_POINT)
add_sources_group(opus silk ${silk_sources_fixed})
@@ -380,11 +382,26 @@ if(NOT OPUS_ENABLE_FLOAT_API)
target_compile_definitions(opus PRIVATE DISABLE_FLOAT_API)
endif()
+if (OPUS_DEEP_PLC OR OPUS_DRED OR OPUS_OSCE)
+ add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})
+ set(OPUS_DNN TRUE)
+else()
+ set(OPUS_DNN FALSE)
+endif()
+
+if (OPUS_DNN)
+ add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})
+ target_compile_definitions(opus PRIVATE ENABLE_DEEP_PLC)
+endif()
+
if (OPUS_DRED)
+ add_sources_group(opus lpcnet ${dred_headers} ${dred_sources})
target_compile_definitions(opus PRIVATE ENABLE_DRED)
- if(NOT OPUS_DEEP_PLC)
- target_compile_definitions(opus PRIVATE ENABLE_DEEP_PLC)
- endif()
+endif()
+
+if (OPUS_OSCE)
+ add_sources_group(opus lpcnet ${osce_headers} ${osce_sources})
+ target_compile_definitions(opus PRIVATE ENABLE_OSCE)
endif()
if(NOT OPUS_DISABLE_INTRINSICS)
@@ -405,7 +422,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
endif()
add_sources_group(opus celt ${celt_sources_x86_rtcd})
add_sources_group(opus silk ${silk_sources_x86_rtcd})
- add_sources_group(opus lpcnet ${dnn_sources_x86_rtcd})
+ if (OPUS_DNN)
+ add_sources_group(opus lpcnet ${dnn_sources_x86_rtcd})
+ endif()
endif()
if(SSE1_SUPPORTED)
@@ -427,7 +446,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
if(SSE2_SUPPORTED)
if(OPUS_X86_MAY_HAVE_SSE2)
add_sources_group(opus celt ${celt_sources_sse2})
- add_sources_group(opus lpcnet ${dnn_sources_sse2})
+ if (OPUS_DNN)
+ add_sources_group(opus lpcnet ${dnn_sources_sse2})
+ endif()
target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE2)
if(NOT MSVC)
set_source_files_properties(${celt_sources_sse2} ${dnn_sources_sse2} PROPERTIES COMPILE_FLAGS -msse2)
@@ -445,7 +466,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
if(OPUS_X86_MAY_HAVE_SSE4_1)
add_sources_group(opus celt ${celt_sources_sse4_1})
add_sources_group(opus silk ${silk_sources_sse4_1})
- add_sources_group(opus lpcnet ${dnn_sources_sse4_1})
+ if (OPUS_DNN)
+ add_sources_group(opus lpcnet ${dnn_sources_sse4_1})
+ endif()
target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE4_1)
if(NOT MSVC)
set_source_files_properties(${celt_sources_sse4_1} ${silk_sources_sse4_1} ${dnn_sources_sse4_1} PROPERTIES COMPILE_FLAGS -msse4.1)
@@ -471,7 +494,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
add_sources_group(opus celt ${celt_sources_avx2})
add_sources_group(opus silk ${silk_sources_avx2})
add_sources_group(opus silk ${silk_sources_float_avx2})
- add_sources_group(opus lpcnet ${dnn_sources_avx2})
+ if (OPUS_DNN)
+ add_sources_group(opus lpcnet ${dnn_sources_avx2})
+ endif()
target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_AVX2)
if(MSVC)
set(AVX2_FLAGS "${AVX2_FLAGS} /arch:AVX2")
@@ -524,7 +549,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
add_sources_group(opus celt ${celt_sources_arm_neon_intr})
add_sources_group(opus silk ${silk_sources_arm_neon_intr})
- add_sources_group(opus lpcnet ${dnn_sources_arm_neon})
+ if (OPUS_DNN)
+ add_sources_group(opus lpcnet ${dnn_sources_arm_neon})
+ endif()
# silk arm neon depends on main_Fix.h
target_include_directories(opus PRIVATE silk/fixed)
diff --git a/Makefile.am b/Makefile.am
index d09c1771..4fd821a5 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -25,6 +25,9 @@ endif
if ENABLE_DRED
LPCNET_SOURCES += $(DRED_SOURCES)
endif
+if ENABLE_OSCE
+LPCNET_SOURCES += $(OSCE_SOURCES)
+endif
if FIXED_POINT
SILK_SOURCES += $(SILK_SOURCES_FIXED)
@@ -132,6 +135,9 @@ endif
if ENABLE_DRED
LPCNET_HEAD += $(DRED_HEAD)
endif
+if ENABLE_OSCE
+LPCNET_HEAD += $(OSCE_HEAD)
+endif
libopus_la_SOURCES = $(CELT_SOURCES) $(SILK_SOURCES) $(LPCNET_SOURCES) $(OPUS_SOURCES)
libopus_la_LDFLAGS = -no-undefined -version-info @OPUS_LT_CURRENT@:@OPUS_LT_REVISION@:@OPUS_LT_AGE@
diff --git a/autogen.sh b/autogen.sh
index b7482c2d..1987e38b 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -9,7 +9,7 @@ set -e
srcdir=`dirname $0`
test -n "$srcdir" && cd "$srcdir"
-dnn/download_model.sh df63771
+dnn/download_model.sh 591c8ba
echo "Updating build configuration files, please wait...."
diff --git a/cmake/OpusSources.cmake b/cmake/OpusSources.cmake
index 74e4eaed..0cf24557 100644
--- a/cmake/OpusSources.cmake
+++ b/cmake/OpusSources.cmake
@@ -42,8 +42,10 @@ get_opus_sources(CELT_SOURCES_ARM_NE10 celt_sources.mk celt_sources_arm_ne10)
get_opus_sources(DEEP_PLC_HEAD lpcnet_headers.mk deep_plc_headers)
get_opus_sources(DRED_HEAD lpcnet_headers.mk dred_headers)
+get_opus_sources(OSCE_HEAD lpcnet_headers.mk osce_headers)
get_opus_sources(DEEP_PLC_SOURCES lpcnet_sources.mk deep_plc_sources)
get_opus_sources(DRED_SOURCES lpcnet_sources.mk dred_sources)
+get_opus_sources(OSCE_SOURCES lpcnet_sources.mk osce_sources)
get_opus_sources(DNN_SOURCES_X86_RTCD lpcnet_sources.mk dnn_sources_x86_rtcd)
get_opus_sources(DNN_SOURCES_SSE2 lpcnet_sources.mk dnn_sources_sse2)
get_opus_sources(DNN_SOURCES_SSE4_1 lpcnet_sources.mk dnn_sources_sse4_1)
diff --git a/configure.ac b/configure.ac
index b4c5f2a5..84ce651d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -175,10 +175,10 @@ AC_ARG_ENABLE([deep-plc],
[AS_HELP_STRING([--enable-deep-plc], [Use deep PLC for SILK])],,
[enable_deep_plc=no])
-AS_IF([test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes"],[
+AS_IF([test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"],[
AC_DEFINE([ENABLE_DEEP_PLC], [1], [Deep PLC])
])
-AM_CONDITIONAL([ENABLE_DEEP_PLC], [test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes"])
+AM_CONDITIONAL([ENABLE_DEEP_PLC], [test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"])
has_float_approx=no
case "$host_cpu" in
@@ -904,6 +904,31 @@ AS_IF([test "$enable_dnn_debug_float" = "no"], [
AC_DEFINE([DISABLE_DEBUG_FLOAT], [1], [Disable DNN debug float])
])
+AC_ARG_ENABLE([osce-training-data],
+ AS_HELP_STRING([--enable-osce-training-data], [enables feature output for SILK enhancement]),,
+ [enable_osc_training_data=no]
+)
+
+AS_IF([test "$enable_osce_training_data" = "yes"], [
+ AC_DEFINE([ENABLE_OSCE_TRAINING_DATA], [1], [Enable dumping of OSCE training data])
+])
+
+AC_MSG_CHECKING([argument osce training data])
+AS_IF([test "$enable_osce_training_data" = "yes"], [
+ AC_MSG_RESULT([yes])
+], [AC_MSG_RESULT([no])])
+
+AC_ARG_ENABLE([osce],
+ AS_HELP_STRING([--enable-osce], [enables feature output for SILK enhancement]),,
+ [enable_osce=no]
+)
+
+AS_IF([test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"], [
+ AC_DEFINE([ENABLE_OSCE], [1], [Enable Opus Speech Coding Enhancement])
+])
+
+AM_CONDITIONAL([ENABLE_OSCE], [test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"])
+
AM_CONDITIONAL([HAVE_DOXYGEN], [test "$HAVE_DOXYGEN" = "yes"])
AC_ARG_ENABLE([extra-programs],
diff --git a/dnn/adaconvtest.c b/dnn/adaconvtest.c
new file mode 100644
index 00000000..722e4aff
--- /dev/null
+++ b/dnn/adaconvtest.c
@@ -0,0 +1,449 @@
+#include "lace_data.h"
+#include "nolace_data.h"
+#include "osce.h"
+#include "nndsp.h"
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+
+
+extern const WeightArray lacelayers_arrays[];
+extern const WeightArray nolacelayers_arrays[];
+
+void adaconv_compare(
+ const char * prefix,
+ int num_frames,
+ AdaConvState* hAdaConv,
+ LinearLayer *kernel_layer,
+ LinearLayer *gain_layer,
+ int feature_dim,
+ int frame_size,
+ int overlap_size,
+ int in_channels,
+ int out_channels,
+ int kernel_size,
+ int left_padding,
+ float filter_gain_a,
+ float filter_gain_b,
+ float shape_gain
+)
+{
+ char feature_file[256];
+ char x_in_file[256];
+ char x_out_file[256];
+ char message[512];
+ int i_frame, i_sample;
+ float mse;
+ float features[512];
+ float x_in[512];
+ float x_out_ref[512];
+ float x_out[512];
+ float window[40];
+
+ init_adaconv_state(hAdaConv);
+ compute_overlap_window(window, 40);
+
+ FILE *f_features, *f_x_in, *f_x_out;
+
+ strcpy(feature_file, prefix);
+ strcat(feature_file, "_features.f32");
+ f_features = fopen(feature_file, "r");
+ if (f_features == NULL)
+ {
+ sprintf(message, "could not open file %s", feature_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(x_in_file, prefix);
+ strcat(x_in_file, "_x_in.f32");
+ f_x_in = fopen(x_in_file, "r");
+ if (f_x_in == NULL)
+ {
+ sprintf(message, "could not open file %s", x_in_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(x_out_file, prefix);
+ strcat(x_out_file, "_x_out.f32");
+ f_x_out = fopen(x_out_file, "r");
+ if (f_x_out == NULL)
+ {
+ sprintf(message, "could not open file %s", x_out_file);
+ perror(message);
+ exit(1);
+ }
+
+ for (i_frame = 0; i_frame < num_frames; i_frame ++)
+ {
+ if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
+ exit(1);
+ }
+
+ if (fread(x_in, sizeof(float), frame_size * in_channels, f_x_in) != frame_size * in_channels)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
+ exit(1);
+ }
+
+ if (fread(x_out_ref, sizeof(float), frame_size * out_channels, f_x_out) != frame_size * out_channels)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
+ exit(1);
+ }
+
+ adaconv_process_frame(hAdaConv, x_out, x_in, features, kernel_layer, gain_layer, feature_dim,
+ frame_size, overlap_size, in_channels, out_channels, kernel_size, left_padding,
+ filter_gain_a, filter_gain_b, shape_gain, window, 0);
+
+ mse = 0;
+ for (i_sample = 0; i_sample < frame_size * out_channels; i_sample ++)
+ {
+ mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
+ }
+ mse = sqrt(mse / (frame_size * out_channels));
+ printf("rmse[%d] %f\n", i_frame, mse);
+
+ }
+}
+
+
+void adacomb_compare(
+ const char * prefix,
+ int num_frames,
+ AdaCombState* hAdaComb,
+ LinearLayer *kernel_layer,
+ LinearLayer *gain_layer,
+ LinearLayer *global_gain_layer,
+ int feature_dim,
+ int frame_size,
+ int overlap_size,
+ int kernel_size,
+ int left_padding,
+ float filter_gain_a,
+ float filter_gain_b,
+ float log_gain_limit
+)
+{
+ char feature_file[256];
+ char x_in_file[256];
+ char p_in_file[256];
+ char x_out_file[256];
+ char message[512];
+ int i_frame, i_sample;
+ float mse;
+ float features[512];
+ float x_in[512];
+ float x_out_ref[512];
+ float x_out[512];
+ int pitch_lag;
+ float window[40];
+
+ init_adacomb_state(hAdaComb);
+ compute_overlap_window(window, 40);
+
+ FILE *f_features, *f_x_in, *f_p_in, *f_x_out;
+
+ strcpy(feature_file, prefix);
+ strcat(feature_file, "_features.f32");
+ f_features = fopen(feature_file, "r");
+ if (f_features == NULL)
+ {
+ sprintf(message, "could not open file %s", feature_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(x_in_file, prefix);
+ strcat(x_in_file, "_x_in.f32");
+ f_x_in = fopen(x_in_file, "r");
+ if (f_x_in == NULL)
+ {
+ sprintf(message, "could not open file %s", x_in_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(p_in_file, prefix);
+ strcat(p_in_file, "_p_in.s32");
+ f_p_in = fopen(p_in_file, "r");
+ if (f_p_in == NULL)
+ {
+ sprintf(message, "could not open file %s", p_in_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(x_out_file, prefix);
+ strcat(x_out_file, "_x_out.f32");
+ f_x_out = fopen(x_out_file, "r");
+ if (f_x_out == NULL)
+ {
+ sprintf(message, "could not open file %s", x_out_file);
+ perror(message);
+ exit(1);
+ }
+
+ for (i_frame = 0; i_frame < num_frames; i_frame ++)
+ {
+ if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
+ exit(1);
+ }
+
+ if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
+ exit(1);
+ }
+
+ if (fread(&pitch_lag, sizeof(int), 1, f_p_in) != 1)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, p_in_file);
+ exit(1);
+ }
+
+ if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
+ exit(1);
+ }
+
+ adacomb_process_frame(hAdaComb, x_out, x_in, features, kernel_layer, gain_layer, global_gain_layer,
+ pitch_lag, feature_dim, frame_size, overlap_size, kernel_size, left_padding, filter_gain_a, filter_gain_b, log_gain_limit, window, 0);
+
+
+ mse = 0;
+ for (i_sample = 0; i_sample < frame_size; i_sample ++)
+ {
+ mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
+ }
+ mse = sqrt(mse / (frame_size));
+ printf("rmse[%d] %f\n", i_frame, mse);
+
+ }
+}
+
+void adashape_compare(
+ const char * prefix,
+ int num_frames,
+ AdaShapeState* hAdaShape,
+ LinearLayer *alpha1,
+ LinearLayer *alpha2,
+ int feature_dim,
+ int frame_size,
+ int avg_pool_k
+)
+{
+ char feature_file[256];
+ char x_in_file[256];
+ char x_out_file[256];
+ char message[512];
+ int i_frame, i_sample;
+ float mse;
+ float features[512];
+ float x_in[512];
+ float x_out_ref[512];
+ float x_out[512];
+
+ init_adashape_state(hAdaShape);
+
+ FILE *f_features, *f_x_in, *f_x_out;
+
+ strcpy(feature_file, prefix);
+ strcat(feature_file, "_features.f32");
+ f_features = fopen(feature_file, "r");
+ if (f_features == NULL)
+ {
+ sprintf(message, "could not open file %s", feature_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(x_in_file, prefix);
+ strcat(x_in_file, "_x_in.f32");
+ f_x_in = fopen(x_in_file, "r");
+ if (f_x_in == NULL)
+ {
+ sprintf(message, "could not open file %s", x_in_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(x_out_file, prefix);
+ strcat(x_out_file, "_x_out.f32");
+ f_x_out = fopen(x_out_file, "r");
+ if (f_x_out == NULL)
+ {
+ sprintf(message, "could not open file %s", x_out_file);
+ perror(message);
+ exit(1);
+ }
+
+ for (i_frame = 0; i_frame < num_frames; i_frame ++)
+ {
+ if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
+ exit(1);
+ }
+
+ if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
+ exit(1);
+ }
+
+ if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
+ exit(1);
+ }
+
+ adashape_process_frame(hAdaShape, x_out, x_in, features, alpha1, alpha2, feature_dim,
+ frame_size, avg_pool_k, 0);
+
+ mse = 0;
+ for (i_sample = 0; i_sample < frame_size; i_sample ++)
+ {
+ mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
+ }
+ mse = sqrt(mse / (frame_size));
+ printf("rmse[%d] %f\n", i_frame, mse);
+
+ }
+}
+
+
+int main()
+{
+ LACELayers hLACE;
+ NOLACELayers hNoLACE;
+
+ AdaConvState hAdaConv;
+ AdaCombState hAdaComb;
+ AdaShapeState hAdaShape;
+
+ init_adaconv_state(&hAdaConv);
+
+ init_lacelayers(&hLACE, lacelayers_arrays);
+ init_nolacelayers(&hNoLACE, nolacelayers_arrays);
+
+ printf("\ntesting lace.af1 (1 in, 1 out)...\n");
+ adaconv_compare(
+ "testvectors/lace_af1",
+ 5,
+ &hAdaConv,
+ &hLACE.lace_af1_kernel,
+ &hLACE.lace_af1_gain,
+ LACE_AF1_FEATURE_DIM,
+ LACE_AF1_FRAME_SIZE,
+ LACE_AF1_OVERLAP_SIZE,
+ LACE_AF1_IN_CHANNELS,
+ LACE_AF1_OUT_CHANNELS,
+ LACE_AF1_KERNEL_SIZE,
+ LACE_AF1_LEFT_PADDING,
+ LACE_AF1_FILTER_GAIN_A,
+ LACE_AF1_FILTER_GAIN_B,
+ LACE_AF1_SHAPE_GAIN
+ );
+
+
+ printf("\ntesting nolace.af1 (1 in, 2 out)...\n");
+ adaconv_compare(
+ "testvectors/nolace_af1",
+ 5,
+ &hAdaConv,
+ &hNoLACE.nolace_af1_kernel,
+ &hNoLACE.nolace_af1_gain,
+ NOLACE_AF1_FEATURE_DIM,
+ NOLACE_AF1_FRAME_SIZE,
+ NOLACE_AF1_OVERLAP_SIZE,
+ NOLACE_AF1_IN_CHANNELS,
+ NOLACE_AF1_OUT_CHANNELS,
+ NOLACE_AF1_KERNEL_SIZE,
+ NOLACE_AF1_LEFT_PADDING,
+ NOLACE_AF1_FILTER_GAIN_A,
+ NOLACE_AF1_FILTER_GAIN_B,
+ NOLACE_AF1_SHAPE_GAIN
+ );
+
+
+ printf("testing nolace.af4 (2 in, 1 out)...\n");
+ adaconv_compare(
+ "testvectors/nolace_af4",
+ 5,
+ &hAdaConv,
+ &hNoLACE.nolace_af4_kernel,
+ &hNoLACE.nolace_af4_gain,
+ NOLACE_AF4_FEATURE_DIM,
+ NOLACE_AF4_FRAME_SIZE,
+ NOLACE_AF4_OVERLAP_SIZE,
+ NOLACE_AF4_IN_CHANNELS,
+ NOLACE_AF4_OUT_CHANNELS,
+ NOLACE_AF4_KERNEL_SIZE,
+ NOLACE_AF4_LEFT_PADDING,
+ NOLACE_AF4_FILTER_GAIN_A,
+ NOLACE_AF4_FILTER_GAIN_B,
+ NOLACE_AF4_SHAPE_GAIN
+ );
+
+ printf("\ntesting nolace.af2 (2 in, 2 out)...\n");
+ adaconv_compare(
+ "testvectors/nolace_af2",
+ 5,
+ &hAdaConv,
+ &hNoLACE.nolace_af2_kernel,
+ &hNoLACE.nolace_af2_gain,
+ NOLACE_AF2_FEATURE_DIM,
+ NOLACE_AF2_FRAME_SIZE,
+ NOLACE_AF2_OVERLAP_SIZE,
+ NOLACE_AF2_IN_CHANNELS,
+ NOLACE_AF2_OUT_CHANNELS,
+ NOLACE_AF2_KERNEL_SIZE,
+ NOLACE_AF2_LEFT_PADDING,
+ NOLACE_AF2_FILTER_GAIN_A,
+ NOLACE_AF2_FILTER_GAIN_B,
+ NOLACE_AF2_SHAPE_GAIN
+ );
+
+ printf("\ntesting lace.cf1...\n");
+ adacomb_compare(
+ "testvectors/lace_cf1",
+ 5,
+ &hAdaComb,
+ &hLACE.lace_cf1_kernel,
+ &hLACE.lace_cf1_gain,
+ &hLACE.lace_cf1_global_gain,
+ LACE_CF1_FEATURE_DIM,
+ LACE_CF1_FRAME_SIZE,
+ LACE_CF1_OVERLAP_SIZE,
+ LACE_CF1_KERNEL_SIZE,
+ LACE_CF1_LEFT_PADDING,
+ LACE_CF1_FILTER_GAIN_A,
+ LACE_CF1_FILTER_GAIN_B,
+ LACE_CF1_LOG_GAIN_LIMIT
+ );
+
+ printf("\ntesting nolace.tdshape1...\n");
+ adashape_compare(
+ "testvectors/nolace_tdshape1",
+ 5,
+ &hAdaShape,
+ &hNoLACE.nolace_tdshape1_alpha1,
+ &hNoLACE.nolace_tdshape1_alpha2,
+ NOLACE_TDSHAPE1_FEATURE_DIM,
+ NOLACE_TDSHAPE1_FRAME_SIZE,
+ NOLACE_TDSHAPE1_AVG_POOL_K
+ );
+
+ return 0;
+}
+
+/* gcc -DVAR_ARRAYS -DENABLE_OSCE -I ../include -I ../silk -I . -I ../celt adaconvtest.c nndsp.c lace_data.c nolace_data.c nnet.c nnet_default.c ../celt/pitch.c ../celt/celt_lpc.c parse_lpcnet_weights.c -lm -o adaconvtest */ \ No newline at end of file
diff --git a/dnn/meson.build b/dnn/meson.build
index 6e520fbc..737d4a02 100644
--- a/dnn/meson.build
+++ b/dnn/meson.build
@@ -5,6 +5,11 @@ if opt_enable_dred
dnn_sources += dred_sources
endif
+osce_sources = sources['OSCE_SOURCES']
+if opt_enable_osce
+ dnn_sources += osce_sources
+endif
+
dnn_sources_sse2 = sources['DNN_SOURCES_SSE2']
dnn_sources_sse4_1 = sources['DNN_SOURCES_SSE4_1']
dnn_sources_avx2 = sources['DNN_SOURCES_AVX2']
diff --git a/dnn/nndsp.c b/dnn/nndsp.c
new file mode 100644
index 00000000..bfbf5735
--- /dev/null
+++ b/dnn/nndsp.c
@@ -0,0 +1,412 @@
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "nndsp.h"
+#include "arch.h"
+#include "nnet.h"
+#include "os_support.h"
+#include "pitch.h"
+
+#include <math.h>
+
+#ifndef M_PI
+#define M_PI 3.141592653589793f
+#endif
+
+#define KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel) ((((i_out_channels) * in_channels) + (i_in_channels)) * kernel_size + (i_kernel))
+
+void init_adaconv_state(AdaConvState *hAdaConv)
+{
+ OPUS_CLEAR(hAdaConv, 1);
+}
+
+void init_adacomb_state(AdaCombState *hAdaComb)
+{
+ OPUS_CLEAR(hAdaComb, 1);
+}
+
+void init_adashape_state(AdaShapeState *hAdaShape)
+{
+ OPUS_CLEAR(hAdaShape, 1);
+}
+
+void compute_overlap_window(float *window, int overlap_size)
+{
+ int i_sample;
+ for (i_sample=0; i_sample < overlap_size; i_sample++)
+ {
+ window[i_sample] = 0.5f + 0.5f * cos(M_PI * (i_sample + 0.5f) / overlap_size);
+ }
+}
+
+#ifdef DEBUG_NNDSP
+void print_float_vector(const char* name, const float *vec, int length)
+{
+ for (int i = 0; i < length; i ++)
+ {
+ printf("%s[%d]: %f\n", name, i, vec[i]);
+ }
+}
+#endif
+
+static void scale_kernel(
+ float *kernel,
+ int in_channels,
+ int out_channels,
+ int kernel_size,
+ float *gain
+)
+/* normalizes (p-norm) kernel over input channel and kernel dimension */
+{
+ float norm;
+ int i_in_channels, i_out_channels, i_kernel;
+
+ for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
+ {
+ norm = 0;
+ for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels ++)
+ {
+ for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
+ {
+ norm += kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] * kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)];
+ }
+ }
+#ifdef DEBUG_NNDSP
+ printf("kernel norm: %f, %f\n", norm, sqrt(norm));
+#endif
+ norm = 1.f / (1e-6f + sqrt(norm));
+ for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
+ {
+ for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
+ {
+
+ kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] *= norm * gain[i_out_channels];
+ }
+ }
+ }
+}
+
+static void transform_gains(
+ float *gains,
+ int num_gains,
+ float filter_gain_a,
+ float filter_gain_b
+)
+{
+ int i;
+ for (i = 0; i < num_gains; i++)
+ {
+ gains[i] = exp(filter_gain_a * gains[i] + filter_gain_b);
+ }
+}
+
+void adaconv_process_frame(
+ AdaConvState* hAdaConv,
+ float *x_out,
+ const float *x_in,
+ const float *features,
+ const LinearLayer *kernel_layer,
+ const LinearLayer *gain_layer,
+ int feature_dim,
+ int frame_size,
+ int overlap_size,
+ int in_channels,
+ int out_channels,
+ int kernel_size,
+ int left_padding,
+ float filter_gain_a,
+ float filter_gain_b,
+ float shape_gain,
+ float *window,
+ int arch
+)
+{
+ float output_buffer[ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS];
+ float kernel_buffer[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
+ float input_buffer[ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE)];
+ float kernel0[ADACONV_MAX_KERNEL_SIZE];
+ float kernel1[ADACONV_MAX_KERNEL_SIZE];
+ float channel_buffer0[ADACONV_MAX_OVERLAP_SIZE];
+ float channel_buffer1[ADACONV_MAX_FRAME_SIZE];
+ float gain_buffer[ADACONV_MAX_OUTPUT_CHANNELS];
+ float *p_input;
+ int i_in_channels, i_out_channels, i_sample;
+
+ (void) feature_dim; /* ToDo: figure out whether we might need this information */
+
+ celt_assert(shape_gain == 1);
+ celt_assert(left_padding == kernel_size - 1); /* currently only supports causal version. Non-causal version not difficult to implement but will require third loop */
+ celt_assert(kernel_size < frame_size);
+
+ OPUS_CLEAR(output_buffer, ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS);
+ OPUS_CLEAR(kernel_buffer, ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS);
+ OPUS_CLEAR(input_buffer, ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE));
+
+#ifdef DEBUG_NNDSP
+ print_float_vector("x_in", x_in, in_channels * frame_size);
+#endif
+
+ /* prepare input */
+ for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
+ {
+ OPUS_COPY(input_buffer + i_in_channels * (kernel_size + frame_size), hAdaConv->history + i_in_channels * kernel_size, kernel_size);
+ OPUS_COPY(input_buffer + kernel_size + i_in_channels * (kernel_size + frame_size), x_in + frame_size * i_in_channels, frame_size);
+ }
+ p_input = input_buffer + kernel_size;
+
+
+ /* calculate new kernel and new gain */
+ compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
+ compute_generic_dense(gain_layer, gain_buffer, features, ACTIVATION_TANH, arch);
+#ifdef DEBUG_NNDSP
+ print_float_vector("features", features, feature_dim);
+ print_float_vector("adaconv_kernel_raw", kernel_buffer, in_channels * out_channels * kernel_size);
+ print_float_vector("adaconv_gain_raw", gain_buffer, out_channels);
+#endif
+ transform_gains(gain_buffer, out_channels, filter_gain_a, filter_gain_b);
+ scale_kernel(kernel_buffer, in_channels, out_channels, kernel_size, gain_buffer);
+
+#ifdef DEBUG_NNDSP
+ print_float_vector("adaconv_kernel", kernel_buffer, in_channels * out_channels * kernel_size);
+ print_float_vector("adaconv_gain", gain_buffer, out_channels);
+#endif
+
+ /* calculate overlapping part using kernel from last frame */
+
+ for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
+ {
+ for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
+ {
+ OPUS_CLEAR(kernel0, ADACONV_MAX_KERNEL_SIZE);
+ OPUS_CLEAR(kernel1, ADACONV_MAX_KERNEL_SIZE);
+
+ OPUS_COPY(kernel0, hAdaConv->last_kernel + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
+ OPUS_COPY(kernel1, kernel_buffer + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
+ celt_pitch_xcorr(kernel0, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer0, ADACONV_MAX_KERNEL_SIZE, overlap_size, arch);
+ celt_pitch_xcorr(kernel1, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer1, ADACONV_MAX_KERNEL_SIZE, frame_size, arch);
+ for (i_sample = 0; i_sample < overlap_size; i_sample++)
+ {
+ output_buffer[i_sample + i_out_channels * frame_size] += window[i_sample] * channel_buffer0[i_sample];
+ output_buffer[i_sample + i_out_channels * frame_size] += (1.f - window[i_sample]) * channel_buffer1[i_sample];
+ }
+ for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
+ {
+ output_buffer[i_sample + i_out_channels * frame_size] += channel_buffer1[i_sample];
+ }
+ }
+ }
+
+ OPUS_COPY(x_out, output_buffer, out_channels * frame_size);
+
+#ifdef DEBUG_NNDSP
+ print_float_vector("x_out", x_out, out_channels * frame_size);
+#endif
+
+ /* buffer update */
+ for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
+ {
+ OPUS_COPY(hAdaConv->history + i_in_channels * kernel_size, p_input + i_in_channels * (frame_size + kernel_size) + frame_size - kernel_size, kernel_size);
+ }
+ OPUS_COPY(hAdaConv->last_kernel, kernel_buffer, kernel_size * in_channels * out_channels);
+}
+
+void adacomb_process_frame(
+ AdaCombState* hAdaComb,
+ float *x_out,
+ const float *x_in,
+ const float *features,
+ const LinearLayer *kernel_layer,
+ const LinearLayer *gain_layer,
+ const LinearLayer *global_gain_layer,
+ int pitch_lag,
+ int feature_dim,
+ int frame_size,
+ int overlap_size,
+ int kernel_size,
+ int left_padding,
+ float filter_gain_a,
+ float filter_gain_b,
+ float log_gain_limit,
+ float *window,
+ int arch
+)
+{
+ float output_buffer[ADACOMB_MAX_FRAME_SIZE];
+ float output_buffer_last[ADACOMB_MAX_FRAME_SIZE];
+ float kernel_buffer[ADACOMB_MAX_KERNEL_SIZE];
+ float input_buffer[ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE];
+ float gain, global_gain;
+ float *p_input;
+ int i_sample;
+ float kernel[16];
+ float last_kernel[16];
+
+ (void) feature_dim; /* ToDo: figure out whether we might need this information */
+
+ OPUS_CLEAR(output_buffer, ADACOMB_MAX_FRAME_SIZE);
+ OPUS_CLEAR(kernel_buffer, ADACOMB_MAX_KERNEL_SIZE);
+ OPUS_CLEAR(input_buffer, ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE);
+
+ OPUS_COPY(input_buffer, hAdaComb->history, kernel_size + ADACOMB_MAX_LAG);
+ OPUS_COPY(input_buffer + kernel_size + ADACOMB_MAX_LAG, x_in, frame_size);
+ p_input = input_buffer + kernel_size + ADACOMB_MAX_LAG;
+
+ /* calculate new kernel and new gain */
+ compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
+ compute_generic_dense(gain_layer, &gain, features, ACTIVATION_RELU, arch);
+ compute_generic_dense(global_gain_layer, &global_gain, features, ACTIVATION_TANH, arch);
+#ifdef DEBUG_NNDSP
+ print_float_vector("features", features, feature_dim);
+ print_float_vector("adacomb_kernel_raw", kernel_buffer, kernel_size);
+ print_float_vector("adacomb_gain_raw", &gain, 1);
+ print_float_vector("adacomb_global_gain_raw", &global_gain, 1);
+#endif
+ gain = exp(log_gain_limit - gain);
+ global_gain = exp(filter_gain_a * global_gain + filter_gain_b);
+ scale_kernel(kernel_buffer, 1, 1, kernel_size, &gain);
+
+#ifdef DEBUG_NNDSP
+ print_float_vector("adacomb_kernel", kernel_buffer, kernel_size);
+ print_float_vector("adacomb_gain", &gain, 1);
+#endif
+
+ OPUS_CLEAR(kernel, ADACOMB_MAX_KERNEL_SIZE);
+ OPUS_CLEAR(last_kernel, ADACOMB_MAX_KERNEL_SIZE);
+ OPUS_COPY(kernel, kernel_buffer, kernel_size);
+ OPUS_COPY(last_kernel, hAdaComb->last_kernel, kernel_size);
+
+ celt_pitch_xcorr(last_kernel, &p_input[- left_padding - hAdaComb->last_pitch_lag], output_buffer_last, ADACOMB_MAX_KERNEL_SIZE, overlap_size, arch);
+
+ celt_pitch_xcorr(kernel, &p_input[- left_padding - pitch_lag], output_buffer, ADACOMB_MAX_KERNEL_SIZE, frame_size, arch);
+ for (i_sample = 0; i_sample < overlap_size; i_sample++)
+ {
+ output_buffer[i_sample] = hAdaComb->last_global_gain * window[i_sample] * output_buffer_last[i_sample] + global_gain * (1.f - window[i_sample]) * output_buffer[i_sample];
+ }
+
+ for (i_sample = 0; i_sample < overlap_size; i_sample++)
+ {
+ output_buffer[i_sample] += (window[i_sample] * hAdaComb->last_global_gain + (1.f - window[i_sample]) * global_gain) * p_input[i_sample];
+ }
+
+ for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
+ {
+ output_buffer[i_sample] = global_gain * (output_buffer[i_sample] + p_input[i_sample]);
+ }
+ OPUS_COPY(x_out, output_buffer, frame_size);
+
+#ifdef DEBUG_NNDSP
+ print_float_vector("x_out", x_out, frame_size);
+#endif
+
+ /* buffer update */
+ OPUS_COPY(hAdaComb->last_kernel, kernel_buffer, kernel_size);
+ OPUS_COPY(hAdaComb->history, p_input + frame_size - kernel_size - ADACOMB_MAX_LAG, kernel_size + ADACOMB_MAX_LAG);
+ hAdaComb->last_pitch_lag = pitch_lag;
+ hAdaComb->last_global_gain = global_gain;
+}
+
+
+void adashape_process_frame(
+ AdaShapeState *hAdaShape,
+ float *x_out,
+ const float *x_in,
+ const float *features,
+ const LinearLayer *alpha1,
+ const LinearLayer *alpha2,
+ int feature_dim,
+ int frame_size,
+ int avg_pool_k,
+ int arch
+)
+{
+ float in_buffer[ADASHAPE_MAX_INPUT_DIM + ADASHAPE_MAX_FRAME_SIZE];
+ float out_buffer[ADASHAPE_MAX_FRAME_SIZE];
+ int i, k;
+ int tenv_size;
+ float mean;
+ float *tenv;
+
+ celt_assert(frame_size % avg_pool_k == 0);
+ celt_assert(feature_dim + frame_size / avg_pool_k + 1 < ADASHAPE_MAX_INPUT_DIM);
+
+ tenv_size = frame_size / avg_pool_k;
+ tenv = in_buffer + feature_dim;
+ OPUS_CLEAR(tenv, tenv_size + 1);
+
+ OPUS_COPY(in_buffer, features, feature_dim);
+
+ /* calculate temporal envelope */
+ mean = 0;
+ for (i = 0; i < tenv_size; i++)
+ {
+ for (k = 0; k < avg_pool_k; k++)
+ {
+ tenv[i] += fabs(x_in[i * avg_pool_k + k]);
+ }
+ tenv[i] = log(tenv[i] / avg_pool_k + 1.52587890625e-05f);
+ mean += tenv[i];
+ }
+ mean /= tenv_size;
+ for (i = 0; i < tenv_size; i++)
+ {
+ tenv[i] -= mean;
+ }
+ tenv[tenv_size] = mean;
+#ifdef DEBUG_NNDSP
+ print_float_vector("tenv", tenv, tenv_size + 1);
+#endif
+
+ /* calculate temporal weights */
+#ifdef DEBUG_NNDSP
+ print_float_vector("alpha1_in", in_buffer, feature_dim + tenv_size + 1);
+#endif
+ compute_generic_conv1d(alpha1, out_buffer, hAdaShape->conv_alpha1_state, in_buffer, feature_dim + tenv_size + 1, ACTIVATION_LINEAR, arch);
+#ifdef DEBUG_NNDSP
+ print_float_vector("alpha1_out", out_buffer, frame_size);
+#endif
+ /* compute leaky ReLU by hand. ToDo: try tanh activation */
+ for (i = 0; i < frame_size; i ++)
+ {
+ in_buffer[i] = out_buffer[i] >= 0 ? out_buffer[i] : 0.2f * out_buffer[i];
+ }
+#ifdef DEBUG_NNDSP
+ print_float_vector("post_alpha1", in_buffer, frame_size);
+#endif
+ compute_generic_conv1d(alpha2, out_buffer, hAdaShape->conv_alpha2_state, in_buffer, frame_size, ACTIVATION_LINEAR, arch);
+
+ /* shape signal */
+ for (i = 0; i < frame_size; i ++)
+ {
+ x_out[i] = exp(out_buffer[i]) * x_in[i];
+ }
+
+}
diff --git a/dnn/nndsp.h b/dnn/nndsp.h
new file mode 100644
index 00000000..f00094b6
--- /dev/null
+++ b/dnn/nndsp.h
@@ -0,0 +1,141 @@
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef NNDSP_H
+#define NNDSP_H
+
+#include "opus_types.h"
+#include "nnet.h"
+#include <string.h>
+
+
+#define ADACONV_MAX_KERNEL_SIZE 16
+#define ADACONV_MAX_INPUT_CHANNELS 2
+#define ADACONV_MAX_OUTPUT_CHANNELS 2
+#define ADACONV_MAX_FRAME_SIZE 80
+#define ADACONV_MAX_OVERLAP_SIZE 40
+
+#define ADACOMB_MAX_LAG 300
+#define ADACOMB_MAX_KERNEL_SIZE 16
+#define ADACOMB_MAX_FRAME_SIZE 80
+#define ADACOMB_MAX_OVERLAP_SIZE 40
+
+#define ADASHAPE_MAX_INPUT_DIM 512
+#define ADASHAPE_MAX_FRAME_SIZE 160
+
+/*#define DEBUG_NNDSP*/
+#ifdef DEBUG_NNDSP
+#include <stdio.h>
+#endif
+
+
+void print_float_vector(const char* name, const float *vec, int length);
+
+typedef struct {
+ float history[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS];
+ float last_kernel[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
+ float last_gain;
+} AdaConvState;
+
+
+typedef struct {
+ float history[ADACOMB_MAX_KERNEL_SIZE + ADACOMB_MAX_LAG];
+ float last_kernel[ADACOMB_MAX_KERNEL_SIZE];
+ float last_global_gain;
+ int last_pitch_lag;
+} AdaCombState;
+
+
+typedef struct {
+ float conv_alpha1_state[ADASHAPE_MAX_INPUT_DIM];
+ float conv_alpha2_state[ADASHAPE_MAX_FRAME_SIZE];
+} AdaShapeState;
+
+void init_adaconv_state(AdaConvState *hAdaConv);
+
+void init_adacomb_state(AdaCombState *hAdaComb);
+
+void init_adashape_state(AdaShapeState *hAdaShape);
+
+void compute_overlap_window(float *window, int overlap_size);
+
+void adaconv_process_frame(
+ AdaConvState* hAdaConv,
+ float *x_out,
+ const float *x_in,
+ const float *features,
+ const LinearLayer *kernel_layer,
+ const LinearLayer *gain_layer,
+ int feature_dim, /* not strictly necessary */
+ int frame_size,
+ int overlap_size,
+ int in_channels,
+ int out_channels,
+ int kernel_size,
+ int left_padding,
+ float filter_gain_a,
+ float filter_gain_b,
+ float shape_gain,
+ float *window,
+ int arch
+);
+
+void adacomb_process_frame(
+ AdaCombState* hAdaComb,
+ float *x_out,
+ const float *x_in,
+ const float *features,
+ const LinearLayer *kernel_layer,
+ const LinearLayer *gain_layer,
+ const LinearLayer *global_gain_layer,
+ int pitch_lag,
+ int feature_dim,
+ int frame_size,
+ int overlap_size,
+ int kernel_size,
+ int left_padding,
+ float filter_gain_a,
+ float filter_gain_b,
+ float log_gain_limit,
+ float *window,
+ int arch
+);
+
+void adashape_process_frame(
+ AdaShapeState *hAdaShape,
+ float *x_out,
+ const float *x_in,
+ const float *features,
+ const LinearLayer *alpha1,
+ const LinearLayer *alpha2,
+ int feature_dim,
+ int frame_size,
+ int avg_pool_k,
+ int arch
+);
+
+#endif
diff --git a/dnn/nnet.c b/dnn/nnet.c
index e794e450..7ba623ca 100644
--- a/dnn/nnet.c
+++ b/dnn/nnet.c
@@ -41,6 +41,10 @@
#include "os_support.h"
#include "vec.h"
+#ifdef ENABLE_OSCE
+#include "osce_config.h"
+#endif
+
#ifdef NO_OPTIMIZATIONS
#if defined(_MSC_VER)
#pragma message ("Compiling without any vectorization. This code will be very slow")
@@ -59,8 +63,11 @@ void compute_generic_dense(const LinearLayer *layer, float *output, const float
compute_activation(output, output, layer->nb_outputs, activation, arch);
}
+#ifdef ENABLE_OSCE
+#define MAX_RNN_NEURONS_ALL IMAX(IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS), DRED_MAX_RNN_NEURONS), OSCE_MAX_RNN_NEURONS)
+#else
#define MAX_RNN_NEURONS_ALL IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS), DRED_MAX_RNN_NEURONS)
-
+#endif
void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch)
{
diff --git a/dnn/nnet_arch.h b/dnn/nnet_arch.h
index 12a467e5..694a3608 100644
--- a/dnn/nnet_arch.h
+++ b/dnn/nnet_arch.h
@@ -64,13 +64,29 @@ static OPUS_INLINE float relu(float x)
return x < 0 ? 0 : x;
}
+/*#define HIGH_ACCURACY */
+
void RTCD_SUF(compute_activation_)(float *output, const float *input, int N, int activation)
{
int i;
if (activation == ACTIVATION_SIGMOID) {
+#ifdef HIGH_ACCURACY
+ for (int n=0; n<N; n++)
+ {
+ output[n] = 1.f / (1 + exp(-input[n]));
+ }
+#else
vec_sigmoid(output, input, N);
+#endif
} else if (activation == ACTIVATION_TANH) {
+#ifdef HIGH_ACCURACY
+ for (int n=0; n<N; n++)
+ {
+ output[n] = tanh(input[n]);
+ }
+#else
vec_tanh(output, input, N);
+#endif
} else if (activation == ACTIVATION_SWISH) {
vec_swish(output, input, N);
} else if (activation == ACTIVATION_RELU) {
diff --git a/dnn/osce.c b/dnn/osce.c
new file mode 100644
index 00000000..2a78a6ea
--- /dev/null
+++ b/dnn/osce.c
@@ -0,0 +1,1411 @@
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include <math.h>
+#include "osce.h"
+#include "osce_features.h"
+#include "os_support.h"
+#include "nndsp.h"
+#include "float_cast.h"
+#include "arch.h"
+
+#ifdef OSCE_DEBUG
+#include <stdio.h>
+/*#define WRITE_FEATURES*/
+/*#define DEBUG_LACE*/
+/*#define DEBUG_NOLACE*/
+#define FINIT(fid, name, mode) do{if (fid == NULL) {fid = fopen(name, mode);}} while(0)
+#endif
+
+#ifdef ENABLE_OSCE_TRAINING_DATA
+#include <stdio.h>
+#endif
+
+#define CLIP(a, min, max) (((a) < (min) ? (min) : (a)) > (max) ? (max) : (a))
+
+extern const WeightArray lacelayers_arrays[];
+extern const WeightArray nolacelayers_arrays[];
+
+/* LACE */
+
+#ifndef DISABLE_LACE
+
+static void compute_lace_numbits_embedding(float *emb, float numbits, int dim, float min_val, float max_val, int logscale)
+{
+ float x;
+ (void) dim;
+
+ numbits = logscale ? log(numbits) : numbits;
+ x = CLIP(numbits, min_val, max_val) - (max_val + min_val) / 2;
+
+ emb[0] = sin(x * LACE_NUMBITS_SCALE_0 - 0.5f);
+ emb[1] = sin(x * LACE_NUMBITS_SCALE_1 - 0.5f);
+ emb[2] = sin(x * LACE_NUMBITS_SCALE_2 - 0.5f);
+ emb[3] = sin(x * LACE_NUMBITS_SCALE_3 - 0.5f);
+ emb[4] = sin(x * LACE_NUMBITS_SCALE_4 - 0.5f);
+ emb[5] = sin(x * LACE_NUMBITS_SCALE_5 - 0.5f);
+ emb[6] = sin(x * LACE_NUMBITS_SCALE_6 - 0.5f);
+ emb[7] = sin(x * LACE_NUMBITS_SCALE_7 - 0.5f);
+}
+
+
+static int init_lace(LACE *hLACE, const WeightArray *weights)
+{
+ int ret = 0;
+ OPUS_CLEAR(hLACE, 1);
+ celt_assert(weights != NULL);
+
+ ret = init_lacelayers(&hLACE->layers, weights);
+
+ compute_overlap_window(hLACE->window, LACE_OVERLAP_SIZE);
+
+ return ret;
+}
+
+static void reset_lace_state(LACEState *state)
+{
+ OPUS_CLEAR(state, 1);
+
+ init_adacomb_state(&state->cf1_state);
+ init_adacomb_state(&state->cf2_state);
+ init_adaconv_state(&state->af1_state);
+}
+
+static void lace_feature_net(
+ LACE *hLACE,
+ LACEState *state,
+ float *output,
+ const float *features,
+ const float *numbits,
+ const int *periods,
+ int arch
+)
+{
+ float input_buffer[4 * IMAX(LACE_COND_DIM, LACE_HIDDEN_FEATURE_DIM)];
+ float output_buffer[4 * IMAX(LACE_COND_DIM, LACE_HIDDEN_FEATURE_DIM)];
+ float numbits_embedded[2 * LACE_NUMBITS_EMBEDDING_DIM];
+ int i_subframe;
+
+ compute_lace_numbits_embedding(numbits_embedded, numbits[0], LACE_NUMBITS_EMBEDDING_DIM,
+ log(LACE_NUMBITS_RANGE_LOW), log(LACE_NUMBITS_RANGE_HIGH), 1);
+ compute_lace_numbits_embedding(numbits_embedded + LACE_NUMBITS_EMBEDDING_DIM, numbits[1], LACE_NUMBITS_EMBEDDING_DIM,
+ log(LACE_NUMBITS_RANGE_LOW), log(LACE_NUMBITS_RANGE_HIGH), 1);
+
+ /* scaling and dimensionality reduction */
+ for (i_subframe = 0; i_subframe < 4; i_subframe ++)
+ {
+ OPUS_COPY(input_buffer, features + i_subframe * LACE_NUM_FEATURES, LACE_NUM_FEATURES);
+ OPUS_COPY(input_buffer + LACE_NUM_FEATURES, hLACE->layers.lace_pitch_embedding.float_weights + periods[i_subframe] * LACE_PITCH_EMBEDDING_DIM, LACE_PITCH_EMBEDDING_DIM);
+ OPUS_COPY(input_buffer + LACE_NUM_FEATURES + LACE_PITCH_EMBEDDING_DIM, numbits_embedded, 2 * LACE_NUMBITS_EMBEDDING_DIM);
+
+ compute_generic_conv1d(
+ &hLACE->layers.lace_fnet_conv1,
+ output_buffer + i_subframe * LACE_HIDDEN_FEATURE_DIM,
+ NULL,
+ input_buffer,
+ LACE_NUM_FEATURES + LACE_PITCH_EMBEDDING_DIM + 2 * LACE_NUMBITS_EMBEDDING_DIM,
+ ACTIVATION_TANH,
+ arch);
+ }
+
+ /* subframe accumulation */
+ OPUS_COPY(input_buffer, output_buffer, 4 * LACE_HIDDEN_FEATURE_DIM);
+ compute_generic_conv1d(
+ &hLACE->layers.lace_fnet_conv2,
+ output_buffer,
+ state->feature_net_conv2_state,
+ input_buffer,
+ 4 * LACE_HIDDEN_FEATURE_DIM,
+ ACTIVATION_TANH,
+ arch
+ );
+
+ /* tconv upsampling */
+ OPUS_COPY(input_buffer, output_buffer, 4 * LACE_COND_DIM);
+ compute_generic_dense(
+ &hLACE->layers.lace_fnet_tconv,
+ output_buffer,
+ input_buffer,
+ ACTIVATION_LINEAR,
+ arch
+ );
+
+ /* GRU */
+ OPUS_COPY(input_buffer, output_buffer, 4 * LACE_COND_DIM);
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ compute_generic_gru(
+ &hLACE->layers.lace_fnet_gru_input,
+ &hLACE->layers.lace_fnet_gru_recurrent,
+ state->feature_net_gru_state,
+ input_buffer + i_subframe * LACE_COND_DIM,
+ arch
+ );
+ OPUS_COPY(output + i_subframe * LACE_COND_DIM, state->feature_net_gru_state, LACE_COND_DIM);
+ }
+}
+
+
+static void lace_process_20ms_frame(
+ LACE* hLACE,
+ LACEState *state,
+ float *x_out,
+ const float *x_in,
+ const float *features,
+ const float *numbits,
+ const int *periods,
+ int arch
+)
+{
+ float feature_buffer[4 * LACE_COND_DIM];
+ float output_buffer[4 * LACE_FRAME_SIZE];
+ int i_subframe, i_sample;
+
+#ifdef DEBUG_LACE
+ static FILE *f_features=NULL, *f_encfeatures=NULL, *f_xin=NULL, *f_xpreemph=NULL, *f_postcf1=NULL;
+ static FILE *f_postcf2=NULL, *f_postaf1=NULL, *f_xdeemph, *f_numbits, *f_periods;
+
+
+ FINIT(f_features, "debug/c_features.f32", "wb");
+ FINIT(f_encfeatures, "debug/c_encoded_features.f32", "wb");
+ FINIT(f_xin, "debug/c_x_in.f32", "wb");
+ FINIT(f_xpreemph, "debug/c_xpreemph.f32", "wb");
+ FINIT(f_xdeemph, "debug/c_xdeemph.f32", "wb");
+ FINIT(f_postcf1, "debug/c_post_cf1.f32", "wb");
+ FINIT(f_postcf2, "debug/c_post_cf2.f32", "wb");
+ FINIT(f_postaf1, "debug/c_post_af1.f32", "wb");
+ FINIT(f_numbits, "debug/c_numbits.f32", "wb");
+ FINIT(f_periods, "debug/c_periods.s32", "wb");
+
+ fwrite(x_in, sizeof(*x_in), 4 * LACE_FRAME_SIZE, f_xin);
+ fwrite(numbits, sizeof(*numbits), 2, f_numbits);
+ fwrite(periods, sizeof(*periods), 4, f_periods);
+#endif
+
+ /* pre-emphasis */
+ for (i_sample = 0; i_sample < 4 * LACE_FRAME_SIZE; i_sample ++)
+ {
+ output_buffer[i_sample] = x_in[i_sample] - LACE_PREEMPH * state->preemph_mem;
+ state->preemph_mem = x_in[i_sample];
+ }
+
+ /* run feature encoder */
+ lace_feature_net(hLACE, state, feature_buffer, features, numbits, periods, arch);
+#ifdef DEBUG_LACE
+ fwrite(features, sizeof(*features), 4 * LACE_NUM_FEATURES, f_features);
+ fwrite(feature_buffer, sizeof(*feature_buffer), 4 * LACE_COND_DIM, f_encfeatures);
+ fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_xpreemph);
+#endif
+
+ /* 1st comb filtering stage */
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ adacomb_process_frame(
+ &state->cf1_state,
+ output_buffer + i_subframe * LACE_FRAME_SIZE,
+ output_buffer + i_subframe * LACE_FRAME_SIZE,
+ feature_buffer + i_subframe * LACE_COND_DIM,
+ &hLACE->layers.lace_cf1_kernel,
+ &hLACE->layers.lace_cf1_gain,
+ &hLACE->layers.lace_cf1_global_gain,
+ periods[i_subframe],
+ LACE_COND_DIM,
+ LACE_FRAME_SIZE,
+ LACE_OVERLAP_SIZE,
+ LACE_CF1_KERNEL_SIZE,
+ LACE_CF1_LEFT_PADDING,
+ LACE_CF1_FILTER_GAIN_A,
+ LACE_CF1_FILTER_GAIN_B,
+ LACE_CF1_LOG_GAIN_LIMIT,
+ hLACE->window,
+ arch);
+ }
+
+#ifdef DEBUG_LACE
+ fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_postcf1);
+#endif
+
+ /* 2nd comb filtering stage */
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ adacomb_process_frame(
+ &state->cf2_state,
+ output_buffer + i_subframe * LACE_FRAME_SIZE,
+ output_buffer + i_subframe * LACE_FRAME_SIZE,
+ feature_buffer + i_subframe * LACE_COND_DIM,
+ &hLACE->layers.lace_cf2_kernel,
+ &hLACE->layers.lace_cf2_gain,
+ &hLACE->layers.lace_cf2_global_gain,
+ periods[i_subframe],
+ LACE_COND_DIM,
+ LACE_FRAME_SIZE,
+ LACE_OVERLAP_SIZE,
+ LACE_CF2_KERNEL_SIZE,
+ LACE_CF2_LEFT_PADDING,
+ LACE_CF2_FILTER_GAIN_A,
+ LACE_CF2_FILTER_GAIN_B,
+ LACE_CF2_LOG_GAIN_LIMIT,
+ hLACE->window,
+ arch);
+ }
+#ifdef DEBUG_LACE
+ fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_postcf2);
+#endif
+
+ /* final adaptive filtering stage */
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ adaconv_process_frame(
+ &state->af1_state,
+ output_buffer + i_subframe * LACE_FRAME_SIZE,
+ output_buffer + i_subframe * LACE_FRAME_SIZE,
+ feature_buffer + i_subframe * LACE_COND_DIM,
+ &hLACE->layers.lace_af1_kernel,
+ &hLACE->layers.lace_af1_gain,
+ LACE_COND_DIM,
+ LACE_FRAME_SIZE,
+ LACE_OVERLAP_SIZE,
+ LACE_AF1_IN_CHANNELS,
+ LACE_AF1_OUT_CHANNELS,
+ LACE_AF1_KERNEL_SIZE,
+ LACE_AF1_LEFT_PADDING,
+ LACE_AF1_FILTER_GAIN_A,
+ LACE_AF1_FILTER_GAIN_B,
+ LACE_AF1_SHAPE_GAIN,
+ hLACE->window,
+ arch);
+ }
+#ifdef DEBUG_LACE
+ fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_postaf1);
+#endif
+
+ /* de-emphasis */
+ for (i_sample = 0; i_sample < 4 * LACE_FRAME_SIZE; i_sample ++)
+ {
+ x_out[i_sample] = output_buffer[i_sample] + LACE_PREEMPH * state->deemph_mem;
+ state->deemph_mem = x_out[i_sample];
+ }
+#ifdef DEBUG_LACE
+ fwrite(x_out, sizeof(float), 4 * LACE_FRAME_SIZE, f_xdeemph);
+#endif
+}
+
+#endif /* #ifndef DISABLE_LACE */
+
+
+/* NoLACE */
+#ifndef DISABLE_NOLACE
+
+static void compute_nolace_numbits_embedding(float *emb, float numbits, int dim, float min_val, float max_val, int logscale)
+{
+ float x;
+ (void) dim;
+
+ numbits = logscale ? log(numbits) : numbits;
+ x = CLIP(numbits, min_val, max_val) - (max_val + min_val) / 2;
+
+ emb[0] = sin(x * NOLACE_NUMBITS_SCALE_0 - 0.5f);
+ emb[1] = sin(x * NOLACE_NUMBITS_SCALE_1 - 0.5f);
+ emb[2] = sin(x * NOLACE_NUMBITS_SCALE_2 - 0.5f);
+ emb[3] = sin(x * NOLACE_NUMBITS_SCALE_3 - 0.5f);
+ emb[4] = sin(x * NOLACE_NUMBITS_SCALE_4 - 0.5f);
+ emb[5] = sin(x * NOLACE_NUMBITS_SCALE_5 - 0.5f);
+ emb[6] = sin(x * NOLACE_NUMBITS_SCALE_6 - 0.5f);
+ emb[7] = sin(x * NOLACE_NUMBITS_SCALE_7 - 0.5f);
+}
+
+static int init_nolace(NoLACE *hNoLACE, const WeightArray *weights)
+{
+ int ret = 0;
+ OPUS_CLEAR(hNoLACE, 1);
+ celt_assert(weights != NULL);
+
+ ret = init_nolacelayers(&hNoLACE->layers, weights);
+
+ compute_overlap_window(hNoLACE->window, NOLACE_OVERLAP_SIZE);
+
+ return ret;
+}
+
+static void reset_nolace_state(NoLACEState *state)
+{
+ OPUS_CLEAR(state, 1);
+
+ init_adacomb_state(&state->cf1_state);
+ init_adacomb_state(&state->cf2_state);
+ init_adaconv_state(&state->af1_state);
+ init_adaconv_state(&state->af2_state);
+ init_adaconv_state(&state->af3_state);
+ init_adaconv_state(&state->af4_state);
+ init_adashape_state(&state->tdshape1_state);
+ init_adashape_state(&state->tdshape2_state);
+ init_adashape_state(&state->tdshape3_state);
+}
+
+static void nolace_feature_net(
+ NoLACE *hNoLACE,
+ NoLACEState *state,
+ float *output,
+ const float *features,
+ const float *numbits,
+ const int *periods,
+ int arch
+)
+{
+ float input_buffer[4 * IMAX(NOLACE_COND_DIM, NOLACE_HIDDEN_FEATURE_DIM)];
+ float output_buffer[4 * IMAX(NOLACE_COND_DIM, NOLACE_HIDDEN_FEATURE_DIM)];
+ float numbits_embedded[2 * NOLACE_NUMBITS_EMBEDDING_DIM];
+ int i_subframe;
+
+ compute_nolace_numbits_embedding(numbits_embedded, numbits[0], NOLACE_NUMBITS_EMBEDDING_DIM,
+ log(NOLACE_NUMBITS_RANGE_LOW), log(NOLACE_NUMBITS_RANGE_HIGH), 1);
+ compute_nolace_numbits_embedding(numbits_embedded + NOLACE_NUMBITS_EMBEDDING_DIM, numbits[1], NOLACE_NUMBITS_EMBEDDING_DIM,
+ log(NOLACE_NUMBITS_RANGE_LOW), log(NOLACE_NUMBITS_RANGE_HIGH), 1);
+
+ /* scaling and dimensionality reduction */
+ for (i_subframe = 0; i_subframe < 4; i_subframe ++)
+ {
+ OPUS_COPY(input_buffer, features + i_subframe * NOLACE_NUM_FEATURES, NOLACE_NUM_FEATURES);
+ OPUS_COPY(input_buffer + NOLACE_NUM_FEATURES, hNoLACE->layers.nolace_pitch_embedding.float_weights + periods[i_subframe] * NOLACE_PITCH_EMBEDDING_DIM, NOLACE_PITCH_EMBEDDING_DIM);
+ OPUS_COPY(input_buffer + NOLACE_NUM_FEATURES + NOLACE_PITCH_EMBEDDING_DIM, numbits_embedded, 2 * NOLACE_NUMBITS_EMBEDDING_DIM);
+
+ compute_generic_conv1d(
+ &hNoLACE->layers.nolace_fnet_conv1,
+ output_buffer + i_subframe * NOLACE_HIDDEN_FEATURE_DIM,
+ NULL,
+ input_buffer,
+ NOLACE_NUM_FEATURES + NOLACE_PITCH_EMBEDDING_DIM + 2 * NOLACE_NUMBITS_EMBEDDING_DIM,
+ ACTIVATION_TANH,
+ arch);
+ }
+
+ /* subframe accumulation */
+ OPUS_COPY(input_buffer, output_buffer, 4 * NOLACE_HIDDEN_FEATURE_DIM);
+ compute_generic_conv1d(
+ &hNoLACE->layers.nolace_fnet_conv2,
+ output_buffer,
+ state->feature_net_conv2_state,
+ input_buffer,
+ 4 * NOLACE_HIDDEN_FEATURE_DIM,
+ ACTIVATION_TANH,
+ arch
+ );
+
+ /* tconv upsampling */
+ OPUS_COPY(input_buffer, output_buffer, 4 * NOLACE_COND_DIM);
+ compute_generic_dense(
+ &hNoLACE->layers.nolace_fnet_tconv,
+ output_buffer,
+ input_buffer,
+ ACTIVATION_LINEAR,
+ arch
+ );
+
+ /* GRU */
+ OPUS_COPY(input_buffer, output_buffer, 4 * NOLACE_COND_DIM);
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ compute_generic_gru(
+ &hNoLACE->layers.nolace_fnet_gru_input,
+ &hNoLACE->layers.nolace_fnet_gru_recurrent,
+ state->feature_net_gru_state,
+ input_buffer + i_subframe * NOLACE_COND_DIM,
+ arch
+ );
+ OPUS_COPY(output + i_subframe * NOLACE_COND_DIM, state->feature_net_gru_state, NOLACE_COND_DIM);
+ }
+}
+
+
+static void nolace_process_20ms_frame(
+ NoLACE* hNoLACE,
+ NoLACEState *state,
+ float *x_out,
+ const float *x_in,
+ const float *features,
+ const float *numbits,
+ const int *periods,
+ int arch
+)
+{
+ float feature_buffer[4 * NOLACE_COND_DIM];
+ float feature_transform_buffer[4 * NOLACE_COND_DIM];
+ float x_buffer1[8 * NOLACE_FRAME_SIZE];
+ float x_buffer2[8 * NOLACE_FRAME_SIZE];
+ int i_subframe, i_sample;
+ NOLACELayers *layers = &hNoLACE->layers;
+
+#ifdef DEBUG_NOLACE
+ static FILE *f_features=NULL, *f_encfeatures=NULL, *f_xin=NULL, *f_xpreemph=NULL, *f_postcf1=NULL;
+ static FILE *f_postcf2=NULL, *f_postaf1=NULL, *f_xdeemph, *f_numbits, *f_periods;
+ static FILE *f_ffpostcf1, *f_fpostcf2, *f_fpostaf1;
+
+
+ FINIT(f_features, "debug/c_features.f32", "wb");
+ FINIT(f_encfeatures, "debug/c_encoded_features.f32", "wb");
+ FINIT(f_xin, "debug/c_x_in.f32", "wb");
+ FINIT(f_xpreemph, "debug/c_xpreemph.f32", "wb");
+ FINIT(f_xdeemph, "debug/c_xdeemph.f32", "wb");
+ FINIT(f_postcf1, "debug/c_post_cf1.f32", "wb");
+ FINIT(f_postcf2, "debug/c_post_cf2.f32", "wb");
+ FINIT(f_postaf1, "debug/c_post_af1.f32", "wb");
+ FINIT(f_numbits, "debug/c_numbits.f32", "wb");
+ FINIT(f_periods, "debug/c_periods.s32", "wb");
+
+ fwrite(x_in, sizeof(*x_in), 4 * NOLACE_FRAME_SIZE, f_xin);
+ fwrite(numbits, sizeof(*numbits), 2, f_numbits);
+ fwrite(periods, sizeof(*periods), 4, f_periods);
+#endif
+
+ /* pre-emphasis */
+ for (i_sample = 0; i_sample < 4 * NOLACE_FRAME_SIZE; i_sample ++)
+ {
+ x_buffer1[i_sample] = x_in[i_sample] - NOLACE_PREEMPH * state->preemph_mem;
+ state->preemph_mem = x_in[i_sample];
+ }
+
+ /* run feature encoder */
+ nolace_feature_net(hNoLACE, state, feature_buffer, features, numbits, periods, arch);
+#ifdef DEBUG_NOLACE
+ fwrite(features, sizeof(*features), 4 * NOLACE_NUM_FEATURES, f_features);
+ fwrite(feature_buffer, sizeof(*feature_buffer), 4 * NOLACE_COND_DIM, f_encfeatures);
+ fwrite(output_buffer, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_xpreemph);
+#endif
+
+ /* 1st comb filtering stage */
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ /* modifies signal in place */
+ adacomb_process_frame(
+ &state->cf1_state,
+ x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
+ x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ &hNoLACE->layers.nolace_cf1_kernel,
+ &hNoLACE->layers.nolace_cf1_gain,
+ &hNoLACE->layers.nolace_cf1_global_gain,
+ periods[i_subframe],
+ NOLACE_COND_DIM,
+ NOLACE_FRAME_SIZE,
+ NOLACE_OVERLAP_SIZE,
+ NOLACE_CF1_KERNEL_SIZE,
+ NOLACE_CF1_LEFT_PADDING,
+ NOLACE_CF1_FILTER_GAIN_A,
+ NOLACE_CF1_FILTER_GAIN_B,
+ NOLACE_CF1_LOG_GAIN_LIMIT,
+ hNoLACE->window,
+ arch);
+
+ compute_generic_conv1d(
+ &layers->nolace_post_cf1,
+ feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
+ state->post_cf1_state,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ NOLACE_COND_DIM,
+ ACTIVATION_TANH,
+ arch);
+ }
+
+ /* update feature buffer */
+ OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
+
+#ifdef DEBUG_NOLACE
+ fwrite(x_buffer1, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_postcf1);
+#endif
+
+ /* 2nd comb filtering stage */
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ /* modifies signal in place */
+ adacomb_process_frame(
+ &state->cf2_state,
+ x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
+ x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ &hNoLACE->layers.nolace_cf2_kernel,
+ &hNoLACE->layers.nolace_cf2_gain,
+ &hNoLACE->layers.nolace_cf2_global_gain,
+ periods[i_subframe],
+ NOLACE_COND_DIM,
+ NOLACE_FRAME_SIZE,
+ NOLACE_OVERLAP_SIZE,
+ NOLACE_CF2_KERNEL_SIZE,
+ NOLACE_CF2_LEFT_PADDING,
+ NOLACE_CF2_FILTER_GAIN_A,
+ NOLACE_CF2_FILTER_GAIN_B,
+ NOLACE_CF2_LOG_GAIN_LIMIT,
+ hNoLACE->window,
+ arch);
+
+ compute_generic_conv1d(
+ &layers->nolace_post_cf2,
+ feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
+ state->post_cf2_state,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ NOLACE_COND_DIM,
+ ACTIVATION_TANH,
+ arch);
+ }
+
+ /* update feature buffer */
+ OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
+
+#ifdef DEBUG_NOLACE
+ fwrite(x_buffer1, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_postcf2);
+#endif
+
+ /* final adaptive filtering stage */
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ adaconv_process_frame(
+ &state->af1_state,
+ x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF1_OUT_CHANNELS,
+ x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ &hNoLACE->layers.nolace_af1_kernel,
+ &hNoLACE->layers.nolace_af1_gain,
+ NOLACE_COND_DIM,
+ NOLACE_FRAME_SIZE,
+ NOLACE_OVERLAP_SIZE,
+ NOLACE_AF1_IN_CHANNELS,
+ NOLACE_AF1_OUT_CHANNELS,
+ NOLACE_AF1_KERNEL_SIZE,
+ NOLACE_AF1_LEFT_PADDING,
+ NOLACE_AF1_FILTER_GAIN_A,
+ NOLACE_AF1_FILTER_GAIN_B,
+ NOLACE_AF1_SHAPE_GAIN,
+ hNoLACE->window,
+ arch);
+
+ compute_generic_conv1d(
+ &layers->nolace_post_af1,
+ feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
+ state->post_af1_state,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ NOLACE_COND_DIM,
+ ACTIVATION_TANH,
+ arch);
+ }
+
+ /* update feature buffer */
+ OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
+
+#ifdef DEBUG_NOLACE
+ fwrite(x_buffer2, sizeof(float), 4 * NOLACE_FRAME_SIZE * NOLACE_AF1_OUT_CHANNELS, f_postaf1);
+#endif
+
+ /* first shape-mix round */
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ celt_assert(NOLACE_AF1_OUT_CHANNELS == 2);
+ /* modifies second channel in place */
+ adashape_process_frame(
+ &state->tdshape1_state,
+ x_buffer2 + i_subframe * NOLACE_AF1_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+ x_buffer2 + i_subframe * NOLACE_AF1_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ &layers->nolace_tdshape1_alpha1,
+ &layers->nolace_tdshape1_alpha2,
+ NOLACE_TDSHAPE1_FEATURE_DIM,
+ NOLACE_TDSHAPE1_FRAME_SIZE,
+ NOLACE_TDSHAPE1_AVG_POOL_K,
+ arch
+ );
+
+ adaconv_process_frame(
+ &state->af2_state,
+ x_buffer1 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF2_OUT_CHANNELS,
+ x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF2_IN_CHANNELS,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ &hNoLACE->layers.nolace_af2_kernel,
+ &hNoLACE->layers.nolace_af2_gain,
+ NOLACE_COND_DIM,
+ NOLACE_FRAME_SIZE,
+ NOLACE_OVERLAP_SIZE,
+ NOLACE_AF2_IN_CHANNELS,
+ NOLACE_AF2_OUT_CHANNELS,
+ NOLACE_AF2_KERNEL_SIZE,
+ NOLACE_AF2_LEFT_PADDING,
+ NOLACE_AF2_FILTER_GAIN_A,
+ NOLACE_AF2_FILTER_GAIN_B,
+ NOLACE_AF2_SHAPE_GAIN,
+ hNoLACE->window,
+ arch);
+
+ compute_generic_conv1d(
+ &layers->nolace_post_af2,
+ feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
+ state->post_af2_state,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ NOLACE_COND_DIM,
+ ACTIVATION_TANH,
+ arch);
+ }
+
+ /* update feature buffer */
+ OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
+
+#ifdef DEBUG_NOLACE
+ fwrite(x_buffer1, sizeof(float), 4 * NOLACE_FRAME_SIZE * NOLACE_AF2_OUT_CHANNELS, f_postaf2);
+#endif
+
+ /* second shape-mix round */
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ celt_assert(NOLACE_AF2_OUT_CHANNELS == 2);
+ /* modifies second channel in place */
+ adashape_process_frame(
+ &state->tdshape2_state,
+ x_buffer1 + i_subframe * NOLACE_AF2_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+ x_buffer1 + i_subframe * NOLACE_AF2_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ &layers->nolace_tdshape2_alpha1,
+ &layers->nolace_tdshape2_alpha2,
+ NOLACE_TDSHAPE2_FEATURE_DIM,
+ NOLACE_TDSHAPE2_FRAME_SIZE,
+ NOLACE_TDSHAPE2_AVG_POOL_K,
+ arch
+ );
+
+ adaconv_process_frame(
+ &state->af3_state,
+ x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF3_OUT_CHANNELS,
+ x_buffer1 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF3_IN_CHANNELS,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ &hNoLACE->layers.nolace_af3_kernel,
+ &hNoLACE->layers.nolace_af3_gain,
+ NOLACE_COND_DIM,
+ NOLACE_FRAME_SIZE,
+ NOLACE_OVERLAP_SIZE,
+ NOLACE_AF3_IN_CHANNELS,
+ NOLACE_AF3_OUT_CHANNELS,
+ NOLACE_AF3_KERNEL_SIZE,
+ NOLACE_AF3_LEFT_PADDING,
+ NOLACE_AF3_FILTER_GAIN_A,
+ NOLACE_AF3_FILTER_GAIN_B,
+ NOLACE_AF3_SHAPE_GAIN,
+ hNoLACE->window,
+ arch);
+
+ compute_generic_conv1d(
+ &layers->nolace_post_af3,
+ feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
+ state->post_af3_state,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ NOLACE_COND_DIM,
+ ACTIVATION_TANH,
+ arch);
+ }
+
+ /* update feature buffer */
+ OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
+
+ /* third shape-mix round */
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ celt_assert(NOLACE_AF3_OUT_CHANNELS == 2);
+ /* modifies second channel in place */
+ adashape_process_frame(
+ &state->tdshape3_state,
+ x_buffer2 + i_subframe * NOLACE_AF3_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+ x_buffer2 + i_subframe * NOLACE_AF3_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ &layers->nolace_tdshape3_alpha1,
+ &layers->nolace_tdshape3_alpha2,
+ NOLACE_TDSHAPE3_FEATURE_DIM,
+ NOLACE_TDSHAPE3_FRAME_SIZE,
+ NOLACE_TDSHAPE3_AVG_POOL_K,
+ arch
+ );
+
+ adaconv_process_frame(
+ &state->af4_state,
+ x_buffer1 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF4_OUT_CHANNELS,
+ x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF4_IN_CHANNELS,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ &hNoLACE->layers.nolace_af4_kernel,
+ &hNoLACE->layers.nolace_af4_gain,
+ NOLACE_COND_DIM,
+ NOLACE_FRAME_SIZE,
+ NOLACE_OVERLAP_SIZE,
+ NOLACE_AF4_IN_CHANNELS,
+ NOLACE_AF4_OUT_CHANNELS,
+ NOLACE_AF4_KERNEL_SIZE,
+ NOLACE_AF4_LEFT_PADDING,
+ NOLACE_AF4_FILTER_GAIN_A,
+ NOLACE_AF4_FILTER_GAIN_B,
+ NOLACE_AF4_SHAPE_GAIN,
+ hNoLACE->window,
+ arch);
+
+ }
+
+
+ /* de-emphasis */
+ for (i_sample = 0; i_sample < 4 * NOLACE_FRAME_SIZE; i_sample ++)
+ {
+ x_out[i_sample] = x_buffer1[i_sample] + NOLACE_PREEMPH * state->deemph_mem;
+ state->deemph_mem = x_out[i_sample];
+ }
+#ifdef DEBUG_NOLACE
+ fwrite(x_out, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_xdeemph);
+#endif
+}
+
+#endif /* #ifndef DISABLE_NOLACE */
+
+/* API */
+
+void osce_reset(silk_OSCE_struct *hOSCE, int method)
+{
+ OSCEState *state = &hOSCE->state;
+
+ OPUS_CLEAR(&hOSCE->features, 1);
+
+ switch(method)
+ {
+ case OSCE_METHOD_NONE:
+ break;
+#ifndef DISABLE_LACE
+ case OSCE_METHOD_LACE:
+ reset_lace_state(&state->lace);
+ break;
+#endif
+#ifndef DISABLE_NOLACE
+ case OSCE_METHOD_NOLACE:
+ reset_nolace_state(&state->nolace);
+ break;
+#endif
+ default:
+ celt_assert(0 && "method not defined"); /* Question: return error code? */
+ }
+ hOSCE->method = method;
+ hOSCE->features.reset = 2;
+}
+
+
+#if 0
+#include <stdio.h>
+static void print_float_array(FILE *fid, const char *name, const float *array, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ fprintf(fid, "%s[%d]: %f\n", name, i, array[i]);
+ }
+}
+
+static void print_int_array(FILE *fid, const char *name, const int *array, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ fprintf(fid, "%s[%d]: %d\n", name, i, array[i]);
+ }
+}
+
+static void print_int8_array(FILE *fid, const char *name, const opus_int8 *array, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ fprintf(fid, "%s[%d]: %d\n", name, i, array[i]);
+ }
+}
+
+static void print_linear_layer(FILE *fid, const char *name, LinearLayer *layer)
+{
+ int i, n_in, n_out, n_total;
+ char tmp[256];
+
+ n_in = layer->nb_inputs;
+ n_out = layer->nb_outputs;
+ n_total = n_in * n_out;
+
+ fprintf(fid, "\nprinting layer %s...\n", name);
+ fprintf(fid, "%s.nb_inputs: %d\n%s.nb_outputs: %d\n", name, n_in, name, n_out);
+
+ if (layer->bias !=NULL){}
+ if (layer->subias !=NULL){}
+ if (layer->weights !=NULL){}
+ if (layer->float_weights !=NULL){}
+
+ if (layer->bias != NULL) {sprintf(tmp, "%s.bias", name); print_float_array(fid, tmp, layer->bias, n_out);}
+ if (layer->subias != NULL) {sprintf(tmp, "%s.subias", name); print_float_array(fid, tmp, layer->subias, n_out);}
+ if (layer->weights != NULL) {sprintf(tmp, "%s.weights", name); print_int8_array(fid, tmp, layer->weights, n_total);}
+ if (layer->float_weights != NULL) {sprintf(tmp, "%s.float_weights", name); print_float_array(fid, tmp, layer->float_weights, n_total);}
+ //if (layer->weights_idx != NULL) {sprintf(tmp, "%s.weights_idx", name); print_float_array(fid, tmp, layer->weights_idx, n_total);}
+ if (layer->diag != NULL) {sprintf(tmp, "%s.diag", name); print_float_array(fid, tmp, layer->diag, n_in);}
+ if (layer->scale != NULL) {sprintf(tmp, "%s.scale", name); print_float_array(fid, tmp, layer->scale, n_out);}
+
+}
+#endif
+
+int osce_load_models(OSCEModel *model, const unsigned char *data, int len)
+{
+ int ret = 0;
+ WeightArray *list;
+
+ if (data != NULL && len)
+ {
+ /* init from buffer */
+ parse_weights(&list, data, len);
+
+#ifndef DISABLE_LACE
+ if (ret == 0) {ret = init_lace(&model->lace, list);}
+#endif
+
+#ifndef DISABLE_LACE
+ if (ret == 0) {ret = init_nolace(&model->nolace, list);}
+#endif
+
+ free(list);
+ } else
+ {
+#ifdef USE_WEIGHTS_FILE
+ return -1;
+#else
+#ifndef DISABLE_LACE
+ if (ret == 0) {ret = init_lace(&model->lace, lacelayers_arrays);}
+#endif
+
+#ifndef DISABLE_LACE
+ if (ret == 0) {ret = init_nolace(&model->nolace, nolacelayers_arrays);}
+#endif
+
+#endif /* USE_WEIGHTS_FILE */
+ }
+
+ ret = ret ? -1 : 0;
+ return ret;
+}
+
+void osce_enhance_frame(
+ OSCEModel *model, /* I OSCE model struct */
+ silk_decoder_state *psDec, /* I/O Decoder state */
+ silk_decoder_control *psDecCtrl, /* I Decoder control */
+ opus_int16 xq[], /* I/O Decoded speech */
+ opus_int32 num_bits, /* I Size of SILK payload in bits */
+ int arch /* I Run-time architecture */
+)
+{
+ float in_buffer[320];
+ float out_buffer[320];
+ float features[4 * OSCE_FEATURE_DIM];
+ float numbits[2];
+ int periods[4];
+ int i;
+
+ /* enhancement only implemented for 20 ms frame at 16kHz */
+ if (psDec->fs_kHz != 16 || psDec->nb_subfr != 4)
+ {
+ osce_reset(&psDec->osce, psDec->osce.method);
+ return;
+ }
+
+ osce_calculate_features(psDec, psDecCtrl, features, numbits, periods, xq, num_bits);
+
+ /* scale input */
+ for (i = 0; i < 320; i++)
+ {
+ in_buffer[i] = ((float) xq[i]) * (1.f/32768.f);
+ }
+
+ switch(psDec->osce.method)
+ {
+ case OSCE_METHOD_NONE:
+ OPUS_COPY(out_buffer, in_buffer, 320);
+ break;
+#ifndef DISABLE_LACE
+ case OSCE_METHOD_LACE:
+ lace_process_20ms_frame(&model->lace, &psDec->osce.state.lace, out_buffer, in_buffer, features, numbits, periods, arch);
+ break;
+#endif
+#ifndef DISABLE_NOLACE
+ case OSCE_METHOD_NOLACE:
+ nolace_process_20ms_frame(&model->nolace, &psDec->osce.state.nolace, out_buffer, in_buffer, features, numbits, periods, arch);
+ break;
+#endif
+ default:
+ celt_assert(0 && "method not defined");
+ }
+
+#ifdef ENABLE_OSCE_TRAINING_DATA
+ int k;
+
+ static FILE *flpc = NULL;
+ static FILE *fgain = NULL;
+ static FILE *fltp = NULL;
+ static FILE *fperiod = NULL;
+ static FILE *fnoisy16k = NULL;
+ static FILE* f_numbits = NULL;
+ static FILE* f_numbits_smooth = NULL;
+
+ if (flpc == NULL) {flpc = fopen("features_lpc.f32", "wb");}
+ if (fgain == NULL) {fgain = fopen("features_gain.f32", "wb");}
+ if (fltp == NULL) {fltp = fopen("features_ltp.f32", "wb");}
+ if (fperiod == NULL) {fperiod = fopen("features_period.s16", "wb");}
+ if (fnoisy16k == NULL) {fnoisy16k = fopen("noisy_16k.s16", "wb");}
+ if(f_numbits == NULL) {f_numbits = fopen("features_num_bits.s32", "wb");}
+ if (f_numbits_smooth == NULL) {f_numbits_smooth = fopen("features_num_bits_smooth.f32", "wb");}
+
+ fwrite(&num_bits, sizeof(num_bits), 1, f_numbits);
+ fwrite(&(psDec->osce.features.numbits_smooth), sizeof(psDec->osce.features.numbits_smooth), 1, f_numbits_smooth);
+
+ for (k = 0; k < psDec->nb_subfr; k++)
+ {
+ float tmp;
+ int16_t itmp;
+ float lpc_buffer[16] = {0};
+ opus_int16 *A_Q12, *B_Q14;
+
+ (void) num_bits;
+ (void) arch;
+
+ /* gain */
+ tmp = (float) psDecCtrl->Gains_Q16[k] / (1UL << 16);
+ fwrite(&tmp, sizeof(tmp), 1, fgain);
+
+ /* LPC */
+ A_Q12 = psDecCtrl->PredCoef_Q12[ k >> 1 ];
+ for (i = 0; i < psDec->LPC_order; i++)
+ {
+ lpc_buffer[i] = (float) A_Q12[i] / (1U << 12);
+ }
+ fwrite(lpc_buffer, sizeof(lpc_buffer[0]), 16, flpc);
+
+ /* LTP */
+ B_Q14 = &psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER ];
+ for (i = 0; i < 5; i++)
+ {
+ tmp = (float) B_Q14[i] / (1U << 14);
+ fwrite(&tmp, sizeof(tmp), 1, fltp);
+ }
+
+ /* periods */
+ itmp = psDec->indices.signalType == TYPE_VOICED ? psDecCtrl->pitchL[ k ] : 0;
+ fwrite(&itmp, sizeof(itmp), 1, fperiod);
+ }
+
+ fwrite(xq, psDec->nb_subfr * psDec->subfr_length, sizeof(xq[0]), fnoisy16k);
+#endif
+
+ if (psDec->osce.features.reset > 1)
+ {
+ OPUS_COPY(out_buffer, in_buffer, 320);
+ psDec->osce.features.reset --;
+ }
+ else if (psDec->osce.features.reset)
+ {
+ osce_cross_fade_10ms(out_buffer, in_buffer, 320);
+ psDec->osce.features.reset = 0;
+ }
+
+ /* scale output */
+ for (i = 0; i < 320; i++)
+ {
+ float tmp = 32768.f * out_buffer[i];
+ if (tmp > 32767.f) tmp = 32767.f;
+ if (tmp < -32767.f) tmp = -32767.f;
+ xq[i] = float2int(tmp);
+ }
+
+}
+
+
+#if 0
+
+#include <stdio.h>
+
+void lace_feature_net_compare(
+ const char * prefix,
+ int num_frames,
+ LACE* hLACE
+)
+{
+ char in_feature_file[256];
+ char out_feature_file[256];
+ char numbits_file[256];
+ char periods_file[256];
+ char message[512];
+ int i_frame, i_feature;
+ float mse;
+ float in_features[4 * LACE_NUM_FEATURES];
+ float out_features[4 * LACE_COND_DIM];
+ float out_features2[4 * LACE_COND_DIM];
+ float numbits[2];
+ int periods[4];
+
+ init_lace(hLACE);
+
+ FILE *f_in_features, *f_out_features, *f_numbits, *f_periods;
+
+ strcpy(in_feature_file, prefix);
+ strcat(in_feature_file, "_in_features.f32");
+ f_in_features = fopen(in_feature_file, "rb");
+ if (f_in_features == NULL)
+ {
+ sprintf(message, "could not open file %s", in_feature_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(out_feature_file, prefix);
+ strcat(out_feature_file, "_out_features.f32");
+ f_out_features = fopen(out_feature_file, "rb");
+ if (f_out_features == NULL)
+ {
+ sprintf(message, "could not open file %s", out_feature_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(periods_file, prefix);
+ strcat(periods_file, "_periods.s32");
+ f_periods = fopen(periods_file, "rb");
+ if (f_periods == NULL)
+ {
+ sprintf(message, "could not open file %s", periods_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(numbits_file, prefix);
+ strcat(numbits_file, "_numbits.f32");
+ f_numbits = fopen(numbits_file, "rb");
+ if (f_numbits == NULL)
+ {
+ sprintf(message, "could not open file %s", numbits_file);
+ perror(message);
+ exit(1);
+ }
+
+ for (i_frame = 0; i_frame < num_frames; i_frame ++)
+ {
+ if(fread(in_features, sizeof(float), 4 * LACE_NUM_FEATURES, f_in_features) != 4 * LACE_NUM_FEATURES)
+ {
+ fprintf(stderr, "could not read frame %d from in_features\n", i_frame);
+ exit(1);
+ }
+ if(fread(out_features, sizeof(float), 4 * LACE_COND_DIM, f_out_features) != 4 * LACE_COND_DIM)
+ {
+ fprintf(stderr, "could not read frame %d from out_features\n", i_frame);
+ exit(1);
+ }
+ if(fread(periods, sizeof(int), 4, f_periods) != 4)
+ {
+ fprintf(stderr, "could not read frame %d from periods\n", i_frame);
+ exit(1);
+ }
+ if(fread(numbits, sizeof(float), 2, f_numbits) != 2)
+ {
+ fprintf(stderr, "could not read frame %d from numbits\n", i_frame);
+ exit(1);
+ }
+
+
+ lace_feature_net(hLACE, out_features2, in_features, numbits, periods);
+
+ float mse = 0;
+ for (int i = 0; i < 4 * LACE_COND_DIM; i ++)
+ {
+ mse += pow(out_features[i] - out_features2[i], 2);
+ }
+ mse /= (4 * LACE_COND_DIM);
+ printf("rmse: %f\n", sqrt(mse));
+
+ }
+
+ fclose(f_in_features);
+ fclose(f_out_features);
+ fclose(f_numbits);
+ fclose(f_periods);
+}
+
+
+void lace_demo(
+ char *prefix,
+ char *output
+)
+{
+ char feature_file[256];
+ char numbits_file[256];
+ char periods_file[256];
+ char x_in_file[256];
+ char message[512];
+ int i_frame;
+ float mse;
+ float features[4 * LACE_NUM_FEATURES];
+ float numbits[2];
+ int periods[4];
+ float x_in[4 * LACE_FRAME_SIZE];
+ int16_t x_out[4 * LACE_FRAME_SIZE];
+ float buffer[4 * LACE_FRAME_SIZE];
+ LACE hLACE;
+ int frame_counter = 0;
+ FILE *f_features, *f_numbits, *f_periods, *f_x_in, *f_x_out;
+
+ init_lace(&hLACE);
+
+ strcpy(feature_file, prefix);
+ strcat(feature_file, "_features.f32");
+ f_features = fopen(feature_file, "rb");
+ if (f_features == NULL)
+ {
+ sprintf(message, "could not open file %s", feature_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(x_in_file, prefix);
+ strcat(x_in_file, "_x_in.f32");
+ f_x_in = fopen(x_in_file, "rb");
+ if (f_x_in == NULL)
+ {
+ sprintf(message, "could not open file %s", x_in_file);
+ perror(message);
+ exit(1);
+ }
+
+ f_x_out = fopen(output, "wb");
+ if (f_x_out == NULL)
+ {
+ sprintf(message, "could not open file %s", output);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(periods_file, prefix);
+ strcat(periods_file, "_periods.s32");
+ f_periods = fopen(periods_file, "rb");
+ if (f_periods == NULL)
+ {
+ sprintf(message, "could not open file %s", periods_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(numbits_file, prefix);
+ strcat(numbits_file, "_numbits.f32");
+ f_numbits = fopen(numbits_file, "rb");
+ if (f_numbits == NULL)
+ {
+ sprintf(message, "could not open file %s", numbits_file);
+ perror(message);
+ exit(1);
+ }
+
+ printf("processing %s\n", prefix);
+
+ while (fread(x_in, sizeof(float), 4 * LACE_FRAME_SIZE, f_x_in) == 4 * LACE_FRAME_SIZE)
+ {
+ printf("\rframe: %d", frame_counter++);
+ if(fread(features, sizeof(float), 4 * LACE_NUM_FEATURES, f_features) != 4 * LACE_NUM_FEATURES)
+ {
+ fprintf(stderr, "could not read frame %d from features\n", i_frame);
+ exit(1);
+ }
+ if(fread(periods, sizeof(int), 4, f_periods) != 4)
+ {
+ fprintf(stderr, "could not read frame %d from periods\n", i_frame);
+ exit(1);
+ }
+ if(fread(numbits, sizeof(float), 2, f_numbits) != 2)
+ {
+ fprintf(stderr, "could not read frame %d from numbits\n", i_frame);
+ exit(1);
+ }
+
+ lace_process_20ms_frame(
+ &hLACE,
+ buffer,
+ x_in,
+ features,
+ numbits,
+ periods
+ );
+
+ for (int n=0; n < 4 * LACE_FRAME_SIZE; n ++)
+ {
+ float tmp = (1UL<<15) * buffer[n];
+ tmp = CLIP(tmp, -32768, 32767);
+ x_out[n] = (int16_t) round(tmp);
+ }
+
+ fwrite(x_out, sizeof(int16_t), 4 * LACE_FRAME_SIZE, f_x_out);
+ }
+ printf("\ndone!\n");
+
+ fclose(f_features);
+ fclose(f_numbits);
+ fclose(f_periods);
+ fclose(f_x_in);
+ fclose(f_x_out);
+}
+
+void nolace_demo(
+ char *prefix,
+ char *output
+)
+{
+ char feature_file[256];
+ char numbits_file[256];
+ char periods_file[256];
+ char x_in_file[256];
+ char message[512];
+ int i_frame;
+ float mse;
+ float features[4 * LACE_NUM_FEATURES];
+ float numbits[2];
+ int periods[4];
+ float x_in[4 * LACE_FRAME_SIZE];
+ int16_t x_out[4 * LACE_FRAME_SIZE];
+ float buffer[4 * LACE_FRAME_SIZE];
+ NoLACE hNoLACE;
+ int frame_counter = 0;
+ FILE *f_features, *f_numbits, *f_periods, *f_x_in, *f_x_out;
+
+ init_nolace(&hNoLACE);
+
+ strcpy(feature_file, prefix);
+ strcat(feature_file, "_features.f32");
+ f_features = fopen(feature_file, "rb");
+ if (f_features == NULL)
+ {
+ sprintf(message, "could not open file %s", feature_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(x_in_file, prefix);
+ strcat(x_in_file, "_x_in.f32");
+ f_x_in = fopen(x_in_file, "rb");
+ if (f_x_in == NULL)
+ {
+ sprintf(message, "could not open file %s", x_in_file);
+ perror(message);
+ exit(1);
+ }
+
+ f_x_out = fopen(output, "wb");
+ if (f_x_out == NULL)
+ {
+ sprintf(message, "could not open file %s", output);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(periods_file, prefix);
+ strcat(periods_file, "_periods.s32");
+ f_periods = fopen(periods_file, "rb");
+ if (f_periods == NULL)
+ {
+ sprintf(message, "could not open file %s", periods_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(numbits_file, prefix);
+ strcat(numbits_file, "_numbits.f32");
+ f_numbits = fopen(numbits_file, "rb");
+ if (f_numbits == NULL)
+ {
+ sprintf(message, "could not open file %s", numbits_file);
+ perror(message);
+ exit(1);
+ }
+
+ printf("processing %s\n", prefix);
+
+ while (fread(x_in, sizeof(float), 4 * LACE_FRAME_SIZE, f_x_in) == 4 * LACE_FRAME_SIZE)
+ {
+ printf("\rframe: %d", frame_counter++);
+ if(fread(features, sizeof(float), 4 * LACE_NUM_FEATURES, f_features) != 4 * LACE_NUM_FEATURES)
+ {
+ fprintf(stderr, "could not read frame %d from features\n", i_frame);
+ exit(1);
+ }
+ if(fread(periods, sizeof(int), 4, f_periods) != 4)
+ {
+ fprintf(stderr, "could not read frame %d from periods\n", i_frame);
+ exit(1);
+ }
+ if(fread(numbits, sizeof(float), 2, f_numbits) != 2)
+ {
+ fprintf(stderr, "could not read frame %d from numbits\n", i_frame);
+ exit(1);
+ }
+
+ nolace_process_20ms_frame(
+ &hNoLACE,
+ buffer,
+ x_in,
+ features,
+ numbits,
+ periods
+ );
+
+ for (int n=0; n < 4 * LACE_FRAME_SIZE; n ++)
+ {
+ float tmp = (1UL<<15) * buffer[n];
+ tmp = CLIP(tmp, -32768, 32767);
+ x_out[n] = (int16_t) round(tmp);
+ }
+
+ fwrite(x_out, sizeof(int16_t), 4 * LACE_FRAME_SIZE, f_x_out);
+ }
+ printf("\ndone!\n");
+
+ fclose(f_features);
+ fclose(f_numbits);
+ fclose(f_periods);
+ fclose(f_x_in);
+ fclose(f_x_out);
+}
+
+
+int main()
+{
+#if 0
+ LACE hLACE;
+
+ lace_feature_net_compare("testvec2/lace", 5, &hLACE);
+
+ lace_demo("testdata/test9", "out_lace_c_9kbps.pcm");
+ lace_demo("testdata/test6", "out_lace_c_6kbps.pcm");
+#endif
+ nolace_demo("testdata/test9", "out_nolace_c_9kbps.pcm");
+
+}
+#endif
+
+/*gcc -I ../include -I . -I ../silk -I ../celt osce.c nndsp.c lace_data.c nolace_data.c nnet.c parse_lpcnet_weights.c -lm -o lacetest*/
diff --git a/dnn/osce.h b/dnn/osce.h
new file mode 100644
index 00000000..3dd8b7c0
--- /dev/null
+++ b/dnn/osce.h
@@ -0,0 +1,81 @@
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OSCE_H
+#define OSCE_H
+
+
+#include "opus_types.h"
+/*#include "osce_config.h"*/
+#ifndef DISABLE_LACE
+#include "lace_data.h"
+#endif
+#ifndef DISABLE_NOLACE
+#include "nolace_data.h"
+#endif
+#include "nndsp.h"
+#include "nnet.h"
+#include "osce_structs.h"
+#include "structs.h"
+
+#define OSCE_METHOD_NONE 0
+#ifndef DISABLE_LACE
+#define OSCE_METHOD_LACE 1
+#endif
+#ifndef DISABLE_NOLACE
+#define OSCE_METHOD_NOLACE 2
+#endif
+
+#if !defined(DISABLE_NOLACE)
+#define OSCE_DEFAULT_METHOD OSCE_METHOD_NOLACE
+#elif !defined(DISABLE_LACE)
+#define OSCE_DEFAULT_METHOD OSCE_METHOD_LACE
+#else
+#define OSCE_DEFAULT_METHOD OSCE_METHOD_NONE
+#endif
+
+
+
+
+/* API */
+
+
+void osce_enhance_frame(
+ OSCEModel *model, /* I OSCE model struct */
+ silk_decoder_state *psDec, /* I/O Decoder state */
+ silk_decoder_control *psDecCtrl, /* I Decoder control */
+ opus_int16 xq[], /* I/O Decoded speech */
+ opus_int32 num_bits, /* I Size of SILK payload in bits */
+ int arch /* I Run-time architecture */
+);
+
+
+int osce_load_models(OSCEModel *hModel, const unsigned char *data, int len);
+void osce_reset(silk_OSCE_struct *hOSCE, int method);
+
+
+#endif
diff --git a/dnn/osce_config.h b/dnn/osce_config.h
new file mode 100644
index 00000000..de94fe2f
--- /dev/null
+++ b/dnn/osce_config.h
@@ -0,0 +1,62 @@
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OSCE_CONFIG
+#define OSCE_CONFIG
+
+#define OSCE_MAX_RNN_NEURONS 256
+
+#define OSCE_FEATURES_MAX_HISTORY 350
+#define OSCE_FEATURE_DIM 93
+#define OSCE_MAX_FEATURE_FRAMES 4
+
+#define OSCE_CLEAN_SPEC_NUM_BANDS 64
+#define OSCE_NOISY_SPEC_NUM_BANDS 18
+
+#define OSCE_NO_PITCH_VALUE 7
+
+#define OSCE_PREEMPH 0.85f
+
+#define OSCE_PITCH_HANGOVER 8
+
+#define OSCE_CLEAN_SPEC_START 0
+#define OSCE_CLEAN_SPEC_LENGTH 64
+
+#define OSCE_NOISY_CEPSTRUM_START 64
+#define OSCE_NOISY_CEPSTRUM_LENGTH 18
+
+#define OSCE_ACORR_START 82
+#define OSCE_ACORR_LENGTH 5
+
+#define OSCE_LTP_START 87
+#define OSCE_LTP_LENGTH 5
+
+#define OSCE_LOG_GAIN_START 92
+#define OSCE_LOG_GAIN_LENGTH 1
+
+
+#endif \ No newline at end of file
diff --git a/dnn/osce_features.c b/dnn/osce_features.c
new file mode 100644
index 00000000..0466f132
--- /dev/null
+++ b/dnn/osce_features.c
@@ -0,0 +1,454 @@
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#define OSCE_SPEC_WINDOW_SIZE 320
+#define OSCE_SPEC_NUM_FREQS 161
+
+
+/*DEBUG*/
+/*#define WRITE_FEATURES*/
+/*#define DEBUG_PRING*/
+/*******/
+
+#include "stack_alloc.h"
+#include "osce_features.h"
+#include "kiss_fft.h"
+#include "os_support.h"
+#include "osce.h"
+#include "freq.h"
+
+
+#if defined(WRITE_FEATURES) || defined(DEBUG_PRING)
+#include <stdio.h>
+#include <stdlib.h>
+#endif
+
+static const int center_bins_clean[64] = {
+ 0, 2, 5, 8, 10, 12, 15, 18,
+ 20, 22, 25, 28, 30, 33, 35, 38,
+ 40, 42, 45, 48, 50, 52, 55, 58,
+ 60, 62, 65, 68, 70, 73, 75, 78,
+ 80, 82, 85, 88, 90, 92, 95, 98,
+ 100, 102, 105, 108, 110, 112, 115, 118,
+ 120, 122, 125, 128, 130, 132, 135, 138,
+ 140, 142, 145, 148, 150, 152, 155, 160
+};
+
+static const int center_bins_noisy[18] = {
+ 0, 4, 8, 12, 16, 20, 24, 28,
+ 32, 40, 48, 56, 64, 80, 96, 112,
+ 136, 160
+};
+
+static const float band_weights_clean[64] = {
+ 0.666666666667f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.400000000000f, 0.400000000000f, 0.400000000000f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.400000000000f, 0.400000000000f, 0.400000000000f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.250000000000f, 0.333333333333f
+};
+
+static const float band_weights_noisy[18] = {
+ 0.400000000000f, 0.250000000000f, 0.250000000000f, 0.250000000000f,
+ 0.250000000000f, 0.250000000000f, 0.250000000000f, 0.250000000000f,
+ 0.166666666667f, 0.125000000000f, 0.125000000000f, 0.125000000000f,
+ 0.083333333333f, 0.062500000000f, 0.062500000000f, 0.050000000000f,
+ 0.041666666667f, 0.080000000000f
+};
+
+static float osce_window[OSCE_SPEC_WINDOW_SIZE] = {
+ 0.004908718808f, 0.014725683311f, 0.024541228523f, 0.034354408400f, 0.044164277127f,
+ 0.053969889210f, 0.063770299562f, 0.073564563600f, 0.083351737332f, 0.093130877450f,
+ 0.102901041421f, 0.112661287575f, 0.122410675199f, 0.132148264628f, 0.141873117332f,
+ 0.151584296010f, 0.161280864678f, 0.170961888760f, 0.180626435180f, 0.190273572448f,
+ 0.199902370753f, 0.209511902052f, 0.219101240157f, 0.228669460829f, 0.238215641862f,
+ 0.247738863176f, 0.257238206902f, 0.266712757475f, 0.276161601717f, 0.285583828929f,
+ 0.294978530977f, 0.304344802381f, 0.313681740399f, 0.322988445118f, 0.332264019538f,
+ 0.341507569661f, 0.350718204573f, 0.359895036535f, 0.369037181064f, 0.378143757022f,
+ 0.387213886697f, 0.396246695891f, 0.405241314005f, 0.414196874117f, 0.423112513073f,
+ 0.431987371563f, 0.440820594212f, 0.449611329655f, 0.458358730621f, 0.467061954019f,
+ 0.475720161014f, 0.484332517110f, 0.492898192230f, 0.501416360796f, 0.509886201809f,
+ 0.518306898929f, 0.526677640552f, 0.534997619887f, 0.543266035038f, 0.551482089078f,
+ 0.559644990127f, 0.567753951426f, 0.575808191418f, 0.583806933818f, 0.591749407690f,
+ 0.599634847523f, 0.607462493302f, 0.615231590581f, 0.622941390558f, 0.630591150148f,
+ 0.638180132051f, 0.645707604824f, 0.653172842954f, 0.660575126926f, 0.667913743292f,
+ 0.675187984742f, 0.682397150168f, 0.689540544737f, 0.696617479953f, 0.703627273726f,
+ 0.710569250438f, 0.717442741007f, 0.724247082951f, 0.730981620454f, 0.737645704427f,
+ 0.744238692572f, 0.750759949443f, 0.757208846506f, 0.763584762206f, 0.769887082016f,
+ 0.776115198508f, 0.782268511401f, 0.788346427627f, 0.794348361383f, 0.800273734191f,
+ 0.806121974951f, 0.811892519997f, 0.817584813152f, 0.823198305781f, 0.828732456844f,
+ 0.834186732948f, 0.839560608398f, 0.844853565250f, 0.850065093356f, 0.855194690420f,
+ 0.860241862039f, 0.865206121757f, 0.870086991109f, 0.874883999665f, 0.879596685080f,
+ 0.884224593137f, 0.888767277786f, 0.893224301196f, 0.897595233788f, 0.901879654283f,
+ 0.906077149740f, 0.910187315596f, 0.914209755704f, 0.918144082372f, 0.921989916403f,
+ 0.925746887127f, 0.929414632439f, 0.932992798835f, 0.936481041442f, 0.939879024058f,
+ 0.943186419177f, 0.946402908026f, 0.949528180593f, 0.952561935658f, 0.955503880820f,
+ 0.958353732530f, 0.961111216112f, 0.963776065795f, 0.966348024735f, 0.968826845041f,
+ 0.971212287799f, 0.973504123096f, 0.975702130039f, 0.977806096779f, 0.979815820533f,
+ 0.981731107599f, 0.983551773378f, 0.985277642389f, 0.986908548290f, 0.988444333892f,
+ 0.989884851171f, 0.991229961288f, 0.992479534599f, 0.993633450666f, 0.994691598273f,
+ 0.995653875433f, 0.996520189401f, 0.997290456679f, 0.997964603026f, 0.998542563469f,
+ 0.999024282300f, 0.999409713092f, 0.999698818696f, 0.999891571247f, 0.999987952167f,
+ 0.999987952167f, 0.999891571247f, 0.999698818696f, 0.999409713092f, 0.999024282300f,
+ 0.998542563469f, 0.997964603026f, 0.997290456679f, 0.996520189401f, 0.995653875433f,
+ 0.994691598273f, 0.993633450666f, 0.992479534599f, 0.991229961288f, 0.989884851171f,
+ 0.988444333892f, 0.986908548290f, 0.985277642389f, 0.983551773378f, 0.981731107599f,
+ 0.979815820533f, 0.977806096779f, 0.975702130039f, 0.973504123096f, 0.971212287799f,
+ 0.968826845041f, 0.966348024735f, 0.963776065795f, 0.961111216112f, 0.958353732530f,
+ 0.955503880820f, 0.952561935658f, 0.949528180593f, 0.946402908026f, 0.943186419177f,
+ 0.939879024058f, 0.936481041442f, 0.932992798835f, 0.929414632439f, 0.925746887127f,
+ 0.921989916403f, 0.918144082372f, 0.914209755704f, 0.910187315596f, 0.906077149740f,
+ 0.901879654283f, 0.897595233788f, 0.893224301196f, 0.888767277786f, 0.884224593137f,
+ 0.879596685080f, 0.874883999665f, 0.870086991109f, 0.865206121757f, 0.860241862039f,
+ 0.855194690420f, 0.850065093356f, 0.844853565250f, 0.839560608398f, 0.834186732948f,
+ 0.828732456844f, 0.823198305781f, 0.817584813152f, 0.811892519997f, 0.806121974951f,
+ 0.800273734191f, 0.794348361383f, 0.788346427627f, 0.782268511401f, 0.776115198508f,
+ 0.769887082016f, 0.763584762206f, 0.757208846506f, 0.750759949443f, 0.744238692572f,
+ 0.737645704427f, 0.730981620454f, 0.724247082951f, 0.717442741007f, 0.710569250438f,
+ 0.703627273726f, 0.696617479953f, 0.689540544737f, 0.682397150168f, 0.675187984742f,
+ 0.667913743292f, 0.660575126926f, 0.653172842954f, 0.645707604824f, 0.638180132051f,
+ 0.630591150148f, 0.622941390558f, 0.615231590581f, 0.607462493302f, 0.599634847523f,
+ 0.591749407690f, 0.583806933818f, 0.575808191418f, 0.567753951426f, 0.559644990127f,
+ 0.551482089078f, 0.543266035038f, 0.534997619887f, 0.526677640552f, 0.518306898929f,
+ 0.509886201809f, 0.501416360796f, 0.492898192230f, 0.484332517110f, 0.475720161014f,
+ 0.467061954019f, 0.458358730621f, 0.449611329655f, 0.440820594212f, 0.431987371563f,
+ 0.423112513073f, 0.414196874117f, 0.405241314005f, 0.396246695891f, 0.387213886697f,
+ 0.378143757022f, 0.369037181064f, 0.359895036535f, 0.350718204573f, 0.341507569661f,
+ 0.332264019538f, 0.322988445118f, 0.313681740399f, 0.304344802381f, 0.294978530977f,
+ 0.285583828929f, 0.276161601717f, 0.266712757475f, 0.257238206902f, 0.247738863176f,
+ 0.238215641862f, 0.228669460829f, 0.219101240157f, 0.209511902052f, 0.199902370753f,
+ 0.190273572448f, 0.180626435180f, 0.170961888760f, 0.161280864678f, 0.151584296010f,
+ 0.141873117332f, 0.132148264628f, 0.122410675199f, 0.112661287575f, 0.102901041421f,
+ 0.093130877450f, 0.083351737332f, 0.073564563600f, 0.063770299562f, 0.053969889210f,
+ 0.044164277127f, 0.034354408400f, 0.024541228523f, 0.014725683311f, 0.004908718808f
+};
+
+static void apply_filterbank(float *x_out, float *x_in, const int *center_bins, const float* band_weights, int num_bands)
+{
+ int b, i;
+ float frac;
+
+ celt_assert(x_in != x_out)
+
+ x_out[0] = 0;
+ for (b = 0; b < num_bands - 1; b++)
+ {
+ x_out[b+1] = 0;
+ for (i = center_bins[b]; i < center_bins[b+1]; i++)
+ {
+ frac = (float) (center_bins[b+1] - i) / (center_bins[b+1] - center_bins[b]);
+ x_out[b] += band_weights[b] * frac * x_in[i];
+ x_out[b+1] += band_weights[b+1] * (1 - frac) * x_in[i];
+
+ }
+ }
+ x_out[num_bands - 1] += band_weights[num_bands - 1] * x_in[center_bins[num_bands - 1]];
+#ifdef DEBUG_PRINT
+ for (b = 0; b < num_bands; b++)
+ {
+ printf("band[%d]: %f\n", b, x_out[b]);
+ }
+#endif
+}
+
+
+static void mag_spec_320_onesided(float *out, float *in)
+{
+ celt_assert(OSCE_SPEC_WINDOW_SIZE == 320);
+ kiss_fft_cpx buffer[OSCE_SPEC_WINDOW_SIZE];
+ int k;
+ forward_transform(buffer, in);
+
+ for (k = 0; k < OSCE_SPEC_NUM_FREQS; k++)
+ {
+ out[k] = OSCE_SPEC_WINDOW_SIZE * sqrt(buffer[k].r * buffer[k].r + buffer[k].i * buffer[k].i);
+#ifdef DEBUG_PRINT
+ printf("magspec[%d]: %f\n", k, out[k]);
+#endif
+ }
+}
+
+
+static void calculate_log_spectrum_from_lpc(float *spec, opus_int16 *a_q12, int lpc_order)
+{
+ float buffer[OSCE_SPEC_WINDOW_SIZE] = {0};
+ int i;
+
+ /* zero expansion */
+ buffer[0] = 1;
+ for (i = 0; i < lpc_order; i++)
+ {
+ buffer[i+1] = - (float)a_q12[i] / (1U << 12);
+ }
+
+ /* calculate and invert magnitude spectrum */
+ mag_spec_320_onesided(buffer, buffer);
+
+ for (i = 0; i < OSCE_SPEC_NUM_FREQS; i++)
+ {
+ buffer[i] = 1.f / (buffer[i] + 1e-9f);
+ }
+
+ /* apply filterbank */
+ apply_filterbank(spec, buffer, center_bins_clean, band_weights_clean, OSCE_CLEAN_SPEC_NUM_BANDS);
+
+ /* log and scaling */
+ for (i = 0; i < OSCE_CLEAN_SPEC_NUM_BANDS; i++)
+ {
+ spec[i] = 0.3f * log(spec[i] + 1e-9f);
+ }
+}
+
+static void calculate_cepstrum(float *cepstrum, float *signal)
+{
+ float buffer[OSCE_SPEC_WINDOW_SIZE];
+ float *spec = &buffer[OSCE_SPEC_NUM_FREQS + 3];
+ int n;
+
+ celt_assert(cepstrum != signal)
+
+ for (n = 0; n < OSCE_SPEC_WINDOW_SIZE; n++)
+ {
+ buffer[n] = osce_window[n] * signal[n];
+ }
+
+ /* calculate magnitude spectrum */
+ mag_spec_320_onesided(buffer, buffer);
+
+ /* accumulate bands */
+ apply_filterbank(spec, buffer, center_bins_noisy, band_weights_noisy, OSCE_NOISY_SPEC_NUM_BANDS);
+
+ /* log domain conversion */
+ for (n = 0; n < OSCE_NOISY_SPEC_NUM_BANDS; n++)
+ {
+ spec[n] = log(spec[n] + 1e-9f);
+#ifdef DEBUG_PRINT
+ printf("logspec[%d]: %f\n", n, spec[n]);
+#endif
+ }
+
+ /* DCT-II (orthonormal) */
+ celt_assert(OSCE_NOISY_SPEC_NUM_BANDS == NB_BANDS);
+ dct(cepstrum, spec);
+}
+
+static void calculate_acorr(float *acorr, float *signal, int lag)
+{
+ int n, k;
+ celt_assert(acorr != signal)
+
+ for (k = -2; k <= 2; k++)
+ {
+ acorr[k+2] = 0;
+ float xx = 0;
+ float xy = 0;
+ float yy = 0;
+ for (n = 0; n < 80; n++)
+ {
+ /* obviously wasteful -> fix later */
+ xx += signal[n] * signal[n];
+ yy += signal[n - lag + k] * signal[n - lag + k];
+ xy += signal[n] * signal[n - lag + k];
+ }
+ acorr[k+2] = xy / sqrt(xx * yy + 1e-9f);
+ }
+}
+
+static int pitch_postprocessing(OSCEFeatureState *psFeatures, int lag, int type)
+{
+ int new_lag;
+
+#ifdef OSCE_HANGOVER_BUGFIX
+#define TESTBIT 1
+#else
+#define TESTBIT 0
+#endif
+
+ /* hangover is currently disabled to reflect a bug in the python code. ToDo: re-evaluate hangover */
+ if (type != TYPE_VOICED && psFeatures->last_type == TYPE_VOICED && TESTBIT)
+ /* enter hangover */
+ {
+ new_lag = OSCE_NO_PITCH_VALUE;
+ if (psFeatures->pitch_hangover_count < OSCE_PITCH_HANGOVER)
+ {
+ new_lag = psFeatures->last_lag;
+ psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER;
+ }
+ }
+ else if (type != TYPE_VOICED && psFeatures->pitch_hangover_count && TESTBIT)
+ /* continue hangover */
+ {
+ new_lag = psFeatures->last_lag;
+ psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER;
+ }
+ else if (type != TYPE_VOICED)
+ /* unvoiced frame after hangover */
+ {
+ new_lag = OSCE_NO_PITCH_VALUE;
+ psFeatures->pitch_hangover_count = 0;
+ }
+ else
+ /* voiced frame: update last_lag */
+ {
+ new_lag = lag;
+ psFeatures->last_lag = lag;
+ psFeatures->pitch_hangover_count = 0;
+ }
+
+ /* buffer update */
+ psFeatures->last_type = type;
+
+ /* with the current setup this should never happen (but who knows...) */
+ celt_assert(new_lag)
+
+ return new_lag;
+}
+
+void osce_calculate_features(
+ silk_decoder_state *psDec, /* I/O Decoder state */
+ silk_decoder_control *psDecCtrl, /* I Decoder control */
+ float *features, /* O input features */
+ float *numbits, /* O numbits and smoothed numbits */
+ int *periods, /* O pitch lags on subframe basis */
+ const opus_int16 xq[], /* I Decoded speech */
+ opus_int32 num_bits /* I Size of SILK payload in bits */
+)
+{
+ int num_subframes, num_samples;
+ float buffer[OSCE_FEATURES_MAX_HISTORY + OSCE_MAX_FEATURE_FRAMES * 80];
+ float *frame, *pfeatures;
+ OSCEFeatureState *psFeatures;
+ int i, n, k;
+#ifdef WRITE_FEATURES
+ static FILE *f_feat = NULL;
+ if (f_feat == NULL)
+ {
+ f_feat = fopen("assembled_features.f32", "wb");
+ }
+#endif
+
+ /*OPUS_CLEAR(buffer, 1);*/
+ memset(buffer, 0, sizeof(buffer));
+
+ num_subframes = psDec->nb_subfr;
+ num_samples = num_subframes * 80;
+ psFeatures = &psDec->osce.features;
+
+ /* smooth bit count */
+ psFeatures->numbits_smooth = 0.9f * psFeatures->numbits_smooth + 0.1f * num_bits;
+ numbits[0] = num_bits;
+#ifdef OSCE_NUMBITS_BUGFIX
+ numbits[1] = psFeatures->numbits_smooth;
+#else
+ numbits[1] = num_bits;
+#endif
+
+ for (n = 0; n < num_samples; n++)
+ {
+ buffer[OSCE_FEATURES_MAX_HISTORY + n] = (float) xq[n] / (1U<<15);
+ }
+ OPUS_COPY(buffer, psFeatures->signal_history, OSCE_FEATURES_MAX_HISTORY);
+
+ for (k = 0; k < num_subframes; k++)
+ {
+ pfeatures = features + k * OSCE_FEATURE_DIM;
+ frame = &buffer[OSCE_FEATURES_MAX_HISTORY + k * 80];
+ memset(pfeatures, 0, OSCE_FEATURE_DIM); /* precaution */
+
+ /* clean spectrum from lpcs (update every other frame) */
+ if (k % 2 == 0)
+ {
+ calculate_log_spectrum_from_lpc(pfeatures + OSCE_CLEAN_SPEC_START, psDecCtrl->PredCoef_Q12[k >> 1], psDec->LPC_order);
+ }
+ else
+ {
+ OPUS_COPY(pfeatures + OSCE_CLEAN_SPEC_START, pfeatures + OSCE_CLEAN_SPEC_START - OSCE_FEATURE_DIM, OSCE_CLEAN_SPEC_LENGTH);
+ }
+
+ /* noisy cepstrum from signal (update every other frame) */
+ if (k % 2 == 0)
+ {
+ calculate_cepstrum(pfeatures + OSCE_NOISY_CEPSTRUM_START, frame - 160);
+ }
+ else
+ {
+ OPUS_COPY(pfeatures + OSCE_NOISY_CEPSTRUM_START, pfeatures + OSCE_NOISY_CEPSTRUM_START - OSCE_FEATURE_DIM, OSCE_NOISY_CEPSTRUM_LENGTH);
+ }
+
+ /* pitch hangover and zero value replacement */
+ periods[k] = pitch_postprocessing(psFeatures, psDecCtrl->pitchL[k], psDec->indices.signalType);
+
+ /* auto-correlation around pitch lag */
+ calculate_acorr(pfeatures + OSCE_ACORR_START, frame, periods[k]);
+
+ /* ltp */
+ celt_assert(OSCE_LTP_LENGTH == LTP_ORDER)
+ for (i = 0; i < OSCE_LTP_LENGTH; i++)
+ {
+ pfeatures[OSCE_LTP_START + i] = (float) psDecCtrl->LTPCoef_Q14[k * LTP_ORDER + i] / (1U << 14);
+ }
+
+ /* frame gain */
+ pfeatures[OSCE_LOG_GAIN_START] = log((float) psDecCtrl->Gains_Q16[k] / (1UL << 16) + 1e-9f);
+
+#ifdef WRITE_FEATURES
+ fwrite(pfeatures, sizeof(*pfeatures), 93, f_feat);
+#endif
+ }
+
+ /* buffer update */
+ OPUS_COPY(psFeatures->signal_history, &buffer[num_samples], OSCE_FEATURES_MAX_HISTORY);
+}
+
+
+void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length)
+{
+ int i;
+ celt_assert(length >= 160);
+
+ for (i = 0; i < 160; i++)
+ {
+ x_enhanced[i] = osce_window[i] * x_enhanced[i] + (1.f - osce_window[i]) * x_in[i];
+ }
+
+
+}
diff --git a/dnn/osce_features.h b/dnn/osce_features.h
new file mode 100644
index 00000000..91e95f1e
--- /dev/null
+++ b/dnn/osce_features.h
@@ -0,0 +1,50 @@
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OSCE_FEATURES_H
+#define OSCE_FEATURES_H
+
+
+#include "structs.h"
+#include "opus_types.h"
+
+#define OSCE_NUMBITS_BUGFIX
+
+void osce_calculate_features(
+ silk_decoder_state *psDec, /* I/O Decoder state */
+ silk_decoder_control *psDecCtrl, /* I Decoder control */
+ float *features, /* O input features */
+ float *numbits, /* O numbits and smoothed numbits */
+ int *periods, /* O pitch lags on subframe basis */
+ const opus_int16 xq[], /* I Decoded speech */
+ opus_int32 num_bits /* I Size of SILK payload in bits */
+);
+
+
+void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length);
+
+#endif \ No newline at end of file
diff --git a/dnn/osce_structs.h b/dnn/osce_structs.h
new file mode 100644
index 00000000..a4350be2
--- /dev/null
+++ b/dnn/osce_structs.h
@@ -0,0 +1,124 @@
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OSCE_STRUCTS_H
+#define OSCE_STRUCTS_H
+
+#include "opus_types.h"
+#include "osce_config.h"
+#ifndef DISABLE_LACE
+#include "lace_data.h"
+#endif
+#ifndef DISABLE_NOLACE
+#include "nolace_data.h"
+#endif
+#include "nndsp.h"
+#include "nnet.h"
+
+/* feature calculation */
+
+typedef struct {
+ float numbits_smooth;
+ int pitch_hangover_count;
+ int last_lag;
+ int last_type;
+ float signal_history[OSCE_FEATURES_MAX_HISTORY];
+ int reset;
+} OSCEFeatureState;
+
+
+#ifndef DISABLE_LACE
+/* LACE */
+typedef struct {
+ float feature_net_conv2_state[LACE_FNET_CONV2_STATE_SIZE];
+ float feature_net_gru_state[LACE_COND_DIM];
+ AdaCombState cf1_state;
+ AdaCombState cf2_state;
+ AdaConvState af1_state;
+ float preemph_mem;
+ float deemph_mem;
+} LACEState;
+
+typedef struct
+{
+ LACELayers layers;
+ float window[LACE_OVERLAP_SIZE];
+} LACE;
+
+#endif /* #ifndef DISABLE_LACE */
+
+
+#ifndef DISABLE_NOLACE
+/* NoLACE */
+typedef struct {
+ float feature_net_conv2_state[NOLACE_FNET_CONV2_STATE_SIZE];
+ float feature_net_gru_state[NOLACE_COND_DIM];
+ float post_cf1_state[NOLACE_COND_DIM];
+ float post_cf2_state[NOLACE_COND_DIM];
+ float post_af1_state[NOLACE_COND_DIM];
+ float post_af2_state[NOLACE_COND_DIM];
+ float post_af3_state[NOLACE_COND_DIM];
+ AdaCombState cf1_state;
+ AdaCombState cf2_state;
+ AdaConvState af1_state;
+ AdaConvState af2_state;
+ AdaConvState af3_state;
+ AdaConvState af4_state;
+ AdaShapeState tdshape1_state;
+ AdaShapeState tdshape2_state;
+ AdaShapeState tdshape3_state;
+ float preemph_mem;
+ float deemph_mem;
+} NoLACEState;
+
+typedef struct {
+ NOLACELayers layers;
+ float window[LACE_OVERLAP_SIZE];
+} NoLACE;
+
+#endif /* #ifndef DISABLE_NOLACE */
+
+/* OSCEModel */
+typedef struct {
+#ifndef DISABLE_LACE
+ LACE lace;
+#endif
+#ifndef DISABLE_NOLACE
+ NoLACE nolace;
+#endif
+} OSCEModel;
+
+typedef union {
+#ifndef DISABLE_LACE
+ LACEState lace;
+#endif
+#ifndef DISABLE_NOLACE
+ NoLACEState nolace;
+#endif
+} OSCEState;
+
+#endif \ No newline at end of file
diff --git a/dnn/torch/osce/create_testvectors.py b/dnn/torch/osce/create_testvectors.py
new file mode 100644
index 00000000..a037d0db
--- /dev/null
+++ b/dnn/torch/osce/create_testvectors.py
@@ -0,0 +1,165 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+
+import torch
+import numpy as np
+
+from models import model_dict
+from utils import endoscopy
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('checkpoint_path', type=str, help='path to folder containing checkpoints "lace_checkpoint.pth" and nolace_checkpoint.pth"')
+parser.add_argument('output_folder', type=str, help='output folder for testvectors')
+parser.add_argument('--debug', action='store_true', help='add debug output to output folder')
+
+
+def create_adaconv_testvector(prefix, adaconv, num_frames, debug=False):
+ feature_dim = adaconv.feature_dim
+ in_channels = adaconv.in_channels
+ out_channels = adaconv.out_channels
+ frame_size = adaconv.frame_size
+
+ features = torch.randn((1, num_frames, feature_dim))
+ x_in = torch.randn((1, in_channels, num_frames * frame_size))
+
+ x_out = adaconv(x_in, features, debug=debug)
+
+ features = features[0].detach().numpy()
+ x_in = x_in[0].reshape(in_channels, num_frames, frame_size).permute(1, 0, 2).detach().numpy()
+ x_out = x_out[0].reshape(out_channels, num_frames, frame_size).permute(1, 0, 2).detach().numpy()
+
+ features.tofile(prefix + '_features.f32')
+ x_in.tofile(prefix + '_x_in.f32')
+ x_out.tofile(prefix + '_x_out.f32')
+
+def create_adacomb_testvector(prefix, adacomb, num_frames, debug=False):
+ feature_dim = adacomb.feature_dim
+ in_channels = 1
+ frame_size = adacomb.frame_size
+
+ features = torch.randn((1, num_frames, feature_dim))
+ x_in = torch.randn((1, in_channels, num_frames * frame_size))
+ p_in = torch.randint(adacomb.kernel_size, 250, (1, num_frames))
+
+ x_out = adacomb(x_in, features, p_in, debug=debug)
+
+ features = features[0].detach().numpy()
+ x_in = x_in[0].permute(1, 0).detach().numpy()
+ p_in = p_in[0].detach().numpy().astype(np.int32)
+ x_out = x_out[0].permute(1, 0).detach().numpy()
+
+ features.tofile(prefix + '_features.f32')
+ x_in.tofile(prefix + '_x_in.f32')
+ p_in.tofile(prefix + '_p_in.s32')
+ x_out.tofile(prefix + '_x_out.f32')
+
+def create_adashape_testvector(prefix, adashape, num_frames):
+ feature_dim = adashape.feature_dim
+ frame_size = adashape.frame_size
+
+ features = torch.randn((1, num_frames, feature_dim))
+ x_in = torch.randn((1, 1, num_frames * frame_size))
+
+ x_out = adashape(x_in, features)
+
+ features = features[0].detach().numpy()
+ x_in = x_in.flatten().detach().numpy()
+ x_out = x_out.flatten().detach().numpy()
+
+ features.tofile(prefix + '_features.f32')
+ x_in.tofile(prefix + '_x_in.f32')
+ x_out.tofile(prefix + '_x_out.f32')
+
+def create_feature_net_testvector(prefix, model, num_frames):
+ num_features = model.num_features
+ num_subframes = 4 * num_frames
+
+ input_features = torch.randn((1, num_subframes, num_features))
+ periods = torch.randint(32, 300, (1, num_subframes))
+ numbits = model.numbits_range[0] + torch.rand((1, num_frames, 2)) * (model.numbits_range[1] - model.numbits_range[0])
+
+
+ pembed = model.pitch_embedding(periods)
+ nembed = torch.repeat_interleave(model.numbits_embedding(numbits).flatten(2), 4, dim=1)
+ full_features = torch.cat((input_features, pembed, nembed), dim=-1)
+
+ cf = model.feature_net(full_features)
+
+ input_features.float().numpy().tofile(prefix + "_in_features.f32")
+ periods.numpy().astype(np.int32).tofile(prefix + "_periods.s32")
+ numbits.float().numpy().tofile(prefix + "_numbits.f32")
+ full_features.detach().numpy().tofile(prefix + "_full_features.f32")
+ cf.detach().numpy().tofile(prefix + "_out_features.f32")
+
+
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+
+ os.makedirs(args.output_folder, exist_ok=True)
+
+ lace_checkpoint = torch.load(os.path.join(args.checkpoint_path, "lace_checkpoint.pth"), map_location='cpu')
+ nolace_checkpoint = torch.load(os.path.join(args.checkpoint_path, "nolace_checkpoint.pth"), map_location='cpu')
+
+ lace = model_dict['lace'](**lace_checkpoint['setup']['model']['kwargs'])
+ nolace = model_dict['nolace'](**nolace_checkpoint['setup']['model']['kwargs'])
+
+ lace.load_state_dict(lace_checkpoint['state_dict'])
+ nolace.load_state_dict(nolace_checkpoint['state_dict'])
+
+ if args.debug:
+ endoscopy.init(args.output_folder)
+
+ # lace af1, 1 input channel, 1 output channel
+ create_adaconv_testvector(os.path.join(args.output_folder, "lace_af1"), lace.af1, 5, debug=args.debug)
+
+ # nolace af1, 1 input channel, 2 output channels
+ create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af1"), nolace.af1, 5, debug=args.debug)
+
+ # nolace af4, 2 input channel, 1 output channels
+ create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af4"), nolace.af4, 5, debug=args.debug)
+
+ # nolace af2, 2 input channel, 2 output channels
+ create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af2"), nolace.af2, 5, debug=args.debug)
+
+ # lace cf1
+ create_adacomb_testvector(os.path.join(args.output_folder, "lace_cf1"), lace.cf1, 5, debug=args.debug)
+
+ # nolace tdshape1
+ create_adashape_testvector(os.path.join(args.output_folder, "nolace_tdshape1"), nolace.tdshape1, 5)
+
+ # lace feature net
+ create_feature_net_testvector(os.path.join(args.output_folder, 'lace'), lace, 5)
+
+ if args.debug:
+ endoscopy.close()
diff --git a/dnn/torch/osce/data/silk_enhancement_set.py b/dnn/torch/osce/data/silk_enhancement_set.py
index 65e97508..fd18c4de 100644
--- a/dnn/torch/osce/data/silk_enhancement_set.py
+++ b/dnn/torch/osce/data/silk_enhancement_set.py
@@ -49,7 +49,6 @@ class SilkEnhancementSet(Dataset):
num_bands_noisy_spec=18,
noisy_spec_scale='opus',
noisy_apply_dct=True,
- add_offset=False,
add_double_lag_acorr=False,
):
@@ -73,7 +72,6 @@ class SilkEnhancementSet(Dataset):
self.gains = np.fromfile(os.path.join(path, 'features_gain.f32'), dtype=np.float32)
self.num_bits = np.fromfile(os.path.join(path, 'features_num_bits.s32'), dtype=np.int32)
self.num_bits_smooth = np.fromfile(os.path.join(path, 'features_num_bits_smooth.f32'), dtype=np.float32)
- self.offsets = np.fromfile(os.path.join(path, 'features_offset.f32'), dtype=np.float32)
self.clean_signal_hp = np.fromfile(os.path.join(path, 'clean_hp.s16'), dtype=np.int16)
self.clean_signal = np.fromfile(os.path.join(path, 'clean.s16'), dtype=np.int16)
@@ -86,7 +84,6 @@ class SilkEnhancementSet(Dataset):
num_bands_noisy_spec,
noisy_spec_scale,
noisy_apply_dct,
- add_offset,
add_double_lag_acorr)
self.history_len = 700 if add_double_lag_acorr else 350
@@ -120,8 +117,7 @@ class SilkEnhancementSet(Dataset):
self.lpcs[frame_start : frame_stop],
self.gains[frame_start : frame_stop],
self.ltps[frame_start : frame_stop],
- self.periods[frame_start : frame_stop],
- self.offsets[frame_start : frame_stop]
+ self.periods[frame_start : frame_stop]
)
if self.preemph > 0:
diff --git a/dnn/torch/osce/export_model_weights.py b/dnn/torch/osce/export_model_weights.py
index 8b95aca9..f94431d3 100644
--- a/dnn/torch/osce/export_model_weights.py
+++ b/dnn/torch/osce/export_model_weights.py
@@ -40,10 +40,53 @@ import wexchange.torch
from wexchange.torch import dump_torch_weights
from models import model_dict
+from utils.layers.limited_adaptive_comb1d import LimitedAdaptiveComb1d
+from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d
+from utils.layers.td_shaper import TDShaper
+from wexchange.torch import dump_torch_weights
+
+
+
parser = argparse.ArgumentParser()
parser.add_argument('checkpoint', type=str, help='LACE or NoLACE model checkpoint')
parser.add_argument('output_dir', type=str, help='output folder')
+parser.add_argument('--quantize', action="store_true", help='quantization according to schedule')
+
+
+schedules = {
+ 'nolace': [
+ ('pitch_embedding', dict()),
+ ('feature_net.conv1', dict()),
+ ('feature_net.conv2', dict(quantize=True, scale=None)),
+ ('feature_net.tconv', dict(quantize=True, scale=None)),
+ ('feature_net.gru', dict()),
+ ('cf1', dict(quantize=True, scale=None)),
+ ('cf2', dict(quantize=True, scale=None)),
+ ('af1', dict(quantize=True, scale=None)),
+ ('tdshape1', dict()),
+ ('tdshape2', dict()),
+ ('tdshape3', dict()),
+ ('af2', dict(quantize=True, scale=None)),
+ ('af3', dict(quantize=True, scale=None)),
+ ('af4', dict(quantize=True, scale=None)),
+ ('post_cf1', dict(quantize=True, scale=None)),
+ ('post_cf2', dict(quantize=True, scale=None)),
+ ('post_af1', dict(quantize=True, scale=None)),
+ ('post_af2', dict(quantize=True, scale=None)),
+ ('post_af3', dict(quantize=True, scale=None))
+ ],
+ 'lace' : [
+ ('pitch_embedding', dict()),
+ ('feature_net.conv1', dict()),
+ ('feature_net.conv2', dict(quantize=True, scale=None)),
+ ('feature_net.tconv', dict(quantize=True, scale=None)),
+ ('feature_net.gru', dict()),
+ ('cf1', dict(quantize=True, scale=None)),
+ ('cf2', dict(quantize=True, scale=None)),
+ ('af1', dict(quantize=True, scale=None))
+ ]
+}
# auxiliary functions
@@ -60,8 +103,28 @@ def sha1(filename):
return sha1.hexdigest()
+def osce_dump_generic(writer, name, module):
+ if isinstance(module, torch.nn.Linear) or isinstance(module, torch.nn.Conv1d) \
+ or isinstance(module, torch.nn.ConvTranspose1d) or isinstance(module, torch.nn.Embedding) \
+ or isinstance(module, LimitedAdaptiveConv1d) or isinstance(module, LimitedAdaptiveComb1d) \
+ or isinstance(module, TDShaper) or isinstance(module, torch.nn.GRU):
+ dump_torch_weights(writer, module, name=name, verbose=True)
+ else:
+ for child_name, child in module.named_children():
+ osce_dump_generic(writer, (name + "_" + child_name).replace("feature_net", "fnet"), child)
+
+
def export_name(name):
- return name.replace('.', '_')
+ name = name.replace('.', '_')
+ name = name.replace('feature_net', 'fnet')
+ return name
+
+def osce_scheduled_dump(writer, prefix, model, schedule):
+ if not prefix.endswith('_'):
+ prefix += '_'
+
+ for name, kwargs in schedule:
+ dump_torch_weights(writer, model.get_submodule(name), prefix + export_name(name), **kwargs, verbose=True)
if __name__ == "__main__":
args = parser.parse_args()
@@ -76,22 +139,34 @@ if __name__ == "__main__":
# create model and load weights
checkpoint = torch.load(checkpoint_path, map_location='cpu')
model = model_dict[checkpoint['setup']['model']['name']](*checkpoint['setup']['model']['args'], **checkpoint['setup']['model']['kwargs'])
+ model.load_state_dict(checkpoint['state_dict'])
# CWriter
model_name = checkpoint['setup']['model']['name']
- cwriter = wexchange.c_export.CWriter(os.path.join(outdir, model_name + "_data"), message=message, model_struct_name=model_name.upper())
-
- # dump numbits_embedding parameters by hand
- numbits_embedding = model.get_submodule('numbits_embedding')
- weights = next(iter(numbits_embedding.parameters()))
- for i, c in enumerate(weights):
- cwriter.header.write(f"\nNUMBITS_COEF_{i} {float(c.detach())}f")
- cwriter.header.write("\n\n")
+ cwriter = wexchange.c_export.CWriter(os.path.join(outdir, model_name + "_data"), message=message, model_struct_name=model_name.upper() + 'Layers', add_typedef=True)
+
+ # Add custom includes and global parameters
+ cwriter.header.write(f'''
+#define {model_name.upper()}_PREEMPH {model.preemph}f
+#define {model_name.upper()}_FRAME_SIZE {model.FRAME_SIZE}
+#define {model_name.upper()}_OVERLAP_SIZE 40
+#define {model_name.upper()}_NUM_FEATURES {model.num_features}
+#define {model_name.upper()}_PITCH_MAX {model.pitch_max}
+#define {model_name.upper()}_PITCH_EMBEDDING_DIM {model.pitch_embedding_dim}
+#define {model_name.upper()}_NUMBITS_RANGE_LOW {model.numbits_range[0]}
+#define {model_name.upper()}_NUMBITS_RANGE_HIGH {model.numbits_range[1]}
+#define {model_name.upper()}_NUMBITS_EMBEDDING_DIM {model.numbits_embedding_dim}
+#define {model_name.upper()}_COND_DIM {model.cond_dim}
+#define {model_name.upper()}_HIDDEN_FEATURE_DIM {model.hidden_feature_dim}
+''')
+
+ for i, s in enumerate(model.numbits_embedding.scale_factors):
+ cwriter.header.write(f"#define {model_name.upper()}_NUMBITS_SCALE_{i} {float(s.detach().cpu())}f\n")
# dump layers
- for name, module in model.named_modules():
- if isinstance(module, torch.nn.Linear) or isinstance(module, torch.nn.Conv1d) \
- or isinstance(module, torch.nn.ConvTranspose1d) or isinstance(module, torch.nn.Embedding):
- dump_torch_weights(cwriter, module, name=export_name(name), verbose=True)
+ if model_name in schedules and args.quantize:
+ osce_scheduled_dump(cwriter, model_name, model, schedules[model_name])
+ else:
+ osce_dump_generic(cwriter, model_name, model)
cwriter.close()
diff --git a/dnn/torch/osce/models/lace.py b/dnn/torch/osce/models/lace.py
index a11dfc41..58293de4 100644
--- a/dnn/torch/osce/models/lace.py
+++ b/dnn/torch/osce/models/lace.py
@@ -96,7 +96,7 @@ class LACE(NNSBase):
self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
# spectral shaping
- self.af1 = LimitedAdaptiveConv1d(1, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
+ self.af1 = LimitedAdaptiveConv1d(1, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
def flop_count(self, rate=16000, verbose=False):
diff --git a/dnn/torch/osce/models/no_lace.py b/dnn/torch/osce/models/no_lace.py
index 2709274c..0e0fb1b3 100644
--- a/dnn/torch/osce/models/no_lace.py
+++ b/dnn/torch/osce/models/no_lace.py
@@ -96,8 +96,8 @@ class NoLACE(NNSBase):
# comb filters
left_pad = self.kernel_size // 2
right_pad = self.kernel_size - 1 - left_pad
- self.cf1 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
- self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
+ self.cf1 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
+ self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
# spectral shaping
self.af1 = LimitedAdaptiveConv1d(1, 2, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
diff --git a/dnn/torch/osce/utils/layers/limited_adaptive_comb1d.py b/dnn/torch/osce/utils/layers/limited_adaptive_comb1d.py
index b146240e..3bb6fa07 100644
--- a/dnn/torch/osce/utils/layers/limited_adaptive_comb1d.py
+++ b/dnn/torch/osce/utils/layers/limited_adaptive_comb1d.py
@@ -41,13 +41,13 @@ class LimitedAdaptiveComb1d(nn.Module):
feature_dim,
frame_size=160,
overlap_size=40,
- use_bias=True,
padding=None,
max_lag=256,
name=None,
gain_limit_db=10,
global_gain_limits_db=[-6, 6],
- norm_p=2):
+ norm_p=2,
+ **kwargs):
"""
Parameters:
@@ -87,7 +87,6 @@ class LimitedAdaptiveComb1d(nn.Module):
self.kernel_size = kernel_size
self.frame_size = frame_size
self.overlap_size = overlap_size
- self.use_bias = use_bias
self.max_lag = max_lag
self.limit_db = gain_limit_db
self.norm_p = norm_p
@@ -101,8 +100,6 @@ class LimitedAdaptiveComb1d(nn.Module):
# network for generating convolution weights
self.conv_kernel = nn.Linear(feature_dim, kernel_size)
- if self.use_bias:
- self.conv_bias = nn.Linear(feature_dim,1)
# comb filter gain
self.filter_gain = nn.Linear(feature_dim, 1)
@@ -154,9 +151,6 @@ class LimitedAdaptiveComb1d(nn.Module):
conv_kernels = self.conv_kernel(features).reshape((batch_size, num_frames, self.out_channels, self.in_channels, self.kernel_size))
conv_kernels = conv_kernels / (1e-6 + torch.norm(conv_kernels, p=self.norm_p, dim=-1, keepdim=True))
- if self.use_bias:
- conv_biases = self.conv_bias(features).permute(0, 2, 1)
-
conv_gains = torch.exp(- torch.relu(self.filter_gain(features).permute(0, 2, 1)) + self.log_gain_limit)
# calculate gains
global_conv_gains = torch.exp(self.filter_gain_a * torch.tanh(self.global_filter_gain(features).permute(0, 2, 1)) + self.filter_gain_b)
@@ -190,10 +184,6 @@ class LimitedAdaptiveComb1d(nn.Module):
new_chunk = torch.conv1d(xx, conv_kernels[:, i, ...].reshape((batch_size * self.out_channels, self.in_channels, self.kernel_size)), groups=batch_size).reshape(batch_size, self.out_channels, -1)
-
- if self.use_bias:
- new_chunk = new_chunk + conv_biases[:, :, i : i + 1]
-
offset = self.max_lag + self.padding[0]
new_chunk = global_conv_gains[:, :, i : i + 1] * (new_chunk * conv_gains[:, :, i : i + 1] + x[..., offset + i * frame_size : offset + (i + 1) * frame_size + overlap_size])
@@ -223,10 +213,6 @@ class LimitedAdaptiveComb1d(nn.Module):
count += 2 * (self.in_channels * self.out_channels * self.kernel_size * (1 + overhead) * rate)
count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels
- # bias computation
- if self.use_bias:
- count += 2 * (frame_rate * self.feature_dim) + rate * (1 + overhead)
-
# a0 computation
count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels
diff --git a/dnn/torch/osce/utils/layers/limited_adaptive_conv1d.py b/dnn/torch/osce/utils/layers/limited_adaptive_conv1d.py
index 073ea1b1..a17b0e9b 100644
--- a/dnn/torch/osce/utils/layers/limited_adaptive_conv1d.py
+++ b/dnn/torch/osce/utils/layers/limited_adaptive_conv1d.py
@@ -46,12 +46,12 @@ class LimitedAdaptiveConv1d(nn.Module):
feature_dim,
frame_size=160,
overlap_size=40,
- use_bias=True,
padding=None,
name=None,
gain_limits_db=[-6, 6],
shape_gain_db=0,
- norm_p=2):
+ norm_p=2,
+ **kwargs):
"""
Parameters:
@@ -90,7 +90,6 @@ class LimitedAdaptiveConv1d(nn.Module):
self.kernel_size = kernel_size
self.frame_size = frame_size
self.overlap_size = overlap_size
- self.use_bias = use_bias
self.gain_limits_db = gain_limits_db
self.shape_gain_db = shape_gain_db
self.norm_p = norm_p
@@ -104,9 +103,6 @@ class LimitedAdaptiveConv1d(nn.Module):
# network for generating convolution weights
self.conv_kernel = nn.Linear(feature_dim, in_channels * out_channels * kernel_size)
- if self.use_bias:
- self.conv_bias = nn.Linear(feature_dim, out_channels)
-
self.shape_gain = min(1, 10**(shape_gain_db / 20))
self.filter_gain = nn.Linear(feature_dim, out_channels)
@@ -133,10 +129,6 @@ class LimitedAdaptiveConv1d(nn.Module):
count += 2 * (frame_rate * self.feature_dim * self.kernel_size)
count += 2 * (self.in_channels * self.out_channels * self.kernel_size * (1 + overhead) * rate)
- # bias computation
- if self.use_bias:
- count += 2 * (frame_rate * self.feature_dim) + rate * (1 + overhead)
-
# gain computation
count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels
@@ -183,9 +175,6 @@ class LimitedAdaptiveConv1d(nn.Module):
conv_kernels = self.shape_gain * conv_kernels + (1 - self.shape_gain) * id_kernels
- if self.use_bias:
- conv_biases = self.conv_bias(features).permute(0, 2, 1)
-
# calculate gains
conv_gains = torch.exp(self.filter_gain_a * torch.tanh(self.filter_gain(features)) + self.filter_gain_b)
if debug and batch_size == 1:
diff --git a/dnn/torch/osce/utils/silk_features.py b/dnn/torch/osce/utils/silk_features.py
index 2997ef5f..8c5dbf05 100644
--- a/dnn/torch/osce/utils/silk_features.py
+++ b/dnn/torch/osce/utils/silk_features.py
@@ -33,6 +33,7 @@ import numpy as np
import torch
import scipy
+import scipy.signal
from utils.pitch import hangover, calculate_acorr_window
from utils.spec import create_filter_bank, cepstrum, log_spectrum, log_spectrum_from_lpc
@@ -59,7 +60,6 @@ def silk_feature_factory(no_pitch_value=256,
num_bands_noisy_spec=18,
noisy_spec_scale='opus',
noisy_apply_dct=True,
- add_offset=False,
add_double_lag_acorr=False
):
@@ -67,7 +67,7 @@ def silk_feature_factory(no_pitch_value=256,
fb_clean_spec = create_filter_bank(num_bands_clean_spec, 320, scale='erb', round_center_bins=True, normalize=True)
fb_noisy_spec = create_filter_bank(num_bands_noisy_spec, 320, scale=noisy_spec_scale, round_center_bins=True, normalize=True)
- def create_features(noisy, noisy_history, lpcs, gains, ltps, periods, offsets):
+ def create_features(noisy, noisy_history, lpcs, gains, ltps, periods):
periods = periods.copy()
@@ -89,10 +89,7 @@ def silk_feature_factory(no_pitch_value=256,
acorr, _ = calculate_acorr_window(noisy, 80, periods, noisy_history, radius=acorr_radius, add_double_lag_acorr=add_double_lag_acorr)
- if add_offset:
- features = np.concatenate((clean_spectrum, noisy_cepstrum, acorr, ltps, log_gains, offsets.reshape(-1, 1)), axis=-1, dtype=np.float32)
- else:
- features = np.concatenate((clean_spectrum, noisy_cepstrum, acorr, ltps, log_gains), axis=-1, dtype=np.float32)
+ features = np.concatenate((clean_spectrum, noisy_cepstrum, acorr, ltps, log_gains), axis=-1, dtype=np.float32)
return features, periods.astype(np.int64)
@@ -110,7 +107,6 @@ def load_inference_data(path,
num_bands_noisy_spec=18,
noisy_spec_scale='opus',
noisy_apply_dct=True,
- add_offset=False,
add_double_lag_acorr=False,
**kwargs):
@@ -122,13 +118,12 @@ def load_inference_data(path,
periods = np.fromfile(os.path.join(path, 'features_period.s16'), dtype=np.int16)
num_bits = np.fromfile(os.path.join(path, 'features_num_bits.s32'), dtype=np.int32).astype(np.float32).reshape(-1, 1)
num_bits_smooth = np.fromfile(os.path.join(path, 'features_num_bits_smooth.f32'), dtype=np.float32).reshape(-1, 1)
- offsets = np.fromfile(os.path.join(path, 'features_offset.f32'), dtype=np.float32)
# load signal, add back delay and pre-emphasize
signal = np.fromfile(os.path.join(path, 'noisy.s16'), dtype=np.int16).astype(np.float32) / (2 ** 15)
signal = np.concatenate((np.zeros(skip, dtype=np.float32), signal), dtype=np.float32)
- create_features = silk_feature_factory(no_pitch_value, acorr_radius, pitch_hangover, num_bands_clean_spec, num_bands_noisy_spec, noisy_spec_scale, noisy_apply_dct, add_offset, add_double_lag_acorr)
+ create_features = silk_feature_factory(no_pitch_value, acorr_radius, pitch_hangover, num_bands_clean_spec, num_bands_noisy_spec, noisy_spec_scale, noisy_apply_dct, add_double_lag_acorr)
num_frames = min((len(signal) // 320) * 4, len(lpcs))
signal = signal[: num_frames * 80]
@@ -138,11 +133,10 @@ def load_inference_data(path,
periods = periods[: num_frames]
num_bits = num_bits[: num_frames // 4]
num_bits_smooth = num_bits[: num_frames // 4]
- offsets = offsets[: num_frames]
numbits = np.repeat(np.concatenate((num_bits, num_bits_smooth), axis=-1, dtype=np.float32), 4, axis=0)
- features, periods = create_features(signal, np.zeros(350, dtype=signal.dtype), lpcs, gains, ltps, periods, offsets)
+ features, periods = create_features(signal, np.zeros(350, dtype=signal.dtype), lpcs, gains, ltps, periods)
if preemph > 0:
signal[1:] -= preemph * signal[:-1]
diff --git a/dnn/torch/osce/utils/spec.py b/dnn/torch/osce/utils/spec.py
index 01b923ae..59f53538 100644
--- a/dnn/torch/osce/utils/spec.py
+++ b/dnn/torch/osce/utils/spec.py
@@ -30,6 +30,7 @@
import math as m
import numpy as np
import scipy
+import scipy.fftpack
import torch
def erb(f):
diff --git a/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py b/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py
index 36050881..2745f337 100644
--- a/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py
+++ b/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py
@@ -38,7 +38,8 @@ class CWriter:
create_state_struct=False,
enable_binary_blob=True,
model_struct_name="Model",
- nnet_header="nnet.h"):
+ nnet_header="nnet.h",
+ add_typedef=False):
"""
Writer class for creating souce and header files for weight exports to C
@@ -73,6 +74,7 @@ class CWriter:
self.enable_binary_blob = enable_binary_blob
self.create_state_struct = create_state_struct
self.model_struct_name = model_struct_name
+ self.add_typedef = add_typedef
# for binary blob format, format is key=<layer name>, value=(<layer type>, <init call>)
self.layer_dict = OrderedDict()
@@ -119,11 +121,17 @@ f"""
# create model type
if self.enable_binary_blob:
- self.header.write(f"\nstruct {self.model_struct_name} {{")
+ if self.add_typedef:
+ self.header.write(f"\ntypedef struct {{")
+ else:
+ self.header.write(f"\nstruct {self.model_struct_name} {{")
for name, data in self.layer_dict.items():
layer_type = data[0]
self.header.write(f"\n {layer_type} {name};")
- self.header.write(f"\n}};\n")
+ if self.add_typedef:
+ self.header.write(f"\n}} {self.model_struct_name};\n")
+ else:
+ self.header.write(f"\n}};\n")
init_prototype = f"int init_{self.model_struct_name.lower()}({self.model_struct_name} *model, const WeightArray *arrays)"
self.header.write(f"\n{init_prototype};\n")
diff --git a/dnn/torch/weight-exchange/wexchange/torch/__init__.py b/dnn/torch/weight-exchange/wexchange/torch/__init__.py
index 98c96fad..8245566d 100644
--- a/dnn/torch/weight-exchange/wexchange/torch/__init__.py
+++ b/dnn/torch/weight-exchange/wexchange/torch/__init__.py
@@ -34,3 +34,4 @@ from .torch import dump_torch_gru_weights, load_torch_gru_weights
from .torch import dump_torch_grucell_weights
from .torch import dump_torch_embedding_weights, load_torch_embedding_weights
from .torch import dump_torch_weights, load_torch_weights
+from .torch import dump_torch_adaptive_conv1d_weights \ No newline at end of file
diff --git a/dnn/torch/weight-exchange/wexchange/torch/torch.py b/dnn/torch/weight-exchange/wexchange/torch/torch.py
index 281d9be3..f7e16032 100644
--- a/dnn/torch/weight-exchange/wexchange/torch/torch.py
+++ b/dnn/torch/weight-exchange/wexchange/torch/torch.py
@@ -28,12 +28,154 @@
"""
import os
+import sys
import torch
import numpy as np
+sys.path.append(sys.path.append(os.path.join(os.path.dirname(__file__), '../osce')))
+try:
+ import utils.layers as osce_layers
+ from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d
+ from utils.layers.limited_adaptive_comb1d import LimitedAdaptiveComb1d
+ from utils.layers.td_shaper import TDShaper
+ has_osce=True
+except:
+ has_osce=False
+
from wexchange.c_export import CWriter, print_gru_layer, print_dense_layer, print_conv1d_layer, print_tconv1d_layer, print_conv2d_layer
+def dump_torch_adaptive_conv1d_weights(where, adaconv, name='adaconv', scale=1/128, quantize=False):
+
+
+ w_kernel = adaconv.conv_kernel.weight.detach().cpu().numpy().copy()
+ b_kernel = adaconv.conv_kernel.bias.detach().cpu().numpy().copy()
+ w_gain = adaconv.filter_gain.weight.detach().cpu().numpy().copy()
+ b_gain = adaconv.filter_gain.bias.detach().cpu().numpy().copy()
+
+ if isinstance(where, CWriter):
+ # pad kernel for quantization
+ left_padding = adaconv.padding[0]
+ kernel_size = adaconv.kernel_size
+ in_channels = adaconv.in_channels
+ out_channels = adaconv.out_channels
+ feature_dim = adaconv.feature_dim
+
+ if quantize and kernel_size % 8:
+ kernel_padding = 8 - (kernel_size % 8)
+ w_kernel = np.concatenate(
+ (np.zeros((out_channels, in_channels, kernel_padding, feature_dim)), w_kernel.reshape(out_channels, in_channels, kernel_size, feature_dim)),
+ dtype=w_kernel.dtype,
+ axis=2).reshape(-1, feature_dim)
+ b_kernel = np.concatenate(
+ (np.zeros((out_channels, in_channels, kernel_padding)), b_kernel.reshape(out_channels, in_channels, kernel_size)),
+ dtype=b_kernel.dtype,
+ axis=2).reshape(-1)
+ left_padding += kernel_padding
+ kernel_size += kernel_padding
+
+ # write relevant scalar parameters to header file
+ where.header.write(f"""
+#define {name.upper()}_FILTER_GAIN_A {adaconv.filter_gain_a:f}f
+#define {name.upper()}_FILTER_GAIN_B {adaconv.filter_gain_b:f}f
+#define {name.upper()}_SHAPE_GAIN {adaconv.shape_gain:f}f
+#define {name.upper()}_KERNEL_SIZE {kernel_size}
+#define {name.upper()}_FRAME_SIZE {adaconv.frame_size}
+#define {name.upper()}_LEFT_PADDING {left_padding}
+#define {name.upper()}_OVERLAP_SIZE {adaconv.overlap_size}
+#define {name.upper()}_IN_CHANNELS {adaconv.in_channels}
+#define {name.upper()}_OUT_CHANNELS {adaconv.out_channels}
+#define {name.upper()}_NORM_P {adaconv.norm_p}
+#define {name.upper()}_FEATURE_DIM {adaconv.feature_dim}
+"""
+ )
+
+ print_dense_layer(where, name + "_kernel", w_kernel, b_kernel, scale=scale, format='torch', sparse=False, diagonal=False, quantize=quantize)
+ print_dense_layer(where, name + "_gain", w_gain, b_gain, format='torch', sparse=False, diagonal=False, quantize=False)
+
+
+ else:
+ np.save(where, 'weight_kernel.npy', w_kernel)
+ np.save(where, 'bias_kernel.npy', b_kernel)
+ np.save(where, 'weight_gain.npy', w_gain)
+ np.save(where, 'bias_gain.npy', b_gain)
+
+
+def dump_torch_adaptive_comb1d_weights(where, adaconv, name='adaconv', scale=1/128, quantize=False):
+
+
+ w_kernel = adaconv.conv_kernel.weight.detach().cpu().numpy().copy()
+ b_kernel = adaconv.conv_kernel.bias.detach().cpu().numpy().copy()
+ w_gain = adaconv.filter_gain.weight.detach().cpu().numpy().copy()
+ b_gain = adaconv.filter_gain.bias.detach().cpu().numpy().copy()
+ w_global_gain = adaconv.global_filter_gain.weight.detach().cpu().numpy().copy()
+ b_global_gain = adaconv.global_filter_gain.bias.detach().cpu().numpy().copy()
+
+
+ if isinstance(where, CWriter):
+ # pad kernel for quantization
+ left_padding = adaconv.padding[0]
+ kernel_size = adaconv.kernel_size
+
+ if quantize and w_kernel.shape[0] % 8:
+ kernel_padding = 8 - (w_kernel.shape[0] % 8)
+ w_kernel = np.concatenate((np.zeros((kernel_padding, w_kernel.shape[1])), w_kernel), dtype=w_kernel.dtype)
+ b_kernel = np.concatenate((np.zeros((kernel_padding)), b_kernel), dtype=b_kernel.dtype)
+ left_padding += kernel_padding
+ kernel_size += kernel_padding
+ # write relevant scalar parameters to header file
+ where.header.write(f"""
+#define {name.upper()}_FILTER_GAIN_A {adaconv.filter_gain_a:f}f
+#define {name.upper()}_FILTER_GAIN_B {adaconv.filter_gain_b:f}f
+#define {name.upper()}_LOG_GAIN_LIMIT {adaconv.log_gain_limit:f}f
+#define {name.upper()}_KERNEL_SIZE {kernel_size}
+#define {name.upper()}_LEFT_PADDING {left_padding}
+#define {name.upper()}_FRAME_SIZE {adaconv.frame_size}
+#define {name.upper()}_OVERLAP_SIZE {adaconv.overlap_size}
+#define {name.upper()}_IN_CHANNELS {adaconv.in_channels}
+#define {name.upper()}_OUT_CHANNELS {adaconv.out_channels}
+#define {name.upper()}_NORM_P {adaconv.norm_p}
+#define {name.upper()}_FEATURE_DIM {adaconv.feature_dim}
+#define {name.upper()}_MAX_LAG {adaconv.max_lag}
+"""
+ )
+
+ print_dense_layer(where, name + "_kernel", w_kernel, b_kernel, scale=scale, format='torch', sparse=False, diagonal=False, quantize=quantize)
+ print_dense_layer(where, name + "_gain", w_gain, b_gain, format='torch', sparse=False, diagonal=False, quantize=False)
+ print_dense_layer(where, name + "_global_gain", w_global_gain, b_global_gain, format='torch', sparse=False, diagonal=False, quantize=False)
+
+
+ else:
+ np.save(where, 'weight_kernel.npy', w_kernel)
+ np.save(where, 'bias_kernel.npy', b_kernel)
+ np.save(where, 'weight_gain.npy', w_gain)
+ np.save(where, 'bias_gain.npy', b_gain)
+ np.save(where, 'weight_global_gain.npy', w_global_gain)
+ np.save(where, 'bias_global_gain.npy', b_global_gain)
+
+def dump_torch_tdshaper(where, shaper, name='tdshaper'):
+
+ if isinstance(where, CWriter):
+ where.header.write(f"""
+#define {name.upper()}_FEATURE_DIM {shaper.feature_dim}
+#define {name.upper()}_FRAME_SIZE {shaper.frame_size}
+#define {name.upper()}_AVG_POOL_K {shaper.avg_pool_k}
+#define {name.upper()}_INNOVATE {1 if shaper.innovate else 0}
+#define {name.upper()}_POOL_AFTER {1 if shaper.pool_after else 0}
+"""
+ )
+
+ dump_torch_conv1d_weights(where, shaper.feature_alpha1, name + "_alpha1")
+ dump_torch_conv1d_weights(where, shaper.feature_alpha2, name + "_alpha2")
+
+ if shaper.innovate:
+ dump_torch_conv1d_weights(where, shaper.feature_alpha1b, name + "_alpha1b")
+ dump_torch_conv1d_weights(where, shaper.feature_alpha1c, name + "_alpha1c")
+ dump_torch_conv1d_weights(where, shaper.feature_alpha2b, name + "_alpha2b")
+ dump_torch_conv1d_weights(where, shaper.feature_alpha2c, name + "_alpha2c")
+
+
+
def dump_torch_gru_weights(where, gru, name='gru', input_sparse=False, recurrent_sparse=False, quantize=False, scale=1/128, recurrent_scale=1/128):
assert gru.num_layers == 1
@@ -221,7 +363,6 @@ def load_torch_conv2d_weights(where, conv):
def dump_torch_embedding_weights(where, embed, name='embed', scale=1/128, sparse=False, diagonal=False, quantize=False):
- print("quantize = ", quantize)
w = embed.weight.detach().cpu().numpy().copy().transpose()
b = np.zeros(w.shape[0], dtype=w.dtype)
@@ -257,11 +398,21 @@ def dump_torch_weights(where, module, name=None, verbose=False, **kwargs):
elif isinstance(module, torch.nn.Conv2d):
return dump_torch_conv2d_weights(where, module, name, **kwargs)
elif isinstance(module, torch.nn.Embedding):
- return dump_torch_embedding_weights(where, module)
+ return dump_torch_embedding_weights(where, module, name, **kwargs)
elif isinstance(module, torch.nn.ConvTranspose1d):
return dump_torch_tconv1d_weights(where, module, name, **kwargs)
else:
- raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported')
+ if has_osce:
+ if isinstance(module, LimitedAdaptiveConv1d):
+ dump_torch_adaptive_conv1d_weights(where, module, name, **kwargs)
+ elif isinstance(module, LimitedAdaptiveComb1d):
+ dump_torch_adaptive_comb1d_weights(where, module, name, **kwargs)
+ elif isinstance(module, TDShaper):
+ dump_torch_tdshaper(where, module, name, **kwargs)
+ else:
+ raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported')
+ else:
+ raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported')
def load_torch_weights(where, module):
""" generic function for loading weights of some torch.nn.Module """
diff --git a/dnn/write_lpcnet_weights.c b/dnn/write_lpcnet_weights.c
index 722a373d..395590f4 100644
--- a/dnn/write_lpcnet_weights.c
+++ b/dnn/write_lpcnet_weights.c
@@ -46,6 +46,10 @@
#include "plc_data.c"
#include "dred_rdovae_enc_data.c"
#include "dred_rdovae_dec_data.c"
+#ifdef ENABLE_OSCE
+#include "lace_data.c"
+#include "nolace_data.c"
+#endif
void write_weights(const WeightArray *list, FILE *fout)
{
@@ -53,6 +57,9 @@ void write_weights(const WeightArray *list, FILE *fout)
unsigned char zeros[WEIGHT_BLOCK_SIZE] = {0};
while (list[i].name != NULL) {
WeightHead h;
+ if (strlen(list[i].name) >= sizeof(h.name) - 1) {
+ printf("[write_weights] warning: name %s too long\n", list[i].name);
+ }
memcpy(h.head, "DNNw", 4);
h.version = WEIGHT_BLOB_VERSION;
h.type = list[i].type;
@@ -77,6 +84,14 @@ int main(void)
write_weights(lpcnet_plc_arrays, fout);
write_weights(rdovaeenc_arrays, fout);
write_weights(rdovaedec_arrays, fout);
+#ifdef ENABLE_OSCE
+#ifndef DISABLE_LACE
+ write_weights(lacelayers_arrays, fout);
+#endif
+#ifndef DISABLE_NOLACE
+ write_weights(nolacelayers_arrays, fout);
+#endif
+#endif
fclose(fout);
return 0;
}
diff --git a/lpcnet_headers.mk b/lpcnet_headers.mk
index da610ca1..ce74d954 100644
--- a/lpcnet_headers.mk
+++ b/lpcnet_headers.mk
@@ -29,3 +29,12 @@ dnn/dred_rdovae_enc_data.h \
dnn/dred_rdovae_dec.h \
dnn/dred_rdovae_dec_data.h \
dnn/dred_rdovae_stats_data.h
+
+OSCE_HEAD= \
+dnn/osce.h \
+dnn/osce_config.h \
+dnn/osce_structs.h \
+dnn/osce_features.h \
+dnn/nndsp.h \
+dnn/lace_data.h \
+dnn/nolace_data.h
diff --git a/lpcnet_sources.mk b/lpcnet_sources.mk
index 9b8863ad..17f04756 100644
--- a/lpcnet_sources.mk
+++ b/lpcnet_sources.mk
@@ -23,6 +23,13 @@ silk/dred_encoder.c \
silk/dred_coding.c \
silk/dred_decoder.c
+OSCE_SOURCES = \
+dnn/osce.c \
+dnn/osce_features.c \
+dnn/nndsp.c \
+dnn/lace_data.c \
+dnn/nolace_data.c
+
DNN_SOURCES_X86_RTCD = dnn/x86/x86_dnn_map.c
DNN_SOURCES_AVX2 = dnn/x86/nnet_avx2.c
DNN_SOURCES_SSE4_1 = dnn/x86/nnet_sse4_1.c
diff --git a/meson.build b/meson.build
index 289c5917..f468bad4 100644
--- a/meson.build
+++ b/meson.build
@@ -148,6 +148,7 @@ opts = [
[ 'float-approx', 'FLOAT_APPROX' ],
[ 'enable-deep-plc', 'ENABLE_DEEP_PLC' ],
[ 'enable-dred', 'ENABLE_DRED' ],
+ [ 'enable-osce', 'ENABLE_OSCE' ],
[ 'assertions', 'ENABLE_ASSERTIONS' ],
[ 'hardening', 'ENABLE_HARDENING' ],
[ 'fuzzing', 'FUZZING' ],
diff --git a/meson_options.txt b/meson_options.txt
index d5b69eea..46099276 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -9,6 +9,7 @@ option('intrinsics', type : 'feature', value : 'auto', description : 'Intrinsics
option('enable-deep-plc', type : 'boolean', value : false, description : 'Enable Deep Packet Loss Concealment (PLC)')
option('enable-dred', type : 'boolean', value : false, description : 'Enable Deep Redundancy (DRED)')
+option('enable-osce', type : 'boolean', value : false, description : 'Enable Opus Speech Coding Enhancement (OSCE)')
option('enable-dnn-debug-float', type : 'boolean', value : false, description : 'Compute DNN using float weights')
option('custom-modes', type : 'boolean', value : false, description : 'Enable non-Opus modes, e.g. 44.1 kHz & 2^n frames')
diff --git a/silk/API.h b/silk/API.h
index 6e623b84..878965c7 100644
--- a/silk/API.h
+++ b/silk/API.h
@@ -92,6 +92,16 @@ opus_int silk_Encode( /* O Returns error co
/* Decoder functions */
/****************************************/
+
+/***********************************************/
+/* Load OSCE models from external data pointer */
+/***********************************************/
+opus_int silk_LoadOSCEModels(
+ void *decState, /* O I/O State */
+ const unsigned char *data, /* I pointer to binary blob */
+ int len /* I length of binary blob data */
+);
+
/***********************************************/
/* Get size in bytes of the Silk decoder state */
/***********************************************/
@@ -100,8 +110,12 @@ opus_int silk_Get_Decoder_Size( /* O Returns error co
);
/*************************/
-/* Init or Reset decoder */
+/* Init and Reset decoder */
/*************************/
+opus_int silk_ResetDecoder( /* O Returns error code */
+ void *decState /* I/O State */
+);
+
opus_int silk_InitDecoder( /* O Returns error code */
void *decState /* I/O State */
);
diff --git a/silk/control.h b/silk/control.h
index d30d114c..f5633e62 100644
--- a/silk/control.h
+++ b/silk/control.h
@@ -147,6 +147,11 @@ typedef struct {
/* I: Enable Deep PLC */
opus_int enable_deep_plc;
+
+#ifdef ENABLE_OSCE
+ /* I: OSCE method */
+ opus_int osce_method;
+#endif
} silk_DecControlStruct;
#ifdef __cplusplus
diff --git a/silk/dec_API.c b/silk/dec_API.c
index a29ecc73..e4ae8343 100644
--- a/silk/dec_API.c
+++ b/silk/dec_API.c
@@ -33,6 +33,11 @@ POSSIBILITY OF SUCH DAMAGE.
#include "stack_alloc.h"
#include "os_support.h"
+#ifdef ENABLE_OSCE
+#include "osce.h"
+#include "osce_structs.h"
+#endif
+
/************************/
/* Decoder Super Struct */
/************************/
@@ -42,12 +47,33 @@ typedef struct {
opus_int nChannelsAPI;
opus_int nChannelsInternal;
opus_int prev_decode_only_middle;
+#ifdef ENABLE_OSCE
+ OSCEModel osce_model;
+#endif
} silk_decoder;
/*********************/
/* Decoder functions */
/*********************/
+
+
+opus_int silk_LoadOSCEModels(void *decState, const unsigned char *data, int len)
+{
+#ifdef ENABLE_OSCE
+ opus_int ret = SILK_NO_ERROR;
+
+ ret = osce_load_models(&((silk_decoder *)decState)->osce_model, data, len);
+
+ return ret;
+#else
+ (void) decState;
+ (void) data;
+ (void) len;
+ return SILK_NO_ERROR;
+#endif
+}
+
opus_int silk_Get_Decoder_Size( /* O Returns error code */
opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */
)
@@ -60,6 +86,24 @@ opus_int silk_Get_Decoder_Size( /* O Returns error co
}
/* Reset decoder state */
+opus_int silk_ResetDecoder( /* O Returns error code */
+ void *decState /* I/O State */
+)
+{
+ opus_int n, ret = SILK_NO_ERROR;
+ silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
+
+ for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
+ ret = silk_reset_decoder( &channel_state[ n ] );
+ }
+ silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo));
+ /* Not strictly needed, but it's cleaner that way */
+ ((silk_decoder *)decState)->prev_decode_only_middle = 0;
+
+ return ret;
+}
+
+
opus_int silk_InitDecoder( /* O Returns error code */
void *decState /* I/O State */
)
@@ -67,6 +111,11 @@ opus_int silk_InitDecoder( /* O Returns error co
opus_int n, ret = SILK_NO_ERROR;
silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
+#ifndef USE_WEIGHTS_FILE
+ /* load osce models */
+ silk_LoadOSCEModels(decState, NULL, 0);
+#endif
+
for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
ret = silk_init_decoder( &channel_state[ n ] );
}
@@ -301,10 +350,18 @@ opus_int silk_Decode( /* O Returns error co
} else {
condCoding = CODE_CONDITIONALLY;
}
+#ifdef ENABLE_OSCE
+ if ( channel_state[n].osce.method != decControl->osce_method ) {
+ osce_reset( &channel_state[n].osce, decControl->osce_method );
+ }
+#endif
ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding,
#ifdef ENABLE_DEEP_PLC
n == 0 ? lpcnet : NULL,
#endif
+#ifdef ENABLE_OSCE
+ &psDec->osce_model,
+#endif
arch);
} else {
silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
diff --git a/silk/decode_frame.c b/silk/decode_frame.c
index b393952c..48f74aef 100644
--- a/silk/decode_frame.c
+++ b/silk/decode_frame.c
@@ -33,6 +33,10 @@ POSSIBILITY OF SUCH DAMAGE.
#include "stack_alloc.h"
#include "PLC.h"
+#ifdef ENABLE_OSCE
+#include "osce.h"
+#endif
+
/****************/
/* Decode frame */
/****************/
@@ -46,16 +50,25 @@ opus_int silk_decode_frame(
#ifdef ENABLE_DEEP_PLC
LPCNetPLCState *lpcnet,
#endif
+#ifdef ENABLE_OSCE
+ OSCEModel *osce_model,
+#endif
int arch /* I Run-time architecture */
)
{
VARDECL( silk_decoder_control, psDecCtrl );
opus_int L, mv_len, ret = 0;
+#ifdef ENABLE_OSCE
+ opus_int32 ec_start;
+#endif
SAVE_STACK;
L = psDec->frame_length;
ALLOC( psDecCtrl, 1, silk_decoder_control );
psDecCtrl->LTP_scale_Q14 = 0;
+#ifdef ENABLE_OSCE
+ ec_start = ec_tell(psRangeDec);
+#endif
/* Safety checks */
celt_assert( L > 0 && L <= MAX_FRAME_LENGTH );
@@ -87,6 +100,21 @@ opus_int silk_decode_frame(
/********************************************************/
silk_decode_core( psDec, psDecCtrl, pOut, pulses, arch );
+ /*************************/
+ /* Update output buffer. */
+ /*************************/
+ celt_assert( psDec->ltp_mem_length >= psDec->frame_length );
+ mv_len = psDec->ltp_mem_length - psDec->frame_length;
+ silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
+ silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
+
+#ifdef ENABLE_OSCE
+ /********************************************************/
+ /* Run SILK enhancer */
+ /********************************************************/
+ osce_enhance_frame( osce_model, psDec, psDecCtrl, pOut, ec_tell(psRangeDec) - ec_start, arch );
+#endif
+
/********************************************************/
/* Update PLC state */
/********************************************************/
@@ -109,15 +137,18 @@ opus_int silk_decode_frame(
lpcnet,
#endif
arch );
- }
- /*************************/
- /* Update output buffer. */
- /*************************/
- celt_assert( psDec->ltp_mem_length >= psDec->frame_length );
- mv_len = psDec->ltp_mem_length - psDec->frame_length;
- silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
- silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
+#ifdef ENABLE_OSCE
+ osce_reset( &psDec->osce, psDec->osce.method );
+#endif
+ /*************************/
+ /* Update output buffer. */
+ /*************************/
+ celt_assert( psDec->ltp_mem_length >= psDec->frame_length );
+ mv_len = psDec->ltp_mem_length - psDec->frame_length;
+ silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
+ silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
+ }
/************************************************/
/* Comfort noise generation / estimation */
diff --git a/silk/init_decoder.c b/silk/init_decoder.c
index 16c03dcd..01bc4b7a 100644
--- a/silk/init_decoder.c
+++ b/silk/init_decoder.c
@@ -31,15 +31,21 @@ POSSIBILITY OF SUCH DAMAGE.
#include "main.h"
+#ifdef ENABLE_OSCE
+#include "osce.h"
+#endif
+
+#include "structs.h"
+
/************************/
-/* Init Decoder State */
+/* Reset Decoder State */
/************************/
-opus_int silk_init_decoder(
+opus_int silk_reset_decoder(
silk_decoder_state *psDec /* I/O Decoder state pointer */
)
{
/* Clear the entire encoder state, except anything copied */
- silk_memset( psDec, 0, sizeof( silk_decoder_state ) );
+ silk_memset( &psDec->SILK_DECODER_STATE_RESET_START, 0, sizeof( silk_decoder_state ) - ((char*) &psDec->SILK_DECODER_STATE_RESET_START - (char*)psDec) );
/* Used to deactivate LSF interpolation */
psDec->first_frame_after_reset = 1;
@@ -52,6 +58,27 @@ opus_int silk_init_decoder(
/* Reset PLC state */
silk_PLC_Reset( psDec );
+#ifdef ENABLE_OSCE
+ /* Reset OSCE state and method */
+ osce_reset(&psDec->osce, OSCE_DEFAULT_METHOD);
+#endif
+
+ return 0;
+}
+
+
+/************************/
+/* Init Decoder State */
+/************************/
+opus_int silk_init_decoder(
+ silk_decoder_state *psDec /* I/O Decoder state pointer */
+)
+{
+ /* Clear the entire encoder state, except anything copied */
+ silk_memset( psDec, 0, sizeof( silk_decoder_state ) );
+
+ silk_reset_decoder( psDec );
+
return(0);
}
diff --git a/silk/main.h b/silk/main.h
index c67775ef..cd576d8c 100644
--- a/silk/main.h
+++ b/silk/main.h
@@ -389,6 +389,10 @@ void silk_NLSF_decode(
/****************************************************/
/* Decoder Functions */
/****************************************************/
+opus_int silk_reset_decoder(
+ silk_decoder_state *psDec /* I/O Decoder state pointer */
+);
+
opus_int silk_init_decoder(
silk_decoder_state *psDec /* I/O Decoder state pointer */
);
@@ -413,6 +417,9 @@ opus_int silk_decode_frame(
#ifdef ENABLE_DEEP_PLC
LPCNetPLCState *lpcnet,
#endif
+#ifdef ENABLE_OSCE
+ OSCEModel *osce_model,
+#endif
int arch /* I Run-time architecture */
);
diff --git a/silk/structs.h b/silk/structs.h
index 709d3557..38243be1 100644
--- a/silk/structs.h
+++ b/silk/structs.h
@@ -44,6 +44,11 @@ POSSIBILITY OF SUCH DAMAGE.
#include "dred_decoder.h"
#endif
+#ifdef ENABLE_OSCE
+#include "osce_config.h"
+#include "osce_structs.h"
+#endif
+
#ifdef __cplusplus
extern "C"
{
@@ -238,6 +243,14 @@ typedef struct {
} silk_encoder_state;
+#ifdef ENABLE_OSCE
+typedef struct {
+ OSCEFeatureState features;
+ OSCEState state;
+ int method;
+} silk_OSCE_struct;
+#endif
+
/* Struct for Packet Loss Concealment */
typedef struct {
opus_int32 pitchL_Q8; /* Pitch lag to use for voiced concealment */
@@ -270,6 +283,10 @@ typedef struct {
/* Decoder state */
/********************************/
typedef struct {
+#ifdef ENABLE_OSCE
+ silk_OSCE_struct osce;
+#endif
+#define SILK_DECODER_STATE_RESET_START prev_gain_Q16
opus_int32 prev_gain_Q16;
opus_int32 exc_Q14[ MAX_FRAME_LENGTH ];
opus_int32 sLPC_Q14_buf[ MAX_LPC_ORDER ];
diff --git a/silk_sources.mk b/silk_sources.mk
index 27c07129..3780b164 100644
--- a/silk_sources.mk
+++ b/silk_sources.mk
@@ -161,4 +161,4 @@ silk/float/schur_FLP.c \
silk/float/sort_FLP.c
SILK_SOURCES_FLOAT_AVX2 = \
-silk/float/x86/inner_product_FLP_avx2.c
+silk/float/x86/inner_product_FLP_avx2.c \ No newline at end of file
diff --git a/src/opus_decoder.c b/src/opus_decoder.c
index 596c2dd0..dd95aefc 100644
--- a/src/opus_decoder.c
+++ b/src/opus_decoder.c
@@ -57,6 +57,10 @@
#include "dred_rdovae_dec.h"
#endif
+#ifdef ENABLE_OSCE
+#include "osce.h"
+#endif
+
struct OpusDecoder {
int celt_dec_offset;
int silk_dec_offset;
@@ -383,7 +387,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
pcm_ptr = pcm_silk;
if (st->prev_mode==MODE_CELT_ONLY)
- silk_InitDecoder( silk_dec );
+ silk_ResetDecoder( silk_dec );
/* The SILK PLC cannot produce frames of less than 10 ms */
st->DecControl.payloadSize_ms = IMAX(10, 1000 * audiosize / st->Fs);
@@ -408,6 +412,15 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
}
}
st->DecControl.enable_deep_plc = st->complexity >= 5;
+#ifdef ENABLE_OSCE
+ st->DecControl.osce_method = OSCE_METHOD_NONE;
+#ifndef DISABLE_LACE
+ if (st->complexity >= 6) {st->DecControl.osce_method = OSCE_METHOD_LACE;}
+#endif
+#ifndef DISABLE_NOLACE
+ if (st->complexity >= 7) {st->DecControl.osce_method = OSCE_METHOD_NOLACE;}
+#endif
+#endif
lost_flag = data == NULL ? 1 : 2 * !!decode_fec;
decoded_samples = 0;
@@ -953,7 +966,7 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...)
((char*)&st->OPUS_DECODER_RESET_START - (char*)st));
celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
- silk_InitDecoder( silk_dec );
+ silk_ResetDecoder( silk_dec );
st->stream_channels = st->channels;
st->frame_size = st->Fs/400;
#ifdef ENABLE_DEEP_PLC
@@ -1044,6 +1057,7 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...)
goto bad_arg;
}
ret = lpcnet_plc_load_model(&st->lpcnet, data, len);
+ ret = silk_LoadOSCEModels(silk_dec, data, len) || ret;
}
break;
#endif
diff --git a/src/opus_demo.c b/src/opus_demo.c
index c5f6250f..bccdf976 100644
--- a/src/opus_demo.c
+++ b/src/opus_demo.c
@@ -70,6 +70,10 @@ unsigned char *load_blob(const char *filename, int *len) {
FILE *file;
unsigned char *data;
file = fopen(filename, "r");
+ if (file == NULL)
+ {
+ perror("could not open blob file\n");
+ }
fseek(file, 0L, SEEK_END);
*len = ftell(file);
fseek(file, 0L, SEEK_SET);
@@ -254,6 +258,68 @@ static OpusDecoder *ms_opus_decoder_create(opus_int32 Fs, int channels, int *err
}
#endif
+
+#ifdef ENABLE_OSCE_TRAINING_DATA
+#define COMPLEXITY_MIN 0
+#define COMPLEXITY_MAX 10
+
+#define PACKET_LOSS_PERC_MIN 0
+#define PACKET_LOSS_PERC_MAX 50
+#define PACKET_LOSS_PERC_STEP 5
+
+#define CBR_BITRATE_LIMIT 8000
+
+#define NUM_BITRATES 102
+static int bitrates[NUM_BITRATES] = {
+ 6000, 6060, 6120, 6180, 6240, 6300, 6360, 6420, 6480,
+ 6525, 6561, 6598, 6634, 6670, 6707, 6743, 6780, 6816,
+ 6853, 6889, 6926, 6962, 6999, 7042, 7085, 7128, 7171,
+ 7215, 7258, 7301, 7344, 7388, 7431, 7474, 7512, 7541,
+ 7570, 7599, 7628, 7657, 7686, 7715, 7744, 7773, 7802,
+ 7831, 7860, 7889, 7918, 7947, 7976, 8013, 8096, 8179,
+ 8262, 8344, 8427, 8511, 8605, 8699, 8792, 8886, 8980,
+ 9100, 9227, 9354, 9480, 9561, 9634, 9706, 9779, 9851,
+ 9924, 9996, 10161, 10330, 10499, 10698, 10898, 11124, 11378,
+ 11575, 11719, 11862, 12014, 12345, 12751, 13195, 13561, 13795,
+ 14069, 14671, 15403, 15790, 16371, 17399, 17968, 19382, 20468,
+ 22000, 32000, 64000
+};
+
+static int randint(int min, int max, int step)
+{
+ double r = ((double) rand())/ (RAND_MAX + 1.);
+ int d;
+
+ d = ((int) ((max + 1 - min) * r / step) * step) + min;
+
+ return d;
+}
+
+static void new_random_setting(OpusEncoder *enc)
+{
+ int bitrate_bps;
+ int complexity;
+ int packet_loss_perc;
+ int use_vbr;
+
+ bitrate_bps = bitrates[randint(0, NUM_BITRATES - 1, 1)];
+ complexity = randint(COMPLEXITY_MIN, COMPLEXITY_MAX, 1);
+ packet_loss_perc = randint(PACKET_LOSS_PERC_MIN, PACKET_LOSS_PERC_MAX, PACKET_LOSS_PERC_STEP);
+ use_vbr = bitrate_bps < CBR_BITRATE_LIMIT ? 1 : randint(0, 1, 1);
+
+ if (1)
+ {
+ printf("changing settings to %d\t%d\t%d\t%d\n", bitrate_bps, complexity, packet_loss_perc, use_vbr);
+ }
+
+ opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate_bps));
+ opus_encoder_ctl(enc, OPUS_SET_COMPLEXITY(complexity));
+ opus_encoder_ctl(enc, OPUS_SET_PACKET_LOSS_PERC(packet_loss_perc));
+ opus_encoder_ctl(enc, OPUS_SET_VBR(use_vbr));
+}
+
+#endif
+
int main(int argc, char *argv[])
{
int err;
@@ -316,6 +382,10 @@ int main(int argc, char *argv[])
int lost_count=0;
FILE *packet_loss_file=NULL;
int dred_duration=0;
+#ifdef ENABLE_OSCE_TRAINING_DATA
+ int silk_random_switching = 0;
+ int silk_frame_counter = 0;
+#endif
#ifdef USE_WEIGHTS_FILE
int blob_len;
unsigned char *blob_data;
@@ -546,6 +616,12 @@ int main(int argc, char *argv[])
mode_list = celt_hq_test;
nb_modes_in_list = 4;
args++;
+#ifdef ENABLE_OSCE_TRAINING_DATA
+ } else if( strcmp( argv[ args ], "-silk_random_switching" ) == 0 ){
+ silk_random_switching = atoi( argv[ args + 1 ] );
+ printf("switching encoding parameters every %dth frame\n", silk_random_switching);
+ args += 2;
+#endif
} else {
printf( "Error: unrecognized setting: %s\n\n", argv[ args ] );
print_usage( argv );
@@ -759,6 +835,15 @@ int main(int argc, char *argv[])
opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(mode_list[curr_mode][3]));
frame_size = mode_list[curr_mode][2];
}
+#ifdef ENABLE_OSCE_TRAINING_DATA
+ if (silk_random_switching)
+ {
+ silk_frame_counter += 1;
+ if (silk_frame_counter % silk_random_switching == 0) {
+ new_random_setting(enc);
+ }
+ }
+#endif
num_read = fread(fbytes, sizeof(short)*channels, frame_size-remaining, fin);
curr_read = (int)num_read;
tot_in += curr_read;
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index 53c899a0..21dfe4ff 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -50,6 +50,9 @@
#else
#include "float/structs_FLP.h"
#endif
+#ifdef ENABLE_OSCE_TRAINING_DATA
+#include <stdio.h>
+#endif
#define MAX_ENCODER_BUFFER 480
@@ -1693,6 +1696,25 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
if (st->application == OPUS_APPLICATION_VOIP)
{
hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs, st->arch);
+
+#ifdef ENABLE_OSCE_TRAINING_DATA
+ /* write out high pass filtered clean signal*/
+ static FILE *fout =NULL;
+ if (fout == NULL)
+ {
+ fout = fopen("clean_hp.s16", "wb");
+ }
+
+ {
+ int idx;
+ opus_int16 tmp;
+ for (idx = 0; idx < frame_size; idx++)
+ {
+ tmp = (opus_int16) (32768 * pcm_buf[total_buffer + idx] + 0.5f);
+ fwrite(&tmp, sizeof(tmp), 1, fout);
+ }
+ }
+#endif
} else {
dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
}
@@ -2909,7 +2931,9 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
{
goto bad_arg;
}
+#ifdef ENABLE_DRED
ret = dred_encoder_load_model(&st->dred_encoder, data, len);
+#endif
}
break;
#endif
diff --git a/tests/test_opus_api.c b/tests/test_opus_api.c
index b6d67572..9500d407 100644
--- a/tests/test_opus_api.c
+++ b/tests/test_opus_api.c
@@ -103,7 +103,7 @@ opus_int32 test_dec_api(void)
for(c=0;c<4;c++)
{
i=opus_decoder_get_size(c);
- if(((c==1||c==2)&&(i<=2048||i>1<<17))||((c!=1&&c!=2)&&i!=0))test_failed();
+ if(((c==1||c==2)&&(i<=2048||i>1<<18))||((c!=1&&c!=2)&&i!=0))test_failed();
fprintf(stdout," opus_decoder_get_size(%d)=%d ...............%s OK.\n",c,i,i>0?"":"....");
cfgs++;
}
@@ -367,7 +367,7 @@ opus_int32 test_msdec_api(void)
for(b=-1;b<4;b++)
{
i=opus_multistream_decoder_get_size(a,b);
- if(((a>0&&b<=a&&b>=0)&&(i<=2048||i>((1<<17)*a)))||((a<1||b>a||b<0)&&i!=0))test_failed();
+ if(((a>0&&b<=a&&b>=0)&&(i<=2048||i>((1<<18)*a)))||((a<1||b>a||b<0)&&i!=0))test_failed();
fprintf(stdout," opus_multistream_decoder_get_size(%2d,%2d)=%d %sOK.\n",a,b,i,i>0?"":"... ");
cfgs++;
}