From 7d328f5bfaa321d823ff4d11b62d5357c99e0693 Mon Sep 17 00:00:00 2001
From: Jan Buethe <jbuethe@amazon.de>
Date: Wed, 8 Nov 2023 14:03:39 +0100
Subject: Merge LACE/NoLACE under OSCE framework

---
 .github/workflows/autotools.yml                    |    8 +-
 .github/workflows/dred.yml                         |    2 +-
 .gitlab-ci.yml                                     |    8 +-
 CMakeLists.txt                                     |   47 +-
 Makefile.am                                        |    6 +
 autogen.sh                                         |    2 +-
 cmake/OpusSources.cmake                            |    2 +
 configure.ac                                       |   29 +-
 dnn/adaconvtest.c                                  |  449 +++++++
 dnn/meson.build                                    |    5 +
 dnn/nndsp.c                                        |  412 ++++++
 dnn/nndsp.h                                        |  141 ++
 dnn/nnet.c                                         |    9 +-
 dnn/nnet_arch.h                                    |   16 +
 dnn/osce.c                                         | 1411 ++++++++++++++++++++
 dnn/osce.h                                         |   81 ++
 dnn/osce_config.h                                  |   62 +
 dnn/osce_features.c                                |  454 +++++++
 dnn/osce_features.h                                |   50 +
 dnn/osce_structs.h                                 |  124 ++
 dnn/torch/osce/create_testvectors.py               |  165 +++
 dnn/torch/osce/data/silk_enhancement_set.py        |    6 +-
 dnn/torch/osce/export_model_weights.py             |  101 +-
 dnn/torch/osce/models/lace.py                      |    2 +-
 dnn/torch/osce/models/no_lace.py                   |    4 +-
 .../osce/utils/layers/limited_adaptive_comb1d.py   |   18 +-
 .../osce/utils/layers/limited_adaptive_conv1d.py   |   15 +-
 dnn/torch/osce/utils/silk_features.py              |   16 +-
 dnn/torch/osce/utils/spec.py                       |    1 +
 .../weight-exchange/wexchange/c_export/c_writer.py |   14 +-
 .../weight-exchange/wexchange/torch/__init__.py    |    1 +
 dnn/torch/weight-exchange/wexchange/torch/torch.py |  157 ++-
 dnn/write_lpcnet_weights.c                         |   15 +
 lpcnet_headers.mk                                  |    9 +
 lpcnet_sources.mk                                  |    7 +
 meson.build                                        |    1 +
 meson_options.txt                                  |    1 +
 silk/API.h                                         |   16 +-
 silk/control.h                                     |    5 +
 silk/dec_API.c                                     |   57 +
 silk/decode_frame.c                                |   47 +-
 silk/init_decoder.c                                |   33 +-
 silk/main.h                                        |    7 +
 silk/structs.h                                     |   17 +
 silk_sources.mk                                    |    2 +-
 src/opus_decoder.c                                 |   18 +-
 src/opus_demo.c                                    |   85 ++
 src/opus_encoder.c                                 |   24 +
 tests/test_opus_api.c                              |    4 +-
 49 files changed, 4062 insertions(+), 104 deletions(-)
 create mode 100644 dnn/adaconvtest.c
 create mode 100644 dnn/nndsp.c
 create mode 100644 dnn/nndsp.h
 create mode 100644 dnn/osce.c
 create mode 100644 dnn/osce.h
 create mode 100644 dnn/osce_config.h
 create mode 100644 dnn/osce_features.c
 create mode 100644 dnn/osce_features.h
 create mode 100644 dnn/osce_structs.h
 create mode 100644 dnn/torch/osce/create_testvectors.py

diff --git a/.github/workflows/autotools.yml b/.github/workflows/autotools.yml
index 91d332bf..bb66d5b0 100644
--- a/.github/workflows/autotools.yml
+++ b/.github/workflows/autotools.yml
@@ -29,6 +29,12 @@ jobs:
             compiler: gcc,
             buildconfig: --enable-assertions --enable-custom-modes
           }
+        - {
+            name: "Linux/GCC/EnableDNN",
+            os: ubuntu-latest,
+            compiler: gcc,
+            buildconfig: --enable-assertions --enable-custom-modes --enable-dred --enable-osce
+          }
     steps:
       - uses: actions/checkout@v3
         # No AutoMake on Mac so let's install it
@@ -42,4 +48,4 @@ jobs:
       - name: Build
         run: make -j 2
       - name: Test
-        run: make check -j 2
\ No newline at end of file
+        run: make check -j 2
diff --git a/.github/workflows/dred.yml b/.github/workflows/dred.yml
index 52ac2571..ac703dd1 100644
--- a/.github/workflows/dred.yml
+++ b/.github/workflows/dred.yml
@@ -74,7 +74,7 @@ jobs:
         run: mkdir build
       - name: Configure
         working-directory: ./build
-        run: cmake .. ${{ matrix.config.args }} -DCMAKE_BUILD_TYPE=${{ matrix.config.config }} -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON
+        run: cmake .. ${{ matrix.config.args }} -DCMAKE_BUILD_TYPE=${{ matrix.config.config }} -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_OSCE=ON
       - name: Build
         working-directory: ./build
         run: cmake --build . -j 2 --config ${{ matrix.config.config }} --target package
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 0117c46e..92f578bc 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -64,9 +64,9 @@ autoconf:
     - !reference [.snippets, git_prep]
   script:
     - ./autogen.sh
-    - CFLAGS="-mavx -mfma -mavx2 -O2 -ffast-math" ./configure --enable-float-approx
+    - CFLAGS="-mavx -mfma -mavx2 -O2 -ffast-math" ./configure --enable-float-approx --enable-dred --enable-osce
     - make -j16
-    - DISTCHECK_CONFIGURE_FLAGS="--enable-float-approx CFLAGS='-mavx -mfma -mavx2 -O2'" make distcheck -j16
+    - DISTCHECK_CONFIGURE_FLAGS="--enable-float-approx --enable-dred --enable-osce CFLAGS='-mavx -mfma -mavx2 -O2'" make distcheck -j16
   cache:
     paths:
       - "src/*.o"
@@ -87,7 +87,7 @@ cmake:
   script:
     - ./autogen.sh
     - mkdir build
-    - cmake -S . -B build -G "Ninja" -DCMAKE_BUILD_TYPE=Release -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_X86_PRESUME_AVX2=ON
+    - cmake -S . -B build -G "Ninja" -DCMAKE_BUILD_TYPE=Release -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_OSCE=ON -DOPUS_X86_PRESUME_AVX2=ON
     - cmake --build build
     - cd build && ctest --output-on-failure -j 16
 
@@ -101,7 +101,7 @@ cmake:
   script:
     - ./autogen.sh
     - mkdir builddir
-    - meson setup -Dtests=enabled -Ddocs=enabled -Dbuildtype=release builddir
+    - meson setup -Denable-deep-plc=true -Denable-osce=true -Denable-dred=true -Dtests=enabled -Ddocs=enabled -Dbuildtype=release builddir
     - meson compile -C builddir
     - meson test -C builddir
     #- meson dist --no-tests -C builddir
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 073d7de8..06e9b675 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -87,6 +87,10 @@ set(OPUS_DRED_HELP_STR "enable DRED.")
 option(OPUS_DRED ${OPUS_DRED_HELP_STR} OFF)
 add_feature_info(OPUS_DRED OPUS_DRED ${OPUS_DRED_HELP_STR})
 
+set(OPUS_OSCE_HELP_STR "enable OSCE.")
+option(OPUS_OSCE ${OPUS_OSCE_HELP_STR} OFF)
+add_feature_info(OPUS_OSCE OPUS_OSCE ${OPUS_OSCE_HELP_STR})
+
 if(APPLE)
   set(OPUS_BUILD_FRAMEWORK_HELP_STR "build Framework bundle for Apple systems.")
   option(OPUS_BUILD_FRAMEWORK ${OPUS_BUILD_FRAMEWORK_HELP_STR} OFF)
@@ -364,8 +368,6 @@ endif()
 
 add_sources_group(opus silk ${silk_headers} ${silk_sources})
 add_sources_group(opus celt ${celt_headers} ${celt_sources})
-add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})
-add_sources_group(opus lpcnet ${dred_headers} ${dred_sources})
 
 if(OPUS_FIXED_POINT)
   add_sources_group(opus silk ${silk_sources_fixed})
@@ -380,11 +382,26 @@ if(NOT OPUS_ENABLE_FLOAT_API)
   target_compile_definitions(opus PRIVATE DISABLE_FLOAT_API)
 endif()
 
+if (OPUS_DEEP_PLC OR OPUS_DRED OR OPUS_OSCE)
+  add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})
+  set(OPUS_DNN TRUE)
+else()
+  set(OPUS_DNN FALSE)
+endif()
+
+if (OPUS_DNN)
+  add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})
+  target_compile_definitions(opus PRIVATE ENABLE_DEEP_PLC)
+endif()
+
 if (OPUS_DRED)
+  add_sources_group(opus lpcnet ${dred_headers} ${dred_sources})
   target_compile_definitions(opus PRIVATE ENABLE_DRED)
-  if(NOT OPUS_DEEP_PLC)
-	  target_compile_definitions(opus PRIVATE ENABLE_DEEP_PLC)
-  endif()
+endif()
+
+if (OPUS_OSCE)
+  add_sources_group(opus lpcnet ${osce_headers} ${osce_sources})
+  target_compile_definitions(opus PRIVATE ENABLE_OSCE)
 endif()
 
 if(NOT OPUS_DISABLE_INTRINSICS)
@@ -405,7 +422,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
     endif()
     add_sources_group(opus celt ${celt_sources_x86_rtcd})
     add_sources_group(opus silk ${silk_sources_x86_rtcd})
-    add_sources_group(opus lpcnet ${dnn_sources_x86_rtcd})
+    if (OPUS_DNN)
+      add_sources_group(opus lpcnet ${dnn_sources_x86_rtcd})
+    endif()
   endif()
 
   if(SSE1_SUPPORTED)
@@ -427,7 +446,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
   if(SSE2_SUPPORTED)
     if(OPUS_X86_MAY_HAVE_SSE2)
       add_sources_group(opus celt ${celt_sources_sse2})
-      add_sources_group(opus lpcnet ${dnn_sources_sse2})
+      if (OPUS_DNN)
+        add_sources_group(opus lpcnet ${dnn_sources_sse2})
+      endif()
       target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE2)
       if(NOT MSVC)
         set_source_files_properties(${celt_sources_sse2} ${dnn_sources_sse2} PROPERTIES COMPILE_FLAGS -msse2)
@@ -445,7 +466,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
     if(OPUS_X86_MAY_HAVE_SSE4_1)
       add_sources_group(opus celt ${celt_sources_sse4_1})
       add_sources_group(opus silk ${silk_sources_sse4_1})
-      add_sources_group(opus lpcnet ${dnn_sources_sse4_1})
+      if (OPUS_DNN)
+        add_sources_group(opus lpcnet ${dnn_sources_sse4_1})
+      endif()
       target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE4_1)
       if(NOT MSVC)
         set_source_files_properties(${celt_sources_sse4_1} ${silk_sources_sse4_1} ${dnn_sources_sse4_1} PROPERTIES COMPILE_FLAGS -msse4.1)
@@ -471,7 +494,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
       add_sources_group(opus celt ${celt_sources_avx2})
       add_sources_group(opus silk ${silk_sources_avx2})
       add_sources_group(opus silk ${silk_sources_float_avx2})
-      add_sources_group(opus lpcnet ${dnn_sources_avx2})
+      if (OPUS_DNN)
+        add_sources_group(opus lpcnet ${dnn_sources_avx2})
+      endif()
       target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_AVX2)
       if(MSVC)
         set(AVX2_FLAGS "${AVX2_FLAGS} /arch:AVX2")
@@ -524,7 +549,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
 
     add_sources_group(opus celt ${celt_sources_arm_neon_intr})
     add_sources_group(opus silk ${silk_sources_arm_neon_intr})
-    add_sources_group(opus lpcnet ${dnn_sources_arm_neon})
+    if (OPUS_DNN)
+      add_sources_group(opus lpcnet ${dnn_sources_arm_neon})
+    endif()
 
     # silk arm neon depends on main_Fix.h
     target_include_directories(opus PRIVATE silk/fixed)
diff --git a/Makefile.am b/Makefile.am
index d09c1771..4fd821a5 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -25,6 +25,9 @@ endif
 if ENABLE_DRED
 LPCNET_SOURCES += $(DRED_SOURCES)
 endif
+if ENABLE_OSCE
+LPCNET_SOURCES += $(OSCE_SOURCES)
+endif
 
 if FIXED_POINT
 SILK_SOURCES += $(SILK_SOURCES_FIXED)
@@ -132,6 +135,9 @@ endif
 if ENABLE_DRED
 LPCNET_HEAD += $(DRED_HEAD)
 endif
+if ENABLE_OSCE
+LPCNET_HEAD += $(OSCE_HEAD)
+endif
 
 libopus_la_SOURCES = $(CELT_SOURCES) $(SILK_SOURCES) $(LPCNET_SOURCES) $(OPUS_SOURCES)
 libopus_la_LDFLAGS = -no-undefined -version-info @OPUS_LT_CURRENT@:@OPUS_LT_REVISION@:@OPUS_LT_AGE@
diff --git a/autogen.sh b/autogen.sh
index b7482c2d..1987e38b 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -9,7 +9,7 @@ set -e
 srcdir=`dirname $0`
 test -n "$srcdir" && cd "$srcdir"
 
-dnn/download_model.sh df63771
+dnn/download_model.sh 591c8ba
 
 echo "Updating build configuration files, please wait...."
 
diff --git a/cmake/OpusSources.cmake b/cmake/OpusSources.cmake
index 74e4eaed..0cf24557 100644
--- a/cmake/OpusSources.cmake
+++ b/cmake/OpusSources.cmake
@@ -42,8 +42,10 @@ get_opus_sources(CELT_SOURCES_ARM_NE10 celt_sources.mk celt_sources_arm_ne10)
 
 get_opus_sources(DEEP_PLC_HEAD lpcnet_headers.mk deep_plc_headers)
 get_opus_sources(DRED_HEAD lpcnet_headers.mk dred_headers)
+get_opus_sources(OSCE_HEAD lpcnet_headers.mk osce_headers)
 get_opus_sources(DEEP_PLC_SOURCES lpcnet_sources.mk deep_plc_sources)
 get_opus_sources(DRED_SOURCES lpcnet_sources.mk dred_sources)
+get_opus_sources(OSCE_SOURCES lpcnet_sources.mk osce_sources)
 get_opus_sources(DNN_SOURCES_X86_RTCD lpcnet_sources.mk dnn_sources_x86_rtcd)
 get_opus_sources(DNN_SOURCES_SSE2 lpcnet_sources.mk dnn_sources_sse2)
 get_opus_sources(DNN_SOURCES_SSE4_1 lpcnet_sources.mk dnn_sources_sse4_1)
diff --git a/configure.ac b/configure.ac
index b4c5f2a5..84ce651d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -175,10 +175,10 @@ AC_ARG_ENABLE([deep-plc],
     [AS_HELP_STRING([--enable-deep-plc], [Use deep PLC for SILK])],,
     [enable_deep_plc=no])
 
-AS_IF([test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes"],[
+AS_IF([test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"],[
   AC_DEFINE([ENABLE_DEEP_PLC], [1], [Deep PLC])
 ])
-AM_CONDITIONAL([ENABLE_DEEP_PLC], [test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes"])
+AM_CONDITIONAL([ENABLE_DEEP_PLC], [test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"])
 
 has_float_approx=no
 case "$host_cpu" in
@@ -904,6 +904,31 @@ AS_IF([test "$enable_dnn_debug_float" = "no"], [
        AC_DEFINE([DISABLE_DEBUG_FLOAT], [1], [Disable DNN debug float])
 ])
 
+AC_ARG_ENABLE([osce-training-data],
+  AS_HELP_STRING([--enable-osce-training-data], [enables feature output for SILK enhancement]),,
+  [enable_osc_training_data=no]
+)
+
+AS_IF([test "$enable_osce_training_data" = "yes"], [
+       AC_DEFINE([ENABLE_OSCE_TRAINING_DATA], [1], [Enable dumping of OSCE training data])
+])
+
+AC_MSG_CHECKING([argument osce training data])
+AS_IF([test "$enable_osce_training_data" = "yes"], [
+       AC_MSG_RESULT([yes])
+], [AC_MSG_RESULT([no])])
+
+AC_ARG_ENABLE([osce],
+  AS_HELP_STRING([--enable-osce], [enables feature output for SILK enhancement]),,
+  [enable_osce=no]
+)
+
+AS_IF([test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"], [
+       AC_DEFINE([ENABLE_OSCE], [1], [Enable Opus Speech Coding Enhancement])
+])
+
+AM_CONDITIONAL([ENABLE_OSCE], [test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"])
+
 AM_CONDITIONAL([HAVE_DOXYGEN], [test "$HAVE_DOXYGEN" = "yes"])
 
 AC_ARG_ENABLE([extra-programs],
diff --git a/dnn/adaconvtest.c b/dnn/adaconvtest.c
new file mode 100644
index 00000000..722e4aff
--- /dev/null
+++ b/dnn/adaconvtest.c
@@ -0,0 +1,449 @@
+#include "lace_data.h"
+#include "nolace_data.h"
+#include "osce.h"
+#include "nndsp.h"
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+
+
+extern const WeightArray lacelayers_arrays[];
+extern const WeightArray nolacelayers_arrays[];
+
+void adaconv_compare(
+    const char * prefix,
+    int num_frames,
+    AdaConvState* hAdaConv,
+    LinearLayer *kernel_layer,
+    LinearLayer *gain_layer,
+    int feature_dim,
+    int frame_size,
+    int overlap_size,
+    int in_channels,
+    int out_channels,
+    int kernel_size,
+    int left_padding,
+    float filter_gain_a,
+    float filter_gain_b,
+    float shape_gain
+)
+{
+    char feature_file[256];
+    char x_in_file[256];
+    char x_out_file[256];
+    char message[512];
+    int i_frame, i_sample;
+    float mse;
+    float features[512];
+    float x_in[512];
+    float x_out_ref[512];
+    float x_out[512];
+    float window[40];
+
+    init_adaconv_state(hAdaConv);
+    compute_overlap_window(window, 40);
+
+    FILE *f_features, *f_x_in, *f_x_out;
+
+    strcpy(feature_file, prefix);
+    strcat(feature_file, "_features.f32");
+    f_features = fopen(feature_file, "r");
+    if (f_features == NULL)
+    {
+        sprintf(message, "could not open file %s", feature_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(x_in_file, prefix);
+    strcat(x_in_file, "_x_in.f32");
+    f_x_in = fopen(x_in_file, "r");
+    if (f_x_in == NULL)
+    {
+        sprintf(message, "could not open file %s", x_in_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(x_out_file, prefix);
+    strcat(x_out_file, "_x_out.f32");
+    f_x_out = fopen(x_out_file, "r");
+    if (f_x_out == NULL)
+    {
+        sprintf(message, "could not open file %s", x_out_file);
+        perror(message);
+        exit(1);
+    }
+
+    for (i_frame = 0; i_frame < num_frames; i_frame ++)
+    {
+        if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
+            exit(1);
+        }
+
+        if (fread(x_in, sizeof(float), frame_size * in_channels, f_x_in) != frame_size * in_channels)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
+            exit(1);
+        }
+
+        if (fread(x_out_ref, sizeof(float), frame_size * out_channels, f_x_out) != frame_size * out_channels)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
+            exit(1);
+        }
+
+        adaconv_process_frame(hAdaConv, x_out, x_in, features, kernel_layer, gain_layer, feature_dim,
+            frame_size, overlap_size, in_channels, out_channels, kernel_size, left_padding,
+            filter_gain_a, filter_gain_b, shape_gain, window, 0);
+
+        mse = 0;
+        for (i_sample = 0; i_sample < frame_size * out_channels; i_sample ++)
+        {
+            mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
+        }
+        mse = sqrt(mse / (frame_size * out_channels));
+        printf("rmse[%d] %f\n", i_frame, mse);
+
+    }
+}
+
+
+void adacomb_compare(
+    const char * prefix,
+    int num_frames,
+    AdaCombState* hAdaComb,
+    LinearLayer *kernel_layer,
+    LinearLayer *gain_layer,
+    LinearLayer *global_gain_layer,
+    int feature_dim,
+    int frame_size,
+    int overlap_size,
+    int kernel_size,
+    int left_padding,
+    float filter_gain_a,
+    float filter_gain_b,
+    float log_gain_limit
+)
+{
+    char feature_file[256];
+    char x_in_file[256];
+    char p_in_file[256];
+    char x_out_file[256];
+    char message[512];
+    int i_frame, i_sample;
+    float mse;
+    float features[512];
+    float x_in[512];
+    float x_out_ref[512];
+    float x_out[512];
+    int pitch_lag;
+    float window[40];
+
+    init_adacomb_state(hAdaComb);
+    compute_overlap_window(window, 40);
+
+    FILE *f_features, *f_x_in, *f_p_in, *f_x_out;
+
+    strcpy(feature_file, prefix);
+    strcat(feature_file, "_features.f32");
+    f_features = fopen(feature_file, "r");
+    if (f_features == NULL)
+    {
+        sprintf(message, "could not open file %s", feature_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(x_in_file, prefix);
+    strcat(x_in_file, "_x_in.f32");
+    f_x_in = fopen(x_in_file, "r");
+    if (f_x_in == NULL)
+    {
+        sprintf(message, "could not open file %s", x_in_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(p_in_file, prefix);
+    strcat(p_in_file, "_p_in.s32");
+    f_p_in = fopen(p_in_file, "r");
+    if (f_p_in == NULL)
+    {
+        sprintf(message, "could not open file %s", p_in_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(x_out_file, prefix);
+    strcat(x_out_file, "_x_out.f32");
+    f_x_out = fopen(x_out_file, "r");
+    if (f_x_out == NULL)
+    {
+        sprintf(message, "could not open file %s", x_out_file);
+        perror(message);
+        exit(1);
+    }
+
+    for (i_frame = 0; i_frame < num_frames; i_frame ++)
+    {
+        if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
+            exit(1);
+        }
+
+        if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
+            exit(1);
+        }
+
+        if (fread(&pitch_lag, sizeof(int), 1, f_p_in) != 1)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, p_in_file);
+            exit(1);
+        }
+
+        if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
+            exit(1);
+        }
+
+        adacomb_process_frame(hAdaComb, x_out, x_in, features, kernel_layer, gain_layer, global_gain_layer,
+            pitch_lag, feature_dim, frame_size, overlap_size, kernel_size, left_padding, filter_gain_a, filter_gain_b, log_gain_limit, window, 0);
+
+
+        mse = 0;
+        for (i_sample = 0; i_sample < frame_size; i_sample ++)
+        {
+            mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
+        }
+        mse = sqrt(mse / (frame_size));
+        printf("rmse[%d] %f\n", i_frame, mse);
+
+    }
+}
+
+void adashape_compare(
+    const char * prefix,
+    int num_frames,
+    AdaShapeState* hAdaShape,
+    LinearLayer *alpha1,
+    LinearLayer *alpha2,
+    int feature_dim,
+    int frame_size,
+    int avg_pool_k
+)
+{
+    char feature_file[256];
+    char x_in_file[256];
+    char x_out_file[256];
+    char message[512];
+    int i_frame, i_sample;
+    float mse;
+    float features[512];
+    float x_in[512];
+    float x_out_ref[512];
+    float x_out[512];
+
+    init_adashape_state(hAdaShape);
+
+    FILE *f_features, *f_x_in, *f_x_out;
+
+    strcpy(feature_file, prefix);
+    strcat(feature_file, "_features.f32");
+    f_features = fopen(feature_file, "r");
+    if (f_features == NULL)
+    {
+        sprintf(message, "could not open file %s", feature_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(x_in_file, prefix);
+    strcat(x_in_file, "_x_in.f32");
+    f_x_in = fopen(x_in_file, "r");
+    if (f_x_in == NULL)
+    {
+        sprintf(message, "could not open file %s", x_in_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(x_out_file, prefix);
+    strcat(x_out_file, "_x_out.f32");
+    f_x_out = fopen(x_out_file, "r");
+    if (f_x_out == NULL)
+    {
+        sprintf(message, "could not open file %s", x_out_file);
+        perror(message);
+        exit(1);
+    }
+
+    for (i_frame = 0; i_frame < num_frames; i_frame ++)
+    {
+        if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
+            exit(1);
+        }
+
+        if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
+            exit(1);
+        }
+
+        if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size)
+        {
+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
+            exit(1);
+        }
+
+        adashape_process_frame(hAdaShape, x_out, x_in, features, alpha1, alpha2, feature_dim,
+            frame_size, avg_pool_k, 0);
+
+        mse = 0;
+        for (i_sample = 0; i_sample < frame_size; i_sample ++)
+        {
+            mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
+        }
+        mse = sqrt(mse / (frame_size));
+        printf("rmse[%d] %f\n", i_frame, mse);
+
+    }
+}
+
+
+int main()
+{
+    LACELayers hLACE;
+    NOLACELayers hNoLACE;
+
+    AdaConvState hAdaConv;
+    AdaCombState hAdaComb;
+    AdaShapeState hAdaShape;
+
+    init_adaconv_state(&hAdaConv);
+
+    init_lacelayers(&hLACE, lacelayers_arrays);
+    init_nolacelayers(&hNoLACE, nolacelayers_arrays);
+
+    printf("\ntesting lace.af1 (1 in, 1 out)...\n");
+    adaconv_compare(
+        "testvectors/lace_af1",
+        5,
+        &hAdaConv,
+        &hLACE.lace_af1_kernel,
+        &hLACE.lace_af1_gain,
+        LACE_AF1_FEATURE_DIM,
+        LACE_AF1_FRAME_SIZE,
+        LACE_AF1_OVERLAP_SIZE,
+        LACE_AF1_IN_CHANNELS,
+        LACE_AF1_OUT_CHANNELS,
+        LACE_AF1_KERNEL_SIZE,
+        LACE_AF1_LEFT_PADDING,
+        LACE_AF1_FILTER_GAIN_A,
+        LACE_AF1_FILTER_GAIN_B,
+        LACE_AF1_SHAPE_GAIN
+    );
+
+
+    printf("\ntesting nolace.af1 (1 in, 2 out)...\n");
+    adaconv_compare(
+        "testvectors/nolace_af1",
+        5,
+        &hAdaConv,
+        &hNoLACE.nolace_af1_kernel,
+        &hNoLACE.nolace_af1_gain,
+        NOLACE_AF1_FEATURE_DIM,
+        NOLACE_AF1_FRAME_SIZE,
+        NOLACE_AF1_OVERLAP_SIZE,
+        NOLACE_AF1_IN_CHANNELS,
+        NOLACE_AF1_OUT_CHANNELS,
+        NOLACE_AF1_KERNEL_SIZE,
+        NOLACE_AF1_LEFT_PADDING,
+        NOLACE_AF1_FILTER_GAIN_A,
+        NOLACE_AF1_FILTER_GAIN_B,
+        NOLACE_AF1_SHAPE_GAIN
+    );
+
+
+    printf("testing nolace.af4 (2 in, 1 out)...\n");
+    adaconv_compare(
+        "testvectors/nolace_af4",
+        5,
+        &hAdaConv,
+        &hNoLACE.nolace_af4_kernel,
+        &hNoLACE.nolace_af4_gain,
+        NOLACE_AF4_FEATURE_DIM,
+        NOLACE_AF4_FRAME_SIZE,
+        NOLACE_AF4_OVERLAP_SIZE,
+        NOLACE_AF4_IN_CHANNELS,
+        NOLACE_AF4_OUT_CHANNELS,
+        NOLACE_AF4_KERNEL_SIZE,
+        NOLACE_AF4_LEFT_PADDING,
+        NOLACE_AF4_FILTER_GAIN_A,
+        NOLACE_AF4_FILTER_GAIN_B,
+        NOLACE_AF4_SHAPE_GAIN
+    );
+
+    printf("\ntesting nolace.af2 (2 in, 2 out)...\n");
+    adaconv_compare(
+        "testvectors/nolace_af2",
+        5,
+        &hAdaConv,
+        &hNoLACE.nolace_af2_kernel,
+        &hNoLACE.nolace_af2_gain,
+        NOLACE_AF2_FEATURE_DIM,
+        NOLACE_AF2_FRAME_SIZE,
+        NOLACE_AF2_OVERLAP_SIZE,
+        NOLACE_AF2_IN_CHANNELS,
+        NOLACE_AF2_OUT_CHANNELS,
+        NOLACE_AF2_KERNEL_SIZE,
+        NOLACE_AF2_LEFT_PADDING,
+        NOLACE_AF2_FILTER_GAIN_A,
+        NOLACE_AF2_FILTER_GAIN_B,
+        NOLACE_AF2_SHAPE_GAIN
+    );
+
+    printf("\ntesting lace.cf1...\n");
+    adacomb_compare(
+        "testvectors/lace_cf1",
+        5,
+        &hAdaComb,
+        &hLACE.lace_cf1_kernel,
+        &hLACE.lace_cf1_gain,
+        &hLACE.lace_cf1_global_gain,
+        LACE_CF1_FEATURE_DIM,
+        LACE_CF1_FRAME_SIZE,
+        LACE_CF1_OVERLAP_SIZE,
+        LACE_CF1_KERNEL_SIZE,
+        LACE_CF1_LEFT_PADDING,
+        LACE_CF1_FILTER_GAIN_A,
+        LACE_CF1_FILTER_GAIN_B,
+        LACE_CF1_LOG_GAIN_LIMIT
+    );
+
+    printf("\ntesting nolace.tdshape1...\n");
+    adashape_compare(
+        "testvectors/nolace_tdshape1",
+        5,
+        &hAdaShape,
+        &hNoLACE.nolace_tdshape1_alpha1,
+        &hNoLACE.nolace_tdshape1_alpha2,
+        NOLACE_TDSHAPE1_FEATURE_DIM,
+        NOLACE_TDSHAPE1_FRAME_SIZE,
+        NOLACE_TDSHAPE1_AVG_POOL_K
+    );
+
+    return 0;
+}
+
+/* gcc -DVAR_ARRAYS -DENABLE_OSCE  -I ../include -I ../silk -I . -I ../celt adaconvtest.c nndsp.c lace_data.c nolace_data.c nnet.c nnet_default.c ../celt/pitch.c ../celt/celt_lpc.c parse_lpcnet_weights.c -lm -o adaconvtest */
\ No newline at end of file
diff --git a/dnn/meson.build b/dnn/meson.build
index 6e520fbc..737d4a02 100644
--- a/dnn/meson.build
+++ b/dnn/meson.build
@@ -5,6 +5,11 @@ if opt_enable_dred
   dnn_sources += dred_sources
 endif
 
+osce_sources = sources['OSCE_SOURCES']
+if opt_enable_osce
+  dnn_sources += osce_sources
+endif
+
 dnn_sources_sse2 = sources['DNN_SOURCES_SSE2']
 dnn_sources_sse4_1 = sources['DNN_SOURCES_SSE4_1']
 dnn_sources_avx2 = sources['DNN_SOURCES_AVX2']
diff --git a/dnn/nndsp.c b/dnn/nndsp.c
new file mode 100644
index 00000000..bfbf5735
--- /dev/null
+++ b/dnn/nndsp.c
@@ -0,0 +1,412 @@
+/* Copyright (c) 2023 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "nndsp.h"
+#include "arch.h"
+#include "nnet.h"
+#include "os_support.h"
+#include "pitch.h"
+
+#include <math.h>
+
+#ifndef M_PI
+#define M_PI 3.141592653589793f
+#endif
+
+#define KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel) ((((i_out_channels) * in_channels) + (i_in_channels)) * kernel_size + (i_kernel))
+
+void init_adaconv_state(AdaConvState *hAdaConv)
+{
+    OPUS_CLEAR(hAdaConv, 1);
+}
+
+void init_adacomb_state(AdaCombState *hAdaComb)
+{
+    OPUS_CLEAR(hAdaComb, 1);
+}
+
+void init_adashape_state(AdaShapeState *hAdaShape)
+{
+    OPUS_CLEAR(hAdaShape, 1);
+}
+
+void compute_overlap_window(float *window, int overlap_size)
+{
+    int i_sample;
+    for (i_sample=0; i_sample < overlap_size; i_sample++)
+    {
+        window[i_sample] = 0.5f + 0.5f * cos(M_PI * (i_sample + 0.5f) / overlap_size);
+    }
+}
+
+#ifdef DEBUG_NNDSP
+void print_float_vector(const char* name, const float *vec, int length)
+{
+    for (int i = 0; i < length; i ++)
+    {
+        printf("%s[%d]: %f\n", name, i, vec[i]);
+    }
+}
+#endif
+
+static void scale_kernel(
+    float *kernel,
+    int in_channels,
+    int out_channels,
+    int kernel_size,
+    float *gain
+)
+/* normalizes (p-norm) kernel over input channel and kernel dimension */
+{
+    float norm;
+    int i_in_channels, i_out_channels, i_kernel;
+
+    for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
+    {
+        norm = 0;
+        for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels ++)
+        {
+            for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
+            {
+                norm += kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] * kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)];
+            }
+        }
+#ifdef DEBUG_NNDSP
+        printf("kernel norm: %f, %f\n", norm, sqrt(norm));
+#endif
+        norm = 1.f / (1e-6f + sqrt(norm));
+        for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
+        {
+            for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
+            {
+
+                kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] *= norm * gain[i_out_channels];
+            }
+        }
+    }
+}
+
+static void transform_gains(
+    float *gains,
+    int num_gains,
+    float filter_gain_a,
+    float filter_gain_b
+)
+{
+    int i;
+    for (i = 0; i < num_gains; i++)
+    {
+        gains[i] = exp(filter_gain_a * gains[i] + filter_gain_b);
+    }
+}
+
+void adaconv_process_frame(
+    AdaConvState* hAdaConv,
+    float *x_out,
+    const float *x_in,
+    const float *features,
+    const LinearLayer *kernel_layer,
+    const LinearLayer *gain_layer,
+    int feature_dim,
+    int frame_size,
+    int overlap_size,
+    int in_channels,
+    int out_channels,
+    int kernel_size,
+    int left_padding,
+    float filter_gain_a,
+    float filter_gain_b,
+    float shape_gain,
+    float *window,
+    int arch
+)
+{
+    float output_buffer[ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS];
+    float kernel_buffer[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
+    float input_buffer[ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE)];
+    float kernel0[ADACONV_MAX_KERNEL_SIZE];
+    float kernel1[ADACONV_MAX_KERNEL_SIZE];
+    float channel_buffer0[ADACONV_MAX_OVERLAP_SIZE];
+    float channel_buffer1[ADACONV_MAX_FRAME_SIZE];
+    float gain_buffer[ADACONV_MAX_OUTPUT_CHANNELS];
+    float *p_input;
+    int i_in_channels, i_out_channels, i_sample;
+
+    (void) feature_dim; /* ToDo: figure out whether we might need this information */
+
+    celt_assert(shape_gain == 1);
+    celt_assert(left_padding == kernel_size - 1); /* currently only supports causal version. Non-causal version not difficult to implement but will require third loop */
+    celt_assert(kernel_size < frame_size);
+
+    OPUS_CLEAR(output_buffer, ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS);
+    OPUS_CLEAR(kernel_buffer, ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS);
+    OPUS_CLEAR(input_buffer, ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE));
+
+#ifdef DEBUG_NNDSP
+    print_float_vector("x_in", x_in, in_channels * frame_size);
+#endif
+
+    /* prepare input */
+    for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
+    {
+        OPUS_COPY(input_buffer + i_in_channels * (kernel_size + frame_size), hAdaConv->history + i_in_channels * kernel_size, kernel_size);
+        OPUS_COPY(input_buffer + kernel_size + i_in_channels * (kernel_size + frame_size), x_in + frame_size * i_in_channels, frame_size);
+    }
+    p_input = input_buffer + kernel_size;
+
+
+    /* calculate new kernel and new gain */
+    compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
+    compute_generic_dense(gain_layer, gain_buffer, features, ACTIVATION_TANH, arch);
+#ifdef DEBUG_NNDSP
+    print_float_vector("features", features, feature_dim);
+    print_float_vector("adaconv_kernel_raw", kernel_buffer, in_channels * out_channels * kernel_size);
+    print_float_vector("adaconv_gain_raw", gain_buffer, out_channels);
+#endif
+    transform_gains(gain_buffer, out_channels, filter_gain_a, filter_gain_b);
+    scale_kernel(kernel_buffer, in_channels, out_channels, kernel_size, gain_buffer);
+
+#ifdef DEBUG_NNDSP
+    print_float_vector("adaconv_kernel", kernel_buffer, in_channels * out_channels * kernel_size);
+    print_float_vector("adaconv_gain", gain_buffer, out_channels);
+#endif
+
+    /* calculate overlapping part using kernel from last frame */
+
+    for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
+    {
+        for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
+        {
+            OPUS_CLEAR(kernel0, ADACONV_MAX_KERNEL_SIZE);
+            OPUS_CLEAR(kernel1, ADACONV_MAX_KERNEL_SIZE);
+
+            OPUS_COPY(kernel0, hAdaConv->last_kernel + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
+            OPUS_COPY(kernel1, kernel_buffer + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
+            celt_pitch_xcorr(kernel0, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer0, ADACONV_MAX_KERNEL_SIZE, overlap_size, arch);
+            celt_pitch_xcorr(kernel1, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer1, ADACONV_MAX_KERNEL_SIZE, frame_size, arch);
+            for (i_sample = 0; i_sample < overlap_size; i_sample++)
+            {
+                output_buffer[i_sample + i_out_channels * frame_size] +=  window[i_sample] * channel_buffer0[i_sample];
+                output_buffer[i_sample + i_out_channels * frame_size] += (1.f - window[i_sample]) * channel_buffer1[i_sample];
+            }
+            for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
+            {
+                output_buffer[i_sample + i_out_channels * frame_size] += channel_buffer1[i_sample];
+            }
+        }
+    }
+
+    OPUS_COPY(x_out, output_buffer, out_channels * frame_size);
+
+#ifdef DEBUG_NNDSP
+    print_float_vector("x_out", x_out, out_channels * frame_size);
+#endif
+
+    /* buffer update */
+    for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
+    {
+        OPUS_COPY(hAdaConv->history + i_in_channels * kernel_size, p_input + i_in_channels * (frame_size + kernel_size) + frame_size - kernel_size, kernel_size);
+    }
+    OPUS_COPY(hAdaConv->last_kernel, kernel_buffer, kernel_size * in_channels * out_channels);
+}
+
+void adacomb_process_frame(
+    AdaCombState* hAdaComb,
+    float *x_out,
+    const float *x_in,
+    const float *features,
+    const LinearLayer *kernel_layer,
+    const LinearLayer *gain_layer,
+    const LinearLayer *global_gain_layer,
+    int pitch_lag,
+    int feature_dim,
+    int frame_size,
+    int overlap_size,
+    int kernel_size,
+    int left_padding,
+    float filter_gain_a,
+    float filter_gain_b,
+    float log_gain_limit,
+    float *window,
+    int arch
+)
+{
+    float output_buffer[ADACOMB_MAX_FRAME_SIZE];
+    float output_buffer_last[ADACOMB_MAX_FRAME_SIZE];
+    float kernel_buffer[ADACOMB_MAX_KERNEL_SIZE];
+    float input_buffer[ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE];
+    float gain, global_gain;
+    float *p_input;
+    int i_sample;
+    float kernel[16];
+    float last_kernel[16];
+
+    (void) feature_dim; /* ToDo: figure out whether we might need this information */
+
+    OPUS_CLEAR(output_buffer, ADACOMB_MAX_FRAME_SIZE);
+    OPUS_CLEAR(kernel_buffer, ADACOMB_MAX_KERNEL_SIZE);
+    OPUS_CLEAR(input_buffer, ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE);
+
+    OPUS_COPY(input_buffer, hAdaComb->history, kernel_size + ADACOMB_MAX_LAG);
+    OPUS_COPY(input_buffer + kernel_size + ADACOMB_MAX_LAG, x_in, frame_size);
+    p_input = input_buffer + kernel_size + ADACOMB_MAX_LAG;
+
+    /* calculate new kernel and new gain */
+    compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
+    compute_generic_dense(gain_layer, &gain, features, ACTIVATION_RELU, arch);
+    compute_generic_dense(global_gain_layer, &global_gain, features, ACTIVATION_TANH, arch);
+#ifdef DEBUG_NNDSP
+    print_float_vector("features", features, feature_dim);
+    print_float_vector("adacomb_kernel_raw", kernel_buffer, kernel_size);
+    print_float_vector("adacomb_gain_raw", &gain, 1);
+    print_float_vector("adacomb_global_gain_raw", &global_gain, 1);
+#endif
+    gain = exp(log_gain_limit - gain);
+    global_gain = exp(filter_gain_a * global_gain + filter_gain_b);
+    scale_kernel(kernel_buffer, 1, 1, kernel_size, &gain);
+
+#ifdef DEBUG_NNDSP
+    print_float_vector("adacomb_kernel", kernel_buffer, kernel_size);
+    print_float_vector("adacomb_gain", &gain, 1);
+#endif
+
+    OPUS_CLEAR(kernel, ADACOMB_MAX_KERNEL_SIZE);
+    OPUS_CLEAR(last_kernel, ADACOMB_MAX_KERNEL_SIZE);
+    OPUS_COPY(kernel, kernel_buffer, kernel_size);
+    OPUS_COPY(last_kernel, hAdaComb->last_kernel, kernel_size);
+
+    celt_pitch_xcorr(last_kernel, &p_input[- left_padding - hAdaComb->last_pitch_lag], output_buffer_last, ADACOMB_MAX_KERNEL_SIZE, overlap_size, arch);
+
+    celt_pitch_xcorr(kernel, &p_input[- left_padding - pitch_lag], output_buffer, ADACOMB_MAX_KERNEL_SIZE, frame_size, arch);
+    for (i_sample = 0; i_sample < overlap_size; i_sample++)
+    {
+      output_buffer[i_sample] = hAdaComb->last_global_gain * window[i_sample] * output_buffer_last[i_sample] + global_gain * (1.f - window[i_sample]) * output_buffer[i_sample];
+    }
+
+    for (i_sample = 0; i_sample < overlap_size; i_sample++)
+    {
+      output_buffer[i_sample] += (window[i_sample] * hAdaComb->last_global_gain + (1.f - window[i_sample]) * global_gain) * p_input[i_sample];
+    }
+
+    for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
+    {
+      output_buffer[i_sample] = global_gain * (output_buffer[i_sample] + p_input[i_sample]);
+    }
+    OPUS_COPY(x_out, output_buffer, frame_size);
+
+#ifdef DEBUG_NNDSP
+    print_float_vector("x_out", x_out, frame_size);
+#endif
+
+    /* buffer update */
+    OPUS_COPY(hAdaComb->last_kernel, kernel_buffer, kernel_size);
+    OPUS_COPY(hAdaComb->history, p_input + frame_size - kernel_size - ADACOMB_MAX_LAG, kernel_size + ADACOMB_MAX_LAG);
+    hAdaComb->last_pitch_lag = pitch_lag;
+    hAdaComb->last_global_gain = global_gain;
+}
+
+
+void adashape_process_frame(
+    AdaShapeState *hAdaShape,
+    float *x_out,
+    const float *x_in,
+    const float *features,
+    const LinearLayer *alpha1,
+    const LinearLayer *alpha2,
+    int feature_dim,
+    int frame_size,
+    int avg_pool_k,
+    int arch
+)
+{
+    float in_buffer[ADASHAPE_MAX_INPUT_DIM + ADASHAPE_MAX_FRAME_SIZE];
+    float out_buffer[ADASHAPE_MAX_FRAME_SIZE];
+    int i, k;
+    int tenv_size;
+    float mean;
+    float *tenv;
+
+    celt_assert(frame_size % avg_pool_k == 0);
+    celt_assert(feature_dim + frame_size / avg_pool_k + 1 < ADASHAPE_MAX_INPUT_DIM);
+
+    tenv_size = frame_size / avg_pool_k;
+    tenv = in_buffer + feature_dim;
+    OPUS_CLEAR(tenv, tenv_size + 1);
+
+    OPUS_COPY(in_buffer, features, feature_dim);
+
+    /* calculate temporal envelope */
+    mean = 0;
+    for (i = 0; i < tenv_size; i++)
+    {
+        for (k = 0; k < avg_pool_k; k++)
+        {
+            tenv[i] += fabs(x_in[i * avg_pool_k + k]);
+        }
+        tenv[i] = log(tenv[i] / avg_pool_k + 1.52587890625e-05f);
+        mean += tenv[i];
+    }
+    mean /= tenv_size;
+    for (i = 0; i < tenv_size; i++)
+    {
+        tenv[i] -= mean;
+    }
+    tenv[tenv_size] = mean;
+#ifdef DEBUG_NNDSP
+    print_float_vector("tenv", tenv, tenv_size + 1);
+#endif
+
+    /* calculate temporal weights */
+#ifdef DEBUG_NNDSP
+    print_float_vector("alpha1_in", in_buffer, feature_dim + tenv_size + 1);
+#endif
+    compute_generic_conv1d(alpha1, out_buffer, hAdaShape->conv_alpha1_state, in_buffer, feature_dim + tenv_size + 1, ACTIVATION_LINEAR, arch);
+#ifdef DEBUG_NNDSP
+    print_float_vector("alpha1_out", out_buffer, frame_size);
+#endif
+    /* compute leaky ReLU by hand. ToDo: try tanh activation */
+    for (i = 0; i < frame_size; i ++)
+    {
+        in_buffer[i] = out_buffer[i] >= 0 ? out_buffer[i] : 0.2f * out_buffer[i];
+    }
+#ifdef DEBUG_NNDSP
+    print_float_vector("post_alpha1", in_buffer, frame_size);
+#endif
+    compute_generic_conv1d(alpha2, out_buffer, hAdaShape->conv_alpha2_state, in_buffer, frame_size, ACTIVATION_LINEAR, arch);
+
+    /* shape signal */
+    for (i = 0; i < frame_size; i ++)
+    {
+        x_out[i] = exp(out_buffer[i]) * x_in[i];
+    }
+
+}
diff --git a/dnn/nndsp.h b/dnn/nndsp.h
new file mode 100644
index 00000000..f00094b6
--- /dev/null
+++ b/dnn/nndsp.h
@@ -0,0 +1,141 @@
+/* Copyright (c) 2023 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef NNDSP_H
+#define NNDSP_H
+
+#include "opus_types.h"
+#include "nnet.h"
+#include <string.h>
+
+
+#define ADACONV_MAX_KERNEL_SIZE 16
+#define ADACONV_MAX_INPUT_CHANNELS 2
+#define ADACONV_MAX_OUTPUT_CHANNELS 2
+#define ADACONV_MAX_FRAME_SIZE 80
+#define ADACONV_MAX_OVERLAP_SIZE 40
+
+#define ADACOMB_MAX_LAG 300
+#define ADACOMB_MAX_KERNEL_SIZE 16
+#define ADACOMB_MAX_FRAME_SIZE 80
+#define ADACOMB_MAX_OVERLAP_SIZE 40
+
+#define ADASHAPE_MAX_INPUT_DIM 512
+#define ADASHAPE_MAX_FRAME_SIZE 160
+
+/*#define DEBUG_NNDSP*/
+#ifdef DEBUG_NNDSP
+#include <stdio.h>
+#endif
+
+
+void print_float_vector(const char* name, const float *vec, int length);
+
+typedef struct {
+    float history[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS];
+    float last_kernel[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
+    float last_gain;
+} AdaConvState;
+
+
+typedef struct {
+    float history[ADACOMB_MAX_KERNEL_SIZE + ADACOMB_MAX_LAG];
+    float last_kernel[ADACOMB_MAX_KERNEL_SIZE];
+    float last_global_gain;
+    int last_pitch_lag;
+} AdaCombState;
+
+
+typedef struct {
+    float conv_alpha1_state[ADASHAPE_MAX_INPUT_DIM];
+    float conv_alpha2_state[ADASHAPE_MAX_FRAME_SIZE];
+} AdaShapeState;
+
+void init_adaconv_state(AdaConvState *hAdaConv);
+
+void init_adacomb_state(AdaCombState *hAdaComb);
+
+void init_adashape_state(AdaShapeState *hAdaShape);
+
+void compute_overlap_window(float *window, int overlap_size);
+
+void adaconv_process_frame(
+    AdaConvState* hAdaConv,
+    float *x_out,
+    const float *x_in,
+    const float *features,
+    const LinearLayer *kernel_layer,
+    const LinearLayer *gain_layer,
+    int feature_dim, /* not strictly necessary */
+    int frame_size,
+    int overlap_size,
+    int in_channels,
+    int out_channels,
+    int kernel_size,
+    int left_padding,
+    float filter_gain_a,
+    float filter_gain_b,
+    float shape_gain,
+    float *window,
+    int arch
+);
+
+void adacomb_process_frame(
+    AdaCombState* hAdaComb,
+    float *x_out,
+    const float *x_in,
+    const float *features,
+    const LinearLayer *kernel_layer,
+    const LinearLayer *gain_layer,
+    const LinearLayer *global_gain_layer,
+    int pitch_lag,
+    int feature_dim,
+    int frame_size,
+    int overlap_size,
+    int kernel_size,
+    int left_padding,
+    float filter_gain_a,
+    float filter_gain_b,
+    float log_gain_limit,
+    float *window,
+    int arch
+);
+
+void adashape_process_frame(
+    AdaShapeState *hAdaShape,
+    float *x_out,
+    const float *x_in,
+    const float *features,
+    const LinearLayer *alpha1,
+    const LinearLayer *alpha2,
+    int feature_dim,
+    int frame_size,
+    int avg_pool_k,
+    int arch
+);
+
+#endif
diff --git a/dnn/nnet.c b/dnn/nnet.c
index e794e450..7ba623ca 100644
--- a/dnn/nnet.c
+++ b/dnn/nnet.c
@@ -41,6 +41,10 @@
 #include "os_support.h"
 #include "vec.h"
 
+#ifdef ENABLE_OSCE
+#include "osce_config.h"
+#endif
+
 #ifdef NO_OPTIMIZATIONS
 #if defined(_MSC_VER)
 #pragma message ("Compiling without any vectorization. This code will be very slow")
@@ -59,8 +63,11 @@ void compute_generic_dense(const LinearLayer *layer, float *output, const float
    compute_activation(output, output, layer->nb_outputs, activation, arch);
 }
 
+#ifdef ENABLE_OSCE
+#define MAX_RNN_NEURONS_ALL IMAX(IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS), DRED_MAX_RNN_NEURONS), OSCE_MAX_RNN_NEURONS)
+#else
 #define MAX_RNN_NEURONS_ALL IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS), DRED_MAX_RNN_NEURONS)
-
+#endif
 
 void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch)
 {
diff --git a/dnn/nnet_arch.h b/dnn/nnet_arch.h
index 12a467e5..694a3608 100644
--- a/dnn/nnet_arch.h
+++ b/dnn/nnet_arch.h
@@ -64,13 +64,29 @@ static OPUS_INLINE float relu(float x)
    return x < 0 ? 0 : x;
 }
 
+/*#define HIGH_ACCURACY */
+
 void RTCD_SUF(compute_activation_)(float *output, const float *input, int N, int activation)
 {
    int i;
    if (activation == ACTIVATION_SIGMOID) {
+#ifdef HIGH_ACCURACY
+      for (int n=0; n<N; n++)
+      {
+         output[n] = 1.f  / (1 + exp(-input[n]));
+      }
+#else
       vec_sigmoid(output, input, N);
+#endif
    } else if (activation == ACTIVATION_TANH) {
+#ifdef HIGH_ACCURACY
+      for (int n=0; n<N; n++)
+      {
+         output[n] = tanh(input[n]);
+      }
+#else
       vec_tanh(output, input, N);
+#endif
    } else if (activation == ACTIVATION_SWISH) {
       vec_swish(output, input, N);
    } else if (activation == ACTIVATION_RELU) {
diff --git a/dnn/osce.c b/dnn/osce.c
new file mode 100644
index 00000000..2a78a6ea
--- /dev/null
+++ b/dnn/osce.c
@@ -0,0 +1,1411 @@
+/* Copyright (c) 2023 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include <math.h>
+#include "osce.h"
+#include "osce_features.h"
+#include "os_support.h"
+#include "nndsp.h"
+#include "float_cast.h"
+#include "arch.h"
+
+#ifdef OSCE_DEBUG
+#include <stdio.h>
+/*#define WRITE_FEATURES*/
+/*#define DEBUG_LACE*/
+/*#define DEBUG_NOLACE*/
+#define FINIT(fid, name, mode) do{if (fid == NULL) {fid = fopen(name, mode);}} while(0)
+#endif
+
+#ifdef ENABLE_OSCE_TRAINING_DATA
+#include <stdio.h>
+#endif
+
+#define CLIP(a, min, max) (((a) < (min) ? (min) : (a)) > (max) ? (max) : (a))
+
+extern const WeightArray lacelayers_arrays[];
+extern const WeightArray nolacelayers_arrays[];
+
+/* LACE */
+
+#ifndef DISABLE_LACE
+
+static void compute_lace_numbits_embedding(float *emb, float numbits, int dim, float min_val, float max_val, int logscale)
+{
+    float x;
+    (void) dim;
+
+    numbits = logscale ? log(numbits) : numbits;
+    x = CLIP(numbits, min_val, max_val) - (max_val + min_val) / 2;
+
+    emb[0] = sin(x * LACE_NUMBITS_SCALE_0 - 0.5f);
+    emb[1] = sin(x * LACE_NUMBITS_SCALE_1 - 0.5f);
+    emb[2] = sin(x * LACE_NUMBITS_SCALE_2 - 0.5f);
+    emb[3] = sin(x * LACE_NUMBITS_SCALE_3 - 0.5f);
+    emb[4] = sin(x * LACE_NUMBITS_SCALE_4 - 0.5f);
+    emb[5] = sin(x * LACE_NUMBITS_SCALE_5 - 0.5f);
+    emb[6] = sin(x * LACE_NUMBITS_SCALE_6 - 0.5f);
+    emb[7] = sin(x * LACE_NUMBITS_SCALE_7 - 0.5f);
+}
+
+
+static int init_lace(LACE *hLACE, const WeightArray *weights)
+{
+    int ret = 0;
+    OPUS_CLEAR(hLACE, 1);
+    celt_assert(weights != NULL);
+
+    ret = init_lacelayers(&hLACE->layers, weights);
+
+    compute_overlap_window(hLACE->window, LACE_OVERLAP_SIZE);
+
+    return ret;
+}
+
+static void reset_lace_state(LACEState *state)
+{
+    OPUS_CLEAR(state, 1);
+
+    init_adacomb_state(&state->cf1_state);
+    init_adacomb_state(&state->cf2_state);
+    init_adaconv_state(&state->af1_state);
+}
+
+static void lace_feature_net(
+    LACE *hLACE,
+    LACEState *state,
+    float *output,
+    const float *features,
+    const float *numbits,
+    const int *periods,
+    int arch
+)
+{
+    float input_buffer[4 * IMAX(LACE_COND_DIM, LACE_HIDDEN_FEATURE_DIM)];
+    float output_buffer[4 * IMAX(LACE_COND_DIM, LACE_HIDDEN_FEATURE_DIM)];
+    float numbits_embedded[2 * LACE_NUMBITS_EMBEDDING_DIM];
+    int i_subframe;
+
+    compute_lace_numbits_embedding(numbits_embedded, numbits[0], LACE_NUMBITS_EMBEDDING_DIM,
+        log(LACE_NUMBITS_RANGE_LOW), log(LACE_NUMBITS_RANGE_HIGH), 1);
+    compute_lace_numbits_embedding(numbits_embedded + LACE_NUMBITS_EMBEDDING_DIM, numbits[1], LACE_NUMBITS_EMBEDDING_DIM,
+        log(LACE_NUMBITS_RANGE_LOW), log(LACE_NUMBITS_RANGE_HIGH), 1);
+
+    /* scaling and dimensionality reduction */
+    for (i_subframe = 0; i_subframe < 4; i_subframe ++)
+    {
+        OPUS_COPY(input_buffer, features + i_subframe * LACE_NUM_FEATURES, LACE_NUM_FEATURES);
+        OPUS_COPY(input_buffer + LACE_NUM_FEATURES, hLACE->layers.lace_pitch_embedding.float_weights + periods[i_subframe] * LACE_PITCH_EMBEDDING_DIM, LACE_PITCH_EMBEDDING_DIM);
+        OPUS_COPY(input_buffer + LACE_NUM_FEATURES + LACE_PITCH_EMBEDDING_DIM, numbits_embedded, 2 * LACE_NUMBITS_EMBEDDING_DIM);
+
+        compute_generic_conv1d(
+            &hLACE->layers.lace_fnet_conv1,
+            output_buffer + i_subframe * LACE_HIDDEN_FEATURE_DIM,
+            NULL,
+            input_buffer,
+            LACE_NUM_FEATURES + LACE_PITCH_EMBEDDING_DIM + 2 * LACE_NUMBITS_EMBEDDING_DIM,
+            ACTIVATION_TANH,
+            arch);
+    }
+
+    /* subframe accumulation */
+    OPUS_COPY(input_buffer, output_buffer, 4 * LACE_HIDDEN_FEATURE_DIM);
+    compute_generic_conv1d(
+        &hLACE->layers.lace_fnet_conv2,
+        output_buffer,
+        state->feature_net_conv2_state,
+        input_buffer,
+        4 * LACE_HIDDEN_FEATURE_DIM,
+        ACTIVATION_TANH,
+        arch
+    );
+
+    /* tconv upsampling */
+    OPUS_COPY(input_buffer, output_buffer, 4 * LACE_COND_DIM);
+    compute_generic_dense(
+        &hLACE->layers.lace_fnet_tconv,
+        output_buffer,
+        input_buffer,
+        ACTIVATION_LINEAR,
+        arch
+    );
+
+    /* GRU */
+    OPUS_COPY(input_buffer, output_buffer, 4 * LACE_COND_DIM);
+    for (i_subframe = 0; i_subframe < 4; i_subframe++)
+    {
+        compute_generic_gru(
+            &hLACE->layers.lace_fnet_gru_input,
+            &hLACE->layers.lace_fnet_gru_recurrent,
+            state->feature_net_gru_state,
+            input_buffer + i_subframe * LACE_COND_DIM,
+            arch
+        );
+        OPUS_COPY(output + i_subframe * LACE_COND_DIM, state->feature_net_gru_state, LACE_COND_DIM);
+    }
+}
+
+
+static void lace_process_20ms_frame(
+    LACE* hLACE,
+    LACEState *state,
+    float *x_out,
+    const float *x_in,
+    const float *features,
+    const float *numbits,
+    const int *periods,
+    int arch
+)
+{
+    float feature_buffer[4 * LACE_COND_DIM];
+    float output_buffer[4 * LACE_FRAME_SIZE];
+    int i_subframe, i_sample;
+
+#ifdef DEBUG_LACE
+    static FILE *f_features=NULL, *f_encfeatures=NULL, *f_xin=NULL, *f_xpreemph=NULL, *f_postcf1=NULL;
+    static FILE *f_postcf2=NULL, *f_postaf1=NULL, *f_xdeemph, *f_numbits, *f_periods;
+
+
+    FINIT(f_features, "debug/c_features.f32", "wb");
+    FINIT(f_encfeatures, "debug/c_encoded_features.f32", "wb");
+    FINIT(f_xin, "debug/c_x_in.f32", "wb");
+    FINIT(f_xpreemph, "debug/c_xpreemph.f32", "wb");
+    FINIT(f_xdeemph, "debug/c_xdeemph.f32", "wb");
+    FINIT(f_postcf1, "debug/c_post_cf1.f32", "wb");
+    FINIT(f_postcf2, "debug/c_post_cf2.f32", "wb");
+    FINIT(f_postaf1, "debug/c_post_af1.f32", "wb");
+    FINIT(f_numbits, "debug/c_numbits.f32", "wb");
+    FINIT(f_periods, "debug/c_periods.s32", "wb");
+
+    fwrite(x_in, sizeof(*x_in), 4 * LACE_FRAME_SIZE, f_xin);
+    fwrite(numbits, sizeof(*numbits), 2, f_numbits);
+    fwrite(periods, sizeof(*periods), 4, f_periods);
+#endif
+
+    /* pre-emphasis */
+    for (i_sample = 0; i_sample < 4 * LACE_FRAME_SIZE; i_sample ++)
+    {
+        output_buffer[i_sample] = x_in[i_sample] - LACE_PREEMPH * state->preemph_mem;
+        state->preemph_mem = x_in[i_sample];
+    }
+
+    /* run feature encoder */
+    lace_feature_net(hLACE, state, feature_buffer, features, numbits, periods, arch);
+#ifdef DEBUG_LACE
+    fwrite(features, sizeof(*features), 4 * LACE_NUM_FEATURES, f_features);
+    fwrite(feature_buffer, sizeof(*feature_buffer), 4 * LACE_COND_DIM, f_encfeatures);
+    fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_xpreemph);
+#endif
+
+    /* 1st comb filtering stage */
+    for (i_subframe = 0; i_subframe < 4; i_subframe++)
+    {
+        adacomb_process_frame(
+            &state->cf1_state,
+            output_buffer + i_subframe * LACE_FRAME_SIZE,
+            output_buffer + i_subframe * LACE_FRAME_SIZE,
+            feature_buffer + i_subframe * LACE_COND_DIM,
+            &hLACE->layers.lace_cf1_kernel,
+            &hLACE->layers.lace_cf1_gain,
+            &hLACE->layers.lace_cf1_global_gain,
+            periods[i_subframe],
+            LACE_COND_DIM,
+            LACE_FRAME_SIZE,
+            LACE_OVERLAP_SIZE,
+            LACE_CF1_KERNEL_SIZE,
+            LACE_CF1_LEFT_PADDING,
+            LACE_CF1_FILTER_GAIN_A,
+            LACE_CF1_FILTER_GAIN_B,
+            LACE_CF1_LOG_GAIN_LIMIT,
+            hLACE->window,
+            arch);
+    }
+
+#ifdef DEBUG_LACE
+    fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_postcf1);
+#endif
+
+    /* 2nd comb filtering stage */
+    for (i_subframe = 0; i_subframe < 4; i_subframe++)
+    {
+        adacomb_process_frame(
+            &state->cf2_state,
+            output_buffer + i_subframe * LACE_FRAME_SIZE,
+            output_buffer + i_subframe * LACE_FRAME_SIZE,
+            feature_buffer + i_subframe * LACE_COND_DIM,
+            &hLACE->layers.lace_cf2_kernel,
+            &hLACE->layers.lace_cf2_gain,
+            &hLACE->layers.lace_cf2_global_gain,
+            periods[i_subframe],
+            LACE_COND_DIM,
+            LACE_FRAME_SIZE,
+            LACE_OVERLAP_SIZE,
+            LACE_CF2_KERNEL_SIZE,
+            LACE_CF2_LEFT_PADDING,
+            LACE_CF2_FILTER_GAIN_A,
+            LACE_CF2_FILTER_GAIN_B,
+            LACE_CF2_LOG_GAIN_LIMIT,
+            hLACE->window,
+            arch);
+    }
+#ifdef DEBUG_LACE
+    fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_postcf2);
+#endif
+
+    /* final adaptive filtering stage */
+    for (i_subframe = 0; i_subframe < 4; i_subframe++)
+    {
+        adaconv_process_frame(
+            &state->af1_state,
+            output_buffer + i_subframe * LACE_FRAME_SIZE,
+            output_buffer + i_subframe * LACE_FRAME_SIZE,
+            feature_buffer + i_subframe * LACE_COND_DIM,
+            &hLACE->layers.lace_af1_kernel,
+            &hLACE->layers.lace_af1_gain,
+            LACE_COND_DIM,
+            LACE_FRAME_SIZE,
+            LACE_OVERLAP_SIZE,
+            LACE_AF1_IN_CHANNELS,
+            LACE_AF1_OUT_CHANNELS,
+            LACE_AF1_KERNEL_SIZE,
+            LACE_AF1_LEFT_PADDING,
+            LACE_AF1_FILTER_GAIN_A,
+            LACE_AF1_FILTER_GAIN_B,
+            LACE_AF1_SHAPE_GAIN,
+            hLACE->window,
+            arch);
+    }
+#ifdef DEBUG_LACE
+    fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_postaf1);
+#endif
+
+    /* de-emphasis */
+    for (i_sample = 0; i_sample < 4 * LACE_FRAME_SIZE; i_sample ++)
+    {
+        x_out[i_sample] = output_buffer[i_sample] + LACE_PREEMPH * state->deemph_mem;
+        state->deemph_mem = x_out[i_sample];
+    }
+#ifdef DEBUG_LACE
+    fwrite(x_out, sizeof(float), 4 * LACE_FRAME_SIZE, f_xdeemph);
+#endif
+}
+
+#endif /* #ifndef DISABLE_LACE */
+
+
+/* NoLACE */
+#ifndef DISABLE_NOLACE
+
+static void compute_nolace_numbits_embedding(float *emb, float numbits, int dim, float min_val, float max_val, int logscale)
+{
+    float x;
+    (void) dim;
+
+    numbits = logscale ? log(numbits) : numbits;
+    x = CLIP(numbits, min_val, max_val) - (max_val + min_val) / 2;
+
+    emb[0] = sin(x * NOLACE_NUMBITS_SCALE_0 - 0.5f);
+    emb[1] = sin(x * NOLACE_NUMBITS_SCALE_1 - 0.5f);
+    emb[2] = sin(x * NOLACE_NUMBITS_SCALE_2 - 0.5f);
+    emb[3] = sin(x * NOLACE_NUMBITS_SCALE_3 - 0.5f);
+    emb[4] = sin(x * NOLACE_NUMBITS_SCALE_4 - 0.5f);
+    emb[5] = sin(x * NOLACE_NUMBITS_SCALE_5 - 0.5f);
+    emb[6] = sin(x * NOLACE_NUMBITS_SCALE_6 - 0.5f);
+    emb[7] = sin(x * NOLACE_NUMBITS_SCALE_7 - 0.5f);
+}
+
+static int init_nolace(NoLACE *hNoLACE, const WeightArray *weights)
+{
+    int ret = 0;
+    OPUS_CLEAR(hNoLACE, 1);
+    celt_assert(weights != NULL);
+
+    ret = init_nolacelayers(&hNoLACE->layers, weights);
+
+    compute_overlap_window(hNoLACE->window, NOLACE_OVERLAP_SIZE);
+
+    return ret;
+}
+
+static void reset_nolace_state(NoLACEState *state)
+{
+    OPUS_CLEAR(state, 1);
+
+    init_adacomb_state(&state->cf1_state);
+    init_adacomb_state(&state->cf2_state);
+    init_adaconv_state(&state->af1_state);
+    init_adaconv_state(&state->af2_state);
+    init_adaconv_state(&state->af3_state);
+    init_adaconv_state(&state->af4_state);
+    init_adashape_state(&state->tdshape1_state);
+    init_adashape_state(&state->tdshape2_state);
+    init_adashape_state(&state->tdshape3_state);
+}
+
+static void nolace_feature_net(
+    NoLACE *hNoLACE,
+    NoLACEState *state,
+    float *output,
+    const float *features,
+    const float *numbits,
+    const int *periods,
+    int arch
+)
+{
+    float input_buffer[4 * IMAX(NOLACE_COND_DIM, NOLACE_HIDDEN_FEATURE_DIM)];
+    float output_buffer[4 * IMAX(NOLACE_COND_DIM, NOLACE_HIDDEN_FEATURE_DIM)];
+    float numbits_embedded[2 * NOLACE_NUMBITS_EMBEDDING_DIM];
+    int i_subframe;
+
+    compute_nolace_numbits_embedding(numbits_embedded, numbits[0], NOLACE_NUMBITS_EMBEDDING_DIM,
+        log(NOLACE_NUMBITS_RANGE_LOW), log(NOLACE_NUMBITS_RANGE_HIGH), 1);
+    compute_nolace_numbits_embedding(numbits_embedded + NOLACE_NUMBITS_EMBEDDING_DIM, numbits[1], NOLACE_NUMBITS_EMBEDDING_DIM,
+        log(NOLACE_NUMBITS_RANGE_LOW), log(NOLACE_NUMBITS_RANGE_HIGH), 1);
+
+    /* scaling and dimensionality reduction */
+    for (i_subframe = 0; i_subframe < 4; i_subframe ++)
+    {
+        OPUS_COPY(input_buffer, features + i_subframe * NOLACE_NUM_FEATURES, NOLACE_NUM_FEATURES);
+        OPUS_COPY(input_buffer + NOLACE_NUM_FEATURES, hNoLACE->layers.nolace_pitch_embedding.float_weights + periods[i_subframe] * NOLACE_PITCH_EMBEDDING_DIM, NOLACE_PITCH_EMBEDDING_DIM);
+        OPUS_COPY(input_buffer + NOLACE_NUM_FEATURES + NOLACE_PITCH_EMBEDDING_DIM, numbits_embedded, 2 * NOLACE_NUMBITS_EMBEDDING_DIM);
+
+        compute_generic_conv1d(
+            &hNoLACE->layers.nolace_fnet_conv1,
+            output_buffer + i_subframe * NOLACE_HIDDEN_FEATURE_DIM,
+            NULL,
+            input_buffer,
+            NOLACE_NUM_FEATURES + NOLACE_PITCH_EMBEDDING_DIM + 2 * NOLACE_NUMBITS_EMBEDDING_DIM,
+            ACTIVATION_TANH,
+            arch);
+    }
+
+    /* subframe accumulation */
+    OPUS_COPY(input_buffer, output_buffer, 4 * NOLACE_HIDDEN_FEATURE_DIM);
+    compute_generic_conv1d(
+        &hNoLACE->layers.nolace_fnet_conv2,
+        output_buffer,
+        state->feature_net_conv2_state,
+        input_buffer,
+        4 * NOLACE_HIDDEN_FEATURE_DIM,
+        ACTIVATION_TANH,
+        arch
+    );
+
+    /* tconv upsampling */
+    OPUS_COPY(input_buffer, output_buffer, 4 * NOLACE_COND_DIM);
+    compute_generic_dense(
+        &hNoLACE->layers.nolace_fnet_tconv,
+        output_buffer,
+        input_buffer,
+        ACTIVATION_LINEAR,
+        arch
+    );
+
+    /* GRU */
+    OPUS_COPY(input_buffer, output_buffer, 4 * NOLACE_COND_DIM);
+    for (i_subframe = 0; i_subframe < 4; i_subframe++)
+    {
+        compute_generic_gru(
+            &hNoLACE->layers.nolace_fnet_gru_input,
+            &hNoLACE->layers.nolace_fnet_gru_recurrent,
+            state->feature_net_gru_state,
+            input_buffer + i_subframe * NOLACE_COND_DIM,
+            arch
+        );
+        OPUS_COPY(output + i_subframe * NOLACE_COND_DIM, state->feature_net_gru_state, NOLACE_COND_DIM);
+    }
+}
+
+
+static void nolace_process_20ms_frame(
+    NoLACE* hNoLACE,
+    NoLACEState *state,
+    float *x_out,
+    const float *x_in,
+    const float *features,
+    const float *numbits,
+    const int *periods,
+    int arch
+)
+{
+    float feature_buffer[4 * NOLACE_COND_DIM];
+    float feature_transform_buffer[4 * NOLACE_COND_DIM];
+    float x_buffer1[8 * NOLACE_FRAME_SIZE];
+    float x_buffer2[8 * NOLACE_FRAME_SIZE];
+    int i_subframe, i_sample;
+    NOLACELayers *layers = &hNoLACE->layers;
+
+#ifdef DEBUG_NOLACE
+    static FILE *f_features=NULL, *f_encfeatures=NULL, *f_xin=NULL, *f_xpreemph=NULL, *f_postcf1=NULL;
+    static FILE *f_postcf2=NULL, *f_postaf1=NULL, *f_xdeemph, *f_numbits, *f_periods;
+    static FILE *f_ffpostcf1, *f_fpostcf2, *f_fpostaf1;
+
+
+    FINIT(f_features, "debug/c_features.f32", "wb");
+    FINIT(f_encfeatures, "debug/c_encoded_features.f32", "wb");
+    FINIT(f_xin, "debug/c_x_in.f32", "wb");
+    FINIT(f_xpreemph, "debug/c_xpreemph.f32", "wb");
+    FINIT(f_xdeemph, "debug/c_xdeemph.f32", "wb");
+    FINIT(f_postcf1, "debug/c_post_cf1.f32", "wb");
+    FINIT(f_postcf2, "debug/c_post_cf2.f32", "wb");
+    FINIT(f_postaf1, "debug/c_post_af1.f32", "wb");
+    FINIT(f_numbits, "debug/c_numbits.f32", "wb");
+    FINIT(f_periods, "debug/c_periods.s32", "wb");
+
+    fwrite(x_in, sizeof(*x_in), 4 * NOLACE_FRAME_SIZE, f_xin);
+    fwrite(numbits, sizeof(*numbits), 2, f_numbits);
+    fwrite(periods, sizeof(*periods), 4, f_periods);
+#endif
+
+    /* pre-emphasis */
+    for (i_sample = 0; i_sample < 4 * NOLACE_FRAME_SIZE; i_sample ++)
+    {
+        x_buffer1[i_sample] = x_in[i_sample] - NOLACE_PREEMPH * state->preemph_mem;
+        state->preemph_mem = x_in[i_sample];
+    }
+
+    /* run feature encoder */
+    nolace_feature_net(hNoLACE, state, feature_buffer, features, numbits, periods, arch);
+#ifdef DEBUG_NOLACE
+    fwrite(features, sizeof(*features), 4 * NOLACE_NUM_FEATURES, f_features);
+    fwrite(feature_buffer, sizeof(*feature_buffer), 4 * NOLACE_COND_DIM, f_encfeatures);
+    fwrite(output_buffer, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_xpreemph);
+#endif
+
+    /* 1st comb filtering stage */
+    for (i_subframe = 0; i_subframe < 4; i_subframe++)
+    {
+        /* modifies signal in place */
+        adacomb_process_frame(
+            &state->cf1_state,
+            x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
+            x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
+            feature_buffer + i_subframe * NOLACE_COND_DIM,
+            &hNoLACE->layers.nolace_cf1_kernel,
+            &hNoLACE->layers.nolace_cf1_gain,
+            &hNoLACE->layers.nolace_cf1_global_gain,
+            periods[i_subframe],
+            NOLACE_COND_DIM,
+            NOLACE_FRAME_SIZE,
+            NOLACE_OVERLAP_SIZE,
+            NOLACE_CF1_KERNEL_SIZE,
+            NOLACE_CF1_LEFT_PADDING,
+            NOLACE_CF1_FILTER_GAIN_A,
+            NOLACE_CF1_FILTER_GAIN_B,
+            NOLACE_CF1_LOG_GAIN_LIMIT,
+            hNoLACE->window,
+            arch);
+
+        compute_generic_conv1d(
+            &layers->nolace_post_cf1,
+            feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
+            state->post_cf1_state,
+            feature_buffer + i_subframe * NOLACE_COND_DIM,
+            NOLACE_COND_DIM,
+            ACTIVATION_TANH,
+            arch);
+    }
+
+    /* update feature buffer */
+    OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
+
+#ifdef DEBUG_NOLACE
+    fwrite(x_buffer1, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_postcf1);
+#endif
+
+    /* 2nd comb filtering stage */
+    for (i_subframe = 0; i_subframe < 4; i_subframe++)
+    {
+        /* modifies signal in place */
+        adacomb_process_frame(
+            &state->cf2_state,
+            x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
+            x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
+            feature_buffer + i_subframe * NOLACE_COND_DIM,
+            &hNoLACE->layers.nolace_cf2_kernel,
+            &hNoLACE->layers.nolace_cf2_gain,
+            &hNoLACE->layers.nolace_cf2_global_gain,
+            periods[i_subframe],
+            NOLACE_COND_DIM,
+            NOLACE_FRAME_SIZE,
+            NOLACE_OVERLAP_SIZE,
+            NOLACE_CF2_KERNEL_SIZE,
+            NOLACE_CF2_LEFT_PADDING,
+            NOLACE_CF2_FILTER_GAIN_A,
+            NOLACE_CF2_FILTER_GAIN_B,
+            NOLACE_CF2_LOG_GAIN_LIMIT,
+            hNoLACE->window,
+            arch);
+
+        compute_generic_conv1d(
+            &layers->nolace_post_cf2,
+            feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
+            state->post_cf2_state,
+            feature_buffer + i_subframe * NOLACE_COND_DIM,
+            NOLACE_COND_DIM,
+            ACTIVATION_TANH,
+            arch);
+    }
+
+    /* update feature buffer */
+    OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
+
+#ifdef DEBUG_NOLACE
+    fwrite(x_buffer1, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_postcf2);
+#endif
+
+    /* final adaptive filtering stage */
+    for (i_subframe = 0; i_subframe < 4; i_subframe++)
+    {
+        adaconv_process_frame(
+            &state->af1_state,
+            x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF1_OUT_CHANNELS,
+            x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
+            feature_buffer + i_subframe * NOLACE_COND_DIM,
+            &hNoLACE->layers.nolace_af1_kernel,
+            &hNoLACE->layers.nolace_af1_gain,
+            NOLACE_COND_DIM,
+            NOLACE_FRAME_SIZE,
+            NOLACE_OVERLAP_SIZE,
+            NOLACE_AF1_IN_CHANNELS,
+            NOLACE_AF1_OUT_CHANNELS,
+            NOLACE_AF1_KERNEL_SIZE,
+            NOLACE_AF1_LEFT_PADDING,
+            NOLACE_AF1_FILTER_GAIN_A,
+            NOLACE_AF1_FILTER_GAIN_B,
+            NOLACE_AF1_SHAPE_GAIN,
+            hNoLACE->window,
+            arch);
+
+        compute_generic_conv1d(
+            &layers->nolace_post_af1,
+            feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
+            state->post_af1_state,
+            feature_buffer + i_subframe * NOLACE_COND_DIM,
+            NOLACE_COND_DIM,
+            ACTIVATION_TANH,
+            arch);
+    }
+
+    /* update feature buffer */
+    OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
+
+#ifdef DEBUG_NOLACE
+    fwrite(x_buffer2, sizeof(float), 4 * NOLACE_FRAME_SIZE * NOLACE_AF1_OUT_CHANNELS, f_postaf1);
+#endif
+
+    /* first shape-mix round */
+    for (i_subframe = 0; i_subframe < 4; i_subframe++)
+    {
+        celt_assert(NOLACE_AF1_OUT_CHANNELS == 2);
+        /* modifies second channel in place */
+        adashape_process_frame(
+            &state->tdshape1_state,
+            x_buffer2 + i_subframe * NOLACE_AF1_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+            x_buffer2 + i_subframe * NOLACE_AF1_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+            feature_buffer + i_subframe * NOLACE_COND_DIM,
+            &layers->nolace_tdshape1_alpha1,
+            &layers->nolace_tdshape1_alpha2,
+            NOLACE_TDSHAPE1_FEATURE_DIM,
+            NOLACE_TDSHAPE1_FRAME_SIZE,
+            NOLACE_TDSHAPE1_AVG_POOL_K,
+            arch
+        );
+
+        adaconv_process_frame(
+            &state->af2_state,
+            x_buffer1 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF2_OUT_CHANNELS,
+            x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF2_IN_CHANNELS,
+            feature_buffer + i_subframe * NOLACE_COND_DIM,
+            &hNoLACE->layers.nolace_af2_kernel,
+            &hNoLACE->layers.nolace_af2_gain,
+            NOLACE_COND_DIM,
+            NOLACE_FRAME_SIZE,
+            NOLACE_OVERLAP_SIZE,
+            NOLACE_AF2_IN_CHANNELS,
+            NOLACE_AF2_OUT_CHANNELS,
+            NOLACE_AF2_KERNEL_SIZE,
+            NOLACE_AF2_LEFT_PADDING,
+            NOLACE_AF2_FILTER_GAIN_A,
+            NOLACE_AF2_FILTER_GAIN_B,
+            NOLACE_AF2_SHAPE_GAIN,
+            hNoLACE->window,
+            arch);
+
+        compute_generic_conv1d(
+            &layers->nolace_post_af2,
+            feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
+            state->post_af2_state,
+            feature_buffer + i_subframe * NOLACE_COND_DIM,
+            NOLACE_COND_DIM,
+            ACTIVATION_TANH,
+            arch);
+    }
+
+    /* update feature buffer */
+    OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
+
+#ifdef DEBUG_NOLACE
+    fwrite(x_buffer1, sizeof(float), 4 * NOLACE_FRAME_SIZE * NOLACE_AF2_OUT_CHANNELS, f_postaf2);
+#endif
+
+    /* second shape-mix round */
+    for (i_subframe = 0; i_subframe < 4; i_subframe++)
+    {
+        celt_assert(NOLACE_AF2_OUT_CHANNELS == 2);
+        /* modifies second channel in place */
+        adashape_process_frame(
+            &state->tdshape2_state,
+            x_buffer1 + i_subframe * NOLACE_AF2_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+            x_buffer1 + i_subframe * NOLACE_AF2_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+            feature_buffer + i_subframe * NOLACE_COND_DIM,
+            &layers->nolace_tdshape2_alpha1,
+            &layers->nolace_tdshape2_alpha2,
+            NOLACE_TDSHAPE2_FEATURE_DIM,
+            NOLACE_TDSHAPE2_FRAME_SIZE,
+            NOLACE_TDSHAPE2_AVG_POOL_K,
+            arch
+        );
+
+        adaconv_process_frame(
+            &state->af3_state,
+            x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF3_OUT_CHANNELS,
+            x_buffer1 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF3_IN_CHANNELS,
+            feature_buffer + i_subframe * NOLACE_COND_DIM,
+            &hNoLACE->layers.nolace_af3_kernel,
+            &hNoLACE->layers.nolace_af3_gain,
+            NOLACE_COND_DIM,
+            NOLACE_FRAME_SIZE,
+            NOLACE_OVERLAP_SIZE,
+            NOLACE_AF3_IN_CHANNELS,
+            NOLACE_AF3_OUT_CHANNELS,
+            NOLACE_AF3_KERNEL_SIZE,
+            NOLACE_AF3_LEFT_PADDING,
+            NOLACE_AF3_FILTER_GAIN_A,
+            NOLACE_AF3_FILTER_GAIN_B,
+            NOLACE_AF3_SHAPE_GAIN,
+            hNoLACE->window,
+            arch);
+
+        compute_generic_conv1d(
+            &layers->nolace_post_af3,
+            feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
+            state->post_af3_state,
+            feature_buffer + i_subframe * NOLACE_COND_DIM,
+            NOLACE_COND_DIM,
+            ACTIVATION_TANH,
+            arch);
+    }
+
+    /* update feature buffer */
+    OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
+
+    /* third shape-mix round */
+    for (i_subframe = 0; i_subframe < 4; i_subframe++)
+    {
+        celt_assert(NOLACE_AF3_OUT_CHANNELS == 2);
+        /* modifies second channel in place */
+        adashape_process_frame(
+            &state->tdshape3_state,
+            x_buffer2 + i_subframe * NOLACE_AF3_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+            x_buffer2 + i_subframe * NOLACE_AF3_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+            feature_buffer + i_subframe * NOLACE_COND_DIM,
+            &layers->nolace_tdshape3_alpha1,
+            &layers->nolace_tdshape3_alpha2,
+            NOLACE_TDSHAPE3_FEATURE_DIM,
+            NOLACE_TDSHAPE3_FRAME_SIZE,
+            NOLACE_TDSHAPE3_AVG_POOL_K,
+            arch
+        );
+
+        adaconv_process_frame(
+            &state->af4_state,
+            x_buffer1 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF4_OUT_CHANNELS,
+            x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF4_IN_CHANNELS,
+            feature_buffer + i_subframe * NOLACE_COND_DIM,
+            &hNoLACE->layers.nolace_af4_kernel,
+            &hNoLACE->layers.nolace_af4_gain,
+            NOLACE_COND_DIM,
+            NOLACE_FRAME_SIZE,
+            NOLACE_OVERLAP_SIZE,
+            NOLACE_AF4_IN_CHANNELS,
+            NOLACE_AF4_OUT_CHANNELS,
+            NOLACE_AF4_KERNEL_SIZE,
+            NOLACE_AF4_LEFT_PADDING,
+            NOLACE_AF4_FILTER_GAIN_A,
+            NOLACE_AF4_FILTER_GAIN_B,
+            NOLACE_AF4_SHAPE_GAIN,
+            hNoLACE->window,
+            arch);
+
+    }
+
+
+    /* de-emphasis */
+    for (i_sample = 0; i_sample < 4 * NOLACE_FRAME_SIZE; i_sample ++)
+    {
+        x_out[i_sample] = x_buffer1[i_sample] + NOLACE_PREEMPH * state->deemph_mem;
+        state->deemph_mem = x_out[i_sample];
+    }
+#ifdef DEBUG_NOLACE
+    fwrite(x_out, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_xdeemph);
+#endif
+}
+
+#endif /* #ifndef DISABLE_NOLACE */
+
+/* API */
+
+void osce_reset(silk_OSCE_struct *hOSCE, int method)
+{
+    OSCEState *state = &hOSCE->state;
+
+    OPUS_CLEAR(&hOSCE->features, 1);
+
+    switch(method)
+    {
+        case OSCE_METHOD_NONE:
+            break;
+#ifndef DISABLE_LACE
+        case OSCE_METHOD_LACE:
+            reset_lace_state(&state->lace);
+            break;
+#endif
+#ifndef DISABLE_NOLACE
+        case OSCE_METHOD_NOLACE:
+            reset_nolace_state(&state->nolace);
+            break;
+#endif
+        default:
+            celt_assert(0 && "method not defined"); /* Question: return error code? */
+    }
+    hOSCE->method = method;
+    hOSCE->features.reset = 2;
+}
+
+
+#if 0
+#include <stdio.h>
+static void print_float_array(FILE *fid, const char  *name, const float *array, int n)
+{
+    int i;
+    for (i = 0; i < n; i++)
+    {
+        fprintf(fid, "%s[%d]: %f\n", name, i, array[i]);
+    }
+}
+
+static void print_int_array(FILE *fid, const char  *name, const int *array, int n)
+{
+    int i;
+    for (i = 0; i < n; i++)
+    {
+        fprintf(fid, "%s[%d]: %d\n", name, i, array[i]);
+    }
+}
+
+static void print_int8_array(FILE *fid, const char  *name, const opus_int8 *array, int n)
+{
+    int i;
+    for (i = 0; i < n; i++)
+    {
+        fprintf(fid, "%s[%d]: %d\n", name, i, array[i]);
+    }
+}
+
+static void print_linear_layer(FILE *fid, const char *name, LinearLayer *layer)
+{
+    int i, n_in, n_out, n_total;
+    char tmp[256];
+
+    n_in = layer->nb_inputs;
+    n_out = layer->nb_outputs;
+    n_total = n_in * n_out;
+
+    fprintf(fid, "\nprinting layer %s...\n", name);
+    fprintf(fid, "%s.nb_inputs: %d\n%s.nb_outputs: %d\n", name, n_in, name, n_out);
+
+    if (layer->bias !=NULL){}
+    if (layer->subias !=NULL){}
+    if (layer->weights !=NULL){}
+    if (layer->float_weights !=NULL){}
+
+    if (layer->bias != NULL) {sprintf(tmp, "%s.bias", name); print_float_array(fid, tmp, layer->bias, n_out);}
+    if (layer->subias != NULL) {sprintf(tmp, "%s.subias", name); print_float_array(fid, tmp, layer->subias, n_out);}
+    if (layer->weights != NULL) {sprintf(tmp, "%s.weights", name); print_int8_array(fid, tmp, layer->weights, n_total);}
+    if (layer->float_weights != NULL) {sprintf(tmp, "%s.float_weights", name); print_float_array(fid, tmp, layer->float_weights, n_total);}
+    //if (layer->weights_idx != NULL) {sprintf(tmp, "%s.weights_idx", name); print_float_array(fid, tmp, layer->weights_idx, n_total);}
+    if (layer->diag != NULL) {sprintf(tmp, "%s.diag", name); print_float_array(fid, tmp, layer->diag, n_in);}
+    if (layer->scale != NULL) {sprintf(tmp, "%s.scale", name); print_float_array(fid, tmp, layer->scale, n_out);}
+
+}
+#endif
+
+int osce_load_models(OSCEModel *model, const unsigned char *data, int len)
+{
+    int ret = 0;
+    WeightArray *list;
+
+    if (data != NULL  && len)
+    {
+        /* init from buffer */
+        parse_weights(&list, data, len);
+
+#ifndef DISABLE_LACE
+        if (ret == 0) {ret = init_lace(&model->lace, list);}
+#endif
+
+#ifndef DISABLE_LACE
+        if (ret == 0) {ret = init_nolace(&model->nolace, list);}
+#endif
+
+        free(list);
+    } else
+    {
+#ifdef USE_WEIGHTS_FILE
+        return -1;
+#else
+#ifndef DISABLE_LACE
+        if (ret == 0) {ret = init_lace(&model->lace, lacelayers_arrays);}
+#endif
+
+#ifndef DISABLE_LACE
+        if (ret == 0) {ret = init_nolace(&model->nolace, nolacelayers_arrays);}
+#endif
+
+#endif /* USE_WEIGHTS_FILE */
+    }
+
+    ret = ret ? -1 : 0;
+    return ret;
+}
+
+void osce_enhance_frame(
+    OSCEModel                   *model,                         /* I    OSCE model struct                           */
+    silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
+    silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */
+    opus_int16                  xq[],                           /* I/O  Decoded speech                              */
+    opus_int32                  num_bits,                       /* I    Size of SILK payload in bits                */
+    int                         arch                            /* I    Run-time architecture                       */
+)
+{
+    float in_buffer[320];
+    float out_buffer[320];
+    float features[4 * OSCE_FEATURE_DIM];
+    float numbits[2];
+    int periods[4];
+    int i;
+
+    /* enhancement only implemented for 20 ms frame at 16kHz */
+    if (psDec->fs_kHz != 16 || psDec->nb_subfr != 4)
+    {
+        osce_reset(&psDec->osce, psDec->osce.method);
+        return;
+    }
+
+    osce_calculate_features(psDec, psDecCtrl, features, numbits, periods, xq, num_bits);
+
+    /* scale input */
+    for (i = 0; i < 320; i++)
+    {
+        in_buffer[i] = ((float) xq[i]) * (1.f/32768.f);
+    }
+
+    switch(psDec->osce.method)
+    {
+        case OSCE_METHOD_NONE:
+            OPUS_COPY(out_buffer, in_buffer, 320);
+            break;
+#ifndef DISABLE_LACE
+        case OSCE_METHOD_LACE:
+            lace_process_20ms_frame(&model->lace, &psDec->osce.state.lace, out_buffer, in_buffer, features, numbits, periods, arch);
+            break;
+#endif
+#ifndef DISABLE_NOLACE
+        case OSCE_METHOD_NOLACE:
+            nolace_process_20ms_frame(&model->nolace, &psDec->osce.state.nolace, out_buffer, in_buffer, features, numbits, periods, arch);
+            break;
+#endif
+        default:
+            celt_assert(0 && "method not defined");
+    }
+
+#ifdef ENABLE_OSCE_TRAINING_DATA
+    int  k;
+
+    static FILE *flpc = NULL;
+    static FILE *fgain = NULL;
+    static FILE *fltp = NULL;
+    static FILE *fperiod = NULL;
+    static FILE *fnoisy16k = NULL;
+    static FILE* f_numbits = NULL;
+    static FILE* f_numbits_smooth = NULL;
+
+    if (flpc == NULL) {flpc = fopen("features_lpc.f32", "wb");}
+    if (fgain == NULL) {fgain = fopen("features_gain.f32", "wb");}
+    if (fltp == NULL) {fltp = fopen("features_ltp.f32", "wb");}
+    if (fperiod == NULL) {fperiod = fopen("features_period.s16", "wb");}
+    if (fnoisy16k == NULL) {fnoisy16k = fopen("noisy_16k.s16", "wb");}
+    if(f_numbits == NULL) {f_numbits = fopen("features_num_bits.s32", "wb");}
+    if (f_numbits_smooth == NULL) {f_numbits_smooth = fopen("features_num_bits_smooth.f32", "wb");}
+
+    fwrite(&num_bits, sizeof(num_bits), 1, f_numbits);
+    fwrite(&(psDec->osce.features.numbits_smooth), sizeof(psDec->osce.features.numbits_smooth), 1, f_numbits_smooth);
+
+    for (k = 0; k < psDec->nb_subfr; k++)
+    {
+        float tmp;
+        int16_t itmp;
+        float lpc_buffer[16] = {0};
+        opus_int16 *A_Q12, *B_Q14;
+
+        (void) num_bits;
+        (void) arch;
+
+        /* gain */
+        tmp = (float) psDecCtrl->Gains_Q16[k] / (1UL << 16);
+        fwrite(&tmp, sizeof(tmp), 1, fgain);
+
+        /* LPC */
+        A_Q12 = psDecCtrl->PredCoef_Q12[ k >> 1 ];
+        for (i = 0; i < psDec->LPC_order; i++)
+        {
+            lpc_buffer[i] = (float) A_Q12[i] / (1U << 12);
+        }
+        fwrite(lpc_buffer, sizeof(lpc_buffer[0]), 16, flpc);
+
+        /* LTP */
+        B_Q14 = &psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER ];
+        for (i = 0; i < 5; i++)
+        {
+            tmp = (float) B_Q14[i] / (1U << 14);
+            fwrite(&tmp, sizeof(tmp), 1, fltp);
+        }
+
+        /* periods */
+        itmp = psDec->indices.signalType == TYPE_VOICED ? psDecCtrl->pitchL[ k ] : 0;
+        fwrite(&itmp, sizeof(itmp), 1, fperiod);
+    }
+
+    fwrite(xq, psDec->nb_subfr * psDec->subfr_length, sizeof(xq[0]), fnoisy16k);
+#endif
+
+    if (psDec->osce.features.reset > 1)
+    {
+        OPUS_COPY(out_buffer, in_buffer, 320);
+        psDec->osce.features.reset --;
+    }
+    else if (psDec->osce.features.reset)
+    {
+        osce_cross_fade_10ms(out_buffer, in_buffer, 320);
+        psDec->osce.features.reset = 0;
+    }
+
+    /* scale output */
+    for (i = 0; i < 320; i++)
+    {
+        float tmp = 32768.f * out_buffer[i];
+        if (tmp > 32767.f) tmp = 32767.f;
+        if (tmp < -32767.f) tmp = -32767.f;
+        xq[i] = float2int(tmp);
+    }
+
+}
+
+
+#if 0
+
+#include <stdio.h>
+
+void lace_feature_net_compare(
+    const char * prefix,
+    int num_frames,
+    LACE* hLACE
+)
+{
+    char in_feature_file[256];
+    char out_feature_file[256];
+    char numbits_file[256];
+    char periods_file[256];
+    char message[512];
+    int i_frame, i_feature;
+    float mse;
+    float in_features[4 * LACE_NUM_FEATURES];
+    float out_features[4 * LACE_COND_DIM];
+    float out_features2[4 * LACE_COND_DIM];
+    float numbits[2];
+    int periods[4];
+
+    init_lace(hLACE);
+
+    FILE *f_in_features, *f_out_features, *f_numbits, *f_periods;
+
+    strcpy(in_feature_file, prefix);
+    strcat(in_feature_file, "_in_features.f32");
+    f_in_features = fopen(in_feature_file, "rb");
+    if (f_in_features == NULL)
+    {
+        sprintf(message, "could not open file %s", in_feature_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(out_feature_file, prefix);
+    strcat(out_feature_file, "_out_features.f32");
+    f_out_features = fopen(out_feature_file, "rb");
+    if (f_out_features == NULL)
+    {
+        sprintf(message, "could not open file %s", out_feature_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(periods_file, prefix);
+    strcat(periods_file, "_periods.s32");
+    f_periods = fopen(periods_file, "rb");
+    if (f_periods == NULL)
+    {
+        sprintf(message, "could not open file %s", periods_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(numbits_file, prefix);
+    strcat(numbits_file, "_numbits.f32");
+    f_numbits = fopen(numbits_file, "rb");
+    if (f_numbits == NULL)
+    {
+        sprintf(message, "could not open file %s", numbits_file);
+        perror(message);
+        exit(1);
+    }
+
+    for (i_frame = 0; i_frame < num_frames; i_frame ++)
+    {
+        if(fread(in_features, sizeof(float), 4 * LACE_NUM_FEATURES, f_in_features) != 4 * LACE_NUM_FEATURES)
+        {
+            fprintf(stderr, "could not read frame %d from in_features\n", i_frame);
+            exit(1);
+        }
+        if(fread(out_features, sizeof(float), 4 * LACE_COND_DIM, f_out_features) != 4 * LACE_COND_DIM)
+        {
+            fprintf(stderr, "could not read frame %d from out_features\n", i_frame);
+            exit(1);
+        }
+        if(fread(periods, sizeof(int), 4, f_periods) != 4)
+        {
+            fprintf(stderr, "could not read frame %d from periods\n", i_frame);
+            exit(1);
+        }
+        if(fread(numbits, sizeof(float), 2, f_numbits) != 2)
+        {
+            fprintf(stderr, "could not read frame %d from numbits\n", i_frame);
+            exit(1);
+        }
+
+
+        lace_feature_net(hLACE, out_features2, in_features, numbits, periods);
+
+        float mse = 0;
+        for (int i = 0; i < 4 * LACE_COND_DIM; i ++)
+        {
+            mse += pow(out_features[i] - out_features2[i], 2);
+        }
+        mse /= (4 * LACE_COND_DIM);
+        printf("rmse: %f\n", sqrt(mse));
+
+    }
+
+    fclose(f_in_features);
+    fclose(f_out_features);
+    fclose(f_numbits);
+    fclose(f_periods);
+}
+
+
+void lace_demo(
+    char *prefix,
+    char *output
+)
+{
+    char feature_file[256];
+    char numbits_file[256];
+    char periods_file[256];
+    char x_in_file[256];
+    char message[512];
+    int i_frame;
+    float mse;
+    float features[4 * LACE_NUM_FEATURES];
+    float numbits[2];
+    int periods[4];
+    float x_in[4 * LACE_FRAME_SIZE];
+    int16_t x_out[4 * LACE_FRAME_SIZE];
+    float buffer[4 * LACE_FRAME_SIZE];
+    LACE hLACE;
+    int frame_counter = 0;
+    FILE *f_features, *f_numbits, *f_periods, *f_x_in, *f_x_out;
+
+    init_lace(&hLACE);
+
+    strcpy(feature_file, prefix);
+    strcat(feature_file, "_features.f32");
+    f_features = fopen(feature_file, "rb");
+    if (f_features == NULL)
+    {
+        sprintf(message, "could not open file %s", feature_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(x_in_file, prefix);
+    strcat(x_in_file, "_x_in.f32");
+    f_x_in = fopen(x_in_file, "rb");
+    if (f_x_in == NULL)
+    {
+        sprintf(message, "could not open file %s", x_in_file);
+        perror(message);
+        exit(1);
+    }
+
+    f_x_out = fopen(output, "wb");
+    if (f_x_out == NULL)
+    {
+        sprintf(message, "could not open file %s", output);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(periods_file, prefix);
+    strcat(periods_file, "_periods.s32");
+    f_periods = fopen(periods_file, "rb");
+    if (f_periods == NULL)
+    {
+        sprintf(message, "could not open file %s", periods_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(numbits_file, prefix);
+    strcat(numbits_file, "_numbits.f32");
+    f_numbits = fopen(numbits_file, "rb");
+    if (f_numbits == NULL)
+    {
+        sprintf(message, "could not open file %s", numbits_file);
+        perror(message);
+        exit(1);
+    }
+
+    printf("processing %s\n", prefix);
+
+    while (fread(x_in, sizeof(float), 4 * LACE_FRAME_SIZE, f_x_in) == 4 * LACE_FRAME_SIZE)
+    {
+        printf("\rframe: %d", frame_counter++);
+        if(fread(features, sizeof(float), 4 * LACE_NUM_FEATURES, f_features) != 4 * LACE_NUM_FEATURES)
+        {
+            fprintf(stderr, "could not read frame %d from features\n", i_frame);
+            exit(1);
+        }
+        if(fread(periods, sizeof(int), 4, f_periods) != 4)
+        {
+            fprintf(stderr, "could not read frame %d from periods\n", i_frame);
+            exit(1);
+        }
+        if(fread(numbits, sizeof(float), 2, f_numbits) != 2)
+        {
+            fprintf(stderr, "could not read frame %d from numbits\n", i_frame);
+            exit(1);
+        }
+
+        lace_process_20ms_frame(
+            &hLACE,
+            buffer,
+            x_in,
+            features,
+            numbits,
+            periods
+        );
+
+        for (int n=0; n < 4 * LACE_FRAME_SIZE; n ++)
+        {
+            float tmp = (1UL<<15) * buffer[n];
+            tmp = CLIP(tmp, -32768, 32767);
+            x_out[n] = (int16_t) round(tmp);
+        }
+
+        fwrite(x_out, sizeof(int16_t), 4 * LACE_FRAME_SIZE, f_x_out);
+    }
+    printf("\ndone!\n");
+
+    fclose(f_features);
+    fclose(f_numbits);
+    fclose(f_periods);
+    fclose(f_x_in);
+    fclose(f_x_out);
+}
+
+void nolace_demo(
+    char *prefix,
+    char *output
+)
+{
+    char feature_file[256];
+    char numbits_file[256];
+    char periods_file[256];
+    char x_in_file[256];
+    char message[512];
+    int i_frame;
+    float mse;
+    float features[4 * LACE_NUM_FEATURES];
+    float numbits[2];
+    int periods[4];
+    float x_in[4 * LACE_FRAME_SIZE];
+    int16_t x_out[4 * LACE_FRAME_SIZE];
+    float buffer[4 * LACE_FRAME_SIZE];
+    NoLACE hNoLACE;
+    int frame_counter = 0;
+    FILE *f_features, *f_numbits, *f_periods, *f_x_in, *f_x_out;
+
+    init_nolace(&hNoLACE);
+
+    strcpy(feature_file, prefix);
+    strcat(feature_file, "_features.f32");
+    f_features = fopen(feature_file, "rb");
+    if (f_features == NULL)
+    {
+        sprintf(message, "could not open file %s", feature_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(x_in_file, prefix);
+    strcat(x_in_file, "_x_in.f32");
+    f_x_in = fopen(x_in_file, "rb");
+    if (f_x_in == NULL)
+    {
+        sprintf(message, "could not open file %s", x_in_file);
+        perror(message);
+        exit(1);
+    }
+
+    f_x_out = fopen(output, "wb");
+    if (f_x_out == NULL)
+    {
+        sprintf(message, "could not open file %s", output);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(periods_file, prefix);
+    strcat(periods_file, "_periods.s32");
+    f_periods = fopen(periods_file, "rb");
+    if (f_periods == NULL)
+    {
+        sprintf(message, "could not open file %s", periods_file);
+        perror(message);
+        exit(1);
+    }
+
+    strcpy(numbits_file, prefix);
+    strcat(numbits_file, "_numbits.f32");
+    f_numbits = fopen(numbits_file, "rb");
+    if (f_numbits == NULL)
+    {
+        sprintf(message, "could not open file %s", numbits_file);
+        perror(message);
+        exit(1);
+    }
+
+    printf("processing %s\n", prefix);
+
+    while (fread(x_in, sizeof(float), 4 * LACE_FRAME_SIZE, f_x_in) == 4 * LACE_FRAME_SIZE)
+    {
+        printf("\rframe: %d", frame_counter++);
+        if(fread(features, sizeof(float), 4 * LACE_NUM_FEATURES, f_features) != 4 * LACE_NUM_FEATURES)
+        {
+            fprintf(stderr, "could not read frame %d from features\n", i_frame);
+            exit(1);
+        }
+        if(fread(periods, sizeof(int), 4, f_periods) != 4)
+        {
+            fprintf(stderr, "could not read frame %d from periods\n", i_frame);
+            exit(1);
+        }
+        if(fread(numbits, sizeof(float), 2, f_numbits) != 2)
+        {
+            fprintf(stderr, "could not read frame %d from numbits\n", i_frame);
+            exit(1);
+        }
+
+        nolace_process_20ms_frame(
+            &hNoLACE,
+            buffer,
+            x_in,
+            features,
+            numbits,
+            periods
+        );
+
+        for (int n=0; n < 4 * LACE_FRAME_SIZE; n ++)
+        {
+            float tmp = (1UL<<15) * buffer[n];
+            tmp = CLIP(tmp, -32768, 32767);
+            x_out[n] = (int16_t) round(tmp);
+        }
+
+        fwrite(x_out, sizeof(int16_t), 4 * LACE_FRAME_SIZE, f_x_out);
+    }
+    printf("\ndone!\n");
+
+    fclose(f_features);
+    fclose(f_numbits);
+    fclose(f_periods);
+    fclose(f_x_in);
+    fclose(f_x_out);
+}
+
+
+int main()
+{
+#if 0
+    LACE hLACE;
+
+    lace_feature_net_compare("testvec2/lace", 5, &hLACE);
+
+    lace_demo("testdata/test9", "out_lace_c_9kbps.pcm");
+    lace_demo("testdata/test6", "out_lace_c_6kbps.pcm");
+#endif
+    nolace_demo("testdata/test9", "out_nolace_c_9kbps.pcm");
+
+}
+#endif
+
+/*gcc  -I ../include -I . -I ../silk -I ../celt osce.c nndsp.c lace_data.c nolace_data.c nnet.c parse_lpcnet_weights.c -lm -o lacetest*/
diff --git a/dnn/osce.h b/dnn/osce.h
new file mode 100644
index 00000000..3dd8b7c0
--- /dev/null
+++ b/dnn/osce.h
@@ -0,0 +1,81 @@
+/* Copyright (c) 2023 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OSCE_H
+#define OSCE_H
+
+
+#include "opus_types.h"
+/*#include "osce_config.h"*/
+#ifndef DISABLE_LACE
+#include "lace_data.h"
+#endif
+#ifndef DISABLE_NOLACE
+#include "nolace_data.h"
+#endif
+#include "nndsp.h"
+#include "nnet.h"
+#include "osce_structs.h"
+#include "structs.h"
+
+#define OSCE_METHOD_NONE 0
+#ifndef DISABLE_LACE
+#define OSCE_METHOD_LACE 1
+#endif
+#ifndef DISABLE_NOLACE
+#define OSCE_METHOD_NOLACE 2
+#endif
+
+#if !defined(DISABLE_NOLACE)
+#define OSCE_DEFAULT_METHOD OSCE_METHOD_NOLACE
+#elif !defined(DISABLE_LACE)
+#define OSCE_DEFAULT_METHOD OSCE_METHOD_LACE
+#else
+#define OSCE_DEFAULT_METHOD OSCE_METHOD_NONE
+#endif
+
+
+
+
+/* API */
+
+
+void osce_enhance_frame(
+    OSCEModel                   *model,                         /* I    OSCE model struct                           */
+    silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
+    silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */
+    opus_int16                  xq[],                           /* I/O  Decoded speech                              */
+    opus_int32                  num_bits,                       /* I    Size of SILK payload in bits                */
+    int                         arch                            /* I    Run-time architecture                       */
+);
+
+
+int osce_load_models(OSCEModel *hModel, const unsigned char *data, int len);
+void osce_reset(silk_OSCE_struct *hOSCE, int method);
+
+
+#endif
diff --git a/dnn/osce_config.h b/dnn/osce_config.h
new file mode 100644
index 00000000..de94fe2f
--- /dev/null
+++ b/dnn/osce_config.h
@@ -0,0 +1,62 @@
+/* Copyright (c) 2023 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OSCE_CONFIG
+#define OSCE_CONFIG
+
+#define OSCE_MAX_RNN_NEURONS 256
+
+#define OSCE_FEATURES_MAX_HISTORY 350
+#define OSCE_FEATURE_DIM 93
+#define OSCE_MAX_FEATURE_FRAMES 4
+
+#define OSCE_CLEAN_SPEC_NUM_BANDS 64
+#define OSCE_NOISY_SPEC_NUM_BANDS 18
+
+#define OSCE_NO_PITCH_VALUE 7
+
+#define OSCE_PREEMPH 0.85f
+
+#define OSCE_PITCH_HANGOVER 8
+
+#define OSCE_CLEAN_SPEC_START 0
+#define OSCE_CLEAN_SPEC_LENGTH 64
+
+#define OSCE_NOISY_CEPSTRUM_START 64
+#define OSCE_NOISY_CEPSTRUM_LENGTH 18
+
+#define OSCE_ACORR_START 82
+#define OSCE_ACORR_LENGTH 5
+
+#define OSCE_LTP_START 87
+#define OSCE_LTP_LENGTH 5
+
+#define OSCE_LOG_GAIN_START 92
+#define OSCE_LOG_GAIN_LENGTH 1
+
+
+#endif
\ No newline at end of file
diff --git a/dnn/osce_features.c b/dnn/osce_features.c
new file mode 100644
index 00000000..0466f132
--- /dev/null
+++ b/dnn/osce_features.c
@@ -0,0 +1,454 @@
+/* Copyright (c) 2023 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#define OSCE_SPEC_WINDOW_SIZE 320
+#define OSCE_SPEC_NUM_FREQS 161
+
+
+/*DEBUG*/
+/*#define WRITE_FEATURES*/
+/*#define DEBUG_PRING*/
+/*******/
+
+#include "stack_alloc.h"
+#include "osce_features.h"
+#include "kiss_fft.h"
+#include "os_support.h"
+#include "osce.h"
+#include "freq.h"
+
+
+#if defined(WRITE_FEATURES) || defined(DEBUG_PRING)
+#include <stdio.h>
+#include <stdlib.h>
+#endif
+
+static const int center_bins_clean[64] = {
+      0,      2,      5,      8,     10,     12,     15,     18,
+     20,     22,     25,     28,     30,     33,     35,     38,
+     40,     42,     45,     48,     50,     52,     55,     58,
+     60,     62,     65,     68,     70,     73,     75,     78,
+     80,     82,     85,     88,     90,     92,     95,     98,
+    100,    102,    105,    108,    110,    112,    115,    118,
+    120,    122,    125,    128,    130,    132,    135,    138,
+    140,    142,    145,    148,    150,    152,    155,    160
+};
+
+static const int center_bins_noisy[18] = {
+      0,      4,      8,     12,     16,     20,     24,     28,
+     32,     40,     48,     56,     64,     80,     96,    112,
+    136,    160
+};
+
+static const float band_weights_clean[64] = {
+     0.666666666667f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
+     0.400000000000f,     0.400000000000f,     0.400000000000f,     0.400000000000f,
+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
+     0.400000000000f,     0.400000000000f,     0.400000000000f,     0.400000000000f,
+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
+     0.500000000000f,     0.400000000000f,     0.250000000000f,     0.333333333333f
+};
+
+static const float band_weights_noisy[18] = {
+     0.400000000000f,     0.250000000000f,     0.250000000000f,     0.250000000000f,
+     0.250000000000f,     0.250000000000f,     0.250000000000f,     0.250000000000f,
+     0.166666666667f,     0.125000000000f,     0.125000000000f,     0.125000000000f,
+     0.083333333333f,     0.062500000000f,     0.062500000000f,     0.050000000000f,
+     0.041666666667f,     0.080000000000f
+};
+
+static float osce_window[OSCE_SPEC_WINDOW_SIZE] = {
+     0.004908718808f,     0.014725683311f,     0.024541228523f,     0.034354408400f,     0.044164277127f,
+     0.053969889210f,     0.063770299562f,     0.073564563600f,     0.083351737332f,     0.093130877450f,
+     0.102901041421f,     0.112661287575f,     0.122410675199f,     0.132148264628f,     0.141873117332f,
+     0.151584296010f,     0.161280864678f,     0.170961888760f,     0.180626435180f,     0.190273572448f,
+     0.199902370753f,     0.209511902052f,     0.219101240157f,     0.228669460829f,     0.238215641862f,
+     0.247738863176f,     0.257238206902f,     0.266712757475f,     0.276161601717f,     0.285583828929f,
+     0.294978530977f,     0.304344802381f,     0.313681740399f,     0.322988445118f,     0.332264019538f,
+     0.341507569661f,     0.350718204573f,     0.359895036535f,     0.369037181064f,     0.378143757022f,
+     0.387213886697f,     0.396246695891f,     0.405241314005f,     0.414196874117f,     0.423112513073f,
+     0.431987371563f,     0.440820594212f,     0.449611329655f,     0.458358730621f,     0.467061954019f,
+     0.475720161014f,     0.484332517110f,     0.492898192230f,     0.501416360796f,     0.509886201809f,
+     0.518306898929f,     0.526677640552f,     0.534997619887f,     0.543266035038f,     0.551482089078f,
+     0.559644990127f,     0.567753951426f,     0.575808191418f,     0.583806933818f,     0.591749407690f,
+     0.599634847523f,     0.607462493302f,     0.615231590581f,     0.622941390558f,     0.630591150148f,
+     0.638180132051f,     0.645707604824f,     0.653172842954f,     0.660575126926f,     0.667913743292f,
+     0.675187984742f,     0.682397150168f,     0.689540544737f,     0.696617479953f,     0.703627273726f,
+     0.710569250438f,     0.717442741007f,     0.724247082951f,     0.730981620454f,     0.737645704427f,
+     0.744238692572f,     0.750759949443f,     0.757208846506f,     0.763584762206f,     0.769887082016f,
+     0.776115198508f,     0.782268511401f,     0.788346427627f,     0.794348361383f,     0.800273734191f,
+     0.806121974951f,     0.811892519997f,     0.817584813152f,     0.823198305781f,     0.828732456844f,
+     0.834186732948f,     0.839560608398f,     0.844853565250f,     0.850065093356f,     0.855194690420f,
+     0.860241862039f,     0.865206121757f,     0.870086991109f,     0.874883999665f,     0.879596685080f,
+     0.884224593137f,     0.888767277786f,     0.893224301196f,     0.897595233788f,     0.901879654283f,
+     0.906077149740f,     0.910187315596f,     0.914209755704f,     0.918144082372f,     0.921989916403f,
+     0.925746887127f,     0.929414632439f,     0.932992798835f,     0.936481041442f,     0.939879024058f,
+     0.943186419177f,     0.946402908026f,     0.949528180593f,     0.952561935658f,     0.955503880820f,
+     0.958353732530f,     0.961111216112f,     0.963776065795f,     0.966348024735f,     0.968826845041f,
+     0.971212287799f,     0.973504123096f,     0.975702130039f,     0.977806096779f,     0.979815820533f,
+     0.981731107599f,     0.983551773378f,     0.985277642389f,     0.986908548290f,     0.988444333892f,
+     0.989884851171f,     0.991229961288f,     0.992479534599f,     0.993633450666f,     0.994691598273f,
+     0.995653875433f,     0.996520189401f,     0.997290456679f,     0.997964603026f,     0.998542563469f,
+     0.999024282300f,     0.999409713092f,     0.999698818696f,     0.999891571247f,     0.999987952167f,
+     0.999987952167f,     0.999891571247f,     0.999698818696f,     0.999409713092f,     0.999024282300f,
+     0.998542563469f,     0.997964603026f,     0.997290456679f,     0.996520189401f,     0.995653875433f,
+     0.994691598273f,     0.993633450666f,     0.992479534599f,     0.991229961288f,     0.989884851171f,
+     0.988444333892f,     0.986908548290f,     0.985277642389f,     0.983551773378f,     0.981731107599f,
+     0.979815820533f,     0.977806096779f,     0.975702130039f,     0.973504123096f,     0.971212287799f,
+     0.968826845041f,     0.966348024735f,     0.963776065795f,     0.961111216112f,     0.958353732530f,
+     0.955503880820f,     0.952561935658f,     0.949528180593f,     0.946402908026f,     0.943186419177f,
+     0.939879024058f,     0.936481041442f,     0.932992798835f,     0.929414632439f,     0.925746887127f,
+     0.921989916403f,     0.918144082372f,     0.914209755704f,     0.910187315596f,     0.906077149740f,
+     0.901879654283f,     0.897595233788f,     0.893224301196f,     0.888767277786f,     0.884224593137f,
+     0.879596685080f,     0.874883999665f,     0.870086991109f,     0.865206121757f,     0.860241862039f,
+     0.855194690420f,     0.850065093356f,     0.844853565250f,     0.839560608398f,     0.834186732948f,
+     0.828732456844f,     0.823198305781f,     0.817584813152f,     0.811892519997f,     0.806121974951f,
+     0.800273734191f,     0.794348361383f,     0.788346427627f,     0.782268511401f,     0.776115198508f,
+     0.769887082016f,     0.763584762206f,     0.757208846506f,     0.750759949443f,     0.744238692572f,
+     0.737645704427f,     0.730981620454f,     0.724247082951f,     0.717442741007f,     0.710569250438f,
+     0.703627273726f,     0.696617479953f,     0.689540544737f,     0.682397150168f,     0.675187984742f,
+     0.667913743292f,     0.660575126926f,     0.653172842954f,     0.645707604824f,     0.638180132051f,
+     0.630591150148f,     0.622941390558f,     0.615231590581f,     0.607462493302f,     0.599634847523f,
+     0.591749407690f,     0.583806933818f,     0.575808191418f,     0.567753951426f,     0.559644990127f,
+     0.551482089078f,     0.543266035038f,     0.534997619887f,     0.526677640552f,     0.518306898929f,
+     0.509886201809f,     0.501416360796f,     0.492898192230f,     0.484332517110f,     0.475720161014f,
+     0.467061954019f,     0.458358730621f,     0.449611329655f,     0.440820594212f,     0.431987371563f,
+     0.423112513073f,     0.414196874117f,     0.405241314005f,     0.396246695891f,     0.387213886697f,
+     0.378143757022f,     0.369037181064f,     0.359895036535f,     0.350718204573f,     0.341507569661f,
+     0.332264019538f,     0.322988445118f,     0.313681740399f,     0.304344802381f,     0.294978530977f,
+     0.285583828929f,     0.276161601717f,     0.266712757475f,     0.257238206902f,     0.247738863176f,
+     0.238215641862f,     0.228669460829f,     0.219101240157f,     0.209511902052f,     0.199902370753f,
+     0.190273572448f,     0.180626435180f,     0.170961888760f,     0.161280864678f,     0.151584296010f,
+     0.141873117332f,     0.132148264628f,     0.122410675199f,     0.112661287575f,     0.102901041421f,
+     0.093130877450f,     0.083351737332f,     0.073564563600f,     0.063770299562f,     0.053969889210f,
+     0.044164277127f,     0.034354408400f,     0.024541228523f,     0.014725683311f,     0.004908718808f
+};
+
+static void apply_filterbank(float *x_out, float *x_in, const int *center_bins, const float* band_weights, int num_bands)
+{
+    int b, i;
+    float frac;
+
+    celt_assert(x_in != x_out)
+
+    x_out[0] = 0;
+    for (b = 0; b < num_bands - 1; b++)
+    {
+        x_out[b+1] = 0;
+        for (i = center_bins[b]; i < center_bins[b+1]; i++)
+        {
+            frac = (float) (center_bins[b+1] - i) / (center_bins[b+1] - center_bins[b]);
+            x_out[b]   += band_weights[b] * frac * x_in[i];
+            x_out[b+1] += band_weights[b+1] * (1 - frac) * x_in[i];
+
+        }
+    }
+    x_out[num_bands - 1] += band_weights[num_bands - 1] * x_in[center_bins[num_bands - 1]];
+#ifdef DEBUG_PRINT
+    for (b = 0; b < num_bands; b++)
+    {
+        printf("band[%d]: %f\n", b, x_out[b]);
+    }
+#endif
+}
+
+
+static void mag_spec_320_onesided(float *out, float *in)
+{
+    celt_assert(OSCE_SPEC_WINDOW_SIZE == 320);
+    kiss_fft_cpx buffer[OSCE_SPEC_WINDOW_SIZE];
+    int k;
+    forward_transform(buffer, in);
+
+    for (k = 0; k < OSCE_SPEC_NUM_FREQS; k++)
+    {
+        out[k] = OSCE_SPEC_WINDOW_SIZE * sqrt(buffer[k].r * buffer[k].r + buffer[k].i * buffer[k].i);
+#ifdef DEBUG_PRINT
+        printf("magspec[%d]: %f\n", k, out[k]);
+#endif
+    }
+}
+
+
+static void calculate_log_spectrum_from_lpc(float *spec, opus_int16 *a_q12, int lpc_order)
+{
+    float buffer[OSCE_SPEC_WINDOW_SIZE] = {0};
+    int i;
+
+    /* zero expansion */
+    buffer[0] = 1;
+    for (i = 0; i < lpc_order; i++)
+    {
+        buffer[i+1] = - (float)a_q12[i] / (1U << 12);
+    }
+
+    /* calculate and invert magnitude spectrum */
+    mag_spec_320_onesided(buffer, buffer);
+
+    for (i = 0; i < OSCE_SPEC_NUM_FREQS; i++)
+    {
+        buffer[i] = 1.f / (buffer[i] + 1e-9f);
+    }
+
+    /* apply filterbank */
+    apply_filterbank(spec, buffer, center_bins_clean, band_weights_clean, OSCE_CLEAN_SPEC_NUM_BANDS);
+
+    /* log and scaling */
+    for (i = 0; i < OSCE_CLEAN_SPEC_NUM_BANDS; i++)
+    {
+        spec[i] = 0.3f * log(spec[i] + 1e-9f);
+    }
+}
+
+static void calculate_cepstrum(float *cepstrum, float *signal)
+{
+    float buffer[OSCE_SPEC_WINDOW_SIZE];
+    float *spec = &buffer[OSCE_SPEC_NUM_FREQS + 3];
+    int n;
+
+    celt_assert(cepstrum != signal)
+
+    for (n = 0; n < OSCE_SPEC_WINDOW_SIZE; n++)
+    {
+        buffer[n] = osce_window[n] * signal[n];
+    }
+
+    /* calculate magnitude spectrum */
+    mag_spec_320_onesided(buffer, buffer);
+
+    /* accumulate bands */
+    apply_filterbank(spec, buffer, center_bins_noisy, band_weights_noisy, OSCE_NOISY_SPEC_NUM_BANDS);
+
+    /* log domain conversion */
+    for (n = 0; n < OSCE_NOISY_SPEC_NUM_BANDS; n++)
+    {
+        spec[n] = log(spec[n] + 1e-9f);
+#ifdef DEBUG_PRINT
+        printf("logspec[%d]: %f\n", n, spec[n]);
+#endif
+    }
+
+    /* DCT-II (orthonormal) */
+    celt_assert(OSCE_NOISY_SPEC_NUM_BANDS == NB_BANDS);
+    dct(cepstrum, spec);
+}
+
+static void calculate_acorr(float *acorr, float *signal, int lag)
+{
+    int n, k;
+    celt_assert(acorr != signal)
+
+    for (k = -2; k <= 2; k++)
+    {
+        acorr[k+2] = 0;
+        float xx = 0;
+        float xy = 0;
+        float yy = 0;
+        for (n = 0; n < 80; n++)
+        {
+            /* obviously wasteful -> fix later */
+            xx += signal[n] * signal[n];
+            yy += signal[n - lag + k] * signal[n - lag + k];
+            xy += signal[n] * signal[n - lag + k];
+        }
+        acorr[k+2] = xy / sqrt(xx * yy + 1e-9f);
+    }
+}
+
+static int pitch_postprocessing(OSCEFeatureState *psFeatures, int lag, int type)
+{
+    int new_lag;
+
+#ifdef OSCE_HANGOVER_BUGFIX
+#define TESTBIT 1
+#else
+#define TESTBIT 0
+#endif
+
+    /* hangover is currently disabled to reflect a bug in the python code. ToDo: re-evaluate hangover */
+    if (type != TYPE_VOICED && psFeatures->last_type == TYPE_VOICED && TESTBIT)
+    /* enter hangover */
+    {
+        new_lag = OSCE_NO_PITCH_VALUE;
+        if (psFeatures->pitch_hangover_count < OSCE_PITCH_HANGOVER)
+        {
+            new_lag = psFeatures->last_lag;
+            psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER;
+        }
+    }
+    else if (type != TYPE_VOICED && psFeatures->pitch_hangover_count && TESTBIT)
+    /* continue hangover */
+    {
+        new_lag = psFeatures->last_lag;
+        psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER;
+    }
+    else if (type != TYPE_VOICED)
+    /* unvoiced frame after hangover */
+    {
+        new_lag = OSCE_NO_PITCH_VALUE;
+        psFeatures->pitch_hangover_count = 0;
+    }
+    else
+    /* voiced frame: update last_lag */
+    {
+        new_lag = lag;
+        psFeatures->last_lag = lag;
+        psFeatures->pitch_hangover_count = 0;
+    }
+
+    /* buffer update */
+    psFeatures->last_type = type;
+
+    /* with the current setup this should never happen (but who knows...) */
+    celt_assert(new_lag)
+
+    return new_lag;
+}
+
+void osce_calculate_features(
+    silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
+    silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */
+    float                       *features,                      /* O    input features                              */
+    float                       *numbits,                       /* O    numbits and smoothed numbits                */
+    int                         *periods,                       /* O    pitch lags on subframe basis                */
+    const opus_int16            xq[],                           /* I    Decoded speech                              */
+    opus_int32                  num_bits                        /* I    Size of SILK payload in bits                */
+)
+{
+    int num_subframes, num_samples;
+    float buffer[OSCE_FEATURES_MAX_HISTORY + OSCE_MAX_FEATURE_FRAMES * 80];
+    float *frame, *pfeatures;
+    OSCEFeatureState *psFeatures;
+    int i, n, k;
+#ifdef WRITE_FEATURES
+    static FILE *f_feat = NULL;
+    if (f_feat == NULL)
+    {
+        f_feat = fopen("assembled_features.f32", "wb");
+    }
+#endif
+
+    /*OPUS_CLEAR(buffer, 1);*/
+    memset(buffer, 0, sizeof(buffer));
+
+    num_subframes = psDec->nb_subfr;
+    num_samples = num_subframes * 80;
+    psFeatures = &psDec->osce.features;
+
+    /* smooth bit count */
+    psFeatures->numbits_smooth = 0.9f * psFeatures->numbits_smooth + 0.1f * num_bits;
+    numbits[0] = num_bits;
+#ifdef OSCE_NUMBITS_BUGFIX
+    numbits[1] = psFeatures->numbits_smooth;
+#else
+    numbits[1] = num_bits;
+#endif
+
+    for (n = 0; n < num_samples; n++)
+    {
+        buffer[OSCE_FEATURES_MAX_HISTORY + n] = (float) xq[n] / (1U<<15);
+    }
+    OPUS_COPY(buffer, psFeatures->signal_history, OSCE_FEATURES_MAX_HISTORY);
+
+    for (k = 0; k < num_subframes; k++)
+    {
+        pfeatures = features + k * OSCE_FEATURE_DIM;
+        frame = &buffer[OSCE_FEATURES_MAX_HISTORY + k * 80];
+        memset(pfeatures, 0, OSCE_FEATURE_DIM); /* precaution */
+
+        /* clean spectrum from lpcs (update every other frame) */
+        if (k % 2 == 0)
+        {
+            calculate_log_spectrum_from_lpc(pfeatures + OSCE_CLEAN_SPEC_START, psDecCtrl->PredCoef_Q12[k >> 1], psDec->LPC_order);
+        }
+        else
+        {
+            OPUS_COPY(pfeatures + OSCE_CLEAN_SPEC_START, pfeatures + OSCE_CLEAN_SPEC_START - OSCE_FEATURE_DIM, OSCE_CLEAN_SPEC_LENGTH);
+        }
+
+        /* noisy cepstrum from signal (update every other frame) */
+        if (k % 2 == 0)
+        {
+            calculate_cepstrum(pfeatures + OSCE_NOISY_CEPSTRUM_START, frame - 160);
+        }
+        else
+        {
+            OPUS_COPY(pfeatures + OSCE_NOISY_CEPSTRUM_START, pfeatures + OSCE_NOISY_CEPSTRUM_START - OSCE_FEATURE_DIM, OSCE_NOISY_CEPSTRUM_LENGTH);
+        }
+
+        /* pitch hangover and zero value replacement */
+        periods[k] = pitch_postprocessing(psFeatures, psDecCtrl->pitchL[k], psDec->indices.signalType);
+
+        /* auto-correlation around pitch lag */
+        calculate_acorr(pfeatures + OSCE_ACORR_START, frame, periods[k]);
+
+        /* ltp */
+        celt_assert(OSCE_LTP_LENGTH == LTP_ORDER)
+        for (i = 0; i < OSCE_LTP_LENGTH; i++)
+        {
+            pfeatures[OSCE_LTP_START + i] = (float) psDecCtrl->LTPCoef_Q14[k * LTP_ORDER + i] / (1U << 14);
+        }
+
+        /* frame gain */
+        pfeatures[OSCE_LOG_GAIN_START] = log((float) psDecCtrl->Gains_Q16[k] / (1UL << 16) + 1e-9f);
+
+#ifdef WRITE_FEATURES
+        fwrite(pfeatures, sizeof(*pfeatures), 93, f_feat);
+#endif
+    }
+
+    /* buffer update */
+    OPUS_COPY(psFeatures->signal_history, &buffer[num_samples], OSCE_FEATURES_MAX_HISTORY);
+}
+
+
+void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length)
+{
+    int i;
+    celt_assert(length >= 160);
+
+    for (i = 0; i < 160; i++)
+    {
+        x_enhanced[i] = osce_window[i] * x_enhanced[i] + (1.f - osce_window[i]) * x_in[i];
+    }
+
+
+}
diff --git a/dnn/osce_features.h b/dnn/osce_features.h
new file mode 100644
index 00000000..91e95f1e
--- /dev/null
+++ b/dnn/osce_features.h
@@ -0,0 +1,50 @@
+/* Copyright (c) 2023 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OSCE_FEATURES_H
+#define OSCE_FEATURES_H
+
+
+#include "structs.h"
+#include "opus_types.h"
+
+#define OSCE_NUMBITS_BUGFIX
+
+void osce_calculate_features(
+    silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
+    silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */
+    float                       *features,                      /* O    input features                              */
+    float                       *numbits,                       /* O    numbits and smoothed numbits                */
+    int                         *periods,                       /* O    pitch lags on subframe basis                */
+    const opus_int16            xq[],                           /* I    Decoded speech                              */
+    opus_int32                  num_bits                        /* I    Size of SILK payload in bits                */
+);
+
+
+void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length);
+
+#endif
\ No newline at end of file
diff --git a/dnn/osce_structs.h b/dnn/osce_structs.h
new file mode 100644
index 00000000..a4350be2
--- /dev/null
+++ b/dnn/osce_structs.h
@@ -0,0 +1,124 @@
+/* Copyright (c) 2023 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OSCE_STRUCTS_H
+#define OSCE_STRUCTS_H
+
+#include "opus_types.h"
+#include "osce_config.h"
+#ifndef DISABLE_LACE
+#include "lace_data.h"
+#endif
+#ifndef DISABLE_NOLACE
+#include "nolace_data.h"
+#endif
+#include "nndsp.h"
+#include "nnet.h"
+
+/* feature calculation */
+
+typedef struct {
+    float               numbits_smooth;
+    int                 pitch_hangover_count;
+    int                 last_lag;
+    int                 last_type;
+    float               signal_history[OSCE_FEATURES_MAX_HISTORY];
+    int                 reset;
+} OSCEFeatureState;
+
+
+#ifndef DISABLE_LACE
+/* LACE */
+typedef struct {
+    float feature_net_conv2_state[LACE_FNET_CONV2_STATE_SIZE];
+    float feature_net_gru_state[LACE_COND_DIM];
+    AdaCombState cf1_state;
+    AdaCombState cf2_state;
+    AdaConvState af1_state;
+    float preemph_mem;
+    float deemph_mem;
+} LACEState;
+
+typedef struct
+{
+    LACELayers layers;
+    float window[LACE_OVERLAP_SIZE];
+} LACE;
+
+#endif /* #ifndef DISABLE_LACE */
+
+
+#ifndef DISABLE_NOLACE
+/* NoLACE */
+typedef struct {
+    float feature_net_conv2_state[NOLACE_FNET_CONV2_STATE_SIZE];
+    float feature_net_gru_state[NOLACE_COND_DIM];
+    float post_cf1_state[NOLACE_COND_DIM];
+    float post_cf2_state[NOLACE_COND_DIM];
+    float post_af1_state[NOLACE_COND_DIM];
+    float post_af2_state[NOLACE_COND_DIM];
+    float post_af3_state[NOLACE_COND_DIM];
+    AdaCombState cf1_state;
+    AdaCombState cf2_state;
+    AdaConvState af1_state;
+    AdaConvState af2_state;
+    AdaConvState af3_state;
+    AdaConvState af4_state;
+    AdaShapeState tdshape1_state;
+    AdaShapeState tdshape2_state;
+    AdaShapeState tdshape3_state;
+    float preemph_mem;
+    float deemph_mem;
+} NoLACEState;
+
+typedef struct {
+    NOLACELayers layers;
+    float window[LACE_OVERLAP_SIZE];
+} NoLACE;
+
+#endif /* #ifndef DISABLE_NOLACE */
+
+/* OSCEModel */
+typedef struct {
+#ifndef DISABLE_LACE
+    LACE lace;
+#endif
+#ifndef DISABLE_NOLACE
+    NoLACE nolace;
+#endif
+} OSCEModel;
+
+typedef union {
+#ifndef DISABLE_LACE
+    LACEState lace;
+#endif
+#ifndef DISABLE_NOLACE
+    NoLACEState nolace;
+#endif
+} OSCEState;
+
+#endif
\ No newline at end of file
diff --git a/dnn/torch/osce/create_testvectors.py b/dnn/torch/osce/create_testvectors.py
new file mode 100644
index 00000000..a037d0db
--- /dev/null
+++ b/dnn/torch/osce/create_testvectors.py
@@ -0,0 +1,165 @@
+"""
+/* Copyright (c) 2023 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+
+import torch
+import numpy as np
+
+from models import model_dict
+from utils import endoscopy
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('checkpoint_path', type=str, help='path to folder containing checkpoints "lace_checkpoint.pth" and nolace_checkpoint.pth"')
+parser.add_argument('output_folder', type=str, help='output folder for testvectors')
+parser.add_argument('--debug', action='store_true', help='add debug output to output folder')
+
+
+def create_adaconv_testvector(prefix, adaconv, num_frames, debug=False):
+    feature_dim = adaconv.feature_dim
+    in_channels = adaconv.in_channels
+    out_channels = adaconv.out_channels
+    frame_size = adaconv.frame_size
+
+    features = torch.randn((1, num_frames, feature_dim))
+    x_in = torch.randn((1, in_channels, num_frames * frame_size))
+
+    x_out = adaconv(x_in, features, debug=debug)
+
+    features = features[0].detach().numpy()
+    x_in = x_in[0].reshape(in_channels, num_frames, frame_size).permute(1, 0, 2).detach().numpy()
+    x_out = x_out[0].reshape(out_channels, num_frames, frame_size).permute(1, 0, 2).detach().numpy()
+
+    features.tofile(prefix + '_features.f32')
+    x_in.tofile(prefix + '_x_in.f32')
+    x_out.tofile(prefix + '_x_out.f32')
+
+def create_adacomb_testvector(prefix, adacomb, num_frames, debug=False):
+    feature_dim = adacomb.feature_dim
+    in_channels = 1
+    frame_size = adacomb.frame_size
+
+    features = torch.randn((1, num_frames, feature_dim))
+    x_in = torch.randn((1, in_channels, num_frames * frame_size))
+    p_in = torch.randint(adacomb.kernel_size, 250, (1, num_frames))
+
+    x_out = adacomb(x_in, features, p_in, debug=debug)
+
+    features = features[0].detach().numpy()
+    x_in = x_in[0].permute(1, 0).detach().numpy()
+    p_in = p_in[0].detach().numpy().astype(np.int32)
+    x_out = x_out[0].permute(1, 0).detach().numpy()
+
+    features.tofile(prefix + '_features.f32')
+    x_in.tofile(prefix + '_x_in.f32')
+    p_in.tofile(prefix + '_p_in.s32')
+    x_out.tofile(prefix + '_x_out.f32')
+
+def create_adashape_testvector(prefix, adashape, num_frames):
+    feature_dim = adashape.feature_dim
+    frame_size = adashape.frame_size
+
+    features = torch.randn((1, num_frames, feature_dim))
+    x_in = torch.randn((1, 1, num_frames * frame_size))
+
+    x_out = adashape(x_in, features)
+
+    features = features[0].detach().numpy()
+    x_in = x_in.flatten().detach().numpy()
+    x_out = x_out.flatten().detach().numpy()
+
+    features.tofile(prefix + '_features.f32')
+    x_in.tofile(prefix + '_x_in.f32')
+    x_out.tofile(prefix + '_x_out.f32')
+
+def create_feature_net_testvector(prefix, model, num_frames):
+    num_features = model.num_features
+    num_subframes = 4 * num_frames
+
+    input_features = torch.randn((1, num_subframes, num_features))
+    periods = torch.randint(32, 300, (1, num_subframes))
+    numbits = model.numbits_range[0] + torch.rand((1, num_frames, 2)) * (model.numbits_range[1] - model.numbits_range[0])
+
+
+    pembed = model.pitch_embedding(periods)
+    nembed = torch.repeat_interleave(model.numbits_embedding(numbits).flatten(2), 4, dim=1)
+    full_features = torch.cat((input_features, pembed, nembed), dim=-1)
+
+    cf = model.feature_net(full_features)
+
+    input_features.float().numpy().tofile(prefix + "_in_features.f32")
+    periods.numpy().astype(np.int32).tofile(prefix + "_periods.s32")
+    numbits.float().numpy().tofile(prefix + "_numbits.f32")
+    full_features.detach().numpy().tofile(prefix + "_full_features.f32")
+    cf.detach().numpy().tofile(prefix + "_out_features.f32")
+
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+
+    os.makedirs(args.output_folder, exist_ok=True)
+
+    lace_checkpoint = torch.load(os.path.join(args.checkpoint_path, "lace_checkpoint.pth"), map_location='cpu')
+    nolace_checkpoint = torch.load(os.path.join(args.checkpoint_path, "nolace_checkpoint.pth"), map_location='cpu')
+
+    lace = model_dict['lace'](**lace_checkpoint['setup']['model']['kwargs'])
+    nolace = model_dict['nolace'](**nolace_checkpoint['setup']['model']['kwargs'])
+
+    lace.load_state_dict(lace_checkpoint['state_dict'])
+    nolace.load_state_dict(nolace_checkpoint['state_dict'])
+
+    if args.debug:
+        endoscopy.init(args.output_folder)
+
+    # lace af1, 1 input channel, 1 output channel
+    create_adaconv_testvector(os.path.join(args.output_folder, "lace_af1"), lace.af1, 5, debug=args.debug)
+
+    # nolace af1, 1 input channel, 2 output channels
+    create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af1"), nolace.af1, 5, debug=args.debug)
+
+    # nolace af4, 2 input channel, 1 output channels
+    create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af4"), nolace.af4, 5, debug=args.debug)
+
+    # nolace af2, 2 input channel, 2 output channels
+    create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af2"), nolace.af2, 5, debug=args.debug)
+
+    # lace cf1
+    create_adacomb_testvector(os.path.join(args.output_folder, "lace_cf1"), lace.cf1, 5, debug=args.debug)
+
+    # nolace tdshape1
+    create_adashape_testvector(os.path.join(args.output_folder, "nolace_tdshape1"), nolace.tdshape1, 5)
+
+    # lace feature net
+    create_feature_net_testvector(os.path.join(args.output_folder, 'lace'), lace, 5)
+
+    if args.debug:
+        endoscopy.close()
diff --git a/dnn/torch/osce/data/silk_enhancement_set.py b/dnn/torch/osce/data/silk_enhancement_set.py
index 65e97508..fd18c4de 100644
--- a/dnn/torch/osce/data/silk_enhancement_set.py
+++ b/dnn/torch/osce/data/silk_enhancement_set.py
@@ -49,7 +49,6 @@ class SilkEnhancementSet(Dataset):
                  num_bands_noisy_spec=18,
                  noisy_spec_scale='opus',
                  noisy_apply_dct=True,
-                 add_offset=False,
                  add_double_lag_acorr=False,
                  ):
 
@@ -73,7 +72,6 @@ class SilkEnhancementSet(Dataset):
         self.gains = np.fromfile(os.path.join(path, 'features_gain.f32'), dtype=np.float32)
         self.num_bits = np.fromfile(os.path.join(path, 'features_num_bits.s32'), dtype=np.int32)
         self.num_bits_smooth = np.fromfile(os.path.join(path, 'features_num_bits_smooth.f32'), dtype=np.float32)
-        self.offsets = np.fromfile(os.path.join(path, 'features_offset.f32'), dtype=np.float32)
 
         self.clean_signal_hp = np.fromfile(os.path.join(path, 'clean_hp.s16'), dtype=np.int16)
         self.clean_signal    = np.fromfile(os.path.join(path, 'clean.s16'), dtype=np.int16)
@@ -86,7 +84,6 @@ class SilkEnhancementSet(Dataset):
                                                     num_bands_noisy_spec,
                                                     noisy_spec_scale,
                                                     noisy_apply_dct,
-                                                    add_offset,
                                                     add_double_lag_acorr)
 
         self.history_len = 700 if add_double_lag_acorr else 350
@@ -120,8 +117,7 @@ class SilkEnhancementSet(Dataset):
               self.lpcs[frame_start : frame_stop],
               self.gains[frame_start : frame_stop],
               self.ltps[frame_start : frame_stop],
-              self.periods[frame_start : frame_stop],
-              self.offsets[frame_start : frame_stop]
+              self.periods[frame_start : frame_stop]
         )
 
         if self.preemph > 0:
diff --git a/dnn/torch/osce/export_model_weights.py b/dnn/torch/osce/export_model_weights.py
index 8b95aca9..f94431d3 100644
--- a/dnn/torch/osce/export_model_weights.py
+++ b/dnn/torch/osce/export_model_weights.py
@@ -40,10 +40,53 @@ import wexchange.torch
 from wexchange.torch import dump_torch_weights
 from models import model_dict
 
+from utils.layers.limited_adaptive_comb1d import LimitedAdaptiveComb1d
+from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d
+from utils.layers.td_shaper import TDShaper
+from wexchange.torch import dump_torch_weights
+
+
+
 parser = argparse.ArgumentParser()
 
 parser.add_argument('checkpoint', type=str, help='LACE or NoLACE model checkpoint')
 parser.add_argument('output_dir', type=str, help='output folder')
+parser.add_argument('--quantize', action="store_true", help='quantization according to schedule')
+
+
+schedules = {
+    'nolace': [
+        ('pitch_embedding', dict()),
+        ('feature_net.conv1', dict()),
+        ('feature_net.conv2', dict(quantize=True, scale=None)),
+        ('feature_net.tconv', dict(quantize=True, scale=None)),
+        ('feature_net.gru', dict()),
+        ('cf1', dict(quantize=True, scale=None)),
+        ('cf2', dict(quantize=True, scale=None)),
+        ('af1', dict(quantize=True, scale=None)),
+        ('tdshape1', dict()),
+        ('tdshape2', dict()),
+        ('tdshape3', dict()),
+        ('af2', dict(quantize=True, scale=None)),
+        ('af3', dict(quantize=True, scale=None)),
+        ('af4', dict(quantize=True, scale=None)),
+        ('post_cf1', dict(quantize=True, scale=None)),
+        ('post_cf2', dict(quantize=True, scale=None)),
+        ('post_af1', dict(quantize=True, scale=None)),
+        ('post_af2', dict(quantize=True, scale=None)),
+        ('post_af3', dict(quantize=True, scale=None))
+    ],
+    'lace' : [
+        ('pitch_embedding', dict()),
+        ('feature_net.conv1', dict()),
+        ('feature_net.conv2', dict(quantize=True, scale=None)),
+        ('feature_net.tconv', dict(quantize=True, scale=None)),
+        ('feature_net.gru', dict()),
+        ('cf1', dict(quantize=True, scale=None)),
+        ('cf2', dict(quantize=True, scale=None)),
+        ('af1', dict(quantize=True, scale=None))
+    ]
+}
 
 
 # auxiliary functions
@@ -60,8 +103,28 @@ def sha1(filename):
 
     return sha1.hexdigest()
 
+def osce_dump_generic(writer, name, module):
+    if isinstance(module, torch.nn.Linear) or isinstance(module, torch.nn.Conv1d) \
+            or isinstance(module, torch.nn.ConvTranspose1d) or isinstance(module, torch.nn.Embedding) \
+                or isinstance(module, LimitedAdaptiveConv1d) or isinstance(module, LimitedAdaptiveComb1d) \
+                    or isinstance(module, TDShaper) or isinstance(module, torch.nn.GRU):
+                        dump_torch_weights(writer, module, name=name, verbose=True)
+    else:
+        for child_name, child in module.named_children():
+            osce_dump_generic(writer, (name + "_" + child_name).replace("feature_net", "fnet"), child)
+
+
 def export_name(name):
-    return name.replace('.', '_')
+    name = name.replace('.', '_')
+    name = name.replace('feature_net', 'fnet')
+    return name
+
+def osce_scheduled_dump(writer, prefix, model, schedule):
+    if not prefix.endswith('_'):
+        prefix += '_'
+
+    for name, kwargs in schedule:
+        dump_torch_weights(writer, model.get_submodule(name), prefix + export_name(name), **kwargs, verbose=True)
 
 if __name__ == "__main__":
     args = parser.parse_args()
@@ -76,22 +139,34 @@ if __name__ == "__main__":
     # create model and load weights
     checkpoint = torch.load(checkpoint_path, map_location='cpu')
     model = model_dict[checkpoint['setup']['model']['name']](*checkpoint['setup']['model']['args'], **checkpoint['setup']['model']['kwargs'])
+    model.load_state_dict(checkpoint['state_dict'])
 
     # CWriter
     model_name = checkpoint['setup']['model']['name']
-    cwriter = wexchange.c_export.CWriter(os.path.join(outdir, model_name + "_data"), message=message, model_struct_name=model_name.upper())
-
-    # dump numbits_embedding parameters by hand
-    numbits_embedding = model.get_submodule('numbits_embedding')
-    weights = next(iter(numbits_embedding.parameters()))
-    for i, c in enumerate(weights):
-        cwriter.header.write(f"\nNUMBITS_COEF_{i} {float(c.detach())}f")
-    cwriter.header.write("\n\n")
+    cwriter = wexchange.c_export.CWriter(os.path.join(outdir, model_name + "_data"), message=message, model_struct_name=model_name.upper() + 'Layers', add_typedef=True)
+
+    # Add custom includes and global parameters
+    cwriter.header.write(f'''
+#define {model_name.upper()}_PREEMPH {model.preemph}f
+#define {model_name.upper()}_FRAME_SIZE {model.FRAME_SIZE}
+#define {model_name.upper()}_OVERLAP_SIZE 40
+#define {model_name.upper()}_NUM_FEATURES {model.num_features}
+#define {model_name.upper()}_PITCH_MAX {model.pitch_max}
+#define {model_name.upper()}_PITCH_EMBEDDING_DIM {model.pitch_embedding_dim}
+#define {model_name.upper()}_NUMBITS_RANGE_LOW {model.numbits_range[0]}
+#define {model_name.upper()}_NUMBITS_RANGE_HIGH {model.numbits_range[1]}
+#define {model_name.upper()}_NUMBITS_EMBEDDING_DIM {model.numbits_embedding_dim}
+#define {model_name.upper()}_COND_DIM {model.cond_dim}
+#define {model_name.upper()}_HIDDEN_FEATURE_DIM {model.hidden_feature_dim}
+''')
+
+    for i, s in enumerate(model.numbits_embedding.scale_factors):
+        cwriter.header.write(f"#define {model_name.upper()}_NUMBITS_SCALE_{i} {float(s.detach().cpu())}f\n")
 
     # dump layers
-    for name, module in model.named_modules():
-        if isinstance(module, torch.nn.Linear) or isinstance(module, torch.nn.Conv1d) \
-            or isinstance(module, torch.nn.ConvTranspose1d) or isinstance(module, torch.nn.Embedding):
-                dump_torch_weights(cwriter, module, name=export_name(name), verbose=True)
+    if model_name in schedules and args.quantize:
+        osce_scheduled_dump(cwriter, model_name, model, schedules[model_name])
+    else:
+        osce_dump_generic(cwriter, model_name, model)
 
     cwriter.close()
diff --git a/dnn/torch/osce/models/lace.py b/dnn/torch/osce/models/lace.py
index a11dfc41..58293de4 100644
--- a/dnn/torch/osce/models/lace.py
+++ b/dnn/torch/osce/models/lace.py
@@ -96,7 +96,7 @@ class LACE(NNSBase):
         self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
 
         # spectral shaping
-        self.af1 = LimitedAdaptiveConv1d(1, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
+        self.af1 = LimitedAdaptiveConv1d(1, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
 
     def flop_count(self, rate=16000, verbose=False):
 
diff --git a/dnn/torch/osce/models/no_lace.py b/dnn/torch/osce/models/no_lace.py
index 2709274c..0e0fb1b3 100644
--- a/dnn/torch/osce/models/no_lace.py
+++ b/dnn/torch/osce/models/no_lace.py
@@ -96,8 +96,8 @@ class NoLACE(NNSBase):
         # comb filters
         left_pad = self.kernel_size // 2
         right_pad = self.kernel_size - 1 - left_pad
-        self.cf1 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
-        self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
+        self.cf1 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
+        self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
 
         # spectral shaping
         self.af1 = LimitedAdaptiveConv1d(1, 2, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
diff --git a/dnn/torch/osce/utils/layers/limited_adaptive_comb1d.py b/dnn/torch/osce/utils/layers/limited_adaptive_comb1d.py
index b146240e..3bb6fa07 100644
--- a/dnn/torch/osce/utils/layers/limited_adaptive_comb1d.py
+++ b/dnn/torch/osce/utils/layers/limited_adaptive_comb1d.py
@@ -41,13 +41,13 @@ class LimitedAdaptiveComb1d(nn.Module):
                  feature_dim,
                  frame_size=160,
                  overlap_size=40,
-                 use_bias=True,
                  padding=None,
                  max_lag=256,
                  name=None,
                  gain_limit_db=10,
                  global_gain_limits_db=[-6, 6],
-                 norm_p=2):
+                 norm_p=2,
+                 **kwargs):
         """
 
         Parameters:
@@ -87,7 +87,6 @@ class LimitedAdaptiveComb1d(nn.Module):
         self.kernel_size   = kernel_size
         self.frame_size    = frame_size
         self.overlap_size  = overlap_size
-        self.use_bias      = use_bias
         self.max_lag       = max_lag
         self.limit_db      = gain_limit_db
         self.norm_p        = norm_p
@@ -101,8 +100,6 @@ class LimitedAdaptiveComb1d(nn.Module):
         # network for generating convolution weights
         self.conv_kernel = nn.Linear(feature_dim, kernel_size)
 
-        if self.use_bias:
-            self.conv_bias = nn.Linear(feature_dim,1)
 
         # comb filter gain
         self.filter_gain = nn.Linear(feature_dim, 1)
@@ -154,9 +151,6 @@ class LimitedAdaptiveComb1d(nn.Module):
         conv_kernels = self.conv_kernel(features).reshape((batch_size, num_frames, self.out_channels, self.in_channels, self.kernel_size))
         conv_kernels = conv_kernels / (1e-6 + torch.norm(conv_kernels, p=self.norm_p, dim=-1, keepdim=True))
 
-        if self.use_bias:
-            conv_biases  = self.conv_bias(features).permute(0, 2, 1)
-
         conv_gains   = torch.exp(- torch.relu(self.filter_gain(features).permute(0, 2, 1)) + self.log_gain_limit)
         # calculate gains
         global_conv_gains   = torch.exp(self.filter_gain_a * torch.tanh(self.global_filter_gain(features).permute(0, 2, 1)) + self.filter_gain_b)
@@ -190,10 +184,6 @@ class LimitedAdaptiveComb1d(nn.Module):
 
             new_chunk = torch.conv1d(xx, conv_kernels[:, i, ...].reshape((batch_size * self.out_channels, self.in_channels, self.kernel_size)), groups=batch_size).reshape(batch_size, self.out_channels, -1)
 
-
-            if self.use_bias:
-                new_chunk = new_chunk + conv_biases[:, :, i : i + 1]
-
             offset = self.max_lag + self.padding[0]
             new_chunk = global_conv_gains[:, :, i : i + 1] * (new_chunk * conv_gains[:, :, i : i + 1] + x[..., offset + i * frame_size : offset + (i + 1) * frame_size + overlap_size])
 
@@ -223,10 +213,6 @@ class LimitedAdaptiveComb1d(nn.Module):
         count += 2 * (self.in_channels * self.out_channels * self.kernel_size * (1 + overhead) * rate)
         count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels
 
-        # bias computation
-        if self.use_bias:
-            count += 2 * (frame_rate * self.feature_dim) + rate * (1 + overhead)
-
         # a0 computation
         count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels
 
diff --git a/dnn/torch/osce/utils/layers/limited_adaptive_conv1d.py b/dnn/torch/osce/utils/layers/limited_adaptive_conv1d.py
index 073ea1b1..a17b0e9b 100644
--- a/dnn/torch/osce/utils/layers/limited_adaptive_conv1d.py
+++ b/dnn/torch/osce/utils/layers/limited_adaptive_conv1d.py
@@ -46,12 +46,12 @@ class LimitedAdaptiveConv1d(nn.Module):
                  feature_dim,
                  frame_size=160,
                  overlap_size=40,
-                 use_bias=True,
                  padding=None,
                  name=None,
                  gain_limits_db=[-6, 6],
                  shape_gain_db=0,
-                 norm_p=2):
+                 norm_p=2,
+                 **kwargs):
         """
 
         Parameters:
@@ -90,7 +90,6 @@ class LimitedAdaptiveConv1d(nn.Module):
         self.kernel_size    = kernel_size
         self.frame_size     = frame_size
         self.overlap_size   = overlap_size
-        self.use_bias       = use_bias
         self.gain_limits_db = gain_limits_db
         self.shape_gain_db  = shape_gain_db
         self.norm_p         = norm_p
@@ -104,9 +103,6 @@ class LimitedAdaptiveConv1d(nn.Module):
         # network for generating convolution weights
         self.conv_kernel = nn.Linear(feature_dim, in_channels * out_channels * kernel_size)
 
-        if self.use_bias:
-            self.conv_bias = nn.Linear(feature_dim, out_channels)
-
         self.shape_gain = min(1, 10**(shape_gain_db / 20))
 
         self.filter_gain = nn.Linear(feature_dim, out_channels)
@@ -133,10 +129,6 @@ class LimitedAdaptiveConv1d(nn.Module):
         count += 2 * (frame_rate * self.feature_dim * self.kernel_size)
         count += 2 * (self.in_channels * self.out_channels * self.kernel_size * (1 + overhead) * rate)
 
-        # bias computation
-        if self.use_bias:
-            count += 2 * (frame_rate * self.feature_dim) + rate * (1 + overhead)
-
         # gain computation
 
         count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels
@@ -183,9 +175,6 @@ class LimitedAdaptiveConv1d(nn.Module):
 
         conv_kernels = self.shape_gain * conv_kernels + (1 - self.shape_gain) * id_kernels
 
-        if self.use_bias:
-            conv_biases  = self.conv_bias(features).permute(0, 2, 1)
-
         # calculate gains
         conv_gains   = torch.exp(self.filter_gain_a * torch.tanh(self.filter_gain(features)) + self.filter_gain_b)
         if debug and batch_size == 1:
diff --git a/dnn/torch/osce/utils/silk_features.py b/dnn/torch/osce/utils/silk_features.py
index 2997ef5f..8c5dbf05 100644
--- a/dnn/torch/osce/utils/silk_features.py
+++ b/dnn/torch/osce/utils/silk_features.py
@@ -33,6 +33,7 @@ import numpy as np
 import torch
 
 import scipy
+import scipy.signal
 
 from utils.pitch import hangover, calculate_acorr_window
 from utils.spec import create_filter_bank, cepstrum, log_spectrum, log_spectrum_from_lpc
@@ -59,7 +60,6 @@ def silk_feature_factory(no_pitch_value=256,
                          num_bands_noisy_spec=18,
                          noisy_spec_scale='opus',
                          noisy_apply_dct=True,
-                         add_offset=False,
                          add_double_lag_acorr=False
                          ):
 
@@ -67,7 +67,7 @@ def silk_feature_factory(no_pitch_value=256,
     fb_clean_spec = create_filter_bank(num_bands_clean_spec, 320, scale='erb', round_center_bins=True, normalize=True)
     fb_noisy_spec = create_filter_bank(num_bands_noisy_spec, 320, scale=noisy_spec_scale, round_center_bins=True, normalize=True)
 
-    def create_features(noisy, noisy_history, lpcs, gains, ltps, periods, offsets):
+    def create_features(noisy, noisy_history, lpcs, gains, ltps, periods):
 
         periods = periods.copy()
 
@@ -89,10 +89,7 @@ def silk_feature_factory(no_pitch_value=256,
 
         acorr, _ = calculate_acorr_window(noisy, 80, periods, noisy_history, radius=acorr_radius, add_double_lag_acorr=add_double_lag_acorr)
 
-        if add_offset:
-            features = np.concatenate((clean_spectrum, noisy_cepstrum, acorr, ltps, log_gains, offsets.reshape(-1, 1)), axis=-1, dtype=np.float32)
-        else:
-            features = np.concatenate((clean_spectrum, noisy_cepstrum, acorr, ltps, log_gains), axis=-1, dtype=np.float32)
+        features = np.concatenate((clean_spectrum, noisy_cepstrum, acorr, ltps, log_gains), axis=-1, dtype=np.float32)
 
         return features, periods.astype(np.int64)
 
@@ -110,7 +107,6 @@ def load_inference_data(path,
                         num_bands_noisy_spec=18,
                         noisy_spec_scale='opus',
                         noisy_apply_dct=True,
-                        add_offset=False,
                         add_double_lag_acorr=False,
                         **kwargs):
 
@@ -122,13 +118,12 @@ def load_inference_data(path,
     periods = np.fromfile(os.path.join(path, 'features_period.s16'), dtype=np.int16)
     num_bits = np.fromfile(os.path.join(path, 'features_num_bits.s32'), dtype=np.int32).astype(np.float32).reshape(-1, 1)
     num_bits_smooth = np.fromfile(os.path.join(path, 'features_num_bits_smooth.f32'), dtype=np.float32).reshape(-1, 1)
-    offsets = np.fromfile(os.path.join(path, 'features_offset.f32'), dtype=np.float32)
 
     # load signal, add back delay and pre-emphasize
     signal  = np.fromfile(os.path.join(path, 'noisy.s16'), dtype=np.int16).astype(np.float32) / (2 ** 15)
     signal = np.concatenate((np.zeros(skip, dtype=np.float32), signal), dtype=np.float32)
 
-    create_features = silk_feature_factory(no_pitch_value, acorr_radius, pitch_hangover, num_bands_clean_spec, num_bands_noisy_spec, noisy_spec_scale, noisy_apply_dct, add_offset, add_double_lag_acorr)
+    create_features = silk_feature_factory(no_pitch_value, acorr_radius, pitch_hangover, num_bands_clean_spec, num_bands_noisy_spec, noisy_spec_scale, noisy_apply_dct, add_double_lag_acorr)
 
     num_frames = min((len(signal) // 320) * 4, len(lpcs))
     signal = signal[: num_frames * 80]
@@ -138,11 +133,10 @@ def load_inference_data(path,
     periods = periods[: num_frames]
     num_bits = num_bits[: num_frames // 4]
     num_bits_smooth = num_bits[: num_frames // 4]
-    offsets = offsets[: num_frames]
 
     numbits = np.repeat(np.concatenate((num_bits, num_bits_smooth), axis=-1, dtype=np.float32), 4, axis=0)
 
-    features, periods = create_features(signal, np.zeros(350, dtype=signal.dtype), lpcs, gains, ltps, periods, offsets)
+    features, periods = create_features(signal, np.zeros(350, dtype=signal.dtype), lpcs, gains, ltps, periods)
 
     if preemph > 0:
         signal[1:] -= preemph * signal[:-1]
diff --git a/dnn/torch/osce/utils/spec.py b/dnn/torch/osce/utils/spec.py
index 01b923ae..59f53538 100644
--- a/dnn/torch/osce/utils/spec.py
+++ b/dnn/torch/osce/utils/spec.py
@@ -30,6 +30,7 @@
 import math as m
 import numpy as np
 import scipy
+import scipy.fftpack
 import torch
 
 def erb(f):
diff --git a/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py b/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py
index 36050881..2745f337 100644
--- a/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py
+++ b/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py
@@ -38,7 +38,8 @@ class CWriter:
                  create_state_struct=False,
                  enable_binary_blob=True,
                  model_struct_name="Model",
-                 nnet_header="nnet.h"):
+                 nnet_header="nnet.h",
+                 add_typedef=False):
         """
         Writer class for creating souce and header files for weight exports to C
 
@@ -73,6 +74,7 @@ class CWriter:
         self.enable_binary_blob = enable_binary_blob
         self.create_state_struct = create_state_struct
         self.model_struct_name = model_struct_name
+        self.add_typedef = add_typedef
 
         # for binary blob format, format is key=<layer name>, value=(<layer type>, <init call>)
         self.layer_dict = OrderedDict()
@@ -119,11 +121,17 @@ f"""
 
         # create model type
         if self.enable_binary_blob:
-            self.header.write(f"\nstruct {self.model_struct_name} {{")
+            if self.add_typedef:
+                self.header.write(f"\ntypedef struct {{")
+            else:
+                self.header.write(f"\nstruct {self.model_struct_name} {{")
             for name, data in self.layer_dict.items():
                 layer_type = data[0]
                 self.header.write(f"\n    {layer_type} {name};")
-            self.header.write(f"\n}};\n")
+            if self.add_typedef:
+                self.header.write(f"\n}} {self.model_struct_name};\n")
+            else:
+                self.header.write(f"\n}};\n")
 
             init_prototype = f"int init_{self.model_struct_name.lower()}({self.model_struct_name} *model, const WeightArray *arrays)"
             self.header.write(f"\n{init_prototype};\n")
diff --git a/dnn/torch/weight-exchange/wexchange/torch/__init__.py b/dnn/torch/weight-exchange/wexchange/torch/__init__.py
index 98c96fad..8245566d 100644
--- a/dnn/torch/weight-exchange/wexchange/torch/__init__.py
+++ b/dnn/torch/weight-exchange/wexchange/torch/__init__.py
@@ -34,3 +34,4 @@ from .torch import dump_torch_gru_weights, load_torch_gru_weights
 from .torch import dump_torch_grucell_weights
 from .torch import dump_torch_embedding_weights, load_torch_embedding_weights
 from .torch import dump_torch_weights, load_torch_weights
+from .torch import dump_torch_adaptive_conv1d_weights
\ No newline at end of file
diff --git a/dnn/torch/weight-exchange/wexchange/torch/torch.py b/dnn/torch/weight-exchange/wexchange/torch/torch.py
index 281d9be3..f7e16032 100644
--- a/dnn/torch/weight-exchange/wexchange/torch/torch.py
+++ b/dnn/torch/weight-exchange/wexchange/torch/torch.py
@@ -28,12 +28,154 @@
 """
 
 import os
+import sys
 
 import torch
 import numpy as np
 
+sys.path.append(sys.path.append(os.path.join(os.path.dirname(__file__), '../osce')))
+try:
+    import utils.layers as osce_layers
+    from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d
+    from utils.layers.limited_adaptive_comb1d import LimitedAdaptiveComb1d
+    from utils.layers.td_shaper import TDShaper
+    has_osce=True
+except:
+    has_osce=False
+
 from wexchange.c_export import CWriter, print_gru_layer, print_dense_layer, print_conv1d_layer, print_tconv1d_layer, print_conv2d_layer
 
+def dump_torch_adaptive_conv1d_weights(where, adaconv, name='adaconv', scale=1/128, quantize=False):
+
+
+    w_kernel = adaconv.conv_kernel.weight.detach().cpu().numpy().copy()
+    b_kernel = adaconv.conv_kernel.bias.detach().cpu().numpy().copy()
+    w_gain = adaconv.filter_gain.weight.detach().cpu().numpy().copy()
+    b_gain = adaconv.filter_gain.bias.detach().cpu().numpy().copy()
+
+    if isinstance(where, CWriter):
+        # pad kernel for quantization
+        left_padding = adaconv.padding[0]
+        kernel_size = adaconv.kernel_size
+        in_channels = adaconv.in_channels
+        out_channels = adaconv.out_channels
+        feature_dim = adaconv.feature_dim
+
+        if quantize and kernel_size % 8:
+            kernel_padding = 8 - (kernel_size % 8)
+            w_kernel = np.concatenate(
+                (np.zeros((out_channels, in_channels, kernel_padding, feature_dim)), w_kernel.reshape(out_channels, in_channels, kernel_size, feature_dim)),
+                dtype=w_kernel.dtype,
+                axis=2).reshape(-1, feature_dim)
+            b_kernel = np.concatenate(
+                (np.zeros((out_channels, in_channels, kernel_padding)), b_kernel.reshape(out_channels, in_channels, kernel_size)),
+                dtype=b_kernel.dtype,
+                axis=2).reshape(-1)
+            left_padding += kernel_padding
+            kernel_size += kernel_padding
+
+        # write relevant scalar parameters to header file
+        where.header.write(f"""
+#define {name.upper()}_FILTER_GAIN_A {adaconv.filter_gain_a:f}f
+#define {name.upper()}_FILTER_GAIN_B {adaconv.filter_gain_b:f}f
+#define {name.upper()}_SHAPE_GAIN {adaconv.shape_gain:f}f
+#define {name.upper()}_KERNEL_SIZE {kernel_size}
+#define {name.upper()}_FRAME_SIZE {adaconv.frame_size}
+#define {name.upper()}_LEFT_PADDING {left_padding}
+#define {name.upper()}_OVERLAP_SIZE {adaconv.overlap_size}
+#define {name.upper()}_IN_CHANNELS {adaconv.in_channels}
+#define {name.upper()}_OUT_CHANNELS {adaconv.out_channels}
+#define {name.upper()}_NORM_P {adaconv.norm_p}
+#define {name.upper()}_FEATURE_DIM {adaconv.feature_dim}
+"""
+        )
+
+        print_dense_layer(where, name + "_kernel", w_kernel, b_kernel, scale=scale, format='torch', sparse=False, diagonal=False, quantize=quantize)
+        print_dense_layer(where, name + "_gain", w_gain, b_gain, format='torch', sparse=False, diagonal=False, quantize=False)
+
+
+    else:
+        np.save(where, 'weight_kernel.npy', w_kernel)
+        np.save(where, 'bias_kernel.npy', b_kernel)
+        np.save(where, 'weight_gain.npy', w_gain)
+        np.save(where, 'bias_gain.npy', b_gain)
+
+
+def dump_torch_adaptive_comb1d_weights(where, adaconv, name='adaconv', scale=1/128, quantize=False):
+
+
+    w_kernel = adaconv.conv_kernel.weight.detach().cpu().numpy().copy()
+    b_kernel = adaconv.conv_kernel.bias.detach().cpu().numpy().copy()
+    w_gain = adaconv.filter_gain.weight.detach().cpu().numpy().copy()
+    b_gain = adaconv.filter_gain.bias.detach().cpu().numpy().copy()
+    w_global_gain = adaconv.global_filter_gain.weight.detach().cpu().numpy().copy()
+    b_global_gain = adaconv.global_filter_gain.bias.detach().cpu().numpy().copy()
+
+
+    if isinstance(where, CWriter):
+        # pad kernel for quantization
+        left_padding = adaconv.padding[0]
+        kernel_size = adaconv.kernel_size
+
+        if quantize and w_kernel.shape[0] % 8:
+            kernel_padding = 8 - (w_kernel.shape[0] % 8)
+            w_kernel = np.concatenate((np.zeros((kernel_padding, w_kernel.shape[1])), w_kernel), dtype=w_kernel.dtype)
+            b_kernel = np.concatenate((np.zeros((kernel_padding)), b_kernel), dtype=b_kernel.dtype)
+            left_padding += kernel_padding
+            kernel_size += kernel_padding
+        # write relevant scalar parameters to header file
+        where.header.write(f"""
+#define {name.upper()}_FILTER_GAIN_A {adaconv.filter_gain_a:f}f
+#define {name.upper()}_FILTER_GAIN_B {adaconv.filter_gain_b:f}f
+#define {name.upper()}_LOG_GAIN_LIMIT {adaconv.log_gain_limit:f}f
+#define {name.upper()}_KERNEL_SIZE {kernel_size}
+#define {name.upper()}_LEFT_PADDING {left_padding}
+#define {name.upper()}_FRAME_SIZE {adaconv.frame_size}
+#define {name.upper()}_OVERLAP_SIZE {adaconv.overlap_size}
+#define {name.upper()}_IN_CHANNELS {adaconv.in_channels}
+#define {name.upper()}_OUT_CHANNELS {adaconv.out_channels}
+#define {name.upper()}_NORM_P {adaconv.norm_p}
+#define {name.upper()}_FEATURE_DIM {adaconv.feature_dim}
+#define {name.upper()}_MAX_LAG {adaconv.max_lag}
+"""
+        )
+
+        print_dense_layer(where, name + "_kernel", w_kernel, b_kernel, scale=scale, format='torch', sparse=False, diagonal=False, quantize=quantize)
+        print_dense_layer(where, name + "_gain", w_gain, b_gain, format='torch', sparse=False, diagonal=False, quantize=False)
+        print_dense_layer(where, name + "_global_gain", w_global_gain, b_global_gain, format='torch', sparse=False, diagonal=False, quantize=False)
+
+
+    else:
+        np.save(where, 'weight_kernel.npy', w_kernel)
+        np.save(where, 'bias_kernel.npy', b_kernel)
+        np.save(where, 'weight_gain.npy', w_gain)
+        np.save(where, 'bias_gain.npy', b_gain)
+        np.save(where, 'weight_global_gain.npy', w_global_gain)
+        np.save(where, 'bias_global_gain.npy', b_global_gain)
+
+def dump_torch_tdshaper(where, shaper, name='tdshaper'):
+
+    if isinstance(where, CWriter):
+        where.header.write(f"""
+#define {name.upper()}_FEATURE_DIM {shaper.feature_dim}
+#define {name.upper()}_FRAME_SIZE {shaper.frame_size}
+#define {name.upper()}_AVG_POOL_K {shaper.avg_pool_k}
+#define {name.upper()}_INNOVATE {1 if shaper.innovate else 0}
+#define {name.upper()}_POOL_AFTER {1 if shaper.pool_after else 0}
+"""
+        )
+
+    dump_torch_conv1d_weights(where, shaper.feature_alpha1, name + "_alpha1")
+    dump_torch_conv1d_weights(where, shaper.feature_alpha2, name + "_alpha2")
+
+    if shaper.innovate:
+        dump_torch_conv1d_weights(where, shaper.feature_alpha1b, name + "_alpha1b")
+        dump_torch_conv1d_weights(where, shaper.feature_alpha1c, name + "_alpha1c")
+        dump_torch_conv1d_weights(where, shaper.feature_alpha2b, name + "_alpha2b")
+        dump_torch_conv1d_weights(where, shaper.feature_alpha2c, name + "_alpha2c")
+
+
+
 def dump_torch_gru_weights(where, gru, name='gru', input_sparse=False, recurrent_sparse=False, quantize=False, scale=1/128, recurrent_scale=1/128):
 
     assert gru.num_layers == 1
@@ -221,7 +363,6 @@ def load_torch_conv2d_weights(where, conv):
 
 def dump_torch_embedding_weights(where, embed, name='embed', scale=1/128, sparse=False, diagonal=False, quantize=False):
 
-    print("quantize = ", quantize)
     w = embed.weight.detach().cpu().numpy().copy().transpose()
     b = np.zeros(w.shape[0], dtype=w.dtype)
 
@@ -257,11 +398,21 @@ def dump_torch_weights(where, module, name=None, verbose=False, **kwargs):
     elif isinstance(module, torch.nn.Conv2d):
         return dump_torch_conv2d_weights(where, module, name, **kwargs)
     elif isinstance(module, torch.nn.Embedding):
-        return dump_torch_embedding_weights(where, module)
+        return dump_torch_embedding_weights(where, module, name, **kwargs)
     elif isinstance(module, torch.nn.ConvTranspose1d):
         return dump_torch_tconv1d_weights(where, module, name, **kwargs)
     else:
-        raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported')
+        if has_osce:
+            if isinstance(module, LimitedAdaptiveConv1d):
+                dump_torch_adaptive_conv1d_weights(where, module, name, **kwargs)
+            elif isinstance(module, LimitedAdaptiveComb1d):
+                dump_torch_adaptive_comb1d_weights(where, module, name, **kwargs)
+            elif isinstance(module, TDShaper):
+                dump_torch_tdshaper(where, module, name, **kwargs)
+            else:
+                raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported')
+        else:
+            raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported')
 
 def load_torch_weights(where, module):
     """ generic function for loading weights of some torch.nn.Module """
diff --git a/dnn/write_lpcnet_weights.c b/dnn/write_lpcnet_weights.c
index 722a373d..395590f4 100644
--- a/dnn/write_lpcnet_weights.c
+++ b/dnn/write_lpcnet_weights.c
@@ -46,6 +46,10 @@
 #include "plc_data.c"
 #include "dred_rdovae_enc_data.c"
 #include "dred_rdovae_dec_data.c"
+#ifdef ENABLE_OSCE
+#include "lace_data.c"
+#include "nolace_data.c"
+#endif
 
 void write_weights(const WeightArray *list, FILE *fout)
 {
@@ -53,6 +57,9 @@ void write_weights(const WeightArray *list, FILE *fout)
   unsigned char zeros[WEIGHT_BLOCK_SIZE] = {0};
   while (list[i].name != NULL) {
     WeightHead h;
+    if (strlen(list[i].name) >= sizeof(h.name) - 1) {
+      printf("[write_weights] warning: name %s too long\n", list[i].name);
+    }
     memcpy(h.head, "DNNw", 4);
     h.version = WEIGHT_BLOB_VERSION;
     h.type = list[i].type;
@@ -77,6 +84,14 @@ int main(void)
   write_weights(lpcnet_plc_arrays, fout);
   write_weights(rdovaeenc_arrays, fout);
   write_weights(rdovaedec_arrays, fout);
+#ifdef ENABLE_OSCE
+#ifndef DISABLE_LACE
+  write_weights(lacelayers_arrays, fout);
+#endif
+#ifndef DISABLE_NOLACE
+  write_weights(nolacelayers_arrays, fout);
+#endif
+#endif
   fclose(fout);
   return 0;
 }
diff --git a/lpcnet_headers.mk b/lpcnet_headers.mk
index da610ca1..ce74d954 100644
--- a/lpcnet_headers.mk
+++ b/lpcnet_headers.mk
@@ -29,3 +29,12 @@ dnn/dred_rdovae_enc_data.h \
 dnn/dred_rdovae_dec.h \
 dnn/dred_rdovae_dec_data.h \
 dnn/dred_rdovae_stats_data.h
+
+OSCE_HEAD= \
+dnn/osce.h \
+dnn/osce_config.h \
+dnn/osce_structs.h \
+dnn/osce_features.h \
+dnn/nndsp.h \
+dnn/lace_data.h \
+dnn/nolace_data.h
diff --git a/lpcnet_sources.mk b/lpcnet_sources.mk
index 9b8863ad..17f04756 100644
--- a/lpcnet_sources.mk
+++ b/lpcnet_sources.mk
@@ -23,6 +23,13 @@ silk/dred_encoder.c \
 silk/dred_coding.c \
 silk/dred_decoder.c
 
+OSCE_SOURCES = \
+dnn/osce.c \
+dnn/osce_features.c \
+dnn/nndsp.c \
+dnn/lace_data.c \
+dnn/nolace_data.c
+
 DNN_SOURCES_X86_RTCD = dnn/x86/x86_dnn_map.c
 DNN_SOURCES_AVX2 = dnn/x86/nnet_avx2.c
 DNN_SOURCES_SSE4_1 = dnn/x86/nnet_sse4_1.c
diff --git a/meson.build b/meson.build
index 289c5917..f468bad4 100644
--- a/meson.build
+++ b/meson.build
@@ -148,6 +148,7 @@ opts = [
   [ 'float-approx', 'FLOAT_APPROX' ],
   [ 'enable-deep-plc', 'ENABLE_DEEP_PLC' ],
   [ 'enable-dred', 'ENABLE_DRED' ],
+  [ 'enable-osce', 'ENABLE_OSCE' ],
   [ 'assertions', 'ENABLE_ASSERTIONS' ],
   [ 'hardening', 'ENABLE_HARDENING' ],
   [ 'fuzzing', 'FUZZING' ],
diff --git a/meson_options.txt b/meson_options.txt
index d5b69eea..46099276 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -9,6 +9,7 @@ option('intrinsics', type : 'feature', value : 'auto', description : 'Intrinsics
 
 option('enable-deep-plc', type : 'boolean', value : false, description : 'Enable Deep Packet Loss Concealment (PLC)')
 option('enable-dred', type : 'boolean', value : false, description : 'Enable Deep Redundancy (DRED)')
+option('enable-osce', type : 'boolean', value : false, description : 'Enable Opus Speech Coding Enhancement (OSCE)')
 option('enable-dnn-debug-float', type : 'boolean', value : false, description : 'Compute DNN using float weights')
 
 option('custom-modes', type : 'boolean', value : false, description : 'Enable non-Opus modes, e.g. 44.1 kHz & 2^n frames')
diff --git a/silk/API.h b/silk/API.h
index 6e623b84..878965c7 100644
--- a/silk/API.h
+++ b/silk/API.h
@@ -92,6 +92,16 @@ opus_int silk_Encode(                                   /* O    Returns error co
 /* Decoder functions                    */
 /****************************************/
 
+
+/***********************************************/
+/* Load OSCE models from external data pointer */
+/***********************************************/
+opus_int silk_LoadOSCEModels(
+    void *decState,                                     /* O    I/O State                                       */
+    const unsigned char *data,                          /* I    pointer to binary blob                          */
+    int len                                             /* I    length of binary blob data                      */
+);
+
 /***********************************************/
 /* Get size in bytes of the Silk decoder state */
 /***********************************************/
@@ -100,8 +110,12 @@ opus_int silk_Get_Decoder_Size(                         /* O    Returns error co
 );
 
 /*************************/
-/* Init or Reset decoder */
+/* Init and Reset decoder */
 /*************************/
+opus_int silk_ResetDecoder(                              /* O    Returns error code                              */
+    void                            *decState            /* I/O  State                                           */
+);
+
 opus_int silk_InitDecoder(                              /* O    Returns error code                              */
     void                            *decState           /* I/O  State                                           */
 );
diff --git a/silk/control.h b/silk/control.h
index d30d114c..f5633e62 100644
--- a/silk/control.h
+++ b/silk/control.h
@@ -147,6 +147,11 @@ typedef struct {
 
     /* I:   Enable Deep PLC                                                                 */
     opus_int enable_deep_plc;
+
+#ifdef ENABLE_OSCE
+    /* I: OSCE method */
+    opus_int osce_method;
+#endif
 } silk_DecControlStruct;
 
 #ifdef __cplusplus
diff --git a/silk/dec_API.c b/silk/dec_API.c
index a29ecc73..e4ae8343 100644
--- a/silk/dec_API.c
+++ b/silk/dec_API.c
@@ -33,6 +33,11 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "stack_alloc.h"
 #include "os_support.h"
 
+#ifdef ENABLE_OSCE
+#include "osce.h"
+#include "osce_structs.h"
+#endif
+
 /************************/
 /* Decoder Super Struct */
 /************************/
@@ -42,12 +47,33 @@ typedef struct {
     opus_int                         nChannelsAPI;
     opus_int                         nChannelsInternal;
     opus_int                         prev_decode_only_middle;
+#ifdef ENABLE_OSCE
+    OSCEModel                        osce_model;
+#endif
 } silk_decoder;
 
 /*********************/
 /* Decoder functions */
 /*********************/
 
+
+
+opus_int silk_LoadOSCEModels(void *decState, const unsigned char *data, int len)
+{
+#ifdef ENABLE_OSCE
+    opus_int ret = SILK_NO_ERROR;
+
+    ret = osce_load_models(&((silk_decoder *)decState)->osce_model, data, len);
+
+    return ret;
+#else
+    (void) decState;
+    (void) data;
+    (void) len;
+    return SILK_NO_ERROR;
+#endif
+}
+
 opus_int silk_Get_Decoder_Size(                         /* O    Returns error code                              */
     opus_int                        *decSizeBytes       /* O    Number of bytes in SILK decoder state           */
 )
@@ -60,6 +86,24 @@ opus_int silk_Get_Decoder_Size(                         /* O    Returns error co
 }
 
 /* Reset decoder state */
+opus_int silk_ResetDecoder(                              /* O    Returns error code                              */
+    void                            *decState           /* I/O  State                                           */
+)
+{
+    opus_int n, ret = SILK_NO_ERROR;
+    silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
+
+    for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
+        ret  = silk_reset_decoder( &channel_state[ n ] );
+    }
+    silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo));
+    /* Not strictly needed, but it's cleaner that way */
+    ((silk_decoder *)decState)->prev_decode_only_middle = 0;
+
+    return ret;
+}
+
+
 opus_int silk_InitDecoder(                              /* O    Returns error code                              */
     void                            *decState           /* I/O  State                                           */
 )
@@ -67,6 +111,11 @@ opus_int silk_InitDecoder(                              /* O    Returns error co
     opus_int n, ret = SILK_NO_ERROR;
     silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
 
+#ifndef USE_WEIGHTS_FILE
+    /* load osce models */
+    silk_LoadOSCEModels(decState, NULL, 0);
+#endif
+
     for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
         ret  = silk_init_decoder( &channel_state[ n ] );
     }
@@ -301,9 +350,17 @@ opus_int silk_Decode(                                   /* O    Returns error co
             } else {
                 condCoding = CODE_CONDITIONALLY;
             }
+#ifdef ENABLE_OSCE
+            if ( channel_state[n].osce.method != decControl->osce_method ) {
+                osce_reset( &channel_state[n].osce, decControl->osce_method );
+            }
+#endif
             ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding,
 #ifdef ENABLE_DEEP_PLC
                 n == 0 ? lpcnet : NULL,
+#endif
+#ifdef ENABLE_OSCE
+                &psDec->osce_model,
 #endif
                 arch);
         } else {
diff --git a/silk/decode_frame.c b/silk/decode_frame.c
index b393952c..48f74aef 100644
--- a/silk/decode_frame.c
+++ b/silk/decode_frame.c
@@ -33,6 +33,10 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "stack_alloc.h"
 #include "PLC.h"
 
+#ifdef ENABLE_OSCE
+#include "osce.h"
+#endif
+
 /****************/
 /* Decode frame */
 /****************/
@@ -45,17 +49,26 @@ opus_int silk_decode_frame(
     opus_int                    condCoding,                     /* I    The type of conditional coding to use       */
 #ifdef ENABLE_DEEP_PLC
     LPCNetPLCState              *lpcnet,
+#endif
+#ifdef ENABLE_OSCE
+    OSCEModel                   *osce_model,
 #endif
     int                         arch                            /* I    Run-time architecture                       */
 )
 {
     VARDECL( silk_decoder_control, psDecCtrl );
     opus_int         L, mv_len, ret = 0;
+#ifdef ENABLE_OSCE
+    opus_int32  ec_start;
+#endif
     SAVE_STACK;
 
     L = psDec->frame_length;
     ALLOC( psDecCtrl, 1, silk_decoder_control );
     psDecCtrl->LTP_scale_Q14 = 0;
+#ifdef ENABLE_OSCE
+    ec_start = ec_tell(psRangeDec);
+#endif
 
     /* Safety checks */
     celt_assert( L > 0 && L <= MAX_FRAME_LENGTH );
@@ -87,6 +100,21 @@ opus_int silk_decode_frame(
         /********************************************************/
         silk_decode_core( psDec, psDecCtrl, pOut, pulses, arch );
 
+        /*************************/
+        /* Update output buffer. */
+        /*************************/
+        celt_assert( psDec->ltp_mem_length >= psDec->frame_length );
+        mv_len = psDec->ltp_mem_length - psDec->frame_length;
+        silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
+        silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
+
+#ifdef ENABLE_OSCE
+        /********************************************************/
+        /* Run SILK enhancer                                    */
+        /********************************************************/
+        osce_enhance_frame( osce_model, psDec, psDecCtrl, pOut, ec_tell(psRangeDec) - ec_start, arch );
+#endif
+
         /********************************************************/
         /* Update PLC state                                     */
         /********************************************************/
@@ -109,15 +137,18 @@ opus_int silk_decode_frame(
             lpcnet,
 #endif
             arch );
-    }
 
-    /*************************/
-    /* Update output buffer. */
-    /*************************/
-    celt_assert( psDec->ltp_mem_length >= psDec->frame_length );
-    mv_len = psDec->ltp_mem_length - psDec->frame_length;
-    silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
-    silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
+#ifdef ENABLE_OSCE
+        osce_reset( &psDec->osce, psDec->osce.method );
+#endif
+        /*************************/
+        /* Update output buffer. */
+        /*************************/
+        celt_assert( psDec->ltp_mem_length >= psDec->frame_length );
+        mv_len = psDec->ltp_mem_length - psDec->frame_length;
+        silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
+        silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
+    }
 
     /************************************************/
     /* Comfort noise generation / estimation        */
diff --git a/silk/init_decoder.c b/silk/init_decoder.c
index 16c03dcd..01bc4b7a 100644
--- a/silk/init_decoder.c
+++ b/silk/init_decoder.c
@@ -31,15 +31,21 @@ POSSIBILITY OF SUCH DAMAGE.
 
 #include "main.h"
 
+#ifdef ENABLE_OSCE
+#include "osce.h"
+#endif
+
+#include "structs.h"
+
 /************************/
-/* Init Decoder State   */
+/* Reset Decoder State  */
 /************************/
-opus_int silk_init_decoder(
+opus_int silk_reset_decoder(
     silk_decoder_state          *psDec                          /* I/O  Decoder state pointer                       */
 )
 {
     /* Clear the entire encoder state, except anything copied */
-    silk_memset( psDec, 0, sizeof( silk_decoder_state ) );
+    silk_memset( &psDec->SILK_DECODER_STATE_RESET_START, 0, sizeof( silk_decoder_state ) - ((char*) &psDec->SILK_DECODER_STATE_RESET_START - (char*)psDec) );
 
     /* Used to deactivate LSF interpolation */
     psDec->first_frame_after_reset = 1;
@@ -52,6 +58,27 @@ opus_int silk_init_decoder(
     /* Reset PLC state */
     silk_PLC_Reset( psDec );
 
+#ifdef ENABLE_OSCE
+    /* Reset OSCE state and method */
+    osce_reset(&psDec->osce, OSCE_DEFAULT_METHOD);
+#endif
+
+    return 0;
+}
+
+
+/************************/
+/* Init Decoder State   */
+/************************/
+opus_int silk_init_decoder(
+    silk_decoder_state          *psDec                          /* I/O  Decoder state pointer                       */
+)
+{
+    /* Clear the entire encoder state, except anything copied */
+    silk_memset( psDec, 0, sizeof( silk_decoder_state ) );
+
+    silk_reset_decoder( psDec );
+
     return(0);
 }
 
diff --git a/silk/main.h b/silk/main.h
index c67775ef..cd576d8c 100644
--- a/silk/main.h
+++ b/silk/main.h
@@ -389,6 +389,10 @@ void silk_NLSF_decode(
 /****************************************************/
 /* Decoder Functions                                */
 /****************************************************/
+opus_int silk_reset_decoder(
+    silk_decoder_state          *psDec                          /* I/O  Decoder state pointer                       */
+);
+
 opus_int silk_init_decoder(
     silk_decoder_state          *psDec                          /* I/O  Decoder state pointer                       */
 );
@@ -412,6 +416,9 @@ opus_int silk_decode_frame(
     opus_int                    condCoding,                     /* I    The type of conditional coding to use       */
 #ifdef ENABLE_DEEP_PLC
     LPCNetPLCState              *lpcnet,
+#endif
+#ifdef ENABLE_OSCE
+    OSCEModel                   *osce_model,
 #endif
     int                         arch                            /* I    Run-time architecture                       */
 );
diff --git a/silk/structs.h b/silk/structs.h
index 709d3557..38243be1 100644
--- a/silk/structs.h
+++ b/silk/structs.h
@@ -44,6 +44,11 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "dred_decoder.h"
 #endif
 
+#ifdef ENABLE_OSCE
+#include "osce_config.h"
+#include "osce_structs.h"
+#endif
+
 #ifdef __cplusplus
 extern "C"
 {
@@ -238,6 +243,14 @@ typedef struct {
 } silk_encoder_state;
 
 
+#ifdef ENABLE_OSCE
+typedef struct {
+    OSCEFeatureState features;
+    OSCEState state;
+    int method;
+} silk_OSCE_struct;
+#endif
+
 /* Struct for Packet Loss Concealment */
 typedef struct {
     opus_int32                  pitchL_Q8;                          /* Pitch lag to use for voiced concealment                          */
@@ -270,6 +283,10 @@ typedef struct {
 /* Decoder state                */
 /********************************/
 typedef struct {
+#ifdef ENABLE_OSCE
+    silk_OSCE_struct            osce;
+#endif
+#define SILK_DECODER_STATE_RESET_START prev_gain_Q16
     opus_int32                  prev_gain_Q16;
     opus_int32                  exc_Q14[ MAX_FRAME_LENGTH ];
     opus_int32                  sLPC_Q14_buf[ MAX_LPC_ORDER ];
diff --git a/silk_sources.mk b/silk_sources.mk
index 27c07129..3780b164 100644
--- a/silk_sources.mk
+++ b/silk_sources.mk
@@ -161,4 +161,4 @@ silk/float/schur_FLP.c \
 silk/float/sort_FLP.c
 
 SILK_SOURCES_FLOAT_AVX2 = \
-silk/float/x86/inner_product_FLP_avx2.c
+silk/float/x86/inner_product_FLP_avx2.c
\ No newline at end of file
diff --git a/src/opus_decoder.c b/src/opus_decoder.c
index 596c2dd0..dd95aefc 100644
--- a/src/opus_decoder.c
+++ b/src/opus_decoder.c
@@ -57,6 +57,10 @@
 #include "dred_rdovae_dec.h"
 #endif
 
+#ifdef ENABLE_OSCE
+#include "osce.h"
+#endif
+
 struct OpusDecoder {
    int          celt_dec_offset;
    int          silk_dec_offset;
@@ -383,7 +387,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
          pcm_ptr = pcm_silk;
 
       if (st->prev_mode==MODE_CELT_ONLY)
-         silk_InitDecoder( silk_dec );
+         silk_ResetDecoder( silk_dec );
 
       /* The SILK PLC cannot produce frames of less than 10 ms */
       st->DecControl.payloadSize_ms = IMAX(10, 1000 * audiosize / st->Fs);
@@ -408,6 +412,15 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
         }
      }
      st->DecControl.enable_deep_plc = st->complexity >= 5;
+#ifdef ENABLE_OSCE
+     st->DecControl.osce_method = OSCE_METHOD_NONE;
+#ifndef DISABLE_LACE
+     if (st->complexity >= 6) {st->DecControl.osce_method = OSCE_METHOD_LACE;}
+#endif
+#ifndef DISABLE_NOLACE
+     if (st->complexity >= 7) {st->DecControl.osce_method = OSCE_METHOD_NOLACE;}
+#endif
+#endif
 
      lost_flag = data == NULL ? 1 : 2 * !!decode_fec;
      decoded_samples = 0;
@@ -953,7 +966,7 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...)
             ((char*)&st->OPUS_DECODER_RESET_START - (char*)st));
 
       celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
-      silk_InitDecoder( silk_dec );
+      silk_ResetDecoder( silk_dec );
       st->stream_channels = st->channels;
       st->frame_size = st->Fs/400;
 #ifdef ENABLE_DEEP_PLC
@@ -1044,6 +1057,7 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...)
           goto bad_arg;
        }
        ret = lpcnet_plc_load_model(&st->lpcnet, data, len);
+       ret = silk_LoadOSCEModels(silk_dec, data, len) || ret;
    }
    break;
 #endif
diff --git a/src/opus_demo.c b/src/opus_demo.c
index c5f6250f..bccdf976 100644
--- a/src/opus_demo.c
+++ b/src/opus_demo.c
@@ -70,6 +70,10 @@ unsigned char *load_blob(const char *filename, int *len) {
   FILE *file;
   unsigned char *data;
   file = fopen(filename, "r");
+  if (file == NULL)
+  {
+    perror("could not open blob file\n");
+  }
   fseek(file, 0L, SEEK_END);
   *len = ftell(file);
   fseek(file, 0L, SEEK_SET);
@@ -254,6 +258,68 @@ static OpusDecoder *ms_opus_decoder_create(opus_int32 Fs, int channels, int *err
 }
 #endif
 
+
+#ifdef ENABLE_OSCE_TRAINING_DATA
+#define COMPLEXITY_MIN 0
+#define COMPLEXITY_MAX 10
+
+#define PACKET_LOSS_PERC_MIN 0
+#define PACKET_LOSS_PERC_MAX 50
+#define PACKET_LOSS_PERC_STEP 5
+
+#define CBR_BITRATE_LIMIT 8000
+
+#define NUM_BITRATES 102
+static int bitrates[NUM_BITRATES] = {
+        6000,  6060,  6120,  6180,  6240,  6300,  6360,  6420,  6480,
+        6525,  6561,  6598,  6634,  6670,  6707,  6743,  6780,  6816,
+        6853,  6889,  6926,  6962,  6999,  7042,  7085,  7128,  7171,
+        7215,  7258,  7301,  7344,  7388,  7431,  7474,  7512,  7541,
+        7570,  7599,  7628,  7657,  7686,  7715,  7744,  7773,  7802,
+        7831,  7860,  7889,  7918,  7947,  7976,  8013,  8096,  8179,
+        8262,  8344,  8427,  8511,  8605,  8699,  8792,  8886,  8980,
+        9100,  9227,  9354,  9480,  9561,  9634,  9706,  9779,  9851,
+        9924,  9996, 10161, 10330, 10499, 10698, 10898, 11124, 11378,
+       11575, 11719, 11862, 12014, 12345, 12751, 13195, 13561, 13795,
+       14069, 14671, 15403, 15790, 16371, 17399, 17968, 19382, 20468,
+       22000, 32000, 64000
+};
+
+static int randint(int min, int max, int step)
+{
+    double r = ((double) rand())/ (RAND_MAX + 1.);
+    int d;
+
+    d = ((int) ((max + 1 - min) * r / step) * step) + min;
+
+    return d;
+}
+
+static void new_random_setting(OpusEncoder *enc)
+{
+    int bitrate_bps;
+    int complexity;
+    int packet_loss_perc;
+    int use_vbr;
+
+    bitrate_bps = bitrates[randint(0, NUM_BITRATES - 1, 1)];
+    complexity  = randint(COMPLEXITY_MIN, COMPLEXITY_MAX, 1);
+    packet_loss_perc = randint(PACKET_LOSS_PERC_MIN, PACKET_LOSS_PERC_MAX, PACKET_LOSS_PERC_STEP);
+    use_vbr = bitrate_bps < CBR_BITRATE_LIMIT ? 1 : randint(0, 1, 1);
+
+    if (1)
+    {
+        printf("changing settings to %d\t%d\t%d\t%d\n", bitrate_bps, complexity, packet_loss_perc, use_vbr);
+    }
+
+    opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate_bps));
+    opus_encoder_ctl(enc, OPUS_SET_COMPLEXITY(complexity));
+    opus_encoder_ctl(enc, OPUS_SET_PACKET_LOSS_PERC(packet_loss_perc));
+    opus_encoder_ctl(enc, OPUS_SET_VBR(use_vbr));
+}
+
+#endif
+
 int main(int argc, char *argv[])
 {
     int err;
@@ -316,6 +382,10 @@ int main(int argc, char *argv[])
     int lost_count=0;
     FILE *packet_loss_file=NULL;
     int dred_duration=0;
+#ifdef ENABLE_OSCE_TRAINING_DATA
+    int silk_random_switching = 0;
+    int silk_frame_counter = 0;
+#endif
 #ifdef USE_WEIGHTS_FILE
     int blob_len;
     unsigned char *blob_data;
@@ -546,6 +616,12 @@ int main(int argc, char *argv[])
             mode_list = celt_hq_test;
             nb_modes_in_list = 4;
             args++;
+#ifdef ENABLE_OSCE_TRAINING_DATA
+        } else if( strcmp( argv[ args ], "-silk_random_switching" ) == 0 ){
+            silk_random_switching = atoi( argv[ args + 1 ] );
+            printf("switching encoding parameters every %dth frame\n", silk_random_switching);
+            args += 2;
+#endif
         } else {
             printf( "Error: unrecognized setting: %s\n\n", argv[ args ] );
             print_usage( argv );
@@ -759,6 +835,15 @@ int main(int argc, char *argv[])
                 opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(mode_list[curr_mode][3]));
                 frame_size = mode_list[curr_mode][2];
             }
+#ifdef ENABLE_OSCE_TRAINING_DATA
+            if (silk_random_switching)
+            {
+                silk_frame_counter += 1;
+                if (silk_frame_counter % silk_random_switching == 0) {
+                    new_random_setting(enc);
+                }
+            }
+#endif
             num_read = fread(fbytes, sizeof(short)*channels, frame_size-remaining, fin);
             curr_read = (int)num_read;
             tot_in += curr_read;
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index 53c899a0..21dfe4ff 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -50,6 +50,9 @@
 #else
 #include "float/structs_FLP.h"
 #endif
+#ifdef ENABLE_OSCE_TRAINING_DATA
+#include <stdio.h>
+#endif
 
 #define MAX_ENCODER_BUFFER 480
 
@@ -1693,6 +1696,25 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
     if (st->application == OPUS_APPLICATION_VOIP)
     {
        hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs, st->arch);
+
+#ifdef ENABLE_OSCE_TRAINING_DATA
+       /* write out high pass filtered clean signal*/
+       static FILE *fout =NULL;
+       if (fout == NULL)
+       {
+         fout = fopen("clean_hp.s16", "wb");
+       }
+
+       {
+         int idx;
+         opus_int16 tmp;
+         for (idx = 0; idx < frame_size; idx++)
+         {
+            tmp = (opus_int16) (32768 * pcm_buf[total_buffer + idx] + 0.5f);
+            fwrite(&tmp, sizeof(tmp), 1, fout);
+         }
+       }
+#endif
     } else {
        dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
     }
@@ -2909,7 +2931,9 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
             {
                goto bad_arg;
             }
+#ifdef ENABLE_DRED
             ret = dred_encoder_load_model(&st->dred_encoder, data, len);
+#endif
         }
         break;
 #endif
diff --git a/tests/test_opus_api.c b/tests/test_opus_api.c
index b6d67572..9500d407 100644
--- a/tests/test_opus_api.c
+++ b/tests/test_opus_api.c
@@ -103,7 +103,7 @@ opus_int32 test_dec_api(void)
    for(c=0;c<4;c++)
    {
       i=opus_decoder_get_size(c);
-      if(((c==1||c==2)&&(i<=2048||i>1<<17))||((c!=1&&c!=2)&&i!=0))test_failed();
+      if(((c==1||c==2)&&(i<=2048||i>1<<18))||((c!=1&&c!=2)&&i!=0))test_failed();
       fprintf(stdout,"    opus_decoder_get_size(%d)=%d ...............%s OK.\n",c,i,i>0?"":"....");
       cfgs++;
    }
@@ -367,7 +367,7 @@ opus_int32 test_msdec_api(void)
       for(b=-1;b<4;b++)
       {
          i=opus_multistream_decoder_get_size(a,b);
-         if(((a>0&&b<=a&&b>=0)&&(i<=2048||i>((1<<17)*a)))||((a<1||b>a||b<0)&&i!=0))test_failed();
+         if(((a>0&&b<=a&&b>=0)&&(i<=2048||i>((1<<18)*a)))||((a<1||b>a||b<0)&&i!=0))test_failed();
          fprintf(stdout,"    opus_multistream_decoder_get_size(%2d,%2d)=%d %sOK.\n",a,b,i,i>0?"":"... ");
          cfgs++;
       }
-- 
cgit v1.2.3