switched to channels first layout

author: Jan Buethe <jbuethe@amazon.de> 2023-12-05 15:36:25 +0300
committer: Jan Buethe <jbuethe@amazon.de> 2023-12-05 15:38:59 +0300
commit: 6ac3fd8d0774fe7c40c48cd40b9b4773d9291a8c (patch)
tree: 3bf7a5eed7e9a01856304ec3635bf63389a0c758
parent: d514e253deb545c4535583ec5e9b539fffcb895e (diff)
4 files changed, 78 insertions, 45 deletions
diff --git a/dnn/adaconvtest.c b/dnn/adaconvtest.c
index cab8ba5b..daa8d24b 100644
--- a/dnn/adaconvtest.c
+++ b/dnn/adaconvtest.c
@@ -7,6 +7,7 @@
 #include <stdio.h>
 #include <math.h>
 
+
 void adaconv_compare(
     const char * prefix,
     int num_frames,
diff --git a/dnn/nndsp.c b/dnn/nndsp.c
index 91379e04..748f2c72 100644
--- a/dnn/nndsp.c
+++ b/dnn/nndsp.c
@@ -7,6 +7,7 @@
 #include "arch.h"
 #include "nnet.h"
 #include "os_support.h"
+#include "pitch.h"
 
 #include <math.h>
 
@@ -142,9 +143,14 @@ void adaconv_process_frame(
         window = &window_buffer[0];
     }
 
-    OPUS_COPY(input_buffer, hAdaConv->history, kernel_size * in_channels);
-    OPUS_COPY(input_buffer + kernel_size * in_channels, x_in, frame_size * in_channels);
-    p_input = input_buffer + kernel_size * in_channels;
+    /* prepare input */
+    for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
+    {
+        OPUS_COPY(input_buffer + i_in_channels * (kernel_size + frame_size), hAdaConv->history + i_in_channels * kernel_size, kernel_size);
+        OPUS_COPY(input_buffer + kernel_size + i_in_channels * (kernel_size + frame_size), x_in + frame_size * i_in_channels, frame_size);
+        p_input = input_buffer + kernel_size;
+    }
+
 
     /* calculate new kernel and new gain */
     compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
@@ -163,36 +169,54 @@ void adaconv_process_frame(
 #endif
 
     /* calculate overlapping part using kernel from last frame */
-    for (i_sample = 0; i_sample < overlap_size; i_sample++)
+
+    for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
     {
-        for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
+        for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
         {
-            for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
+            for (i_sample = 0; i_sample < overlap_size; i_sample++)
             {
                 for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
                 {
-                    output_buffer[i_sample * out_channels + i_out_channels] +=
-                        window[i_sample] * p_input[(i_sample + i_kernel - left_padding) * in_channels + i_in_channels] * hAdaConv->last_kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)];
-                    output_buffer[i_sample * out_channels + i_out_channels] +=
-                        (1 - window[i_sample]) * p_input[(i_sample + i_kernel - left_padding) * in_channels + i_in_channels] * kernel_buffer[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)];
+
+                    output_buffer[i_sample + i_out_channels * frame_size] +=
+                        window[i_sample] * p_input[(i_sample + i_kernel - left_padding) + i_in_channels * (frame_size + kernel_size)] * hAdaConv->last_kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)];
+                    output_buffer[i_sample + i_out_channels * frame_size] +=
+                        (1 - window[i_sample]) * p_input[(i_sample + i_kernel - left_padding) + i_in_channels * (frame_size + kernel_size)] * kernel_buffer[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)];
                 }
             }
         }
     }
 
+
     /* calculate non-overlapping part */
-    for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
+
+    for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
     {
-        for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
+        for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
         {
-            for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
+#ifdef USE_XCORR
+            float channel_buffer[ADACONV_MAX_FRAME_SIZE];
+            float kernel[ADACONV_MAX_KERNEL_SIZE];
+            OPUS_CLEAR(kernel, 1);
+            OPUS_CLEAR(channel_buffer, 1);
+            OPUS_COPY(kernel, &kernel_buffer[KERNEL_INDEX(i_out_channels, i_in_channels, 0)], kernel_size);
+            celt_pitch_xcorr(kernel, p_input + i_in_channels * (frame_size + kernel_size) - left_padding + overlap_size, channel_buffer, ADACONV_MAX_KERNEL_SIZE,  frame_size - overlap_size, arch);
+            for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
+            {
+                output_buffer[i_sample + i_out_channels * frame_size] += channel_buffer[i_sample - overlap_size];
+            }
+#else
+            for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
             {
                 for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
                 {
-                    output_buffer[i_sample * out_channels + i_out_channels] +=
-                        p_input[(i_sample + i_kernel - left_padding) * in_channels + i_in_channels] * kernel_buffer[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)];
+
+                    output_buffer[i_sample + i_out_channels * frame_size] +=
+                        p_input[(i_sample + i_kernel - left_padding) + i_in_channels * (frame_size + kernel_size)] * kernel_buffer[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)];
                 }
             }
+#endif
         }
     }
 
@@ -203,7 +227,11 @@ void adaconv_process_frame(
 #endif
 
     /* buffer update */
-    OPUS_COPY(hAdaConv->history, &p_input[(frame_size - kernel_size) * in_channels], kernel_size * in_channels);
+    for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
+    {
+        OPUS_COPY(hAdaConv->history + i_in_channels * kernel_size, p_input + i_in_channels * (frame_size + kernel_size) + frame_size - kernel_size, kernel_size);
+    }
+
     OPUS_COPY(hAdaConv->last_kernel, kernel_buffer, kernel_size * in_channels * out_channels);
 }
 
@@ -229,12 +257,15 @@ void adacomb_process_frame(
 )
 {
     float output_buffer[ADACOMB_MAX_FRAME_SIZE];
+    float output_buffer_last[ADACOMB_MAX_FRAME_SIZE];
     float kernel_buffer[ADACOMB_MAX_KERNEL_SIZE];
     float input_buffer[ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE];
     float window_buffer[ADACOMB_MAX_OVERLAP_SIZE];
     float gain, global_gain;
     float *p_input;
-    int i_sample, i_kernel;
+    int i_sample;
+    float kernel[16];
+    float last_kernel[16];
 
     (void) feature_dim; /* ToDo: figure out whether we might need this information */
 
@@ -275,28 +306,28 @@ void adacomb_process_frame(
     print_float_vector("adacomb_gain", &gain, 1);
 #endif
 
-    /* calculate overlapping part using kernel and lag from last frame */
+    OPUS_CLEAR(kernel, ADACOMB_MAX_KERNEL_SIZE);
+    OPUS_CLEAR(last_kernel, ADACOMB_MAX_KERNEL_SIZE);
+    OPUS_COPY(kernel, kernel_buffer, kernel_size);
+    OPUS_COPY(last_kernel, hAdaComb->last_kernel, kernel_size);
+
+    celt_pitch_xcorr(last_kernel, &p_input[- left_padding - hAdaComb->last_pitch_lag], output_buffer_last, ADACOMB_MAX_KERNEL_SIZE, overlap_size, arch);
+
+    celt_pitch_xcorr(kernel, &p_input[- left_padding - pitch_lag], output_buffer, ADACOMB_MAX_KERNEL_SIZE, frame_size, arch);
     for (i_sample = 0; i_sample < overlap_size; i_sample++)
     {
-        for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
-        {
-            output_buffer[i_sample] += hAdaComb->last_global_gain * window[i_sample] * p_input[i_sample + i_kernel - left_padding - hAdaComb->last_pitch_lag] * hAdaComb->last_kernel[i_kernel];
-            output_buffer[i_sample] += global_gain * (1 - window[i_sample]) * p_input[i_sample + i_kernel - left_padding - pitch_lag] * kernel_buffer[i_kernel];
-        }
-        output_buffer[i_sample] += (window[i_sample] * hAdaComb->last_global_gain + (1 - window[i_sample]) * global_gain) * p_input[i_sample];
+      output_buffer[i_sample] = hAdaComb->last_global_gain * window[i_sample] * output_buffer_last[i_sample] + global_gain * (1 - window[i_sample]) * output_buffer[i_sample];
     }
 
+    for (i_sample = 0; i_sample < overlap_size; i_sample++)
+    {
+      output_buffer[i_sample] += (window[i_sample] * hAdaComb->last_global_gain + (1 - window[i_sample]) * global_gain) * p_input[i_sample];
+    }
 
-    /* calculate non-overlapping part */
     for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
     {
-        for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
-        {
-            output_buffer[i_sample] += p_input[i_sample + i_kernel - left_padding - pitch_lag] * kernel_buffer[i_kernel];
-        }
-        output_buffer[i_sample] = global_gain * (output_buffer[i_sample] + p_input[i_sample]);
+      output_buffer[i_sample] = global_gain * (output_buffer[i_sample] + p_input[i_sample]);
     }
-
     OPUS_COPY(x_out, output_buffer, frame_size);
 
 #ifdef DEBUG_NNDSP
diff --git a/dnn/osce.c b/dnn/osce.c
index b0f31d58..b61b5409 100644
--- a/dnn/osce.c
+++ b/dnn/osce.c
@@ -578,18 +578,18 @@ void nolace_process_20ms_frame(
         /* modifies second channel in place */
         adashape_process_frame(
             &state->tdshape1_state,
-            x_buffer2 + i_subframe * NOLACE_AF1_OUT_CHANNELS * NOLACE_FRAME_SIZE,
-            x_buffer2 + i_subframe * NOLACE_AF1_OUT_CHANNELS * NOLACE_FRAME_SIZE,
+            x_buffer2 + i_subframe * NOLACE_AF1_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+            x_buffer2 + i_subframe * NOLACE_AF1_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
             feature_buffer + i_subframe * NOLACE_COND_DIM,
             &layers->nolace_tdshape1_alpha1,
             &layers->nolace_tdshape1_alpha2,
             NOLACE_TDSHAPE1_FEATURE_DIM,
             NOLACE_TDSHAPE1_FRAME_SIZE,
             NOLACE_TDSHAPE1_AVG_POOL_K,
-            2,
             1,
-            2,
+            0,
             1,
+            0,
             arch
         );
 
@@ -637,18 +637,18 @@ void nolace_process_20ms_frame(
         /* modifies second channel in place */
         adashape_process_frame(
             &state->tdshape2_state,
-            x_buffer1 + i_subframe * NOLACE_AF2_OUT_CHANNELS * NOLACE_FRAME_SIZE,
-            x_buffer1 + i_subframe * NOLACE_AF2_OUT_CHANNELS * NOLACE_FRAME_SIZE,
+            x_buffer1 + i_subframe * NOLACE_AF2_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+            x_buffer1 + i_subframe * NOLACE_AF2_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
             feature_buffer + i_subframe * NOLACE_COND_DIM,
             &layers->nolace_tdshape2_alpha1,
             &layers->nolace_tdshape2_alpha2,
             NOLACE_TDSHAPE2_FEATURE_DIM,
             NOLACE_TDSHAPE2_FRAME_SIZE,
             NOLACE_TDSHAPE2_AVG_POOL_K,
-            2,
             1,
-            2,
+            0,
             1,
+            0,
             arch
         );
 
@@ -692,18 +692,18 @@ void nolace_process_20ms_frame(
         /* modifies second channel in place */
         adashape_process_frame(
             &state->tdshape3_state,
-            x_buffer2 + i_subframe * NOLACE_AF3_OUT_CHANNELS * NOLACE_FRAME_SIZE,
-            x_buffer2 + i_subframe * NOLACE_AF3_OUT_CHANNELS * NOLACE_FRAME_SIZE,
+            x_buffer2 + i_subframe * NOLACE_AF3_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+            x_buffer2 + i_subframe * NOLACE_AF3_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
             feature_buffer + i_subframe * NOLACE_COND_DIM,
             &layers->nolace_tdshape3_alpha1,
             &layers->nolace_tdshape3_alpha2,
             NOLACE_TDSHAPE3_FEATURE_DIM,
             NOLACE_TDSHAPE3_FRAME_SIZE,
             NOLACE_TDSHAPE3_AVG_POOL_K,
-            2,
             1,
-            2,
+            0,
             1,
+            0,
             arch
         );
 
diff --git a/dnn/torch/osce/create_testvectors.py b/dnn/torch/osce/create_testvectors.py
index fd03d318..f8772eea 100644
--- a/dnn/torch/osce/create_testvectors.py
+++ b/dnn/torch/osce/create_testvectors.py
@@ -17,6 +17,7 @@ parser.add_argument('--debug', action='store_true', help='add debug output to ou
 def create_adaconv_testvector(prefix, adaconv, num_frames, debug=False):
     feature_dim = adaconv.feature_dim
     in_channels = adaconv.in_channels
+    out_channels = adaconv.out_channels
     frame_size = adaconv.frame_size
 
     features = torch.randn((1, num_frames, feature_dim))
@@ -25,8 +26,8 @@ def create_adaconv_testvector(prefix, adaconv, num_frames, debug=False):
     x_out = adaconv(x_in, features, debug=debug)
 
     features = features[0].detach().numpy()
-    x_in = x_in[0].permute(1, 0).detach().numpy()
-    x_out = x_out[0].permute(1, 0).detach().numpy()
+    x_in = x_in[0].reshape(in_channels, num_frames, frame_size).permute(1, 0, 2).detach().numpy()
+    x_out = x_out[0].reshape(out_channels, num_frames, frame_size).permute(1, 0, 2).detach().numpy()
 
     features.tofile(prefix + '_features.f32')
     x_in.tofile(prefix + '_x_in.f32')
author	Jan Buethe <jbuethe@amazon.de>	2023-12-05 15:36:25 +0300
committer	Jan Buethe <jbuethe@amazon.de>	2023-12-05 15:38:59 +0300
commit	6ac3fd8d0774fe7c40c48cd40b9b4773d9291a8c (patch)
tree	3bf7a5eed7e9a01856304ec3635bf63389a0c758
parent	d514e253deb545c4535583ec5e9b539fffcb895e (diff)