Pitch and fargan model updates

Removing one of the 2d conv layers for pitch estimation reduces complexity without noticeable degradation. FARGAN model has more adversarial training. Also, no need for the double precision in the low-pass filter.
author: Jean-Marc Valin <jmvalin@amazon.com> 2023-10-29 06:33:47 +0300
committer: Jean-Marc Valin <jmvalin@amazon.com> 2023-10-29 06:33:47 +0300
commit: ddd5669e79a9e581e8420d2ed397e524da864337 (patch)
tree: 3c79bc4581fe7542bd9ca2138db60e5f1dc54f45
parent: c99054dad9053ff7dc30cf0379d8aeb240b2e171 (diff)
5 files changed, 7 insertions, 12 deletions
diff --git a/autogen.sh b/autogen.sh
index 7f83b4b1..cc70a11a 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -9,7 +9,7 @@ set -e
 srcdir=`dirname $0`
 test -n "$srcdir" && cd "$srcdir"
 
-dnn/download_model.sh 290be25
+dnn/download_model.sh c99054d
 
 echo "Updating build configuration files, please wait...."
 
diff --git a/dnn/lpcnet_enc.c b/dnn/lpcnet_enc.c
index 6a4674eb..7133357f 100644
--- a/dnn/lpcnet_enc.c
+++ b/dnn/lpcnet_enc.c
@@ -81,8 +81,8 @@ static void biquad(float *y, float mem[2], const float *x, const float *b, const
     float xi, yi;
     xi = x[i];
     yi = x[i] + mem[0];
-    mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
-    mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
+    mem[0] = mem[1] + (b[0]*xi - a[0]*yi);
+    mem[1] = (b[1]*xi - a[1]*yi);
     y[i] = yi;
   }
 }
diff --git a/dnn/pitchdnn.c b/dnn/pitchdnn.c
index 02c67444..84952721 100644
--- a/dnn/pitchdnn.c
+++ b/dnn/pitchdnn.c
@@ -33,8 +33,7 @@ float compute_pitchdnn(
   /* xcorr*/
   OPUS_COPY(&conv1_tmp1[1], xcorr_features, NB_XCORR_FEATURES);
   compute_conv2d(&model->conv2d_1, &conv1_tmp2[1], st->xcorr_mem1, conv1_tmp1, NB_XCORR_FEATURES, NB_XCORR_FEATURES+2, ACTIVATION_TANH);
-  compute_conv2d(&model->conv2d_2, &conv1_tmp1[1], st->xcorr_mem2, conv1_tmp2, NB_XCORR_FEATURES, NB_XCORR_FEATURES+2, ACTIVATION_TANH);
-  compute_conv2d(&model->conv2d_3, downsampler_in, st->xcorr_mem3, conv1_tmp1, NB_XCORR_FEATURES, NB_XCORR_FEATURES, ACTIVATION_TANH);
+  compute_conv2d(&model->conv2d_2, downsampler_in, st->xcorr_mem2, conv1_tmp2, NB_XCORR_FEATURES, NB_XCORR_FEATURES, ACTIVATION_TANH);
 
   compute_generic_dense(&model->dense_downsampler, downsampler_out, downsampler_in, ACTIVATION_TANH);
   compute_generic_gru(&model->gru_1_input, &model->gru_1_recurrent, st->gru_state, downsampler_out);
diff --git a/dnn/torch/neural-pitch/export_neuralpitch_weights.py b/dnn/torch/neural-pitch/export_neuralpitch_weights.py
index 82b2d3d9..577ec882 100644
--- a/dnn/torch/neural-pitch/export_neuralpitch_weights.py
+++ b/dnn/torch/neural-pitch/export_neuralpitch_weights.py
@@ -73,8 +73,7 @@ f"""
 
     conv_layers = [
         ('conv.1', "conv2d_1"),
-        ('conv.4', "conv2d_2"),
-        ('conv.7', "conv2d_3")
+        ('conv.4', "conv2d_2")
     ]
 
 
diff --git a/dnn/torch/neural-pitch/models.py b/dnn/torch/neural-pitch/models.py
index 34b418e8..ce4977fd 100644
--- a/dnn/torch/neural-pitch/models.py
+++ b/dnn/torch/neural-pitch/models.py
@@ -86,13 +86,10 @@ class PitchDNN(torch.nn.Module):
 
         self.conv = torch.nn.Sequential(
             torch.nn.ZeroPad2d((2,0,1,1)),
-            torch.nn.Conv2d(1, 8, 3, bias=True),
+            torch.nn.Conv2d(1, 4, 3, bias=True),
             self.activation,
             torch.nn.ZeroPad2d((2,0,1,1)),
-            torch.nn.Conv2d(8, 8, 3, bias=True),
-            self.activation,
-            torch.nn.ZeroPad2d((2,0,1,1)),
-            torch.nn.Conv2d(8, 1, 3, bias=True),
+            torch.nn.Conv2d(4, 1, 3, bias=True),
             self.activation,
         )
author	Jean-Marc Valin <jmvalin@amazon.com>	2023-10-29 06:33:47 +0300
committer	Jean-Marc Valin <jmvalin@amazon.com>	2023-10-29 06:33:47 +0300
commit	ddd5669e79a9e581e8420d2ed397e524da864337 (patch)
tree	3c79bc4581fe7542bd9ca2138db60e5f1dc54f45
parent	c99054dad9053ff7dc30cf0379d8aeb240b2e171 (diff)