diff options
author | Jean-Marc Valin <jmvalin@amazon.com> | 2023-10-29 06:33:47 +0300 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@amazon.com> | 2023-10-29 06:33:47 +0300 |
commit | ddd5669e79a9e581e8420d2ed397e524da864337 (patch) | |
tree | 3c79bc4581fe7542bd9ca2138db60e5f1dc54f45 | |
parent | c99054dad9053ff7dc30cf0379d8aeb240b2e171 (diff) |
Pitch and fargan model updates
Removing one of the 2d conv layers for pitch estimation reduces
complexity without noticeable degradation. FARGAN model has more
adversarial training.
Also, no need for the double precision in the low-pass filter.
-rwxr-xr-x | autogen.sh | 2 | ||||
-rw-r--r-- | dnn/lpcnet_enc.c | 4 | ||||
-rw-r--r-- | dnn/pitchdnn.c | 3 | ||||
-rw-r--r-- | dnn/torch/neural-pitch/export_neuralpitch_weights.py | 3 | ||||
-rw-r--r-- | dnn/torch/neural-pitch/models.py | 7 |
5 files changed, 7 insertions, 12 deletions
@@ -9,7 +9,7 @@ set -e srcdir=`dirname $0` test -n "$srcdir" && cd "$srcdir" -dnn/download_model.sh 290be25 +dnn/download_model.sh c99054d echo "Updating build configuration files, please wait...." diff --git a/dnn/lpcnet_enc.c b/dnn/lpcnet_enc.c index 6a4674eb..7133357f 100644 --- a/dnn/lpcnet_enc.c +++ b/dnn/lpcnet_enc.c @@ -81,8 +81,8 @@ static void biquad(float *y, float mem[2], const float *x, const float *b, const float xi, yi; xi = x[i]; yi = x[i] + mem[0]; - mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi); - mem[1] = (b[1]*(double)xi - a[1]*(double)yi); + mem[0] = mem[1] + (b[0]*xi - a[0]*yi); + mem[1] = (b[1]*xi - a[1]*yi); y[i] = yi; } } diff --git a/dnn/pitchdnn.c b/dnn/pitchdnn.c index 02c67444..84952721 100644 --- a/dnn/pitchdnn.c +++ b/dnn/pitchdnn.c @@ -33,8 +33,7 @@ float compute_pitchdnn( /* xcorr*/ OPUS_COPY(&conv1_tmp1[1], xcorr_features, NB_XCORR_FEATURES); compute_conv2d(&model->conv2d_1, &conv1_tmp2[1], st->xcorr_mem1, conv1_tmp1, NB_XCORR_FEATURES, NB_XCORR_FEATURES+2, ACTIVATION_TANH); - compute_conv2d(&model->conv2d_2, &conv1_tmp1[1], st->xcorr_mem2, conv1_tmp2, NB_XCORR_FEATURES, NB_XCORR_FEATURES+2, ACTIVATION_TANH); - compute_conv2d(&model->conv2d_3, downsampler_in, st->xcorr_mem3, conv1_tmp1, NB_XCORR_FEATURES, NB_XCORR_FEATURES, ACTIVATION_TANH); + compute_conv2d(&model->conv2d_2, downsampler_in, st->xcorr_mem2, conv1_tmp2, NB_XCORR_FEATURES, NB_XCORR_FEATURES, ACTIVATION_TANH); compute_generic_dense(&model->dense_downsampler, downsampler_out, downsampler_in, ACTIVATION_TANH); compute_generic_gru(&model->gru_1_input, &model->gru_1_recurrent, st->gru_state, downsampler_out); diff --git a/dnn/torch/neural-pitch/export_neuralpitch_weights.py b/dnn/torch/neural-pitch/export_neuralpitch_weights.py index 82b2d3d9..577ec882 100644 --- a/dnn/torch/neural-pitch/export_neuralpitch_weights.py +++ b/dnn/torch/neural-pitch/export_neuralpitch_weights.py @@ -73,8 +73,7 @@ f""" conv_layers = [ ('conv.1', "conv2d_1"), - ('conv.4', "conv2d_2"), - ('conv.7', "conv2d_3") + ('conv.4', "conv2d_2") ] diff --git a/dnn/torch/neural-pitch/models.py b/dnn/torch/neural-pitch/models.py index 34b418e8..ce4977fd 100644 --- a/dnn/torch/neural-pitch/models.py +++ b/dnn/torch/neural-pitch/models.py @@ -86,13 +86,10 @@ class PitchDNN(torch.nn.Module): self.conv = torch.nn.Sequential( torch.nn.ZeroPad2d((2,0,1,1)), - torch.nn.Conv2d(1, 8, 3, bias=True), + torch.nn.Conv2d(1, 4, 3, bias=True), self.activation, torch.nn.ZeroPad2d((2,0,1,1)), - torch.nn.Conv2d(8, 8, 3, bias=True), - self.activation, - torch.nn.ZeroPad2d((2,0,1,1)), - torch.nn.Conv2d(8, 1, 3, bias=True), + torch.nn.Conv2d(4, 1, 3, bias=True), self.activation, ) |