Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.xiph.org/xiph/opus.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@amazon.com>2023-07-30 22:31:59 +0300
committerJean-Marc Valin <jmvalin@amazon.com>2023-08-02 00:57:04 +0300
commit904f1cc7fc1d31f038fb8f4c30bd099ebda48e75 (patch)
treea45f75a73e880e312d0e4ebfda7485a7dcfc40ac
parent3816b0e001f34250e78927735d58d1a39768e108 (diff)
C implementation of FWGAN
-rwxr-xr-xautogen.sh2
-rw-r--r--dnn/fwgan.c196
-rw-r--r--dnn/fwgan.h30
-rw-r--r--dnn/nnet.c8
-rw-r--r--dnn/nnet.h2
-rw-r--r--lpcnet_headers.mk1
-rw-r--r--lpcnet_sources.mk1
7 files changed, 230 insertions, 10 deletions
diff --git a/autogen.sh b/autogen.sh
index f87f4122..6131b9ab 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -9,7 +9,7 @@ set -e
srcdir=`dirname $0`
test -n "$srcdir" && cd "$srcdir"
-dnn/download_model.sh ad05730
+dnn/download_model.sh 3816b0e
echo "Updating build configuration files, please wait...."
diff --git a/dnn/fwgan.c b/dnn/fwgan.c
index bce3670a..ea4f0e07 100644
--- a/dnn/fwgan.c
+++ b/dnn/fwgan.c
@@ -31,21 +31,207 @@
#include "fwgan.h"
#include "os_support.h"
#include "freq.h"
+#include "fwgan_data.h"
+#include "lpcnet.h"
+#include "pitch.h"
+#include "nnet.h"
-void fwgan_init(FWGANState *st, const float *pcm)
+#define NB_SUBFRAMES 4
+#define SUBFRAME_SIZE 40
+#define FWGAN_FRAME_SIZE (NB_SUBFRAMES*SUBFRAME_SIZE)
+#define CONT_PCM_INPUTS 320
+#define MAX_CONT_SIZE CONT_NET_0_OUT_SIZE
+#define FWGAN_GAMMA 0.92f
+#define FWGAN_DEEMPHASIS 0.85f
+
+#define FEAT_IN_SIZE (BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4 + FWGAN_FRAME_SIZE/2)
+
+#define FWGAN_FEATURES (NB_FEATURES-1)
+
+static void pitch_embeddings(float *pembed, double *phase, double w0) {
+ int i;
+ /* FIXME: This could be speeded up by making phase a unit-norm complex value, rotating it
+ by exp(-i*w0) each sample, and renormalizing once in while. */
+ for (i=0;i<SUBFRAME_SIZE;i++) {
+ *phase += w0;
+ pembed[i] = sin(*phase);
+ pembed[SUBFRAME_SIZE+i] = cos(*phase);
+ }
+}
+
+static void run_fwgan_upsampler(FWGANState *st, float *cond, const float *features)
{
- OPUS_CLEAR(st, 1);
+ FWGAN *model;
+ model = &st->model;
+ celt_assert(FWGAN_FEATURES == model->bfcc_with_corr_upsampler_fc.nb_inputs);
+ celt_assert(BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE == model->bfcc_with_corr_upsampler_fc.nb_outputs);
+ compute_generic_dense(&model->bfcc_with_corr_upsampler_fc, cond, features, ACTIVATION_TANH);
+}
+
+void fwgan_cont(FWGANState *st, const float *pcm0, const float *features0)
+{
+ int i;
+ float norm2, norm_1;
+ float cont_inputs[CONT_PCM_INPUTS+1];
+ float tmp1[MAX_CONT_SIZE];
+ float tmp2[MAX_CONT_SIZE];
+ FWGAN *model;
+ model = &st->model;
+ norm2 = celt_inner_prod(pcm0, pcm0, CONT_PCM_INPUTS, st->arch);
+ norm_1 = 1.f/sqrt(1e-8f + norm2);
+ for (i=0;i<CONT_PCM_INPUTS;i++) cont_inputs[i+1] = norm_1*pcm0[i];
+ cont_inputs[0] = log(sqrt(norm2) + 1e-7f);
+
+ compute_generic_dense(&model->cont_net_0, tmp1, cont_inputs, ACTIVATION_TANH);
+ compute_generic_dense(&model->cont_net_2, tmp2, tmp1, ACTIVATION_TANH);
+ compute_generic_dense(&model->cont_net_4, tmp1, tmp2, ACTIVATION_TANH);
+ compute_generic_dense(&model->cont_net_6, tmp2, tmp1, ACTIVATION_TANH);
+ compute_generic_dense(&model->cont_net_8, tmp1, tmp2, ACTIVATION_TANH);
+ celt_assert(CONT_NET_10_OUT_SIZE == model->cont_net_10.nb_outputs);
+ compute_generic_dense(&model->cont_net_10, cont_inputs, tmp1, ACTIVATION_TANH);
+
+ celt_assert(RNN_GRU_STATE_SIZE == model->rnn_cont_fc_0.nb_outputs);
+ compute_generic_dense(&model->rnn_cont_fc_0, st->rnn_state, cont_inputs, ACTIVATION_TANH);
+
+ celt_assert(FWC1_STATE_SIZE == model->fwc1_cont_fc_0.nb_outputs);
+ compute_generic_dense(&model->fwc1_cont_fc_0, st->fwc1_state, cont_inputs, ACTIVATION_TANH);
+ celt_assert(FWC2_STATE_SIZE == model->fwc2_cont_fc_0.nb_outputs);
+ compute_generic_dense(&model->fwc2_cont_fc_0, st->fwc2_state, cont_inputs, ACTIVATION_TANH);
+ celt_assert(FWC3_STATE_SIZE == model->fwc3_cont_fc_0.nb_outputs);
+ compute_generic_dense(&model->fwc3_cont_fc_0, st->fwc3_state, cont_inputs, ACTIVATION_TANH);
+ celt_assert(FWC4_STATE_SIZE == model->fwc4_cont_fc_0.nb_outputs);
+ compute_generic_dense(&model->fwc4_cont_fc_0, st->fwc4_state, cont_inputs, ACTIVATION_TANH);
+ celt_assert(FWC5_STATE_SIZE == model->fwc5_cont_fc_0.nb_outputs);
+ compute_generic_dense(&model->fwc5_cont_fc_0, st->fwc5_state, cont_inputs, ACTIVATION_TANH);
+ celt_assert(FWC6_STATE_SIZE == model->fwc6_cont_fc_0.nb_outputs);
+ compute_generic_dense(&model->fwc6_cont_fc_0, st->fwc6_state, cont_inputs, ACTIVATION_TANH);
+ celt_assert(FWC7_STATE_SIZE == model->fwc7_cont_fc_0.nb_outputs);
+ compute_generic_dense(&model->fwc7_cont_fc_0, st->fwc7_state, cont_inputs, ACTIVATION_TANH);
+
+ /* FIXME: Do we need to handle initial features? How? */
+}
+
+static void apply_gain(float *pcm, float c0, float *last_gain) {
+ int i;
+ float gain = pow(10.f, (0.5f*c0/sqrt(18.f)));
+ for (i=0;i<SUBFRAME_SIZE;i++) pcm[i] *= *last_gain;
+ *last_gain = gain;
+}
+
+static void fwgan_lpc_syn(float *pcm, float *mem, const float *lpc, float last_lpc[LPC_ORDER]) {
+ int i;
+ for (i=0;i<SUBFRAME_SIZE;i++) {
+ int j;
+ for (j=0;j<LPC_ORDER;j++) pcm[i] -= mem[j]*last_lpc[j];
+ OPUS_MOVE(&mem[1], &mem[0], LPC_ORDER-1);
+ mem[0] = pcm[i];
+ }
+ OPUS_COPY(last_lpc, lpc, LPC_ORDER);
+}
+
+static void fwgan_preemphasis(float *pcm, float *preemph_mem) {
+ int i;
+ for (i=0;i<SUBFRAME_SIZE;i++) {
+ float tmp = pcm[i];
+ pcm[i] -= FWGAN_DEEMPHASIS * *preemph_mem;
+ *preemph_mem = tmp;
+ }
}
-static void run_fwgan(FWGANState *st, float *pcm, const float *input)
+static void fwgan_deemphasis(float *pcm, float *deemph_mem) {
+ int i;
+ for (i=0;i<FWGAN_FRAME_SIZE;i++) {
+ pcm[i] += FWGAN_DEEMPHASIS * *deemph_mem;
+ *deemph_mem = pcm[i];
+ }
+}
+
+static void run_fwgan_subframe(FWGANState *st, float *pcm, const float *cond, double w0)
{
+ float tmp1[FWC1_FC_0_OUT_SIZE];
+ float tmp2[IMAX(RNN_GRU_STATE_SIZE, FWC2_FC_0_OUT_SIZE)];
+ float feat_in[FEAT_IN_SIZE];
+ float rnn_in[FEAT_IN_CONV1_CONV_OUT_SIZE];
+ float pembed[FWGAN_FRAME_SIZE/2];
+ FWGAN *model;
+ model = &st->model;
+
+ pitch_embeddings(pembed, &st->embed_phase, w0);
+ /* Interleave bfcc_cond and pembed for each subframe in feat_in. */
+ OPUS_COPY(&feat_in[BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4], &cond[0], BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4);
+ OPUS_COPY(&feat_in[0], &pembed[0], FWGAN_FRAME_SIZE/2);
+
+ compute_generic_conv1d(&model->feat_in_conv1_conv, rnn_in, st->cont_conv1_mem, feat_in, FEAT_IN_CONV1_CONV_IN_SIZE, ACTIVATION_LINEAR);
+ celt_assert(FEAT_IN_NL1_GATE_OUT_SIZE == model->feat_in_nl1_gate.nb_outputs);
+ compute_gated_activation(&model->feat_in_nl1_gate, rnn_in, rnn_in, ACTIVATION_TANH);
+
+
+ compute_generic_gru(&model->rnn_gru_input, &model->rnn_gru_recurrent, st->rnn_state, rnn_in);
+ celt_assert(IMAX(RNN_GRU_STATE_SIZE, FWC2_FC_0_OUT_SIZE) >= model->rnn_nl_gate.nb_outputs);
+ compute_gated_activation(&model->rnn_nl_gate, tmp2, st->rnn_state, ACTIVATION_TANH);
+
+ compute_generic_conv1d(&model->fwc1_fc_0, tmp1, st->fwc1_state, tmp2, RNN_GRU_STATE_SIZE, ACTIVATION_LINEAR);
+ compute_gated_activation(&model->fwc1_fc_1_gate, tmp1, tmp1, ACTIVATION_TANH);
+
+ compute_generic_conv1d(&model->fwc2_fc_0, tmp2, st->fwc2_state, tmp1, FWC1_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
+ compute_gated_activation(&model->fwc2_fc_1_gate, tmp2, tmp2, ACTIVATION_TANH);
+
+ compute_generic_conv1d(&model->fwc3_fc_0, tmp1, st->fwc3_state, tmp2, FWC2_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
+ compute_gated_activation(&model->fwc3_fc_1_gate, tmp1, tmp1, ACTIVATION_TANH);
+
+ compute_generic_conv1d(&model->fwc4_fc_0, tmp2, st->fwc4_state, tmp1, FWC3_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
+ compute_gated_activation(&model->fwc4_fc_1_gate, tmp2, tmp2, ACTIVATION_TANH);
+ compute_generic_conv1d(&model->fwc5_fc_0, tmp1, st->fwc5_state, tmp2, FWC4_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
+ compute_gated_activation(&model->fwc5_fc_1_gate, tmp1, tmp1, ACTIVATION_TANH);
+
+ compute_generic_conv1d(&model->fwc6_fc_0, tmp2, st->fwc6_state, tmp1, FWC5_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
+ compute_gated_activation(&model->fwc6_fc_1_gate, tmp2, tmp2, ACTIVATION_TANH);
+
+ compute_generic_conv1d(&model->fwc7_fc_0, tmp1, st->fwc7_state, tmp2, FWC6_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
+ compute_gated_activation(&model->fwc7_fc_1_gate, pcm, tmp1, ACTIVATION_TANH);
+}
+
+
+
+void fwgan_init(FWGANState *st)
+{
+ int ret;
+ OPUS_CLEAR(st, 1);
+ ret = init_fwgan(&st->model, fwgan_arrays);
+ celt_assert(ret == 0);
+ /* FIXME: perform arch detection. */
}
void fwgan_synthesize(FWGANState *st, float *pcm, const float *features)
{
+ int subframe;
float lpc[LPC_ORDER];
+ float cond[BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE];
+ double w0;
+ int period;
+ float lpc_weight;
+ float fwgan_features[NB_FEATURES-1];
+ int i;
+ OPUS_COPY(fwgan_features, features, NB_FEATURES-2);
+ fwgan_features[NB_FEATURES-2] = features[NB_FEATURES-1]+.5;
+
+ period = (int)floor(.1 + 50*features[NB_BANDS]+100);
+ w0 = 2*M_PI/period;
lpc_from_cepstrum(lpc, features);
- run_fwgan(st, pcm, features);
- /* Run LPC filter. */
+ lpc_weight = 1.f;
+ for (i=0;i<LPC_ORDER;i++) {
+ lpc_weight *= FWGAN_GAMMA;
+ lpc[i] *= lpc_weight;
+ }
+ run_fwgan_upsampler(st, cond, fwgan_features);
+ for (subframe=0;subframe<NB_SUBFRAMES;subframe++) {
+ float *sub_cond;
+ sub_cond = &cond[subframe*BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4];
+ run_fwgan_subframe(st, &pcm[subframe*SUBFRAME_SIZE], sub_cond, w0);
+ apply_gain(&pcm[subframe*SUBFRAME_SIZE], features[0], &st->last_gain);
+ fwgan_preemphasis(&pcm[subframe*SUBFRAME_SIZE], &st->preemph_mem);
+ fwgan_lpc_syn(&pcm[subframe*SUBFRAME_SIZE], st->syn_mem, lpc, st->last_lpc);
+ }
+ fwgan_deemphasis(pcm, &st->deemph_mem);
}
diff --git a/dnn/fwgan.h b/dnn/fwgan.h
index 84749176..7da11b8f 100644
--- a/dnn/fwgan.h
+++ b/dnn/fwgan.h
@@ -28,15 +28,43 @@
#define FWGAN_H
#include "freq.h"
+#include "fwgan_data.h"
#define FWGAN_CONT_SAMPLES 320
+/* FIXME: Derive those from the model rather than hardcoding. */
+#define FWC1_STATE_SIZE 512
+#define FWC2_STATE_SIZE 512
+#define FWC3_STATE_SIZE 256
+#define FWC4_STATE_SIZE 256
+#define FWC5_STATE_SIZE 128
+#define FWC6_STATE_SIZE 128
+#define FWC7_STATE_SIZE 80
+
typedef struct {
+ FWGAN model;
+ int arch;
+ double embed_phase;
+ float last_gain;
+ float last_lpc[LPC_ORDER];
float syn_mem[LPC_ORDER];
+ float preemph_mem;
+ float deemph_mem;
+ float cont_conv1_mem[FEAT_IN_CONV1_CONV_STATE_SIZE];
+ float cont[FEAT_IN_NL1_GATE_OUT_SIZE];
+ float rnn_state[RNN_GRU_STATE_SIZE];
+ float fwc1_state[FWC1_STATE_SIZE];
+ float fwc2_state[FWC2_STATE_SIZE];
+ float fwc3_state[FWC3_STATE_SIZE];
+ float fwc4_state[FWC4_STATE_SIZE];
+ float fwc5_state[FWC5_STATE_SIZE];
+ float fwc6_state[FWC6_STATE_SIZE];
+ float fwc7_state[FWC7_STATE_SIZE];
} FWGANState;
-void fwgan_init(FWGANState *st, const float *pcm);
+void fwgan_init(FWGANState *st);
+void fwgan_cont(FWGANState *st, const float *pcm0, const float *features0);
void fwgan_synthesize(FWGANState *st, float *pcm, const float *features);
diff --git a/dnn/nnet.c b/dnn/nnet.c
index 05b0ea90..0bb228fe 100644
--- a/dnn/nnet.c
+++ b/dnn/nnet.c
@@ -73,6 +73,7 @@ void compute_linear(const LinearLayer *linear, float *out, const float *in)
{
int i, M, N;
const float *bias;
+ celt_assert(in != out);
bias = linear->bias;
M = linear->nb_inputs;
N = linear->nb_outputs;
@@ -146,11 +147,12 @@ void compute_gated_activation(const LinearLayer *layer, float *output, const flo
{
int i;
float act1[MAX_INPUTS];
+ float act2[MAX_INPUTS];
celt_assert(layer->nb_inputs == layer->nb_outputs);
- compute_linear(layer, output, input);
- compute_activation(output, output, layer->nb_outputs, ACTIVATION_SIGMOID);
compute_activation(act1, input, layer->nb_outputs, activation);
- for (i=0;i<layer->nb_outputs;i++) output[i] *= act1[i];
+ compute_linear(layer, act2, input);
+ compute_activation(act2, act2, layer->nb_outputs, ACTIVATION_SIGMOID);
+ for (i=0;i<layer->nb_outputs;i++) output[i] = act1[i]*act2[i];
}
void compute_activation(float *output, const float *input, int N, int activation)
diff --git a/dnn/nnet.h b/dnn/nnet.h
index 2916b33f..2b43308a 100644
--- a/dnn/nnet.h
+++ b/dnn/nnet.h
@@ -135,6 +135,7 @@ void compute_linear(const LinearLayer *linear, float *out, const float *in);
void compute_generic_dense(const LinearLayer *layer, float *output, const float *input, int activation);
void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in);
void compute_generic_conv1d(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int activation);
+void compute_gated_activation(const LinearLayer *layer, float *output, const float *input, int activation);
void compute_activation(float *output, const float *input, int N, int activation);
@@ -163,6 +164,7 @@ extern const WeightArray lpcnet_arrays[];
extern const WeightArray lpcnet_plc_arrays[];
extern const WeightArray rdovaeenc_arrays[];
extern const WeightArray rdovaedec_arrays[];
+extern const WeightArray fwgan_arrays[];
int linear_init(LinearLayer *layer, const WeightArray *arrays,
const char *bias,
diff --git a/lpcnet_headers.mk b/lpcnet_headers.mk
index 93ca74fb..fc3fc84c 100644
--- a/lpcnet_headers.mk
+++ b/lpcnet_headers.mk
@@ -9,6 +9,7 @@ dnn/burg.h \
dnn/common.h \
dnn/freq.h \
dnn/fwgan.h \
+dnn/fwgan_data.h \
dnn/kiss99.h \
dnn/lpcnet_private.h \
dnn/nnet_data.h \
diff --git a/lpcnet_sources.mk b/lpcnet_sources.mk
index 4c6e73f3..61cbb1f1 100644
--- a/lpcnet_sources.mk
+++ b/lpcnet_sources.mk
@@ -2,6 +2,7 @@ LPCNET_SOURCES = \
dnn/burg.c \
dnn/freq.c \
dnn/fwgan.c \
+dnn/fwgan_data.c \
dnn/kiss99.c \
dnn/lpcnet.c \
dnn/lpcnet_enc.c \