diff options
author | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2024-01-17 23:15:22 +0300 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2024-01-21 10:11:50 +0300 |
commit | 1ddfcfd48cb87f8dc29240d705a4da78bae0eb50 (patch) | |
tree | 9a918d05eb0c9005298b20b0d1dbdc29eb22d32d /dnn | |
parent | e699263660f4b476511be407a5bb6c1b93db59a4 (diff) |
Using PyTorch model (same architecture for now)
Diffstat (limited to 'dnn')
-rw-r--r-- | dnn/lpcnet_plc.c | 16 | ||||
-rw-r--r-- | dnn/lpcnet_private.h | 5 | ||||
-rw-r--r-- | dnn/nnet.c | 4 | ||||
-rw-r--r-- | dnn/nnet.h | 2 | ||||
-rw-r--r-- | dnn/torch/plc/plc_dataset.py | 4 | ||||
-rw-r--r-- | dnn/write_lpcnet_weights.c | 2 |
6 files changed, 21 insertions, 12 deletions
diff --git a/dnn/lpcnet_plc.c b/dnn/lpcnet_plc.c index 3be14abf..2aecb859 100644 --- a/dnn/lpcnet_plc.c +++ b/dnn/lpcnet_plc.c @@ -62,7 +62,7 @@ int lpcnet_plc_init(LPCNetPLCState *st) { lpcnet_encoder_init(&st->enc); st->loaded = 0; #ifndef USE_WEIGHTS_FILE - ret = init_plc_model(&st->model, lpcnet_plc_arrays); + ret = init_plcmodel(&st->model, plcmodel_arrays); if (ret == 0) st->loaded = 1; #else ret = 0; @@ -76,7 +76,7 @@ int lpcnet_plc_load_model(LPCNetPLCState *st, const unsigned char *data, int len WeightArray *list; int ret; parse_weights(&list, data, len); - ret = init_plc_model(&st->model, list); + ret = init_plcmodel(&st->model, list); opus_free(list); if (ret == 0) { ret = lpcnet_encoder_load_model(&st->enc, data, len); @@ -108,14 +108,14 @@ void lpcnet_plc_fec_clear(LPCNetPLCState *st) { static void compute_plc_pred(LPCNetPLCState *st, float *out, const float *in) { - float zeros[3*PLC_MAX_RNN_NEURONS] = {0}; - float dense_out[PLC_DENSE1_OUT_SIZE]; + float tmp[PLC_DENSE_IN_OUT_SIZE]; + PLCModel *model = &st->model; PLCNetState *net = &st->plc_net; celt_assert(st->loaded); - _lpcnet_compute_dense(&st->model.plc_dense1, dense_out, in, st->arch); - compute_gruB(&st->model.plc_gru1, zeros, net->plc_gru1_state, dense_out, st->arch); - compute_gruB(&st->model.plc_gru2, zeros, net->plc_gru2_state, net->plc_gru1_state, st->arch); - _lpcnet_compute_dense(&st->model.plc_out, out, net->plc_gru2_state, st->arch); + compute_generic_dense(&model->plc_dense_in, tmp, in, ACTIVATION_TANH, 0); + compute_generic_gru(&model->plc_gru1_input, &model->plc_gru1_recurrent, net->gru1_state, tmp, 0); + compute_generic_gru(&model->plc_gru2_input, &model->plc_gru2_recurrent, net->gru2_state, net->gru1_state, 0); + compute_generic_dense(&model->plc_dense_out, out, net->gru2_state, ACTIVATION_LINEAR, 0); } static int get_fec_or_pred(LPCNetPLCState *st, float *out) { diff --git a/dnn/lpcnet_private.h b/dnn/lpcnet_private.h index 6ff3ddb2..4aa376b6 100644 --- a/dnn/lpcnet_private.h +++ b/dnn/lpcnet_private.h @@ -41,6 +41,11 @@ struct LPCNetEncState{ float burg_cepstrum[2*NB_BANDS]; }; +typedef struct { + float gru1_state[PLC_GRU1_STATE_SIZE]; + float gru2_state[PLC_GRU2_STATE_SIZE]; +} PLCNetState; + #define PLC_BUF_SIZE ((CONT_VECTORS+5)*FRAME_SIZE) struct LPCNetPLCState { PLCModel model; @@ -64,9 +64,9 @@ void compute_generic_dense(const LinearLayer *layer, float *output, const float } #ifdef ENABLE_OSCE -#define MAX_RNN_NEURONS_ALL IMAX(IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS), DRED_MAX_RNN_NEURONS), OSCE_MAX_RNN_NEURONS) +#define MAX_RNN_NEURONS_ALL IMAX(IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_UNITS), DRED_MAX_RNN_NEURONS), OSCE_MAX_RNN_NEURONS) #else -#define MAX_RNN_NEURONS_ALL IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS), DRED_MAX_RNN_NEURONS) +#define MAX_RNN_NEURONS_ALL IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_UNITS), DRED_MAX_RNN_NEURONS) #endif void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch) @@ -142,7 +142,7 @@ int parse_weights(WeightArray **list, const unsigned char *data, int len); extern const WeightArray lpcnet_arrays[]; -extern const WeightArray lpcnet_plc_arrays[]; +extern const WeightArray plcmodel_arrays[]; extern const WeightArray rdovaeenc_arrays[]; extern const WeightArray rdovaedec_arrays[]; extern const WeightArray fwgan_arrays[]; diff --git a/dnn/torch/plc/plc_dataset.py b/dnn/torch/plc/plc_dataset.py index f5e4747f..2dfaaaf2 100644 --- a/dnn/torch/plc/plc_dataset.py +++ b/dnn/torch/plc/plc_dataset.py @@ -40,6 +40,10 @@ class PLCDataset(torch.utils.data.Dataset): lost_offset = np.random.randint(0, high=self.lost.shape[0]-self.sequence_length) lost = self.lost[lost_offset:lost_offset+self.sequence_length] + #randomly add a few 10-ms losses so that the model learns to handle them + lost = lost * (np.random.rand(lost.shape[-1]) > .02).astype('float32') + #randomly break long consecutive losses so we don't try too hard to predict them + lost = 1 - ((1-lost) * (np.random.rand(lost.shape[-1]) > .1).astype('float32')) lost = np.reshape(lost, (features.shape[0], 1)) lost_mask = np.tile(lost, (1,features.shape[-1])) in_features = features*lost_mask diff --git a/dnn/write_lpcnet_weights.c b/dnn/write_lpcnet_weights.c index 395590f4..2f80b962 100644 --- a/dnn/write_lpcnet_weights.c +++ b/dnn/write_lpcnet_weights.c @@ -81,7 +81,7 @@ int main(void) FILE *fout = fopen("weights_blob.bin", "w"); write_weights(pitchdnn_arrays, fout); write_weights(fargan_arrays, fout); - write_weights(lpcnet_plc_arrays, fout); + write_weights(plcmodel_arrays, fout); write_weights(rdovaeenc_arrays, fout); write_weights(rdovaedec_arrays, fout); #ifdef ENABLE_OSCE |