From c1b80a7ccffefde806a0674a19c756e2099db6b0 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Mon, 15 Jan 2024 18:24:15 -0500 Subject: Improving PLC Should handle the history in a more consistent way. Slightly increase the model size and re-enable biased band loss in training. --- autogen.sh | 2 +- dnn/lpcnet_plc.c | 17 +++++++++++++++-- dnn/lpcnet_private.h | 4 +++- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/autogen.sh b/autogen.sh index 2cac2083..a961a9f8 100755 --- a/autogen.sh +++ b/autogen.sh @@ -9,7 +9,7 @@ set -e srcdir=`dirname $0` test -n "$srcdir" && cd "$srcdir" -dnn/download_model.sh caca188 +dnn/download_model.sh 26ddfd7 echo "Updating build configuration files, please wait...." diff --git a/dnn/lpcnet_plc.c b/dnn/lpcnet_plc.c index ec8ad7e9..5d397dd8 100644 --- a/dnn/lpcnet_plc.c +++ b/dnn/lpcnet_plc.c @@ -51,6 +51,8 @@ void lpcnet_plc_reset(LPCNetPLCState *st) { st->blend = 0; st->loss_count = 0; st->analysis_gap = 1; + st->analysis_pos = PLC_BUF_SIZE; + st->predict_pos = PLC_BUF_SIZE; } int lpcnet_plc_init(LPCNetPLCState *st) { @@ -58,7 +60,6 @@ int lpcnet_plc_init(LPCNetPLCState *st) { st->arch = opus_select_arch(); fargan_init(&st->fargan); lpcnet_encoder_init(&st->enc); - st->analysis_pos = PLC_BUF_SIZE; st->loaded = 0; #ifndef USE_WEIGHTS_FILE ret = init_plc_model(&st->model, lpcnet_plc_arrays); @@ -148,6 +149,7 @@ int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm) { int i; if (st->analysis_pos - FRAME_SIZE >= 0) st->analysis_pos -= FRAME_SIZE; else st->analysis_gap = 1; + if (st->predict_pos - FRAME_SIZE >= 0) st->predict_pos -= FRAME_SIZE; OPUS_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE-FRAME_SIZE); for (i=0;ipcm[PLC_BUF_SIZE-FRAME_SIZE+i] = (1.f/32768.f)*pcm[i]; st->loss_count = 0; @@ -161,28 +163,38 @@ int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm) { celt_assert(st->loaded); if (st->blend == 0) { int count = 0; + st->plc_net = st->plc_bak[0]; while (st->analysis_pos + FRAME_SIZE <= PLC_BUF_SIZE) { float x[FRAME_SIZE]; float plc_features[2*NB_BANDS+NB_FEATURES+1]; + celt_assert(st->analysis_pos >= 0); for (i=0;ipcm[st->analysis_pos+i]; burg_cepstral_analysis(plc_features, x); lpcnet_compute_single_frame_features_float(&st->enc, x, st->features, st->arch); - if ((st->analysis_gap && count > 0) || count > 1) { + if ((!st->analysis_gap || count>0) && st->analysis_pos + 0*FRAME_SIZE >= st->predict_pos) { queue_features(st, st->features); OPUS_COPY(&plc_features[2*NB_BANDS], st->features, NB_FEATURES); plc_features[2*NB_BANDS+NB_FEATURES] = 1; + st->plc_bak[0] = st->plc_bak[1]; + st->plc_bak[1] = st->plc_net; compute_plc_pred(st, st->features, plc_features); } st->analysis_pos += FRAME_SIZE; count++; } + st->plc_bak[0] = st->plc_bak[1]; + st->plc_bak[1] = st->plc_net; get_fec_or_pred(st, st->features); queue_features(st, st->features); + st->plc_bak[0] = st->plc_bak[1]; + st->plc_bak[1] = st->plc_net; get_fec_or_pred(st, st->features); queue_features(st, st->features); fargan_cont(&st->fargan, &st->pcm[PLC_BUF_SIZE-FARGAN_CONT_SAMPLES], st->cont_features); st->analysis_gap = 0; } + st->plc_bak[0] = st->plc_bak[1]; + st->plc_bak[1] = st->plc_net; if (get_fec_or_pred(st, st->features)) st->loss_count = 0; else st->loss_count++; if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9)); @@ -191,6 +203,7 @@ int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm) { queue_features(st, st->features); if (st->analysis_pos - FRAME_SIZE >= 0) st->analysis_pos -= FRAME_SIZE; else st->analysis_gap = 1; + st->predict_pos = PLC_BUF_SIZE; OPUS_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE-FRAME_SIZE); for (i=0;ipcm[PLC_BUF_SIZE-FRAME_SIZE+i] = (1.f/32768.f)*pcm[i]; st->blend = 1; diff --git a/dnn/lpcnet_private.h b/dnn/lpcnet_private.h index 7fb8123a..6ff3ddb2 100644 --- a/dnn/lpcnet_private.h +++ b/dnn/lpcnet_private.h @@ -41,7 +41,7 @@ struct LPCNetEncState{ float burg_cepstrum[2*NB_BANDS]; }; -#define PLC_BUF_SIZE (CONT_VECTORS*FRAME_SIZE) +#define PLC_BUF_SIZE ((CONT_VECTORS+5)*FRAME_SIZE) struct LPCNetPLCState { PLCModel model; FARGANState fargan; @@ -56,12 +56,14 @@ struct LPCNetPLCState { int fec_fill_pos; int fec_skip; int analysis_pos; + int predict_pos; float pcm[PLC_BUF_SIZE]; int blend; float features[NB_TOTAL_FEATURES]; float cont_features[CONT_VECTORS*NB_FEATURES]; int loss_count; PLCNetState plc_net; + PLCNetState plc_bak[2]; }; void preemphasis(float *y, float *mem, const float *x, float coef, int N); -- cgit v1.2.3