diff options
author | Jean-Marc Valin <jmvalin@amazon.com> | 2023-07-01 21:15:26 +0300 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@amazon.com> | 2023-07-03 09:15:40 +0300 |
commit | 3510404ad5a5287148d89ee523aa1edb7ef8e257 (patch) | |
tree | 1dd9f7059041a218dd840ee08265353de331e403 | |
parent | 17bb81934ba2bf3123500c10b8aac6a8f2dfc11a (diff) |
Properly compute and use the DRED offset field
Also, don't code DRED that's redundant with the main packet
-rw-r--r-- | silk/dred_encoder.c | 30 | ||||
-rw-r--r-- | silk/dred_encoder.h | 5 | ||||
-rw-r--r-- | src/opus_decoder.c | 13 | ||||
-rw-r--r-- | src/opus_encoder.c | 2 |
4 files changed, 34 insertions, 16 deletions
diff --git a/silk/dred_encoder.c b/silk/dred_encoder.c index 0f77e96a..89e0f900 100644 --- a/silk/dred_encoder.c +++ b/silk/dred_encoder.c @@ -174,9 +174,13 @@ static void dred_convert_to_16k(DREDEnc *enc, const float *in, int in_len, float } } -void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size) +void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size, int extra_delay) { + int curr_offset16k; int frame_size16k = frame_size * 16000 / enc->Fs; + curr_offset16k = 40 + extra_delay*16000/enc->Fs - enc->input_buffer_fill; + enc->dred_offset = (int)floor((curr_offset16k+20.f)/40.f); + enc->latent_offset = 0; while (frame_size16k > 0) { int process_size16k; int process_size; @@ -186,9 +190,17 @@ void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size) enc->input_buffer_fill += process_size16k; if (enc->input_buffer_fill >= 2*DRED_FRAME_SIZE) { - dred_process_frame(enc); - enc->input_buffer_fill -= 2*DRED_FRAME_SIZE; - OPUS_MOVE(&enc->input_buffer[0], &enc->input_buffer[2*DRED_FRAME_SIZE], enc->input_buffer_fill); + curr_offset16k += 320; + dred_process_frame(enc); + enc->input_buffer_fill -= 2*DRED_FRAME_SIZE; + OPUS_MOVE(&enc->input_buffer[0], &enc->input_buffer[2*DRED_FRAME_SIZE], enc->input_buffer_fill); + /* 15 ms (6*2.5 ms) is the ideal offset for DRED because it corresponds to our vocoder look-ahead. */ + if (enc->dred_offset < 6) { + enc->dred_offset += 8; + OPUS_COPY(enc->initial_state, enc->state_buffer, 24); + } else { + enc->latent_offset++; + } } pcm += process_size; @@ -207,21 +219,19 @@ int dred_encode_silk_frame(const DREDEnc *enc, unsigned char *buf, int max_chunk int i; int offset; int ec_buffer_fill; - int dred_offset; int q0; int dQ; /* entropy coding of state and latents */ ec_enc_init(&ec_encoder, buf, max_bytes); - dred_offset = 8; /* 20 ms */ q0 = DRED_ENC_Q0; dQ = 3; - ec_enc_uint(&ec_encoder, dred_offset, 32); + ec_enc_uint(&ec_encoder, enc->dred_offset, 32); ec_enc_uint(&ec_encoder, q0, 16); ec_enc_uint(&ec_encoder, dQ, 8); - dred_encode_state(&ec_encoder, enc->state_buffer); + dred_encode_state(&ec_encoder, enc->initial_state); - for (i = 0; i < IMIN(2*max_chunks, enc->latents_buffer_fill-1); i += 2) + for (i = 0; i < IMIN(2*max_chunks, enc->latents_buffer_fill-enc->latent_offset-1); i += 2) { ec_enc ec_bak; ec_bak = ec_encoder; @@ -231,7 +241,7 @@ int dred_encode_silk_frame(const DREDEnc *enc, unsigned char *buf, int max_chunk dred_encode_latents( &ec_encoder, - enc->latents_buffer + i * DRED_LATENT_DIM, + enc->latents_buffer + (i+enc->latent_offset) * DRED_LATENT_DIM, quant_scales + offset, dead_zone + offset, r + offset, diff --git a/silk/dred_encoder.h b/silk/dred_encoder.h index 4c7c8b67..8ed323d3 100644 --- a/silk/dred_encoder.h +++ b/silk/dred_encoder.h @@ -46,9 +46,12 @@ typedef struct { #define DREDENC_RESET_START input_buffer float input_buffer[2*DRED_DFRAME_SIZE]; int input_buffer_fill; + int dred_offset; + int latent_offset; float latents_buffer[DRED_MAX_FRAMES * DRED_LATENT_DIM]; int latents_buffer_fill; float state_buffer[24]; + float initial_state[24]; float resample_mem[RESAMPLING_ORDER + 1]; LPCNetEncState lpcnet_enc_state; RDOVAEEncState rdovae_enc; @@ -60,7 +63,7 @@ void dred_encoder_reset(DREDEnc* enc); void dred_deinit_encoder(DREDEnc *enc); -void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size); +void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size, int extra_delay); int dred_encode_silk_frame(const DREDEnc *enc, unsigned char *buf, int max_chunks, int max_bytes); diff --git a/src/opus_decoder.c b/src/opus_decoder.c index c45facac..45978d2a 100644 --- a/src/opus_decoder.c +++ b/src/opus_decoder.c @@ -665,15 +665,20 @@ int opus_decode_native(OpusDecoder *st, const unsigned char *data, return OPUS_BAD_ARG; #ifdef ENABLE_NEURAL_FEC if (dred != NULL && dred->process_stage == 2) { + int F10; int features_per_frame; int needed_feature_frames; + int init_frames; lpcnet_plc_fec_clear(&st->lpcnet); - features_per_frame = IMAX(1, frame_size/(st->Fs/100)); - needed_feature_frames = features_per_frame; + F10 = st->Fs/100; /* if blend==0, the last PLC call was "update" and we need to feed two extra 10-ms frames. */ - if (st->lpcnet.blend == 0) needed_feature_frames+=2; + init_frames = (st->lpcnet.blend == 0) ? 2 : 0; + features_per_frame = IMAX(1, frame_size/F10); + needed_feature_frames = init_frames + features_per_frame; for (i=0;i<needed_feature_frames;i++) { - int feature_offset = (needed_feature_frames-i-1 + (dred_offset/(st->Fs/100)-1)); + int feature_offset; + /* We floor instead of rounding because 5-ms overlap compensates for the missing 0.5 rounding offset. */ + feature_offset = init_frames - i - 2 + (int)floor(((float)dred_offset + dred->dred_offset*F10/4)/F10); if (feature_offset <= 4*dred->nb_latents-1 && feature_offset >= 0) { lpcnet_plc_fec_add(&st->lpcnet, dred->fec_features+feature_offset*DRED_NUM_FEATURES); } else { diff --git a/src/opus_encoder.c b/src/opus_encoder.c index b8e0f92a..2701488d 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -1685,7 +1685,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ #ifdef ENABLE_NEURAL_FEC if ( st->dred_duration > 0 ) { /* DRED Encoder */ - dred_compute_latents( &st->dred_encoder, &pcm_buf[total_buffer*st->channels], frame_size ); + dred_compute_latents( &st->dred_encoder, &pcm_buf[total_buffer*st->channels], frame_size, total_buffer ); } else { st->dred_encoder.latents_buffer_fill = 0; } |