Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.xiph.org/xiph/opus.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@amazon.com>2023-07-01 21:15:26 +0300
committerJean-Marc Valin <jmvalin@amazon.com>2023-07-03 09:15:40 +0300
commit3510404ad5a5287148d89ee523aa1edb7ef8e257 (patch)
tree1dd9f7059041a218dd840ee08265353de331e403
parent17bb81934ba2bf3123500c10b8aac6a8f2dfc11a (diff)
Properly compute and use the DRED offset field
Also, don't code DRED that's redundant with the main packet
-rw-r--r--silk/dred_encoder.c30
-rw-r--r--silk/dred_encoder.h5
-rw-r--r--src/opus_decoder.c13
-rw-r--r--src/opus_encoder.c2
4 files changed, 34 insertions, 16 deletions
diff --git a/silk/dred_encoder.c b/silk/dred_encoder.c
index 0f77e96a..89e0f900 100644
--- a/silk/dred_encoder.c
+++ b/silk/dred_encoder.c
@@ -174,9 +174,13 @@ static void dred_convert_to_16k(DREDEnc *enc, const float *in, int in_len, float
}
}
-void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size)
+void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size, int extra_delay)
{
+ int curr_offset16k;
int frame_size16k = frame_size * 16000 / enc->Fs;
+ curr_offset16k = 40 + extra_delay*16000/enc->Fs - enc->input_buffer_fill;
+ enc->dred_offset = (int)floor((curr_offset16k+20.f)/40.f);
+ enc->latent_offset = 0;
while (frame_size16k > 0) {
int process_size16k;
int process_size;
@@ -186,9 +190,17 @@ void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size)
enc->input_buffer_fill += process_size16k;
if (enc->input_buffer_fill >= 2*DRED_FRAME_SIZE)
{
- dred_process_frame(enc);
- enc->input_buffer_fill -= 2*DRED_FRAME_SIZE;
- OPUS_MOVE(&enc->input_buffer[0], &enc->input_buffer[2*DRED_FRAME_SIZE], enc->input_buffer_fill);
+ curr_offset16k += 320;
+ dred_process_frame(enc);
+ enc->input_buffer_fill -= 2*DRED_FRAME_SIZE;
+ OPUS_MOVE(&enc->input_buffer[0], &enc->input_buffer[2*DRED_FRAME_SIZE], enc->input_buffer_fill);
+ /* 15 ms (6*2.5 ms) is the ideal offset for DRED because it corresponds to our vocoder look-ahead. */
+ if (enc->dred_offset < 6) {
+ enc->dred_offset += 8;
+ OPUS_COPY(enc->initial_state, enc->state_buffer, 24);
+ } else {
+ enc->latent_offset++;
+ }
}
pcm += process_size;
@@ -207,21 +219,19 @@ int dred_encode_silk_frame(const DREDEnc *enc, unsigned char *buf, int max_chunk
int i;
int offset;
int ec_buffer_fill;
- int dred_offset;
int q0;
int dQ;
/* entropy coding of state and latents */
ec_enc_init(&ec_encoder, buf, max_bytes);
- dred_offset = 8; /* 20 ms */
q0 = DRED_ENC_Q0;
dQ = 3;
- ec_enc_uint(&ec_encoder, dred_offset, 32);
+ ec_enc_uint(&ec_encoder, enc->dred_offset, 32);
ec_enc_uint(&ec_encoder, q0, 16);
ec_enc_uint(&ec_encoder, dQ, 8);
- dred_encode_state(&ec_encoder, enc->state_buffer);
+ dred_encode_state(&ec_encoder, enc->initial_state);
- for (i = 0; i < IMIN(2*max_chunks, enc->latents_buffer_fill-1); i += 2)
+ for (i = 0; i < IMIN(2*max_chunks, enc->latents_buffer_fill-enc->latent_offset-1); i += 2)
{
ec_enc ec_bak;
ec_bak = ec_encoder;
@@ -231,7 +241,7 @@ int dred_encode_silk_frame(const DREDEnc *enc, unsigned char *buf, int max_chunk
dred_encode_latents(
&ec_encoder,
- enc->latents_buffer + i * DRED_LATENT_DIM,
+ enc->latents_buffer + (i+enc->latent_offset) * DRED_LATENT_DIM,
quant_scales + offset,
dead_zone + offset,
r + offset,
diff --git a/silk/dred_encoder.h b/silk/dred_encoder.h
index 4c7c8b67..8ed323d3 100644
--- a/silk/dred_encoder.h
+++ b/silk/dred_encoder.h
@@ -46,9 +46,12 @@ typedef struct {
#define DREDENC_RESET_START input_buffer
float input_buffer[2*DRED_DFRAME_SIZE];
int input_buffer_fill;
+ int dred_offset;
+ int latent_offset;
float latents_buffer[DRED_MAX_FRAMES * DRED_LATENT_DIM];
int latents_buffer_fill;
float state_buffer[24];
+ float initial_state[24];
float resample_mem[RESAMPLING_ORDER + 1];
LPCNetEncState lpcnet_enc_state;
RDOVAEEncState rdovae_enc;
@@ -60,7 +63,7 @@ void dred_encoder_reset(DREDEnc* enc);
void dred_deinit_encoder(DREDEnc *enc);
-void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size);
+void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size, int extra_delay);
int dred_encode_silk_frame(const DREDEnc *enc, unsigned char *buf, int max_chunks, int max_bytes);
diff --git a/src/opus_decoder.c b/src/opus_decoder.c
index c45facac..45978d2a 100644
--- a/src/opus_decoder.c
+++ b/src/opus_decoder.c
@@ -665,15 +665,20 @@ int opus_decode_native(OpusDecoder *st, const unsigned char *data,
return OPUS_BAD_ARG;
#ifdef ENABLE_NEURAL_FEC
if (dred != NULL && dred->process_stage == 2) {
+ int F10;
int features_per_frame;
int needed_feature_frames;
+ int init_frames;
lpcnet_plc_fec_clear(&st->lpcnet);
- features_per_frame = IMAX(1, frame_size/(st->Fs/100));
- needed_feature_frames = features_per_frame;
+ F10 = st->Fs/100;
/* if blend==0, the last PLC call was "update" and we need to feed two extra 10-ms frames. */
- if (st->lpcnet.blend == 0) needed_feature_frames+=2;
+ init_frames = (st->lpcnet.blend == 0) ? 2 : 0;
+ features_per_frame = IMAX(1, frame_size/F10);
+ needed_feature_frames = init_frames + features_per_frame;
for (i=0;i<needed_feature_frames;i++) {
- int feature_offset = (needed_feature_frames-i-1 + (dred_offset/(st->Fs/100)-1));
+ int feature_offset;
+ /* We floor instead of rounding because 5-ms overlap compensates for the missing 0.5 rounding offset. */
+ feature_offset = init_frames - i - 2 + (int)floor(((float)dred_offset + dred->dred_offset*F10/4)/F10);
if (feature_offset <= 4*dred->nb_latents-1 && feature_offset >= 0) {
lpcnet_plc_fec_add(&st->lpcnet, dred->fec_features+feature_offset*DRED_NUM_FEATURES);
} else {
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index b8e0f92a..2701488d 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -1685,7 +1685,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
#ifdef ENABLE_NEURAL_FEC
if ( st->dred_duration > 0 ) {
/* DRED Encoder */
- dred_compute_latents( &st->dred_encoder, &pcm_buf[total_buffer*st->channels], frame_size );
+ dred_compute_latents( &st->dred_encoder, &pcm_buf[total_buffer*st->channels], frame_size, total_buffer );
} else {
st->dred_encoder.latents_buffer_fill = 0;
}