diff options
author | Jean-Marc Valin <jmvalin@amazon.com> | 2023-09-16 06:43:39 +0300 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@amazon.com> | 2023-09-16 06:43:39 +0300 |
commit | ca4a1cbbc7bad362fff61577b8151c11d861976c (patch) | |
tree | 4b6c51c7ab39e4daff1329f74738625d29609088 | |
parent | 2b463a3499efba0c6a2ddfac761e0f4aa2707303 (diff) |
Replace pvq with rdovae for initial state
-rw-r--r-- | silk/dred_coding.c | 77 | ||||
-rw-r--r-- | silk/dred_config.h | 2 | ||||
-rw-r--r-- | silk/dred_decoder.c | 12 | ||||
-rw-r--r-- | silk/dred_encoder.c | 15 | ||||
-rw-r--r-- | silk/dred_encoder.h | 4 |
5 files changed, 25 insertions, 85 deletions
diff --git a/silk/dred_coding.c b/silk/dred_coding.c index 24e109ef..f8d2f070 100644 --- a/silk/dred_coding.c +++ b/silk/dred_coding.c @@ -33,16 +33,13 @@ #include <stdio.h> #include "celt/entenc.h" -#include "celt/vq.h" -#include "celt/cwrs.h" #include "celt/laplace.h" #include "os_support.h" #include "dred_config.h" #include "dred_coding.h" #define LATENT_DIM 80 -#define PVQ_DIM 24 -#define PVQ_K 82 +#define STATE_DIM 80 int compute_quantizer(int q0, int dQ, int i) { int quant; @@ -53,37 +50,6 @@ int compute_quantizer(int q0, int dQ, int i) { return (int) floor(0.5f + DRED_ENC_Q0 + 1.f * (DRED_ENC_Q1 - DRED_ENC_Q0) * i / (DRED_NUM_REDUNDANCY_FRAMES - 2)); } -static void encode_pvq(const int *iy, int N, int K, ec_enc *enc) { - int fits; - celt_assert(N==24 || N==12 || N==6); - fits = (N==24 && K<=9) || (N==12 && K<=16) || (N==6); - /*printf("encode(%d,%d), fits=%d\n", N, K, fits);*/ - if (fits) { - if (K > 0) - encode_pulses(iy, N, K, enc); - } - else { - int N2 = N/2; - int K0=0; - int i; - for (i=0;i<N2;i++) K0 += abs(iy[i]); - /* FIXME: Don't use uniform probability for K0. */ - ec_enc_uint(enc, K0, K+1); - /*printf("K0 = %d\n", K0);*/ - encode_pvq(iy, N2, K0, enc); - encode_pvq(&iy[N2], N2, K-K0, enc); - } -} - -void dred_encode_state(ec_enc *enc, const float *x) { - int iy[PVQ_DIM]; - float x0[PVQ_DIM]; - /* Copy state because the PVQ search will trash it. */ - OPUS_COPY(x0, x, PVQ_DIM); - op_pvq_search_c(x0, iy, PVQ_K, PVQ_DIM, 0); - encode_pvq(iy, PVQ_DIM, PVQ_K, enc); -} - void dred_encode_latents(ec_enc *enc, const float *x, const opus_uint16 *scale, const opus_uint16 *dzone, const opus_uint16 *r, const opus_uint16 *p0) { int i; float eps = .1f; @@ -101,47 +67,6 @@ void dred_encode_latents(ec_enc *enc, const float *x, const opus_uint16 *scale, } } - - -static void decode_pvq(int *iy, int N, int K, ec_dec *dec) { - int fits; - celt_assert(N==24 || N==12 || N==6); - fits = (N==24 && K<=9) || (N==12 && K<=16) || (N==6); - /*printf("encode(%d,%d), fits=%d\n", N, K, fits);*/ - if (fits) { - if (K > 0) - decode_pulses(iy, N, K, dec); - else - OPUS_CLEAR(iy, N); - } - else { - int N2 = N/2; - int K0; - /* FIXME: Don't use uniform probability for K0. */ - K0 = ec_dec_uint(dec, K+1); - /*printf("K0 = %d\n", K0);*/ - decode_pvq(iy, N2, K0, dec); - decode_pvq(&iy[N2], N2, K-K0, dec); - } -} - -void dred_decode_state(ec_enc *dec, float *x) { - int k; - int iy[PVQ_DIM]; - float norm = 0; - decode_pvq(iy, PVQ_DIM, PVQ_K, dec); - /*printf("tell: %d\n", ec_tell(dec)-tell1);*/ - for (k = 0; k < PVQ_DIM; k++) - { - norm += (float) iy[k] * iy[k]; - } - norm = 1.f / sqrt(norm); - for (k = 0; k < PVQ_DIM; k++) - { - x[k] = iy[k] * norm; - } -} - void dred_decode_latents(ec_dec *dec, float *x, const opus_uint16 *scale, const opus_uint16 *r, const opus_uint16 *p0) { int i; for (i=0;i<LATENT_DIM;i++) { diff --git a/silk/dred_config.h b/silk/dred_config.h index d8342f8e..b5573dac 100644 --- a/silk/dred_config.h +++ b/silk/dred_config.h @@ -41,7 +41,7 @@ /* these are inpart duplicates to the values defined in dred_rdovae_constants.h */ #define DRED_NUM_FEATURES 20 #define DRED_LATENT_DIM 80 -#define DRED_STATE_DIM 24 +#define DRED_STATE_DIM 80 #define DRED_SILK_ENCODER_DELAY (79+12-80) #define DRED_FRAME_SIZE 160 #define DRED_DFRAME_SIZE (2 * (DRED_FRAME_SIZE)) diff --git a/silk/dred_decoder.c b/silk/dred_decoder.c index 04ba1ef3..500f33b8 100644 --- a/silk/dred_decoder.c +++ b/silk/dred_decoder.c @@ -54,6 +54,7 @@ int dred_ec_decode(OpusDRED *dec, const opus_uint8 *bytes, int num_bytes, int mi int offset; int q0; int dQ; + int state_qoffset; /* since features are decoded in quadruples, it makes no sense to go with an uneven number of redundancy frames */ @@ -66,7 +67,14 @@ int dred_ec_decode(OpusDRED *dec, const opus_uint8 *bytes, int num_bytes, int mi dQ = ec_dec_uint(&ec, 8); /*printf("%d %d %d\n", dred_offset, q0, dQ);*/ - dred_decode_state(&ec, dec->state); + //dred_decode_state(&ec, dec->state); + state_qoffset = q0*(DRED_LATENT_DIM+DRED_STATE_DIM) + DRED_STATE_DIM; + dred_decode_latents( + &ec, + dec->state, + quant_scales + state_qoffset, + r + state_qoffset, + p0 + state_qoffset); /* decode newest to oldest and store oldest to newest */ for (i = 0; i < IMIN(DRED_NUM_REDUNDANCY_FRAMES, (min_feature_frames+1)/2); i += 2) @@ -75,7 +83,7 @@ int dred_ec_decode(OpusDRED *dec, const opus_uint8 *bytes, int num_bytes, int mi if (8*num_bytes - ec_tell(&ec) <= 7) break; q_level = compute_quantizer(q0, dQ, i/2); - offset = q_level * DRED_LATENT_DIM; + offset = q_level * (DRED_LATENT_DIM+DRED_STATE_DIM); dred_decode_latents( &ec, &dec->latents[(i/2)*DRED_LATENT_DIM], diff --git a/silk/dred_encoder.c b/silk/dred_encoder.c index 5bae39e9..7b34cefe 100644 --- a/silk/dred_encoder.c +++ b/silk/dred_encoder.c @@ -197,7 +197,7 @@ void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size, int ex /* 15 ms (6*2.5 ms) is the ideal offset for DRED because it corresponds to our vocoder look-ahead. */ if (enc->dred_offset < 6) { enc->dred_offset += 8; - OPUS_COPY(enc->initial_state, enc->state_buffer, 24); + OPUS_COPY(enc->initial_state, enc->state_buffer, DRED_STATE_DIM); } else { enc->latent_offset++; } @@ -221,6 +221,7 @@ int dred_encode_silk_frame(const DREDEnc *enc, unsigned char *buf, int max_chunk int ec_buffer_fill; int q0; int dQ; + int state_qoffset; /* entropy coding of state and latents */ ec_enc_init(&ec_encoder, buf, max_bytes); @@ -229,15 +230,21 @@ int dred_encode_silk_frame(const DREDEnc *enc, unsigned char *buf, int max_chunk ec_enc_uint(&ec_encoder, enc->dred_offset, 32); ec_enc_uint(&ec_encoder, q0, 16); ec_enc_uint(&ec_encoder, dQ, 8); - dred_encode_state(&ec_encoder, enc->initial_state); - + state_qoffset = q0*(DRED_LATENT_DIM+DRED_STATE_DIM) + DRED_STATE_DIM; + dred_encode_latents( + &ec_encoder, + enc->initial_state, + quant_scales + state_qoffset, + dead_zone + state_qoffset, + r + state_qoffset, + p0 + state_qoffset); for (i = 0; i < IMIN(2*max_chunks, enc->latents_buffer_fill-enc->latent_offset-1); i += 2) { ec_enc ec_bak; ec_bak = ec_encoder; q_level = compute_quantizer(q0, dQ, i/2); - offset = q_level * DRED_LATENT_DIM; + offset = q_level * (DRED_LATENT_DIM+DRED_STATE_DIM); dred_encode_latents( &ec_encoder, diff --git a/silk/dred_encoder.h b/silk/dred_encoder.h index 8ed323d3..2b77d581 100644 --- a/silk/dred_encoder.h +++ b/silk/dred_encoder.h @@ -50,8 +50,8 @@ typedef struct { int latent_offset; float latents_buffer[DRED_MAX_FRAMES * DRED_LATENT_DIM]; int latents_buffer_fill; - float state_buffer[24]; - float initial_state[24]; + float state_buffer[DRED_STATE_DIM]; + float initial_state[DRED_STATE_DIM]; float resample_mem[RESAMPLING_ORDER + 1]; LPCNetEncState lpcnet_enc_state; RDOVAEEncState rdovae_enc; |