diff options
author | Jean-Marc Valin <jmvalin@amazon.com> | 2023-06-06 09:22:18 +0300 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@amazon.com> | 2023-06-07 00:25:34 +0300 |
commit | 1d40c56ae2134059f31cc3df304a1c80fb7553ca (patch) | |
tree | 02ba2b989c15b824f6a6edc8b702c5e48fa41be9 | |
parent | 0bf276b92b4f9e7f62b99604aa62fe10bca0a751 (diff) |
Add resampling/downmix support to DRED encoder
8k, 12k and stereo are mostly untested
-rw-r--r-- | silk/dred_encoder.c | 91 | ||||
-rw-r--r-- | silk/dred_encoder.h | 5 |
2 files changed, 89 insertions, 7 deletions
diff --git a/silk/dred_encoder.c b/silk/dred_encoder.c index a6c827d9..afec129d 100644 --- a/silk/dred_encoder.c +++ b/silk/dred_encoder.c @@ -85,25 +85,104 @@ static void dred_process_frame(DREDEnc *enc) enc->latents_buffer_fill = IMIN(enc->latents_buffer_fill+1, DRED_NUM_REDUNDANCY_FRAMES); } +void filter_df2t(const float *in, float *out, int len, float b0, const float *b, const float *a, int order, float *mem) +{ + int i; + for (i=0;i<len;i++) { + int j; + float xi, yi, nyi; + xi = in[i]; + yi = xi*b0 + mem[0]; + nyi = -yi; + for (j=0;j<order;j++) + { + mem[j] = mem[j+1] + b[j]*xi + a[j]*nyi; + } + out[i] = yi; + /*fprintf(stdout, "%f\n", out[i]);*/ + } +} + +#define MAX_DOWNMIX_BUFFER (960*2) +static void dred_convert_to_16k(DREDEnc *enc, const float *in, int in_len, float *out, int out_len) +{ + float downmix[MAX_DOWNMIX_BUFFER]; + int i; + int up; + celt_assert(enc->channels*in_len <= MAX_DOWNMIX_BUFFER); + celt_assert(in_len * (opus_int32)16000 == out_len * enc->Fs); + switch(enc->Fs) { + case 8000: + up = 2; + break; + case 12000: + up = 4; + break; + case 16000: + up = 1; + break; + case 24000: + up = 2; + break; + case 48000: + up = 1; + break; + default: + celt_assert(0); + } + OPUS_CLEAR(downmix, up*in_len); + if (enc->channels == 1) { + for (i=0;i<in_len;i++) downmix[up*i] = FLOAT2INT16(up*in[i]); + } else { + for (i=0;i<in_len;i++) downmix[up*i] = FLOAT2INT16(.5*up*(in[2*i]+in[2*i+1])); + } + if (enc->Fs == 16000) { + OPUS_COPY(out, downmix, out_len); + } else if (enc->Fs == 48000 || enc->Fs == 24000) { + /* ellip(7, .2, 70, 7750/24000) */ + + static const float filter_b[8] = { 0.005873358047f, 0.012980854831f, 0.014531340042f, 0.014531340042f, 0.012980854831f, 0.005873358047f, 0.004523418224f, 0.f}; + static const float filter_a[8] = {-3.878718597768f, 7.748834257468f, -9.653651699533f, 8.007342726666f, -4.379450178552f, 1.463182111810f, -0.231720677804f, 0.f}; + float b0 = 0.004523418224f; + filter_df2t(downmix, downmix, up*in_len, b0, filter_b, filter_a, RESAMPLING_ORDER, enc->resample_mem); + for (i=0;i<out_len;i++) out[i] = downmix[3*i]; + } else if (enc->Fs == 12000) { + /* ellip(7, .2, 70, 7750/24000) */ + static const float filter_b[8] = {-0.001017101081f, 0.003673127243f, 0.001009165267f, 0.001009165267f, 0.003673127243f, -0.001017101081f, 0.002033596776f, 0.f}; + static const float filter_a[8] = {-4.930414411612f, 11.291643096504f, -15.322037343815f, 13.216403930898f, -7.220409219553f, 2.310550142771f, -0.334338618782f, 0.f}; + float b0 = 0.002033596776f; + filter_df2t(downmix, downmix, up*in_len, b0, filter_b, filter_a, RESAMPLING_ORDER, enc->resample_mem); + for (i=0;i<out_len;i++) out[i] = downmix[3*i]; + } else if (enc->Fs == 8000) { + /* ellip(7, .2, 70, 3900/8000) */ + static const float filter_b[8] = { 0.081670120929f, 0.180401598565f, 0.259391051971f, 0.259391051971f, 0.180401598565f, 0.081670120929f, 0.020109185709f, 0.f}; + static const float filter_a[8] = {-1.393651933659f, 2.609789872676f, -2.403541968806f, 2.056814957331f, -1.148908574570f, 0.473001413788f, -0.110359852412f, 0.f}; + float b0 = 0.020109185709f; + filter_df2t(downmix, out, out_len, b0, filter_b, filter_a, RESAMPLING_ORDER, enc->resample_mem); + } else { + celt_assert(0); + } +} + void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size) { int frame_size16k = frame_size * 16000 / enc->Fs; while (frame_size16k > 0) { - int i; int process_size16k; int process_size; - process_size16k = IMIN(2*DRED_FRAME_SIZE - enc->input_buffer_fill, frame_size16k); + process_size16k = IMIN(2*DRED_FRAME_SIZE, frame_size16k); process_size = process_size16k * enc->Fs / 16000; - for (i=0;i<process_size16k;i++) enc->input_buffer[enc->input_buffer_fill+i] = FLOAT2INT16(pcm[i]); + dred_convert_to_16k(enc, pcm, process_size, &enc->input_buffer[enc->input_buffer_fill], process_size16k); enc->input_buffer_fill += process_size16k; - if (enc->input_buffer_fill == 2*DRED_FRAME_SIZE) + if (enc->input_buffer_fill >= 2*DRED_FRAME_SIZE) { dred_process_frame(enc); - enc->input_buffer_fill = 0; + enc->input_buffer_fill -= 2*DRED_FRAME_SIZE; + OPUS_MOVE(&enc->input_buffer[0], &enc->input_buffer[2*DRED_FRAME_SIZE], enc->input_buffer_fill); } pcm += process_size; - frame_size16k -= process_size; + frame_size16k -= process_size16k; } } diff --git a/silk/dred_encoder.h b/silk/dred_encoder.h index be96e3c0..30e639a9 100644 --- a/silk/dred_encoder.h +++ b/silk/dred_encoder.h @@ -36,17 +36,20 @@ #include "lpcnet/src/dred_rdovae_enc.h" #include "lpcnet/src/dred_rdovae_enc_data.h" +#define RESAMPLING_ORDER 8 + typedef struct { RDOVAEEnc model; opus_int32 Fs; int channels; #define DREDENC_RESET_START input_buffer - float input_buffer[DRED_DFRAME_SIZE]; + float input_buffer[2*DRED_DFRAME_SIZE]; int input_buffer_fill; float latents_buffer[DRED_MAX_FRAMES * DRED_LATENT_DIM]; int latents_buffer_fill; float state_buffer[24]; + float resample_mem[RESAMPLING_ORDER + 1]; LPCNetEncState lpcnet_enc_state; RDOVAEEncState rdovae_enc; } DREDEnc; |