diff options
author | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2017-07-13 10:03:48 +0300 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2017-07-13 10:03:48 +0300 |
commit | d9a92a78f84d6e4f903062f430325b77a72afc03 (patch) | |
tree | 56e2ab36b746997449c0dcc64c17d1821c714e9c | |
parent | b90e3239b538bdcffa7243f6d5a4835b364a125b (diff) |
Add delayed decision (doesn't quite work yet)
-rw-r--r-- | celt/celt.h | 2 | ||||
-rw-r--r-- | src/analysis.c | 43 | ||||
-rw-r--r-- | src/opus_encoder.c | 8 |
3 files changed, 52 insertions, 1 deletions
diff --git a/celt/celt.h b/celt/celt.h index 02f58445..f73f29dd 100644 --- a/celt/celt.h +++ b/celt/celt.h @@ -59,6 +59,8 @@ typedef struct { float noisiness; float activity; float music_prob; + float music_prob_min; + float music_prob_max; int bandwidth; float activity_probability; /* Store as Q6 char to save space. */ diff --git a/src/analysis.c b/src/analysis.c index 178d018c..f501826f 100644 --- a/src/analysis.c +++ b/src/analysis.c @@ -50,6 +50,8 @@ #ifndef DISABLE_FLOAT_API +#define TRANSITION_PENALTY 5 + static const float dct_table[128] = { 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, @@ -234,6 +236,11 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int float tonality_avg; int tonality_count; int i; + int pos0; + float prob_avg; + float prob_count; + float prob_min, prob_max; + float vad_prob; pos = tonal->read_pos; curr_lookahead = tonal->write_pos-tonal->read_pos; @@ -251,6 +258,7 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int pos--; if (pos<0) pos = DETECT_SIZE-1; + pos0 = pos; OPUS_COPY(info_out, &tonal->info[pos], 1); tonality_max = tonality_avg = info_out->tonality; tonality_count = 1; @@ -267,6 +275,41 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int tonality_count++; } info_out->tonality = MAX32(tonality_avg/tonality_count, tonality_max-.2f); + + pos = pos0; + /* If we have enough look-ahead, discard the first 5 frames to compensate for the + delay in the features. */ + if (curr_lookahead > 15) + { + pos += 5; + if (pos>=DETECT_SIZE) + pos -= DETECT_SIZE; + } + + info_out->music_prob = tonal->info[pos].music_prob; + prob_min = prob_max = prob_avg = tonal->info[pos].music_prob; + vad_prob = tonal->info[pos].activity_probability; + prob_count = MAX16(.1, vad_prob); + while (1) + { + float pos_vad; + pos++; + if (pos==DETECT_SIZE) + pos = 0; + if (pos == tonal->write_pos) + break; + pos_vad = tonal->info[pos].activity_probability; + prob_count += MAX16(.1, pos_vad); + prob_avg += (tonal->info[pos].music_prob-prob_avg)/prob_count; + prob_min = MIN16(prob_avg - TRANSITION_PENALTY*(vad_prob - pos_vad)/prob_count, prob_min); + prob_max = MAX16(prob_avg + TRANSITION_PENALTY*(vad_prob - pos_vad)/prob_count, prob_max); + } + prob_min = MAX16(prob_min, 0); + prob_max = MIN16(prob_max, 1); + info_out->music_prob_min = prob_min; + info_out->music_prob_max = prob_max; + + /*printf("%f %f %f\n", prob_min, prob_max, prob_count);*/ tonal->read_subframe += len/(tonal->Fs/400); while (tonal->read_subframe>=8) { diff --git a/src/opus_encoder.c b/src/opus_encoder.c index cd37fcdf..c2866d38 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -1366,7 +1366,12 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ mode_music = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[1][1]) + MULT16_32_Q15(stereo_width,mode_thresholds[1][1])); /* Interpolate based on speech/music probability */ - threshold = mode_music + ((voice_est*voice_est*(mode_voice-mode_music))>>14); + if (analysis_info.valid) + { + float prob = (st->prev_mode == MODE_CELT_ONLY) ? analysis_info.music_prob_max : analysis_info.music_prob_min; + threshold = prob*mode_music + (1-prob)*mode_voice; + } else + threshold = mode_music + ((voice_est*voice_est*(mode_voice-mode_music))>>14); /* Bias towards SILK for VoIP because of some useful features */ if (st->application == OPUS_APPLICATION_VOIP) threshold += 8000; @@ -1378,6 +1383,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ else if (st->prev_mode>0) threshold += 4000; + /*printf("%d\n", (equiv_rate >= threshold));*/ st->mode = (equiv_rate >= threshold) ? MODE_CELT_ONLY: MODE_SILK_ONLY; /* When FEC is enabled and there's enough packet loss, use SILK */ |