Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.xiph.org/xiph/opus.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2017-07-13 10:03:48 +0300
committerJean-Marc Valin <jmvalin@jmvalin.ca>2017-07-13 10:03:48 +0300
commitd9a92a78f84d6e4f903062f430325b77a72afc03 (patch)
tree56e2ab36b746997449c0dcc64c17d1821c714e9c
parentb90e3239b538bdcffa7243f6d5a4835b364a125b (diff)
Add delayed decision (doesn't quite work yet)
-rw-r--r--celt/celt.h2
-rw-r--r--src/analysis.c43
-rw-r--r--src/opus_encoder.c8
3 files changed, 52 insertions, 1 deletions
diff --git a/celt/celt.h b/celt/celt.h
index 02f58445..f73f29dd 100644
--- a/celt/celt.h
+++ b/celt/celt.h
@@ -59,6 +59,8 @@ typedef struct {
float noisiness;
float activity;
float music_prob;
+ float music_prob_min;
+ float music_prob_max;
int bandwidth;
float activity_probability;
/* Store as Q6 char to save space. */
diff --git a/src/analysis.c b/src/analysis.c
index 178d018c..f501826f 100644
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -50,6 +50,8 @@
#ifndef DISABLE_FLOAT_API
+#define TRANSITION_PENALTY 5
+
static const float dct_table[128] = {
0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
@@ -234,6 +236,11 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
float tonality_avg;
int tonality_count;
int i;
+ int pos0;
+ float prob_avg;
+ float prob_count;
+ float prob_min, prob_max;
+ float vad_prob;
pos = tonal->read_pos;
curr_lookahead = tonal->write_pos-tonal->read_pos;
@@ -251,6 +258,7 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
pos--;
if (pos<0)
pos = DETECT_SIZE-1;
+ pos0 = pos;
OPUS_COPY(info_out, &tonal->info[pos], 1);
tonality_max = tonality_avg = info_out->tonality;
tonality_count = 1;
@@ -267,6 +275,41 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
tonality_count++;
}
info_out->tonality = MAX32(tonality_avg/tonality_count, tonality_max-.2f);
+
+ pos = pos0;
+ /* If we have enough look-ahead, discard the first 5 frames to compensate for the
+ delay in the features. */
+ if (curr_lookahead > 15)
+ {
+ pos += 5;
+ if (pos>=DETECT_SIZE)
+ pos -= DETECT_SIZE;
+ }
+
+ info_out->music_prob = tonal->info[pos].music_prob;
+ prob_min = prob_max = prob_avg = tonal->info[pos].music_prob;
+ vad_prob = tonal->info[pos].activity_probability;
+ prob_count = MAX16(.1, vad_prob);
+ while (1)
+ {
+ float pos_vad;
+ pos++;
+ if (pos==DETECT_SIZE)
+ pos = 0;
+ if (pos == tonal->write_pos)
+ break;
+ pos_vad = tonal->info[pos].activity_probability;
+ prob_count += MAX16(.1, pos_vad);
+ prob_avg += (tonal->info[pos].music_prob-prob_avg)/prob_count;
+ prob_min = MIN16(prob_avg - TRANSITION_PENALTY*(vad_prob - pos_vad)/prob_count, prob_min);
+ prob_max = MAX16(prob_avg + TRANSITION_PENALTY*(vad_prob - pos_vad)/prob_count, prob_max);
+ }
+ prob_min = MAX16(prob_min, 0);
+ prob_max = MIN16(prob_max, 1);
+ info_out->music_prob_min = prob_min;
+ info_out->music_prob_max = prob_max;
+
+ /*printf("%f %f %f\n", prob_min, prob_max, prob_count);*/
tonal->read_subframe += len/(tonal->Fs/400);
while (tonal->read_subframe>=8)
{
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index cd37fcdf..c2866d38 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -1366,7 +1366,12 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
mode_music = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[1][1])
+ MULT16_32_Q15(stereo_width,mode_thresholds[1][1]));
/* Interpolate based on speech/music probability */
- threshold = mode_music + ((voice_est*voice_est*(mode_voice-mode_music))>>14);
+ if (analysis_info.valid)
+ {
+ float prob = (st->prev_mode == MODE_CELT_ONLY) ? analysis_info.music_prob_max : analysis_info.music_prob_min;
+ threshold = prob*mode_music + (1-prob)*mode_voice;
+ } else
+ threshold = mode_music + ((voice_est*voice_est*(mode_voice-mode_music))>>14);
/* Bias towards SILK for VoIP because of some useful features */
if (st->application == OPUS_APPLICATION_VOIP)
threshold += 8000;
@@ -1378,6 +1383,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
else if (st->prev_mode>0)
threshold += 4000;
+ /*printf("%d\n", (equiv_rate >= threshold));*/
st->mode = (equiv_rate >= threshold) ? MODE_CELT_ONLY: MODE_SILK_ONLY;
/* When FEC is enabled and there's enough packet loss, use SILK */