diff options
author | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2018-02-13 02:44:36 +0300 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2018-02-13 22:11:07 +0300 |
commit | e04e86e0e7067bfc895dc71e0c74c00b312071c5 (patch) | |
tree | 5f983a5201b76d06c3f47067415c90f8f1c59736 | |
parent | 3b90ff8a33d8819b2d02c8b7ac733073007faf2f (diff) |
Add a simple masking model to the spreading decision
This improves cases where a whole region is dominated by a handful of tones
-rw-r--r-- | celt/bands.c | 8 | ||||
-rw-r--r-- | celt/bands.h | 2 | ||||
-rw-r--r-- | celt/celt_encoder.c | 44 |
3 files changed, 46 insertions, 8 deletions
diff --git a/celt/bands.c b/celt/bands.c index 950b727c..807352fe 100644 --- a/celt/bands.c +++ b/celt/bands.c @@ -478,7 +478,7 @@ static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT /* Decide whether we should spread the pulses in the current frame */ int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, int last_decision, int *hf_average, int *tapset_decision, int update_hf, - int end, int C, int M) + int end, int C, int M, const int *spread_weight) { int i, c, N0; int sum = 0, nbBands=0; @@ -519,8 +519,8 @@ int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, if (i>m->nbEBands-4) hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N); tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N); - sum += tmp*256; - nbBands++; + sum += tmp*spread_weight[i]; + nbBands+=spread_weight[i]; } } while (++c<C); @@ -544,7 +544,7 @@ int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/ celt_assert(nbBands>0); /* end has to be non-zero */ celt_assert(sum>=0); - sum = celt_udiv(sum, nbBands); + sum = celt_udiv((opus_int32)sum<<8, nbBands); /* Recursive averaging */ sum = (sum+*average)>>1; *average = sum; diff --git a/celt/bands.h b/celt/bands.h index 2488c185..422b32cf 100644 --- a/celt/bands.h +++ b/celt/bands.h @@ -72,7 +72,7 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, int last_decision, int *hf_average, int *tapset_decision, int update_hf, - int end, int C, int M); + int end, int C, int M, const int *spread_weight); #ifdef MEASURE_NORM_MSE void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, float *bandE0, int M, int N, int C); diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index aeb35432..106693da 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -965,7 +965,7 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN, int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM, int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc, - AnalysisInfo *analysis, int *importance) + AnalysisInfo *analysis, int *importance, int *spread_weight) { int i, c; opus_int32 tot_boost=0; @@ -991,6 +991,42 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 for (i=0;i<end;i++) maxDepth = MAX16(maxDepth, bandLogE[c*nbEBands+i]-noise_floor[i]); } while (++c<C); + { + /* Compute a really simple masking model to avoid taking into account completely masked + bands when computing the spreading decision. */ + VARDECL(opus_val16, mask); + VARDECL(opus_val16, sig); + ALLOC(mask, nbEBands, opus_val16); + ALLOC(sig, nbEBands, opus_val16); + for (i=0;i<end;i++) + mask[i] = bandLogE[i]-noise_floor[i]; + if (C==2) + { + for (i=0;i<end;i++) + mask[i] = MAX16(mask[i], bandLogE[nbEBands+i]-noise_floor[i]); + } + OPUS_COPY(sig, mask, end); + for (i=1;i<end;i++) + mask[i] = MAX16(mask[i], mask[i-1] - QCONST16(2.f, DB_SHIFT)); + for (i=end-2;i>=0;i--) + mask[i] = MAX16(mask[i], mask[i+1] - QCONST16(3.f, DB_SHIFT)); + for (i=0;i<end;i++) + { + /* Compute SMR: Mask is never more than 72 dB below the peak and never below the noise floor.*/ + opus_val16 smr = sig[i]-MAX16(MAX16(0, maxDepth-QCONST16(12.f, DB_SHIFT)), mask[i]); + /* Clamp SMR to make sure we're not shifting by something negative or too large. */ + smr = MAX16(-QCONST16(5.f, DB_SHIFT), MIN16(0, smr)); +#ifdef FIXED_POINT + /* FIXME: Use PSHR16() instead */ + spread_weight[i] = IMAX(1, 32 >> -PSHR32(smr, DB_SHIFT)); +#else + spread_weight[i] = IMAX(1, 32 >> -(int)floor(.5f + smr)); +#endif + } + /*for (i=0;i<end;i++) + printf("%d ", spread_weight[i]); + printf("\n");*/ + } /* Make sure that dynamic allocation can't make us bust the budget */ if (effectiveBytes > 50 && LM>=1 && !lfe) { @@ -1378,6 +1414,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, VARDECL(int, cap); VARDECL(int, offsets); VARDECL(int, importance); + VARDECL(int, spread_weight); VARDECL(int, fine_priority); VARDECL(int, tf_res); VARDECL(unsigned char, collapse_masks); @@ -1826,10 +1863,11 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, ALLOC(offsets, nbEBands, int); ALLOC(importance, nbEBands, int); + ALLOC(spread_weight, nbEBands, int); maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets, st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr, - eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc, &st->analysis, importance); + eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc, &st->analysis, importance, spread_weight); ALLOC(tf_res, nbEBands, int); /* Disable variable tf resolution for hybrid and at very low bitrate */ @@ -1919,7 +1957,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, { st->spread_decision = spreading_decision(mode, X, &st->tonal_average, st->spread_decision, &st->hf_average, - &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M); + &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M, spread_weight); } /*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/ /*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/ |