Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.xiph.org/xiph/opus.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2019-09-24 22:56:53 +0300
committerJean-Marc Valin <jmvalin@jmvalin.ca>2019-09-24 22:56:53 +0300
commitcece1fd67fe665617dac2f23cb0a4f82c94b2270 (patch)
treed9a0b54bc3bb0ac9d8ff4f244ace04a36376080e
parentcd529edb3363b0be6aebdefa09c4188629f45e2c (diff)
Adds OPUS_{GS}ET_WAVEFORM_MATCHING encoder ctl
This option disables psychoacoustic improvements that deliberately alter the waveform (e.g. changing the phase) to improve quality. Although it should never be used for normal mono or stereo recordings, it can be useful for cases where matching the waveform is important, e.g. direct ambisonics (family 2), where the phase changes the spatial location of sounds.
-rw-r--r--celt/celt_decoder.c2
-rw-r--r--celt/celt_encoder.c30
-rw-r--r--celt/rate.c10
-rw-r--r--celt/rate.h3
-rw-r--r--include/opus_defines.h27
-rw-r--r--silk/control.h3
-rw-r--r--silk/enc_API.c2
-rw-r--r--silk/main.h3
-rw-r--r--silk/stereo_LR_to_MS.c5
-rw-r--r--src/opus_encoder.c26
10 files changed, 100 insertions, 11 deletions
diff --git a/celt/celt_decoder.c b/celt/celt_decoder.c
index e6efce93..534f0ba1 100644
--- a/celt/celt_decoder.c
+++ b/celt/celt_decoder.c
@@ -1061,7 +1061,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
codedBands = clt_compute_allocation(mode, start, end, offsets, cap,
alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
- fine_quant, fine_priority, C, LM, dec, 0, 0, 0);
+ fine_quant, fine_priority, C, LM, dec, 0, 0, 0, 0);
unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C);
diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c
index 44cb0850..b5e5e152 100644
--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@@ -75,6 +75,7 @@ struct OpusCustomEncoder {
int lsb_depth;
int lfe;
int disable_inv;
+ int waveform_matching;
int arch;
/* Everything beyond this point gets cleared on a reset */
@@ -1973,6 +1974,10 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
/*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/
/*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/
}
+ /* SPREAD_AGGRESSIVE can lead to bands being replaced by noise, so disabling for waveform_matching. */
+ if (st->waveform_matching && st->spread_decision == SPREAD_AGGRESSIVE)
+ st->spread_decision = SPREAD_NORMAL;
+
ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5);
}
@@ -2031,6 +2036,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
st->intensity = hysteresis_decision((opus_val16)(equiv_rate/1000),
intensity_thresholds, intensity_histeresis, 21, st->intensity);
+ if (st->waveform_matching)
+ st->intensity = end;
st->intensity = IMIN(end,IMAX(start, st->intensity));
}
@@ -2193,7 +2200,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
signalBandwidth = 1;
codedBands = clt_compute_allocation(mode, start, end, offsets, cap,
alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses,
- fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth);
+ fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands,
+ signalBandwidth, st->waveform_matching);
if (st->lastCodedBands)
st->lastCodedBands = IMIN(st->lastCodedBands+1,IMAX(st->lastCodedBands-1,codedBands));
else
@@ -2517,6 +2525,26 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
*value = st->disable_inv;
}
break;
+ case OPUS_SET_WAVEFORM_MATCHING_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if(value<0 || value>1)
+ {
+ goto bad_arg;
+ }
+ st->waveform_matching = value;
+ }
+ break;
+ case OPUS_GET_WAVEFORM_MATCHING_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->waveform_matching;
+ }
+ break;
case OPUS_RESET_STATE:
{
int i;
diff --git a/celt/rate.c b/celt/rate.c
index 465e1ba2..4003795b 100644
--- a/celt/rate.c
+++ b/celt/rate.c
@@ -248,7 +248,8 @@ void compute_pulse_cache(CELTMode *m, int LM)
static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, int skip_start,
const int *bits1, const int *bits2, const int *thresh, const int *cap, opus_int32 total, opus_int32 *_balance,
int skip_rsv, int *intensity, int intensity_rsv, int *dual_stereo, int dual_stereo_rsv, int *bits,
- int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth)
+ int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth,
+ int waveform_matching)
{
opus_int32 psum;
int lo, hi;
@@ -358,7 +359,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
#ifdef FUZZING
if ((rand()&0x1) == 0)
#else
- if (codedBands<=start+2 || (band_bits > (depth_threshold*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth))
+ if (waveform_matching || codedBands<=start+2 || (band_bits > (depth_threshold*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth))
#endif
{
ec_enc_bit_logp(ec, 1, 1);
@@ -530,7 +531,8 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
}
int clt_compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo,
- opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth)
+ opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth,
+ int waveform_matching)
{
int lo, hi, len, j;
int codedBands;
@@ -637,7 +639,7 @@ int clt_compute_allocation(const CELTMode *m, int start, int end, const int *off
}
codedBands = interp_bits2pulses(m, start, end, skip_start, bits1, bits2, thresh, cap,
total, balance, skip_rsv, intensity, intensity_rsv, dual_stereo, dual_stereo_rsv,
- pulses, ebits, fine_priority, C, LM, ec, encode, prev, signalBandwidth);
+ pulses, ebits, fine_priority, C, LM, ec, encode, prev, signalBandwidth, waveform_matching);
RESTORE_STACK;
return codedBands;
}
diff --git a/celt/rate.h b/celt/rate.h
index fad5e412..6d0e0d36 100644
--- a/celt/rate.h
+++ b/celt/rate.h
@@ -96,6 +96,7 @@ static OPUS_INLINE int pulses2bits(const CELTMode *m, int band, int LM, int puls
@return Total number of bits allocated
*/
int clt_compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo,
- opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth);
+ opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth,
+ int waveform_matching);
#endif
diff --git a/include/opus_defines.h b/include/opus_defines.h
index d141418b..d6f23c39 100644
--- a/include/opus_defines.h
+++ b/include/opus_defines.h
@@ -169,6 +169,8 @@ extern "C" {
#define OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST 4046
#define OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST 4047
#define OPUS_GET_IN_DTX_REQUEST 4049
+#define OPUS_SET_WAVEFORM_MATCHING_REQUEST 4050
+#define OPUS_GET_WAVEFORM_MATCHING_REQUEST 4051
/** Defines for the presence of extended APIs. */
#define OPUS_HAVE_OPUS_PROJECTION_H
@@ -727,6 +729,31 @@ extern "C" {
* @hideinitializer */
#define OPUS_GET_IN_DTX(x) OPUS_GET_IN_DTX_REQUEST, __opus_check_int_ptr(x)
+/** If set to 1, attempt to better matching the input waveform, at the cost
+ * of reducing the perceptual quality of the signalin. This option
+ * disables the use of coding tools like folding, intensity stereo,
+ * and disables some encoder-side filtering. This option will *never* improve
+ * the quality of normal mono or stereo audio and is only meant to be used
+ * for signals where inaudible phase differences matter (e.g. family 2 ambisonics).
+ * @see OPUS_GET_WAVEFORM_MATCHING
+ * @param[in] x <tt>opus_int32</tt>: Allowed values:
+ * <dl>
+ * <dt>0</dt><dd>Disable waveform matching (default).</dd>
+ * <dt>1</dt><dd>Enable waveform matching.</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_SET_WAVEFORM_MATCHING(x) OPUS_SET_WAVEFORM_MATCHING_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured waveform matching status.
+ * @see OPUS_SET_WAVEFORM_MATCHING
+ * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+ * <dl>
+ * <dt>0</dt><dd>Waveform matching disabled (default).</dd>
+ * <dt>1</dt><dd>Waveform matching enabled.</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_GET_WAVEFORM_MATCHING(x) OPUS_GET_WAVEFORM_MATCHING_REQUEST, __opus_check_int_ptr(x)
+
+
/**@}*/
/** @defgroup opus_decoderctls Decoder related CTLs
diff --git a/silk/control.h b/silk/control.h
index b76ec33c..de0dfba0 100644
--- a/silk/control.h
+++ b/silk/control.h
@@ -98,6 +98,9 @@ typedef struct {
/* I: Make frames as independent as possible (but still use LPC) */
opus_int reducedDependency;
+ /* I: Attempt to match the waveform as much as possible */
+ opus_int waveform_matching;
+
/* O: Internal sampling rate used, in Hertz; 8000/12000/16000 */
opus_int32 internalSampleRate;
diff --git a/silk/enc_API.c b/silk/enc_API.c
index 55a33f37..7f0593ac 100644
--- a/silk/enc_API.c
+++ b/silk/enc_API.c
@@ -421,7 +421,7 @@ opus_int silk_Encode( /* O Returns error co
silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ],
psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ],
MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono,
- psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length );
+ psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length, encControl->waveform_matching);
if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
/* Reset side channel encoder memory for first frame with side coding */
if( psEnc->prev_decode_only_middle == 1 ) {
diff --git a/silk/main.h b/silk/main.h
index 1a33eed5..c6cc62bc 100644
--- a/silk/main.h
+++ b/silk/main.h
@@ -58,7 +58,8 @@ void silk_stereo_LR_to_MS(
opus_int prev_speech_act_Q8, /* I Speech activity level in previous frame */
opus_int toMono, /* I Last frame before a stereo->mono transition */
opus_int fs_kHz, /* I Sample rate (kHz) */
- opus_int frame_length /* I Number of samples */
+ opus_int frame_length, /* I Number of samples */
+ opus_int waveform_matching /* I Attempt to match the waveform as much as possible */
);
/* Convert adaptive Mid/Side representation to Left/Right stereo signal */
diff --git a/silk/stereo_LR_to_MS.c b/silk/stereo_LR_to_MS.c
index c8226663..54e456e8 100644
--- a/silk/stereo_LR_to_MS.c
+++ b/silk/stereo_LR_to_MS.c
@@ -44,7 +44,8 @@ void silk_stereo_LR_to_MS(
opus_int prev_speech_act_Q8, /* I Speech activity level in previous frame */
opus_int toMono, /* I Last frame before a stereo->mono transition */
opus_int fs_kHz, /* I Sample rate (kHz) */
- opus_int frame_length /* I Number of samples */
+ opus_int frame_length, /* I Number of samples */
+ opus_int waveform_matching /* I Attempt to match the waveform as much as possible */
)
{
opus_int n, is10msFrame, denom_Q16, delta0_Q13, delta1_Q13;
@@ -115,7 +116,7 @@ void silk_stereo_LR_to_MS(
frac_3_Q16 = silk_MUL( 3, frac_Q16 );
mid_side_rates_bps[ 0 ] = silk_DIV32_varQ( total_rate_bps, SILK_FIX_CONST( 8 + 5, 16 ) + frac_3_Q16, 16+3 );
/* If Mid bitrate below minimum, reduce stereo width */
- if( mid_side_rates_bps[ 0 ] < min_mid_rate_bps ) {
+ if( waveform_matching && mid_side_rates_bps[ 0 ] < min_mid_rate_bps ) {
mid_side_rates_bps[ 0 ] = min_mid_rate_bps;
mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ];
/* width = 4 * ( 2 * side_rate - min_rate ) / ( ( 1 + 3 * frac ) * min_rate ) */
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index e98ac5b8..6ead35ef 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -86,6 +86,7 @@ struct OpusEncoder {
int encoder_buffer;
int lfe;
int arch;
+ int waveform_matching;
int use_dtx; /* general DTX for both SILK and CELT */
#ifndef DISABLE_FLOAT_API
TonalityAnalysisState analysis;
@@ -226,6 +227,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
st->silk_mode.useDTX = 0;
st->silk_mode.useCBR = 0;
st->silk_mode.reducedDependency = 0;
+ st->silk_mode.waveform_matching = 0;
/* Create CELT encoder */
/* Initialize CELT encoder */
@@ -1963,6 +1965,8 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
else
st->silk_mode.stereoWidth_Q14 = 16384 - 2048*(opus_int32)(32000-equiv_rate)/(equiv_rate-14000);
}
+ if (st->waveform_matching)
+ st->silk_mode.stereoWidth_Q14 = 16384;
if( !st->energy_masking && st->channels == 2 ) {
/* Apply stereo width reduction (at low bitrates) */
if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) {
@@ -2679,6 +2683,28 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
celt_encoder_ctl(celt_enc, OPUS_GET_PHASE_INVERSION_DISABLED(value));
}
break;
+ case OPUS_SET_WAVEFORM_MATCHING_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if(value<0 || value>1)
+ {
+ goto bad_arg;
+ }
+ st->waveform_matching = value;
+ st->silk_mode.waveform_matching = value;
+ celt_encoder_ctl(celt_enc, OPUS_SET_WAVEFORM_MATCHING(value));
+ }
+ break;
+ case OPUS_GET_WAVEFORM_MATCHING_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->waveform_matching;
+ }
+ break;
case OPUS_RESET_STATE:
{
void *silk_enc;