SILK update

Simplifies mono/stereo switching in SILK Fixes a quantization mismatch between encoder and decoder Constrains the pitch lags in the same way in the encoder and decoder
author: Koen Vos <koen.vos@skype.net> 2011-10-06 21:38:26 +0400
committer: Jean-Marc Valin <jmvalin@jmvalin.ca> 2011-10-06 21:38:26 +0400
commit: 888756691836ca8ce419a870a768f910330fb9d1 (patch)
tree: 9cf2a47be95f56c4c71dc49cdf7da7fdd9cb6bb9
parent: 480ba7034810fec56625dcd9ceeeb01d75c8d755 (diff)
8 files changed, 33 insertions, 77 deletions
diff --git a/silk/decode_core.c b/silk/decode_core.c
index 6a07ee1a..b80a3895 100644
--- a/silk/decode_core.c
+++ b/silk/decode_core.c
@@ -91,10 +91,10 @@ void silk_decode_core(
         /* Preload LPC coeficients to array on stack. Gives small performance gain */
         silk_memcpy( A_Q12_tmp, A_Q12, psDec->LPC_order * sizeof( opus_int16 ) );
         B_Q14        = &psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER ];
-        Gain_Q10     = silk_RSHIFT( psDecCtrl->Gains_Q16[ k ], 6 );
         signalType   = psDec->indices.signalType;
 
-        inv_gain_Q16 = silk_INVERSE32_varQ( Gain_Q10, 26 );
+        Gain_Q10     = silk_RSHIFT( psDecCtrl->Gains_Q16[ k ], 6 );
+        inv_gain_Q16 = silk_INVERSE32_varQ( psDecCtrl->Gains_Q16[ k ], 32 );
         inv_gain_Q16 = silk_min( inv_gain_Q16, silk_int16_MAX );
 
         /* Calculate Gain adjustment factor */
diff --git a/silk/decode_pitch.c b/silk/decode_pitch.c
index 6aeb7ab2..bb21bc75 100644
--- a/silk/decode_pitch.c
+++ b/silk/decode_pitch.c
@@ -67,7 +67,7 @@ void silk_decode_pitch(
     }
 
     min_lag = silk_SMULBB( PE_MIN_LAG_MS, Fs_kHz );
-    max_lag = silk_SMULBB( PE_MAX_LAG_MS, Fs_kHz );
+    max_lag = silk_SMULBB( PE_MAX_LAG_MS, Fs_kHz ) - 1;
     lag = min_lag + lagIndex;
 
     for( k = 0; k < nb_subfr; k++ ) {
diff --git a/silk/enc_API.c b/silk/enc_API.c
index 3fa2715d..0fe945b6 100644
--- a/silk/enc_API.c
+++ b/silk/enc_API.c
@@ -119,44 +119,6 @@ opus_int silk_QueryEncoder(
     return ret;
 }
 
-static void stereo_crossmix(const opus_int16 *in, opus_int16 *out, int channel, int len, int to_mono, int id)
-{
-   int i;
-   opus_int16                            delta, g1, g2;
-   const opus_int16                     *x1, *x2;
-
-   x1 = in+channel;
-   x2 = in+(1-channel);
-   g1 = to_mono ? 16384: 8192;
-   g2 = to_mono ? 0 : 8192;
-
-   /* We want to finish at 0.5 */
-   delta = (16384+(len>>1))/(len);
-   if (to_mono) {
-      delta = -delta;
-   }
-
-   i=0;
-   if (to_mono != 2)
-   {
-      if ( id==0 ) {
-         for ( ; i < len>>1; i++ ) {
-            out[ i ] = silk_RSHIFT_ROUND( silk_SMLABB( silk_SMULBB( x1[ 2*i ], g1 ), x2[ 2*i ], g2 ), 14 );
-            g1 += delta;
-            g2 -= delta;
-         }
-      }
-   }
-   if (to_mono) {
-      for ( ; i < len; i++ ) {
-         out[ i ] = silk_RSHIFT( (opus_int32)x1[ 2*i ] + (opus_int32)x2[ 2*i ], 1 );
-      }
-   } else {
-      for ( ; i < len; i++ ) {
-         out[ i ] = x1[ 2*i ];
-      }
-   }
-}
 
 /**************************/
 /* Encode frame with Silk */
@@ -268,18 +230,13 @@ opus_int silk_Encode(
         /* Resample and write to buffer */
         if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) {
             int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
-            if ( encControl->toMono > 0) {
-                stereo_crossmix( samplesIn, buf, 0, nSamplesFromInput, encControl->toMono, id );
-            } else if( psEnc->nPrevChannelsInternal == 1 || encControl->toMono == -1 ) {
-                stereo_crossmix( samplesIn, buf, 0, nSamplesFromInput, 0, id );
-            } else {
-                for( n = 0; n < nSamplesFromInput; n++ ) {
-                    buf[ n ] = samplesIn[ 2 * n ];
-                }
+            for( n = 0; n < nSamplesFromInput; n++ ) {
+                buf[ n ] = samplesIn[ 2 * n ];
             }
             /* Making sure to start both resamplers from the same state when switching from mono to stereo */
-            if(psEnc->nPrevChannelsInternal == 1 && id==0)
+            if(psEnc->nPrevChannelsInternal == 1 && id==0) {
                silk_memcpy(&psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));
+            }
 
             ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
                 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
@@ -287,14 +244,8 @@ opus_int silk_Encode(
 
             nSamplesToBuffer  = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx;
             nSamplesToBuffer  = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
-            if ( encControl->toMono > 0) {
-                stereo_crossmix( samplesIn, buf, 1, nSamplesFromInput, encControl->toMono, id );
-            } else if( psEnc->nPrevChannelsInternal == 1  || encControl->toMono == -1) {
-                stereo_crossmix( samplesIn, buf, 1, nSamplesFromInput, 0, id );
-            } else {
-                for( n = 0; n < nSamplesFromInput; n++ ) {
-                    buf[ n ] = samplesIn[ 2 * n + 1 ];
-                }
+            for( n = 0; n < nSamplesFromInput; n++ ) {
+                buf[ n ] = samplesIn[ 2 * n + 1 ];
             }
             ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
                 &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
@@ -394,9 +345,9 @@ opus_int silk_Encode(
             if( encControl->nChannelsInternal == 2 ) {
                 silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ],
                     psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ],
-                    MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8,
+                    MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono,
                     psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length );
-                if (!prefillFlag) {
+                if( !prefillFlag ) {
                     silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
                     silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
                 }
@@ -437,8 +388,9 @@ opus_int silk_Encode(
                     flags  = silk_LSHIFT( flags, 1 );
                     flags |= psEnc->state_Fxx[ n ].sCmn.LBRR_flag;
                 }
-                if (!prefillFlag)
+                if( !prefillFlag ) {
                     ec_enc_patch_initial_bits( psRangeEnc, flags, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal );
+                }
 
                 /* Return zero bytes if all channels DTXed */
                 if( psEnc->state_Fxx[ 0 ].sCmn.inDTX && ( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inDTX ) ) {
diff --git a/silk/float/pitch_analysis_core_FLP.c b/silk/float/pitch_analysis_core_FLP.c
index f2615b5d..c9495915 100644
--- a/silk/float/pitch_analysis_core_FLP.c
+++ b/silk/float/pitch_analysis_core_FLP.c
@@ -467,17 +467,19 @@ opus_int silk_pitch_analysis_core_FLP( /* O voicing estimate: 0 voiced, 1 unvoic
 
         for( k = 0; k < nb_subfr; k++ ) {
             pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, max_lag );
         }
         *lagIndex = (opus_int16)( lag_new - min_lag );
         *contourIndex = (opus_int8)CBimax;
-    } else {
+    } else {        /* Fs_kHz == 8 */
         /* Save Lags and correlation */
         silk_assert( CCmax >= 0.0f );
         *LTPCorr = (silk_float)sqrt( CCmax / nb_subfr ); /* Output normalized correlation */
         for( k = 0; k < nb_subfr; k++ ) {
             pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, max_lag_8kHz );
         }
-        *lagIndex = (opus_int16)( lag - min_lag );
+        *lagIndex = (opus_int16)( lag - min_lag_8kHz );
         *contourIndex = (opus_int8)CBimax;
     }
     silk_assert( *lagIndex >= 0 );
diff --git a/silk/main.h b/silk/main.h
index c30c63ff..d7ed22ce 100644
--- a/silk/main.h
+++ b/silk/main.h
@@ -54,6 +54,7 @@ void silk_stereo_LR_to_MS(
     opus_int32           mid_side_rates_bps[],           /* O    Bitrates for mid and side signals           */
     opus_int32           total_rate_bps,                 /* I    Total bitrate                               */
     opus_int             prev_speech_act_Q8,             /* I    Speech activity level in previous frame     */
+    opus_int             toMono,                         /* I    Last frame before a stereo->mono transition */
     opus_int             fs_kHz,                         /* I    Sample rate (kHz)                           */
     opus_int             frame_length                    /* I    Number of samples                           */
 );
diff --git a/silk/pitch_analysis_core.c b/silk/pitch_analysis_core.c
index 70438474..eaa6209d 100644
--- a/silk/pitch_analysis_core.c
+++ b/silk/pitch_analysis_core.c
@@ -558,15 +558,17 @@ opus_int silk_pitch_analysis_core(        /* O    Voicing estimate: 0 voiced, 1
 
         for( k = 0; k < nb_subfr; k++ ) {
             pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, max_lag );
         }
         *lagIndex = (opus_int16)( lag_new - min_lag);
         *contourIndex = (opus_int8)CBimax;
-    } else {
+    } else {        /* Fs_kHz == 8 */
         /* Save Lags and correlation */
         CCmax = silk_max( CCmax, 0 );
         *LTPCorr_Q15 = (opus_int)silk_SQRT_APPROX( silk_LSHIFT( CCmax, 13 ) ); /* Output normalized correlation */
         for( k = 0; k < nb_subfr; k++ ) {
             pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, max_lag_8kHz );
         }
         *lagIndex = (opus_int16)( lag - min_lag_8kHz );
         *contourIndex = (opus_int8)CBimax;
diff --git a/silk/stereo_LR_to_MS.c b/silk/stereo_LR_to_MS.c
index 5f3eee57..9db12444 100644
--- a/silk/stereo_LR_to_MS.c
+++ b/silk/stereo_LR_to_MS.c
@@ -41,6 +41,7 @@ void silk_stereo_LR_to_MS(
     opus_int32           mid_side_rates_bps[],           /* O    Bitrates for mid and side signals           */
     opus_int32           total_rate_bps,                 /* I    Total bitrate                               */
     opus_int             prev_speech_act_Q8,             /* I    Speech activity level in previous frame     */
+    opus_int             toMono,                         /* I    Last frame before a stereo->mono transition */
     opus_int             fs_kHz,                         /* I    Sample rate (kHz)                           */
     opus_int             frame_length                    /* I    Number of samples                           */
 )
@@ -96,7 +97,7 @@ void silk_stereo_LR_to_MS(
 
     /* Determine bitrate distribution between mid and side, and possibly reduce stereo width */
     total_rate_bps -= is10msFrame ? 1200 : 600;      /* Subtract approximate bitrate for coding stereo parameters */
-    if (total_rate_bps < 1 ) {
+    if( total_rate_bps < 1 ) {
         total_rate_bps = 1;
     }
     min_mid_rate_bps = silk_SMLABB( 2000, fs_kHz, 900 );
@@ -122,7 +123,13 @@ void silk_stereo_LR_to_MS(
 
     /* At very low bitrates or for inputs that are nearly amplitude panned, switch to panned-mono coding */
     *mid_only_flag = 0;
-    if( state->width_prev_Q14 == 0 &&
+    if( toMono ) {
+        /* Last frame before stereo->mono transition; collapse stereo width */
+        width_Q14 = 0;
+        pred_Q13[ 0 ] = 0;
+        pred_Q13[ 1 ] = 0;
+        silk_stereo_quant_pred( pred_Q13, ix );
+    } else if( state->width_prev_Q14 == 0 &&
         ( 8 * total_rate_bps < 13 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.05, 14 ) ) )
     {
         /* Code as panned-mono; previous frame already had zero width */
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index 964c5f8d..ad3279f8 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -486,21 +486,13 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
     }
 #endif
 
-    if (st->silk_mode.toMono==1 && st->stream_channels==2)
-    {
-       /* In case the encoder changes its mind on stereo->mono transition */
-       st->silk_mode.toMono = -1;
-    } else if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0)
+    if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0) 
     {
        /* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */
-       st->silk_mode.toMono=1;
-       st->stream_channels = 2;
-    } else if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==1)
-    {
-       st->silk_mode.toMono=2;
+       st->silk_mode.toMono = 1;
        st->stream_channels = 2;
     } else {
-       st->silk_mode.toMono=0;
+       st->silk_mode.toMono = 0;
     }
 
 #ifdef FUZZING
author	Koen Vos <koen.vos@skype.net>	2011-10-06 21:38:26 +0400
committer	Jean-Marc Valin <jmvalin@jmvalin.ca>	2011-10-06 21:38:26 +0400
commit	888756691836ca8ce419a870a768f910330fb9d1 (patch)
tree	9cf2a47be95f56c4c71dc49cdf7da7fdd9cb6bb9
parent	480ba7034810fec56625dcd9ceeeb01d75c8d755 (diff)