Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/xiph/opus.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimothy B. Terriberry <tterribe@xiph.org>2013-11-18 22:30:13 +0400
committerJean-Marc Valin <jmvalin@jmvalin.ca>2013-11-18 22:41:17 +0400
commit39386e0b85ec0f978aa104d312604badb9047d58 (patch)
treee1171628bb638ec1b770b049e39609d7a268c584 /silk/fixed
parent530198f955e49571b3f890b4da4d933a4cd5df4e (diff)
Adds Neon assembly for correlation/convolution
Optimizing celt_pitch_xcorr()/xcorr_kernel() which also speeds up FIRs, IIRs and auto-correlations Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
Diffstat (limited to 'silk/fixed')
-rw-r--r--silk/fixed/autocorr_FIX.c5
-rw-r--r--silk/fixed/burg_modified_FIX.c5
-rw-r--r--silk/fixed/encode_frame_FIX.c4
-rw-r--r--silk/fixed/find_LPC_FIX.c4
-rw-r--r--silk/fixed/find_pitch_lags_FIX.c8
-rw-r--r--silk/fixed/main_FIX.h9
-rw-r--r--silk/fixed/noise_shape_analysis_FIX.c5
-rw-r--r--silk/fixed/pitch_analysis_core_FIX.c15
8 files changed, 33 insertions, 22 deletions
diff --git a/silk/fixed/autocorr_FIX.c b/silk/fixed/autocorr_FIX.c
index dec3cc0b..de95c986 100644
--- a/silk/fixed/autocorr_FIX.c
+++ b/silk/fixed/autocorr_FIX.c
@@ -38,10 +38,11 @@ void silk_autocorr(
opus_int *scale, /* O Scaling of the correlation vector */
const opus_int16 *inputData, /* I Input data to correlate */
const opus_int inputDataSize, /* I Length of input */
- const opus_int correlationCount /* I Number of correlation taps to compute */
+ const opus_int correlationCount, /* I Number of correlation taps to compute */
+ int arch /* I Run-time architecture */
)
{
opus_int corrCount;
corrCount = silk_min_int( inputDataSize, correlationCount );
- *scale = _celt_autocorr(inputData, results, NULL, 0, corrCount-1, inputDataSize);
+ *scale = _celt_autocorr(inputData, results, NULL, 0, corrCount-1, inputDataSize, arch);
}
diff --git a/silk/fixed/burg_modified_FIX.c b/silk/fixed/burg_modified_FIX.c
index f5112b64..db348295 100644
--- a/silk/fixed/burg_modified_FIX.c
+++ b/silk/fixed/burg_modified_FIX.c
@@ -50,7 +50,8 @@ void silk_burg_modified(
const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */
const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */
const opus_int nb_subfr, /* I Number of subframes stacked in x */
- const opus_int D /* I Order */
+ const opus_int D, /* I Order */
+ int arch /* I Run-time architecture */
)
{
opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
@@ -98,7 +99,7 @@ void silk_burg_modified(
int i;
opus_int32 d;
x_ptr = x + s * subfr_length;
- celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D );
+ celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch );
for( n = 1; n < D + 1; n++ ) {
for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ )
d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] );
diff --git a/silk/fixed/encode_frame_FIX.c b/silk/fixed/encode_frame_FIX.c
index 9c57a10a..b490986b 100644
--- a/silk/fixed/encode_frame_FIX.c
+++ b/silk/fixed/encode_frame_FIX.c
@@ -132,12 +132,12 @@ opus_int silk_encode_frame_FIX(
/*****************************************/
/* Find pitch lags, initial LPC analysis */
/*****************************************/
- silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame );
+ silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch );
/************************/
/* Noise shape analysis */
/************************/
- silk_noise_shape_analysis_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame );
+ silk_noise_shape_analysis_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame, psEnc->sCmn.arch );
/***************************************************/
/* Find linear prediction coefficients (LPC + LTP) */
diff --git a/silk/fixed/find_LPC_FIX.c b/silk/fixed/find_LPC_FIX.c
index 70cefb6b..783d32e2 100644
--- a/silk/fixed/find_LPC_FIX.c
+++ b/silk/fixed/find_LPC_FIX.c
@@ -60,13 +60,13 @@ void silk_find_LPC_FIX(
psEncC->indices.NLSFInterpCoef_Q2 = 4;
/* Burg AR analysis for the full frame */
- silk_burg_modified( &res_nrg, &res_nrg_Q, a_Q16, x, minInvGain_Q30, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder );
+ silk_burg_modified( &res_nrg, &res_nrg_Q, a_Q16, x, minInvGain_Q30, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder, psEncC->arch );
if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) {
VARDECL( opus_int16, LPC_res );
/* Optimal solution for last 10 ms */
- silk_burg_modified( &res_tmp_nrg, &res_tmp_nrg_Q, a_tmp_Q16, x + 2 * subfr_length, minInvGain_Q30, subfr_length, 2, psEncC->predictLPCOrder );
+ silk_burg_modified( &res_tmp_nrg, &res_tmp_nrg_Q, a_tmp_Q16, x + 2 * subfr_length, minInvGain_Q30, subfr_length, 2, psEncC->predictLPCOrder, psEncC->arch );
/* subtract residual energy here, as that's easier than adding it to the */
/* residual energy of the first 10 ms in each iteration of the search below */
diff --git a/silk/fixed/find_pitch_lags_FIX.c b/silk/fixed/find_pitch_lags_FIX.c
index f60b4364..620f8dcd 100644
--- a/silk/fixed/find_pitch_lags_FIX.c
+++ b/silk/fixed/find_pitch_lags_FIX.c
@@ -38,7 +38,8 @@ void silk_find_pitch_lags_FIX(
silk_encoder_state_FIX *psEnc, /* I/O encoder state */
silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */
opus_int16 res[], /* O residual */
- const opus_int16 x[] /* I Speech signal */
+ const opus_int16 x[], /* I Speech signal */
+ int arch /* I Run-time architecture */
)
{
opus_int buf_len, i, scale;
@@ -86,7 +87,7 @@ void silk_find_pitch_lags_FIX(
silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch );
/* Calculate autocorrelation sequence */
- silk_autocorr( auto_corr, &scale, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1 );
+ silk_autocorr( auto_corr, &scale, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1, arch );
/* Add white noise, as fraction of energy */
auto_corr[ 0 ] = silk_SMLAWB( auto_corr[ 0 ], auto_corr[ 0 ], SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ) + 1;
@@ -127,7 +128,8 @@ void silk_find_pitch_lags_FIX(
/*****************************************/
if( silk_pitch_analysis_core( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex, &psEnc->sCmn.indices.contourIndex,
&psEnc->LTPCorr_Q15, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16,
- (opus_int)thrhld_Q13, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr ) == 0 )
+ (opus_int)thrhld_Q13, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr,
+ psEnc->sCmn.arch) == 0 )
{
psEnc->sCmn.indices.signalType = TYPE_VOICED;
} else {
diff --git a/silk/fixed/main_FIX.h b/silk/fixed/main_FIX.h
index d999b13f..a56ca07a 100644
--- a/silk/fixed/main_FIX.h
+++ b/silk/fixed/main_FIX.h
@@ -73,7 +73,8 @@ opus_int silk_encode_frame_FIX(
/* Initializes the Silk encoder state */
opus_int silk_init_encoder(
- silk_encoder_state_Fxx *psEnc /* I/O Pointer to Silk FIX encoder state */
+ silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk FIX encoder state */
+ int arch /* I Run-time architecture */
);
/* Control the Silk encoder */
@@ -104,7 +105,8 @@ void silk_noise_shape_analysis_FIX(
silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */
silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */
const opus_int16 *pitch_res, /* I LPC residual from pitch analysis */
- const opus_int16 *x /* I Input signal [ frame_length + la_shape ] */
+ const opus_int16 *x, /* I Input signal [ frame_length + la_shape ] */
+ int arch /* I Run-time architecture */
);
/* Autocorrelations for a warped frequency axis */
@@ -132,7 +134,8 @@ void silk_find_pitch_lags_FIX(
silk_encoder_state_FIX *psEnc, /* I/O encoder state */
silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */
opus_int16 res[], /* O residual */
- const opus_int16 x[] /* I Speech signal */
+ const opus_int16 x[], /* I Speech signal */
+ int arch /* I Run-time architecture */
);
/* Find LPC and LTP coefficients */
diff --git a/silk/fixed/noise_shape_analysis_FIX.c b/silk/fixed/noise_shape_analysis_FIX.c
index d381fa3d..e24d2e9d 100644
--- a/silk/fixed/noise_shape_analysis_FIX.c
+++ b/silk/fixed/noise_shape_analysis_FIX.c
@@ -145,7 +145,8 @@ void silk_noise_shape_analysis_FIX(
silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */
silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */
const opus_int16 *pitch_res, /* I LPC residual from pitch analysis */
- const opus_int16 *x /* I Input signal [ frame_length + la_shape ] */
+ const opus_int16 *x, /* I Input signal [ frame_length + la_shape ] */
+ int arch /* I Run-time architecture */
)
{
silk_shape_state_FIX *psShapeSt = &psEnc->sShape;
@@ -281,7 +282,7 @@ void silk_noise_shape_analysis_FIX(
silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder );
} else {
/* Calculate regular auto correlation */
- silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1 );
+ silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch );
}
/* Add white noise, as a fraction of energy */
diff --git a/silk/fixed/pitch_analysis_core_FIX.c b/silk/fixed/pitch_analysis_core_FIX.c
index b2c6cbb4..1641a0fb 100644
--- a/silk/fixed/pitch_analysis_core_FIX.c
+++ b/silk/fixed/pitch_analysis_core_FIX.c
@@ -62,7 +62,8 @@ static void silk_P_Ana_calc_corr_st3(
opus_int start_lag, /* I lag offset to search around */
opus_int sf_length, /* I length of a 5 ms subframe */
opus_int nb_subfr, /* I number of subframes */
- opus_int complexity /* I Complexity setting */
+ opus_int complexity, /* I Complexity setting */
+ int arch /* I Run-time architecture */
);
static void silk_P_Ana_calc_energy_st3(
@@ -88,7 +89,8 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */
const opus_int Fs_kHz, /* I Sample frequency (kHz) */
const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */
- const opus_int nb_subfr /* I number of 5 ms subframes */
+ const opus_int nb_subfr, /* I number of 5 ms subframes */
+ int arch /* I Run-time architecture */
)
{
VARDECL( opus_int16, frame_8kHz );
@@ -189,7 +191,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
silk_assert( basis_ptr >= frame_4kHz );
silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
- celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1 );
+ celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch );
/* Calculate first vector products before loop */
cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];
@@ -516,7 +518,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
/* Calculate the correlations and energies needed in stage 3 */
ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
- silk_P_Ana_calc_corr_st3( cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
+ silk_P_Ana_calc_corr_st3( cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
lag_counter = 0;
@@ -597,7 +599,8 @@ static void silk_P_Ana_calc_corr_st3(
opus_int start_lag, /* I lag offset to search around */
opus_int sf_length, /* I length of a 5 ms subframe */
opus_int nb_subfr, /* I number of subframes */
- opus_int complexity /* I Complexity setting */
+ opus_int complexity, /* I Complexity setting */
+ int arch /* I Run-time architecture */
)
{
const opus_int16 *target_ptr;
@@ -634,7 +637,7 @@ static void silk_P_Ana_calc_corr_st3(
lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 );
lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
- celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1 );
+ celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch );
for( j = lag_low; j <= lag_high; j++ ) {
silk_assert( lag_counter < SCRATCH_SIZE );
scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ];