From 71fb707875b95672f0cd1cb153c890eff4219720 Mon Sep 17 00:00:00 2001 From: "Timothy B. Terriberry" Date: Wed, 6 Jul 2022 15:21:16 -0700 Subject: Don't compile x86 cpu detection without RTCD. Also #error if RTCD is enabled without a detection method, like Arm. A number of SILK functions also still used the lookup tables, even when RTCD was disabled. Fix those, too. --- celt/cpu_support.h | 5 +++-- celt/x86/x86cpu.c | 9 ++++++--- silk/SigProc_FIX.h | 4 +++- silk/x86/SigProc_FIX_sse.h | 9 +++++++-- silk/x86/main_sse.h | 31 ++++++++++++++++--------------- silk/x86/x86_silk_map.c | 2 +- 6 files changed, 36 insertions(+), 24 deletions(-) diff --git a/celt/cpu_support.h b/celt/cpu_support.h index 68fc6067..7b5c56ca 100644 --- a/celt/cpu_support.h +++ b/celt/cpu_support.h @@ -43,10 +43,11 @@ */ #define OPUS_ARCHMASK 3 -#elif (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \ +#elif defined(OPUS_HAVE_RTCD) && \ + ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \ (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \ (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \ - (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)) + (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))) #include "x86/x86cpu.h" /* We currently support 5 x86 variants: diff --git a/celt/x86/x86cpu.c b/celt/x86/x86cpu.c index 7cfc8db5..6a1914de 100644 --- a/celt/x86/x86cpu.c +++ b/celt/x86/x86cpu.c @@ -35,11 +35,11 @@ #include "pitch.h" #include "x86cpu.h" -#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \ +#if defined(OPUS_HAVE_RTCD) && \ + ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \ (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \ (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \ - (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)) - + (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))) #if defined(_MSC_VER) @@ -91,6 +91,9 @@ static void cpuid(unsigned int CPUInfo[4], unsigned int InfoType) what we want on CPUs that don't support CPUID. */ CPUInfo[3] = CPUInfo[2] = CPUInfo[1] = CPUInfo[0] = 0; } +#else +# error "Configured to use x86 RTCD, but no CPU detection method available. " \ + "Reconfigure with --disable-rtcd (or send patches)." #endif } diff --git a/silk/SigProc_FIX.h b/silk/SigProc_FIX.h index 1d9bf2f1..fbdfa82e 100644 --- a/silk/SigProc_FIX.h +++ b/silk/SigProc_FIX.h @@ -609,10 +609,12 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b) /* the following seems faster on x86 */ #define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32) -#if !defined(OPUS_X86_MAY_HAVE_SSE4_1) +#if !defined(OVERRIDE_silk_burg_modified) #define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \ ((void)(arch), silk_burg_modified_c(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch)) +#endif +#if !defined(OVERRIDE_silk_inner_prod16) #define silk_inner_prod16(inVec1, inVec2, len, arch) \ ((void)(arch),silk_inner_prod16_c(inVec1, inVec2, len)) #endif diff --git a/silk/x86/SigProc_FIX_sse.h b/silk/x86/SigProc_FIX_sse.h index 9bcaa805..89a5ec88 100644 --- a/silk/x86/SigProc_FIX_sse.h +++ b/silk/x86/SigProc_FIX_sse.h @@ -46,10 +46,12 @@ void silk_burg_modified_sse4_1( ); # if defined(OPUS_X86_PRESUME_SSE4_1) + +# define OVERRIDE_silk_burg_modified # define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \ ((void)(arch), silk_burg_modified_sse4_1(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch)) -# else +# elif defined(OPUS_HAVE_RTCD) extern void (*const SILK_BURG_MODIFIED_IMPL[OPUS_ARCHMASK + 1])( opus_int32 *res_nrg, /* O Residual energy */ @@ -62,6 +64,7 @@ extern void (*const SILK_BURG_MODIFIED_IMPL[OPUS_ARCHMASK + 1])( const opus_int D, /* I Order */ int arch /* I Run-time architecture */); +# define OVERRIDE_silk_burg_modified # define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \ ((*SILK_BURG_MODIFIED_IMPL[(arch) & OPUS_ARCHMASK])(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch)) @@ -76,16 +79,18 @@ opus_int64 silk_inner_prod16_sse4_1( # if defined(OPUS_X86_PRESUME_SSE4_1) +# define OVERRIDE_silk_inner_prod16 # define silk_inner_prod16(inVec1, inVec2, len, arch) \ ((void)(arch),silk_inner_prod16_sse4_1(inVec1, inVec2, len)) -# else +# elif defined(OPUS_HAVE_RTCD) extern opus_int64 (*const SILK_INNER_PROD16_IMPL[OPUS_ARCHMASK + 1])( const opus_int16 *inVec1, const opus_int16 *inVec2, const opus_int len); +# define OVERRIDE_silk_inner_prod16 # define silk_inner_prod16(inVec1, inVec2, len, arch) \ ((*SILK_INNER_PROD16_IMPL[(arch) & OPUS_ARCHMASK])(inVec1, inVec2, len)) diff --git a/silk/x86/main_sse.h b/silk/x86/main_sse.h index 9ed436bb..a01d7f6c 100644 --- a/silk/x86/main_sse.h +++ b/silk/x86/main_sse.h @@ -34,8 +34,6 @@ # if defined(OPUS_X86_MAY_HAVE_SSE4_1) -# define OVERRIDE_silk_VQ_WMat_EC - void silk_VQ_WMat_EC_sse4_1( opus_int8 *ind, /* O index of best codebook vector */ opus_int32 *res_nrg_Q15, /* O best residual energy */ @@ -53,12 +51,13 @@ void silk_VQ_WMat_EC_sse4_1( # if defined OPUS_X86_PRESUME_SSE4_1 +# define OVERRIDE_silk_VQ_WMat_EC # define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \ subfr_len, max_gain_Q7, L, arch) \ ((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \ subfr_len, max_gain_Q7, L)) -# else +# elif defined(OPUS_HAVE_RTCD) extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])( opus_int8 *ind, /* O index of best codebook vector */ @@ -75,6 +74,7 @@ extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])( const opus_int L /* I number of vectors in codebook */ ); +# define OVERRIDE_silk_VQ_WMat_EC # define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \ subfr_len, max_gain_Q7, L, arch) \ ((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \ @@ -82,8 +82,6 @@ extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])( # endif -# define OVERRIDE_silk_NSQ - void silk_NSQ_sse4_1( const silk_encoder_state *psEncC, /* I Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ @@ -104,12 +102,13 @@ void silk_NSQ_sse4_1( # if defined OPUS_X86_PRESUME_SSE4_1 +# define OVERRIDE_silk_NSQ # define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ ((void)(arch),silk_NSQ_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) -# else +# elif defined(OPUS_HAVE_RTCD) extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])( const silk_encoder_state *psEncC, /* I Encoder State */ @@ -129,6 +128,7 @@ extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])( const opus_int LTP_scale_Q14 /* I LTP state scaling */ ); +# define OVERRIDE_silk_NSQ # define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ ((*SILK_NSQ_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ @@ -136,8 +136,6 @@ extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])( # endif -# define OVERRIDE_silk_NSQ_del_dec - void silk_NSQ_del_dec_sse4_1( const silk_encoder_state *psEncC, /* I Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ @@ -158,12 +156,13 @@ void silk_NSQ_del_dec_sse4_1( # if defined OPUS_X86_PRESUME_SSE4_1 +# define OVERRIDE_silk_NSQ_del_dec # define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ ((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) -# else +# elif defined(OPUS_HAVE_RTCD) extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])( const silk_encoder_state *psEncC, /* I Encoder State */ @@ -183,6 +182,7 @@ extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])( const opus_int LTP_scale_Q14 /* I LTP state scaling */ ); +# define OVERRIDE_silk_NSQ_del_dec # define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ ((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \ @@ -221,25 +221,26 @@ void silk_VAD_GetNoiseLevels( silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ ); -# define OVERRIDE_silk_VAD_GetSA_Q8 - opus_int silk_VAD_GetSA_Q8_sse4_1( silk_encoder_state *psEnC, const opus_int16 pIn[] ); # if defined(OPUS_X86_PRESUME_SSE4_1) + +# define OVERRIDE_silk_VAD_GetSA_Q8 # define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_sse4_1(psEnC, pIn)) -# else - -# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \ - ((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn)) +# elif defined(OPUS_HAVE_RTCD) extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])( silk_encoder_state *psEnC, const opus_int16 pIn[]); +# define OVERRIDE_silk_VAD_GetSA_Q8 +# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \ + ((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn)) + # endif # endif diff --git a/silk/x86/x86_silk_map.c b/silk/x86/x86_silk_map.c index ca13cde9..70f60078 100644 --- a/silk/x86/x86_silk_map.c +++ b/silk/x86/x86_silk_map.c @@ -35,7 +35,7 @@ #include "pitch.h" #include "main.h" -#if !defined(OPUS_X86_PRESUME_SSE4_1) +#if defined(OPUS_HAVE_RTCD) && !defined(OPUS_X86_PRESUME_SSE4_1) #if defined(FIXED_POINT) -- cgit v1.2.3