From 8cf872a186b96085b1bb3a547afd598354ebeb87 Mon Sep 17 00:00:00 2001 From: Zheng Lv Date: Tue, 13 Sep 2022 14:40:52 +0800 Subject: Make CELT FFT twiddle complex type aligned This makes kiss_twiddle_cpx 4-byte aligned (instead of 2-byte) for fixed-point builds. Tested with an armv6j+nofp development board, CELT encoding becomes 1.4x as fast, and decoding over 2x. Performance gain is mostly attributed to the proper alignment of the static const array mdct_twiddles960. Co-authored-by: David Gao Signed-off-by: Felicia Lim --- celt/kiss_fft.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/celt/kiss_fft.h b/celt/kiss_fft.h index bffa2bfa..267f72f9 100644 --- a/celt/kiss_fft.h +++ b/celt/kiss_fft.h @@ -52,6 +52,10 @@ extern "C" { # define kiss_fft_scalar opus_int32 # define kiss_twiddle_scalar opus_int16 +/* Some 32-bit CPUs would load/store a kiss_twiddle_cpx with a single memory + * access, and could benefit from additional alignment. + */ +# define KISS_TWIDDLE_CPX_ALIGNMENT (sizeof(opus_int32)) #else # ifndef kiss_fft_scalar @@ -62,6 +66,12 @@ extern "C" { # endif #endif +#if defined(__GNUC__) && defined(KISS_TWIDDLE_CPX_ALIGNMENT) +#define KISS_TWIDDLE_CPX_ALIGNED __attribute__((aligned(KISS_TWIDDLE_CPX_ALIGNMENT))) +#else +#define KISS_TWIDDLE_CPX_ALIGNED +#endif + typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; @@ -70,7 +80,7 @@ typedef struct { typedef struct { kiss_twiddle_scalar r; kiss_twiddle_scalar i; -}kiss_twiddle_cpx; +} KISS_TWIDDLE_CPX_ALIGNED kiss_twiddle_cpx; #define MAXFACTORS 8 /* e.g. an fft of length 128 has 4 factors -- cgit v1.2.3