From ace42cf581f8c06872bfb58cf575d9e8bd398c0a Mon Sep 17 00:00:00 2001 From: Lynne Date: Mon, 19 Sep 2022 05:53:01 +0200 Subject: x86/tx_float: add 15xN PFA FFT AVX SIMD ~4x faster than the C version. The shuffles in the 15pt dim1 are seriously expensive. Not happy with it, but I'm contempt. Can be easily converted to pure AVX by removing all vpermpd/vpermps instructions. --- tests/checkasm/av_tx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tests') diff --git a/tests/checkasm/av_tx.c b/tests/checkasm/av_tx.c index 1fa6da45ac..aa8fc6b4e9 100644 --- a/tests/checkasm/av_tx.c +++ b/tests/checkasm/av_tx.c @@ -24,7 +24,7 @@ #include -#define EPS 0.00005 +#define EPS 0.0005 #define SCALE_NOOP(x) (x) #define SCALE_INT20(x) (av_clip64(lrintf((x) * 2147483648.0), INT32_MIN, INT32_MAX) >> 12) @@ -40,7 +40,7 @@ } while (0) static const int check_lens[] = { - 2, 4, 8, 16, 32, 64, 1024, 16384, + 2, 4, 8, 16, 32, 64, 120, 960, 1024, 1920, 16384, }; static AVTXContext *tx_refs[AV_TX_NB][2 /* Direction */][FF_ARRAY_ELEMS(check_lens)] = { 0 }; -- cgit v1.2.3