From ace42cf581f8c06872bfb58cf575d9e8bd398c0a Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Mon, 19 Sep 2022 05:53:01 +0200
Subject: x86/tx_float: add 15xN PFA FFT AVX SIMD

~4x faster than the C version.
The shuffles in the 15pt dim1 are seriously expensive. Not happy with it,
but I'm contempt.

Can be easily converted to pure AVX by removing all vpermpd/vpermps
instructions.
---
 tests/checkasm/av_tx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tests')

diff --git a/tests/checkasm/av_tx.c b/tests/checkasm/av_tx.c
index 1fa6da45ac..aa8fc6b4e9 100644
--- a/tests/checkasm/av_tx.c
+++ b/tests/checkasm/av_tx.c
@@ -24,7 +24,7 @@
 
 #include <stdlib.h>
 
-#define EPS 0.00005
+#define EPS 0.0005
 
 #define SCALE_NOOP(x) (x)
 #define SCALE_INT20(x) (av_clip64(lrintf((x) * 2147483648.0), INT32_MIN, INT32_MAX) >> 12)
@@ -40,7 +40,7 @@
     } while (0)
 
 static const int check_lens[] = {
-    2, 4, 8, 16, 32, 64, 1024, 16384,
+    2, 4, 8, 16, 32, 64, 120, 960, 1024, 1920, 16384,
 };
 
 static AVTXContext *tx_refs[AV_TX_NB][2 /* Direction */][FF_ARRAY_ELEMS(check_lens)] = { 0 };
-- 
cgit v1.2.3