Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/FFmpeg.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/ppc/fft_altivec_s.S')
-rw-r--r--libavcodec/ppc/fft_altivec_s.S117
1 files changed, 94 insertions, 23 deletions
diff --git a/libavcodec/ppc/fft_altivec_s.S b/libavcodec/ppc/fft_altivec_s.S
index e6af50f90f..d17d033bab 100644
--- a/libavcodec/ppc/fft_altivec_s.S
+++ b/libavcodec/ppc/fft_altivec_s.S
@@ -49,24 +49,6 @@
.endif
.endm
-#if ARCH_PPC64
-#define PTR .quad
-.macro LOAD_PTR ra, rbase, offset
- ld \ra,(\offset)*8(\rbase)
-.endm
-.macro STORE_PTR ra, rbase, offset
- std \ra,(\offset)*8(\rbase)
-.endm
-#else
-#define PTR .int
-.macro LOAD_PTR ra, rbase, offset
- lwz \ra,(\offset)*4(\rbase)
-.endm
-.macro STORE_PTR ra, rbase, offset
- stw \ra,(\offset)*4(\rbase)
-.endm
-#endif
-
.macro FFT4 a0, a1, a2, a3 // in:0-1 out:2-3
vperm \a2,\a0,\a1,v20 // vcprm(0,1,s2,s1) // {r0,i0,r3,i2}
vperm \a3,\a0,\a1,v21 // vcprm(2,3,s0,s3) // {r1,i1,r2,i3}
@@ -314,18 +296,105 @@ fft_pass\suffix\()_altivec:
blr
.endm
+#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
+
+#define WORD_0 0x00,0x01,0x02,0x03
+#define WORD_1 0x04,0x05,0x06,0x07
+#define WORD_2 0x08,0x09,0x0a,0x0b
+#define WORD_3 0x0c,0x0d,0x0e,0x0f
+#define WORD_s0 0x10,0x11,0x12,0x13
+#define WORD_s1 0x14,0x15,0x16,0x17
+#define WORD_s2 0x18,0x19,0x1a,0x1b
+#define WORD_s3 0x1c,0x1d,0x1e,0x1f
+
+#define vcprm(a, b, c, d) .byte WORD_##a, WORD_##b, WORD_##c, WORD_##d
+
+ .rodata
+ .align 4
+fft_data:
+ .float 0, 0, 0, 0
+ .float 1, 0.92387953, M_SQRT1_2, 0.38268343
+ .float 0, 0.38268343, M_SQRT1_2, 0.92387953
+ .float -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2,-M_SQRT1_2
+ .float M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2
+ vcprm(s0,3,2,1)
+ vcprm(0,1,s2,s1)
+ vcprm(2,3,s0,s3)
+ vcprm(2,s3,3,s2)
+ vcprm(0,1,s0,s1)
+ vcprm(2,3,s2,s3)
+ vcprm(2,3,0,1)
+ vcprm(1,2,s3,s0)
+ vcprm(0,3,s2,s1)
+ vcprm(0,2,s1,s3)
+ vcprm(1,3,s0,s2)
+
+.macro lvm b, r, regs:vararg
+ lvx \r, 0, \b
+ addi \b, \b, 16
+ .ifnb \regs
+ lvm \b, \regs
+ .endif
+.endm
+
+.macro stvm b, r, regs:vararg
+ stvx \r, 0, \b
+ addi \b, \b, 16
+ .ifnb \regs
+ stvm \b, \regs
+ .endif
+.endm
+
+.macro fft_calc interleave
+extfunc ff_fft_calc\interleave\()_altivec
+ mflr r0
+ stp r0, 2*PS(r1)
+ stpu r1, -(160+16*PS)(r1)
+ addi r6, r1, 16*PS
+ stvm r6, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
+ mfvrsave r0
+ stw r0, 15*PS(r1)
+ li r6, 0xfffffffc
+ mtvrsave r6
+
+ movrel r6, fft_data
+ lvm r6, v14, v15, v16, v17, v18, v19, v20, v21
+ lvm r6, v22, v23, v24, v25, v26, v27, v28, v29
+
+ li r9, 16
+ movrel r12, X(ff_cos_tabs)
+
+ movrel r6, fft_dispatch_tab\interleave\()_altivec
+ lwz r3, 0(r3)
+ subi r3, r3, 2
+ slwi r3, r3, 2+ARCH_PPC64
+ lpx r3, r3, r6
+ mtctr r3
+ mr r3, r4
+ bctrl
+
+ addi r6, r1, 16*PS
+ lvm r6, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
+ lwz r6, 15*PS(r1)
+ mtvrsave r6
+ lp r1, 0(r1)
+ lp r0, 2*PS(r1)
+ mtlr r0
+ blr
+.endm
+
.macro DECL_FFT suffix, bits, n, n2, n4
fft\n\suffix\()_altivec:
mflr r0
- STORE_PTR r0,r1,\bits-5
+ stp r0,PS*(\bits-3)(r1)
bl fft\n2\()_altivec
addi2 r3,\n*4
bl fft\n4\()_altivec
addi2 r3,\n*2
bl fft\n4\()_altivec
addi2 r3,\n*-6
- LOAD_PTR r0,r1,\bits-5
- LOAD_PTR r4,r12,\bits
+ lp r0,PS*(\bits-3)(r1)
+ lp r4,\bits*PS(r12)
mtlr r0
li r5,\n/16
b fft_pass\suffix\()_altivec
@@ -350,9 +419,11 @@ fft\n\suffix\()_altivec:
DECL_FFT \suffix,15,32768,16384, 8192
DECL_FFT \suffix,16,65536,32768,16384
+ fft_calc \suffix
+
.rodata
- .global EXTERN_ASM\()ff_fft_dispatch\suffix\()_altivec
-EXTERN_ASM\()ff_fft_dispatch\suffix\()_altivec:
+ .align 3
+fft_dispatch_tab\suffix\()_altivec:
PTR fft4\suffix\()_altivec
PTR fft8\suffix\()_altivec
PTR fft16\suffix\()_altivec