Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Storsjö <martin@martin.st>2019-02-12 00:29:51 +0300
committerJanne Grunau <janne-vlc@jannau.net>2019-02-14 01:32:37 +0300
commit97ab8290604d5b728113cc816da3c99455542841 (patch)
tree8fa64013b69227b3ccea457da89754eb526e6878 /src/arm/64/util.S
parent8abcf5dc6739c85a7458985fe566ef4caf0537f8 (diff)
arm64: mc: NEON implementation of warp8x8{,t}
Relative speedup vs C code: Cortex A53 A72 A73 warp_8x8_8bpc_neon: 3.19 2.60 3.66 warp_8x8t_8bpc_neon: 3.09 2.50 3.58
Diffstat (limited to 'src/arm/64/util.S')
-rw-r--r--src/arm/64/util.S29
1 files changed, 29 insertions, 0 deletions
diff --git a/src/arm/64/util.S b/src/arm/64/util.S
index 1181a6e..0dd4617 100644
--- a/src/arm/64/util.S
+++ b/src/arm/64/util.S
@@ -59,4 +59,33 @@
#endif
.endm
+.macro transpose_8x8b r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
+ trn1 \r8\().8b, \r0\().8b, \r1\().8b
+ trn2 \r9\().8b, \r0\().8b, \r1\().8b
+ trn1 \r1\().8b, \r2\().8b, \r3\().8b
+ trn2 \r3\().8b, \r2\().8b, \r3\().8b
+ trn1 \r0\().8b, \r4\().8b, \r5\().8b
+ trn2 \r5\().8b, \r4\().8b, \r5\().8b
+ trn1 \r2\().8b, \r6\().8b, \r7\().8b
+ trn2 \r7\().8b, \r6\().8b, \r7\().8b
+
+ trn1 \r4\().4h, \r0\().4h, \r2\().4h
+ trn2 \r2\().4h, \r0\().4h, \r2\().4h
+ trn1 \r6\().4h, \r5\().4h, \r7\().4h
+ trn2 \r7\().4h, \r5\().4h, \r7\().4h
+ trn1 \r5\().4h, \r9\().4h, \r3\().4h
+ trn2 \r9\().4h, \r9\().4h, \r3\().4h
+ trn1 \r3\().4h, \r8\().4h, \r1\().4h
+ trn2 \r8\().4h, \r8\().4h, \r1\().4h
+
+ trn1 \r0\().2s, \r3\().2s, \r4\().2s
+ trn2 \r4\().2s, \r3\().2s, \r4\().2s
+ trn1 \r1\().2s, \r5\().2s, \r6\().2s
+ trn2 \r5\().2s, \r5\().2s, \r6\().2s
+ trn2 \r6\().2s, \r8\().2s, \r2\().2s
+ trn1 \r2\().2s, \r8\().2s, \r2\().2s
+ trn1 \r3\().2s, \r9\().2s, \r7\().2s
+ trn2 \r7\().2s, \r9\().2s, \r7\().2s
+.endm
+
#endif /* DAVID_SRC_ARM_64_UTIL_S */