Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Storsjö <martin@martin.st>2019-10-04 13:53:49 +0300
committerJean-Baptiste Kempf <jb@videolan.org>2019-10-08 08:33:21 +0300
commitfa6a0924d7aef7fbbdb02c7a8df0714d00e40408 (patch)
treefc9fd8000b8d8ea6315854c07ec676b0eed66bf3
parent1f83575018b39d12410407dc08bdc9c445504406 (diff)
arm64: cdef: Calculate two initial parameters in the same vector
As there's only two individual parameters, we can insert them into the same vector, reducing the number of actual calculation instructions, but adding a few more instructions to dup the results to the final vectors instead.
-rw-r--r--src/arm/64/cdef.S19
1 files changed, 9 insertions, 10 deletions
diff --git a/src/arm/64/cdef.S b/src/arm/64/cdef.S
index e54f782..122c18d 100644
--- a/src/arm/64/cdef.S
+++ b/src/arm/64/cdef.S
@@ -323,19 +323,18 @@ function cdef_filter\w\()_neon, export=1
add x8, x8, w9, uxtw #1
movrel x9, directions\w
add x5, x9, w5, uxtw #1
- movi v30.8h, #15
- dup v28.8h, w6 // damping
+ movi v30.4h, #15
+ dup v28.4h, w6 // damping
dup v25.8h, w3 // threshold
dup v27.8h, w4 // threshold
- clz v24.8h, v25.8h // clz(threshold)
- clz v26.8h, v27.8h // clz(threshold)
- sub v24.8h, v30.8h, v24.8h // ulog2(threshold)
- sub v26.8h, v30.8h, v26.8h // ulog2(threshold)
- uqsub v24.8h, v28.8h, v24.8h // shift = imax(0, damping - ulog2(threshold))
- uqsub v26.8h, v28.8h, v26.8h // shift = imax(0, damping - ulog2(threshold))
- neg v24.8h, v24.8h // -shift
- neg v26.8h, v26.8h // -shift
+ trn1 v24.4h, v25.4h, v27.4h
+ clz v24.4h, v24.4h // clz(threshold)
+ sub v24.4h, v30.4h, v24.4h // ulog2(threshold)
+ uqsub v24.4h, v28.4h, v24.4h // shift = imax(0, damping - ulog2(threshold))
+ neg v24.4h, v24.4h // -shift
+ dup v26.8h, v24.h[1]
+ dup v24.8h, v24.h[0]
1:
.if \w == 8