diff options
author | Martin Storsjö <martin@martin.st> | 2019-10-04 13:53:49 +0300 |
---|---|---|
committer | Jean-Baptiste Kempf <jb@videolan.org> | 2019-10-08 08:33:21 +0300 |
commit | fa6a0924d7aef7fbbdb02c7a8df0714d00e40408 (patch) | |
tree | fc9fd8000b8d8ea6315854c07ec676b0eed66bf3 | |
parent | 1f83575018b39d12410407dc08bdc9c445504406 (diff) |
arm64: cdef: Calculate two initial parameters in the same vector
As there's only two individual parameters, we can insert them into
the same vector, reducing the number of actual calculation instructions,
but adding a few more instructions to dup the results to the final
vectors instead.
-rw-r--r-- | src/arm/64/cdef.S | 19 |
1 files changed, 9 insertions, 10 deletions
diff --git a/src/arm/64/cdef.S b/src/arm/64/cdef.S index e54f782..122c18d 100644 --- a/src/arm/64/cdef.S +++ b/src/arm/64/cdef.S @@ -323,19 +323,18 @@ function cdef_filter\w\()_neon, export=1 add x8, x8, w9, uxtw #1 movrel x9, directions\w add x5, x9, w5, uxtw #1 - movi v30.8h, #15 - dup v28.8h, w6 // damping + movi v30.4h, #15 + dup v28.4h, w6 // damping dup v25.8h, w3 // threshold dup v27.8h, w4 // threshold - clz v24.8h, v25.8h // clz(threshold) - clz v26.8h, v27.8h // clz(threshold) - sub v24.8h, v30.8h, v24.8h // ulog2(threshold) - sub v26.8h, v30.8h, v26.8h // ulog2(threshold) - uqsub v24.8h, v28.8h, v24.8h // shift = imax(0, damping - ulog2(threshold)) - uqsub v26.8h, v28.8h, v26.8h // shift = imax(0, damping - ulog2(threshold)) - neg v24.8h, v24.8h // -shift - neg v26.8h, v26.8h // -shift + trn1 v24.4h, v25.4h, v27.4h + clz v24.4h, v24.4h // clz(threshold) + sub v24.4h, v30.4h, v24.4h // ulog2(threshold) + uqsub v24.4h, v28.4h, v24.4h // shift = imax(0, damping - ulog2(threshold)) + neg v24.4h, v24.4h // -shift + dup v26.8h, v24.h[1] + dup v24.8h, v24.h[0] 1: .if \w == 8 |