Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Storsjö <martin@martin.st>2019-03-05 12:32:05 +0300
committerMartin Storsjö <martin@martin.st>2019-03-05 22:10:25 +0300
commit4f5261a0ed399dcec88c87f34d1095b0152b9ae1 (patch)
tree094447ae8576c73d77971b84474132315ccadb5e /src/arm/64/cdef.S
parentdc2ae517648accc0fe4ac0737f9ee850accda278 (diff)
arm64: cdef: Do saturating subtractions to avoid max operations with 0
Before: Cortex A53 A72 A73 cdef_filter_4x4_8bpc_neon: 677.4 433.9 452.9 cdef_filter_4x8_8bpc_neon: 1255.0 815.2 841.8 cdef_filter_8x8_8bpc_neon: 2278.5 1440.0 1505.0 After: cdef_filter_4x4_8bpc_neon: 645.5 401.9 422.5 cdef_filter_4x8_8bpc_neon: 1193.7 756.6 782.4 cdef_filter_8x8_8bpc_neon: 2162.4 1361.9 1375.6
Diffstat (limited to 'src/arm/64/cdef.S')
-rw-r--r--src/arm/64/cdef.S13
1 files changed, 4 insertions, 9 deletions
diff --git a/src/arm/64/cdef.S b/src/arm/64/cdef.S
index d2fb0da..8913cde 100644
--- a/src/arm/64/cdef.S
+++ b/src/arm/64/cdef.S
@@ -304,10 +304,8 @@ endconst
uabd v20.8h, v0.8h, \s2\().8h // abs(diff)
ushl v17.8h, v16.8h, \shift // abs(diff) >> shift
ushl v21.8h, v20.8h, \shift // abs(diff) >> shift
- sub v17.8h, \thresh_vec, v17.8h // threshold - (abs(diff) >> shift)
- sub v21.8h, \thresh_vec, v21.8h // threshold - (abs(diff) >> shift)
- smax v17.8h, v29.8h, v17.8h // imax(0, threshold - ())
- smax v21.8h, v29.8h, v21.8h // imax(0, threshold - ())
+ uqsub v17.8h, \thresh_vec, v17.8h // imax(0, threshold - (abs(diff) >> shift))
+ uqsub v21.8h, \thresh_vec, v21.8h // imax(0, threshold - (abs(diff) >> shift))
cmhi v18.8h, v0.8h, \s1\().8h // px > p0
cmhi v22.8h, v0.8h, \s2\().8h // px > p1
smin v17.8h, v17.8h, v16.8h // imin(abs(diff), imax())
@@ -334,7 +332,6 @@ function cdef_filter\w\()_neon, export=1
add x5, x9, w5, uxtw #1
movi v31.16b, #255
movi v30.8h, #15
- movi v29.8h, #0
dup v28.8h, w6 // damping
ushr v31.8h, v31.8h, #1 // INT16_MAX
@@ -344,10 +341,8 @@ function cdef_filter\w\()_neon, export=1
clz v26.8h, v27.8h // clz(threshold)
sub v24.8h, v30.8h, v24.8h // ulog2(threshold)
sub v26.8h, v30.8h, v26.8h // ulog2(threshold)
- sub v24.8h, v28.8h, v24.8h // damping - ulog2(threshold)
- sub v26.8h, v28.8h, v26.8h // damping - ulog2(threshold)
- smax v24.8h, v29.8h, v24.8h // shift = imax(0, damping - ulog2(threshold))
- smax v26.8h, v29.8h, v26.8h // shift = imax(0, damping - ulog2(threshold))
+ uqsub v24.8h, v28.8h, v24.8h // shift = imax(0, damping - ulog2(threshold))
+ uqsub v26.8h, v28.8h, v26.8h // shift = imax(0, damping - ulog2(threshold))
neg v24.8h, v24.8h // -shift
neg v26.8h, v26.8h // -shift