Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src/arm/64
diff options
context:
space:
mode:
authorMartin Storsjö <martin@martin.st>2020-03-23 15:34:26 +0300
committerMartin Storsjö <martin@martin.st>2020-03-26 13:31:23 +0300
commitf481d69b0ffac087504036375d505f4323d7ef5e (patch)
tree6ee72636f44417ed30f8d54f70eac43ed0be09d2 /src/arm/64
parent641ef4cc9fa2a12d53be2e75e9690e7b8ff4e605 (diff)
arm64: ipred: Do shifts on only half the register width when possible
In these cases, we only need the value of the first element.
Diffstat (limited to 'src/arm/64')
-rw-r--r--src/arm/64/ipred.S16
1 files changed, 8 insertions, 8 deletions
diff --git a/src/arm/64/ipred.S b/src/arm/64/ipred.S
index 6b77535..00a9112 100644
--- a/src/arm/64/ipred.S
+++ b/src/arm/64/ipred.S
@@ -1717,19 +1717,19 @@ function ipred_cfl_top_neon, export=1
4:
ld1r {v0.2s}, [x2]
uaddlv h0, v0.8b
- urshr v0.8h, v0.8h, #3
+ urshr v0.4h, v0.4h, #3
dup v0.8h, v0.h[0]
b L(ipred_cfl_splat_w4)
8:
ld1 {v0.8b}, [x2]
uaddlv h0, v0.8b
- urshr v0.8h, v0.8h, #3
+ urshr v0.4h, v0.4h, #3
dup v0.8h, v0.h[0]
b L(ipred_cfl_splat_w8)
16:
ld1 {v0.16b}, [x2]
uaddlv h0, v0.16b
- urshr v0.8h, v0.8h, #4
+ urshr v0.4h, v0.4h, #4
dup v0.8h, v0.h[0]
b L(ipred_cfl_splat_w16)
32:
@@ -1737,7 +1737,7 @@ function ipred_cfl_top_neon, export=1
uaddlv h2, v2.16b
uaddlv h3, v3.16b
add v2.4h, v2.4h, v3.4h
- urshr v2.8h, v2.8h, #5
+ urshr v2.4h, v2.4h, #5
dup v0.8h, v2.h[0]
b L(ipred_cfl_splat_w16)
@@ -1772,21 +1772,21 @@ function ipred_cfl_left_neon, export=1
L(ipred_cfl_left_h4):
ld1r {v0.2s}, [x2]
uaddlv h0, v0.8b
- urshr v0.8h, v0.8h, #3
+ urshr v0.4h, v0.4h, #3
dup v0.8h, v0.h[0]
br x9
L(ipred_cfl_left_h8):
ld1 {v0.8b}, [x2]
uaddlv h0, v0.8b
- urshr v0.8h, v0.8h, #3
+ urshr v0.4h, v0.4h, #3
dup v0.8h, v0.h[0]
br x9
L(ipred_cfl_left_h16):
ld1 {v0.16b}, [x2]
uaddlv h0, v0.16b
- urshr v0.8h, v0.8h, #4
+ urshr v0.4h, v0.4h, #4
dup v0.8h, v0.h[0]
br x9
@@ -1795,7 +1795,7 @@ L(ipred_cfl_left_h32):
uaddlv h2, v2.16b
uaddlv h3, v3.16b
add v2.4h, v2.4h, v3.4h
- urshr v2.8h, v2.8h, #5
+ urshr v2.4h, v2.4h, #5
dup v0.8h, v2.h[0]
br x9