Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src/arm
diff options
context:
space:
mode:
authorMartin Storsjö <martin@martin.st>2019-11-13 12:09:05 +0300
committerMartin Storsjö <martin@martin.st>2020-03-26 13:31:23 +0300
commit641ef4cc9fa2a12d53be2e75e9690e7b8ff4e605 (patch)
treedc7291072b0ebab231380c20cb4761747d02745a /src/arm
parentba64f052353906398cf78e79bab7fbb8c468b002 (diff)
arm64: ipred: Avoid data dependencies with consecutive dup instructions
This is around one cycle faster.
Diffstat (limited to 'src/arm')
-rw-r--r--src/arm/64/ipred.S20
1 files changed, 10 insertions, 10 deletions
diff --git a/src/arm/64/ipred.S b/src/arm/64/ipred.S
index 96e88f4..6b77535 100644
--- a/src/arm/64/ipred.S
+++ b/src/arm/64/ipred.S
@@ -608,7 +608,7 @@ L(ipred_dc_w32):
cmp w4, #32
add v0.4h, v0.4h, v1.4h
add v0.4h, v0.4h, v2.4h
- ushl v0.4h, v0.4h, v17.4h
+ ushl v4.4h, v0.4h, v17.4h
b.eq 1f
// h = 8/16/64
cmp w4, #8
@@ -616,10 +616,10 @@ L(ipred_dc_w32):
mov w17, #(0x5556/2)
csel w16, w16, w17, eq
dup v16.4h, w16
- sqdmulh v0.4h, v0.4h, v16.4h
+ sqdmulh v4.4h, v4.4h, v16.4h
1:
- dup v0.16b, v0.b[0]
- dup v1.16b, v0.b[0]
+ dup v0.16b, v4.b[0]
+ dup v1.16b, v4.b[0]
2:
st1 {v0.16b, v1.16b}, [x0], x1
st1 {v0.16b, v1.16b}, [x6], x1
@@ -656,19 +656,19 @@ L(ipred_dc_w64):
cmp w4, #64
add v0.4h, v0.4h, v1.4h
add v0.4h, v0.4h, v3.4h
- ushl v0.4h, v0.4h, v17.4h
+ ushl v4.4h, v0.4h, v17.4h
b.eq 1f
// h = 16/32
mov w16, #(0x5556/2)
movk w16, #(0x3334/2), lsl #16
lsr w16, w16, w4
dup v16.4h, w16
- sqdmulh v0.4h, v0.4h, v16.4h
+ sqdmulh v4.4h, v4.4h, v16.4h
1:
- dup v0.16b, v0.b[0]
- dup v1.16b, v0.b[0]
- dup v2.16b, v0.b[0]
- dup v3.16b, v0.b[0]
+ dup v0.16b, v4.b[0]
+ dup v1.16b, v4.b[0]
+ dup v2.16b, v4.b[0]
+ dup v3.16b, v4.b[0]
2:
st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x6], x1