Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Storsjö <martin@martin.st>2020-03-03 15:49:33 +0300
committerMartin Storsjö <martin@martin.st>2020-03-04 12:43:25 +0300
commit83c627165ae5991ac664f5d4d2c6aa7a772ee9a8 (patch)
treeb52b9096ca3f0f5eb4c262d6630a67b2a237e4dc
parentf4dac1a30b3893d0ff555d8d87a0be7c4b69866a (diff)
arm64: mc: Use more intuitive lane specifications for loads/stores
For loads where we load/store a full or half register (instead of a lanewise load/store), the lane specification in itself doesn't matter, only its size. This doesn't change the generated code, but makes it more readable.
-rw-r--r--src/arm/64/mc.S32
1 files changed, 16 insertions, 16 deletions
diff --git a/src/arm/64/mc.S b/src/arm/64/mc.S
index 4392a2c..7166f32 100644
--- a/src/arm/64/mc.S
+++ b/src/arm/64/mc.S
@@ -434,7 +434,7 @@ function blend_8bpc_neon, export=1
lsl w1, w1, #1
br x6
4:
- ld1 {v2.d}[0], [x5], #8
+ ld1 {v2.8b}, [x5], #8
ld1 {v1.d}[0], [x2], #8
ld1 {v0.s}[0], [x0]
subs w4, w4, #2
@@ -448,8 +448,8 @@ function blend_8bpc_neon, export=1
b.gt 4b
ret
8:
- ld1 {v2.2d}, [x5], #16
- ld1 {v1.2d}, [x2], #16
+ ld1 {v2.16b}, [x5], #16
+ ld1 {v1.16b}, [x2], #16
ld1 {v0.d}[0], [x0]
ld1 {v0.d}[1], [x8]
sub v3.16b, v4.16b, v2.16b
@@ -465,13 +465,13 @@ function blend_8bpc_neon, export=1
b.gt 8b
ret
16:
- ld1 {v1.2d, v2.2d}, [x5], #32
- ld1 {v5.2d, v6.2d}, [x2], #32
- ld1 {v0.2d}, [x0]
+ ld1 {v1.16b, v2.16b}, [x5], #32
+ ld1 {v5.16b, v6.16b}, [x2], #32
+ ld1 {v0.16b}, [x0]
subs w4, w4, #2
sub v7.16b, v4.16b, v1.16b
sub v20.16b, v4.16b, v2.16b
- ld1 {v3.2d}, [x8]
+ ld1 {v3.16b}, [x8]
umull v16.8h, v5.8b, v1.8b
umlal v16.8h, v0.8b, v7.8b
umull2 v17.8h, v5.16b, v1.16b
@@ -484,16 +484,16 @@ function blend_8bpc_neon, export=1
rshrn2 v18.16b, v17.8h, #6
rshrn v19.8b, v21.8h, #6
rshrn2 v19.16b, v22.8h, #6
- st1 {v18.2d}, [x0], x1
- st1 {v19.2d}, [x8], x1
+ st1 {v18.16b}, [x0], x1
+ st1 {v19.16b}, [x8], x1
b.gt 16b
ret
32:
- ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x5], #64
- ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x2], #64
- ld1 {v20.2d, v21.2d}, [x0]
+ ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x5], #64
+ ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x2], #64
+ ld1 {v20.16b, v21.16b}, [x0]
subs w4, w4, #2
- ld1 {v22.2d, v23.2d}, [x8]
+ ld1 {v22.16b, v23.16b}, [x8]
sub v5.16b, v4.16b, v0.16b
sub v6.16b, v4.16b, v1.16b
sub v30.16b, v4.16b, v2.16b
@@ -522,8 +522,8 @@ function blend_8bpc_neon, export=1
rshrn2 v27.16b, v1.8h, #6
rshrn v28.8b, v29.8h, #6
rshrn2 v28.16b, v21.8h, #6
- st1 {v24.2d, v25.2d}, [x0], x1
- st1 {v27.2d, v28.2d}, [x8], x1
+ st1 {v24.16b, v25.16b}, [x0], x1
+ st1 {v27.16b, v28.16b}, [x8], x1
b.gt 32b
ret
L(blend_tbl):
@@ -563,7 +563,7 @@ function blend_h_8bpc_neon, export=1
ret
4:
ld2r {v0.8b, v1.8b}, [x5], #2
- ld1 {v2.2s}, [x2], #8
+ ld1 {v2.8b}, [x2], #8
subs w4, w4, #2
ext v0.8b, v0.8b, v1.8b, #4
ld1 {v3.s}[0], [x0]