diff options
author | Martin Storsjö <martin@martin.st> | 2020-02-10 00:14:22 +0300 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2020-02-11 11:43:43 +0300 |
commit | 8fb306575cef9c1882cfc693b1c47e68e27fc71a (patch) | |
tree | a475b15fdfc6f1957e1b9dc656c6e9e5090ba905 /src/arm | |
parent | 8e8fb84dcda63e83671a41235f2d71e726a2e716 (diff) |
arm: looprestoration: Improve scheduling in box3/5_h slightly
Set flags further from the branch instructions that use them.
Diffstat (limited to 'src/arm')
-rw-r--r-- | src/arm/32/looprestoration.S | 4 | ||||
-rw-r--r-- | src/arm/64/looprestoration.S | 4 |
2 files changed, 4 insertions, 4 deletions
diff --git a/src/arm/32/looprestoration.S b/src/arm/32/looprestoration.S index 066f77a..be3533f 100644 --- a/src/arm/32/looprestoration.S +++ b/src/arm/32/looprestoration.S @@ -925,11 +925,11 @@ L(box3_variable_shift_tbl): vmull.u8 q6, d9, d9 add3 4 + subs r5, r5, #4 vst1.16 {d6}, [r1, :64]! vst1.16 {d14}, [r11, :64]! vst1.32 {q12}, [r0, :128]! vst1.32 {q8}, [r10, :128]! - subs r5, r5, #4 ble 9f vext.8 q0, q0, q0, #4 vext.8 q1, q1, q2, #8 @@ -1215,11 +1215,11 @@ L(box5_variable_shift_tbl): vmull.u8 q6, d9, d9 add5 4 + subs r5, r5, #4 vst1.16 {d6}, [r1, :64]! vst1.16 {d14}, [r11, :64]! vst1.32 {q12}, [r0, :128]! vst1.32 {q10}, [r10, :128]! - subs r5, r5, #4 ble 9f vext.8 q0, q0, q0, #4 vext.8 q1, q1, q2, #8 diff --git a/src/arm/64/looprestoration.S b/src/arm/64/looprestoration.S index c6a6ef7..af0ec1b 100644 --- a/src/arm/64/looprestoration.S +++ b/src/arm/64/looprestoration.S @@ -844,11 +844,11 @@ L(box3_variable_shift_tbl): umull2 v6.8h, v4.16b, v4.16b add3 4 + subs w5, w5, #4 st1 {v3.4h}, [x1], #8 st1 {v7.4h}, [x11], #8 st1 {v26.4s}, [x0], #16 st1 {v28.4s}, [x10], #16 - subs w5, w5, #4 b.le 9f ext v0.16b, v0.16b, v0.16b, #4 ext v4.16b, v4.16b, v4.16b, #4 @@ -1114,11 +1114,11 @@ L(box5_variable_shift_tbl): umull2 v6.8h, v4.16b, v4.16b add5 4 + subs w5, w5, #4 st1 {v3.4h}, [x1], #8 st1 {v7.4h}, [x11], #8 st1 {v26.4s}, [x0], #16 st1 {v28.4s}, [x10], #16 - subs w5, w5, #4 b.le 9f ext v0.16b, v0.16b, v0.16b, #4 ext v1.16b, v1.16b, v2.16b, #8 |