diff options
author | Martin Storsjö <martin@martin.st> | 2020-12-09 12:44:19 +0300 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2020-12-13 00:51:37 +0300 |
commit | 2a448fde52f8ae0f6958e8e89ae6311e0b14a1d8 (patch) | |
tree | 6fa6f8103c5fc07a7968e4052753cb0b20b3574e | |
parent | c1a5e445d1da12c71f5407a032292ea89f1205c1 (diff) |
arm64: loopfilter16: Fix conditions for skipping parts of the filtering
As the arm64 16 bpc loopfilter operates on a 8 pixel region at a time,
inspect 2 bits (corresponding to 4 pixels each) from these registers,
as we also shift them down by 2 bits at the end of the loop.
This should allow skipping the loopfilter altogether (or using a
smaller filter) in more cases.
-rw-r--r-- | src/arm/64/loopfilter16.S | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/src/arm/64/loopfilter16.S b/src/arm/64/loopfilter16.S index 147a93d..e703020 100644 --- a/src/arm/64/loopfilter16.S +++ b/src/arm/64/loopfilter16.S @@ -785,7 +785,7 @@ function lpf_\dir\()_sb_\type\()_16bpc_neon, export=1 orr w6, w6, w7 // vmask[0] |= vmask[1] 1: - tst w6, #0x0f + tst w6, #0x03 .ifc \dir, v ld1 {v0.8b}, [x4], #8 ld1 {v1.8b}, [x3], #8 @@ -847,14 +847,14 @@ function lpf_\dir\()_sb_\type\()_16bpc_neon, export=1 ushl v10.8h, v10.8h, v31.8h .ifc \type, y - tst w2, #0x0f + tst w2, #0x03 b.eq 2f // wd16 bl lpf_\dir\()_16_8_neon b 8f 2: .endif - tst w7, #0x0f + tst w7, #0x03 b.eq 3f .ifc \type, y // wd8 |