diff options
author | Martin Storsjö <martin@martin.st> | 2020-05-04 08:58:12 +0300 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2020-05-10 08:51:42 +0300 |
commit | a6711a5c2b12b74e4bc887c525c25a6981158930 (patch) | |
tree | 06d1b5aa1edb1a63d12fd2f9145f757275332c39 /src/arm | |
parent | 39d6c599352bff68038500756488e80f9cd31295 (diff) |
arm64: itx: Fix the eob checking for dct_dct_64x16
Before this, we never did the early exit from the first pass.
Before: Cortex A53 A72 A73
inv_txfm_add_64x16_dct_dct_1_8bpc_neon: 7275.7 5198.3 5250.9
inv_txfm_add_64x16_dct_dct_2_8bpc_neon: 7276.1 5197.0 5251.3
inv_txfm_add_64x16_dct_dct_3_8bpc_neon: 7275.8 5196.2 5254.5
inv_txfm_add_64x16_dct_dct_4_8bpc_neon: 7273.6 5198.8 5254.2
After:
inv_txfm_add_64x16_dct_dct_1_8bpc_neon: 5187.8 3763.8 3735.0
inv_txfm_add_64x16_dct_dct_2_8bpc_neon: 7280.6 5185.6 5256.3
inv_txfm_add_64x16_dct_dct_3_8bpc_neon: 7270.7 5179.8 5250.3
inv_txfm_add_64x16_dct_dct_4_8bpc_neon: 7271.7 5212.4 5256.4
The other related variants didn't have this bug and properly exited
early when possible.
Diffstat (limited to 'src/arm')
-rw-r--r-- | src/arm/64/itx.S | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/src/arm/64/itx.S b/src/arm/64/itx.S index 6ab6849..5f9c7b3 100644 --- a/src/arm/64/itx.S +++ b/src/arm/64/itx.S @@ -3218,7 +3218,6 @@ function inv_txfm_add_dct_dct_64x16_neon, export=1 mov w8, #(16 - \i) cmp w3, w12 b.lt 1f - ldrh w12, [x13], #2 .endif add x7, x2, #(\i*2) mov x8, #16*2 @@ -3226,6 +3225,9 @@ function inv_txfm_add_dct_dct_64x16_neon, export=1 bl inv_txfm_dct_clear_8x64_neon add x6, x4, #(\i*64*2) bl inv_txfm_horz_dct_64x8_neon +.if \i < 8 + ldrh w12, [x13], #2 +.endif .endr b 3f |