From a6711a5c2b12b74e4bc887c525c25a6981158930 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 4 May 2020 08:58:12 +0300 Subject: arm64: itx: Fix the eob checking for dct_dct_64x16 Before this, we never did the early exit from the first pass. Before: Cortex A53 A72 A73 inv_txfm_add_64x16_dct_dct_1_8bpc_neon: 7275.7 5198.3 5250.9 inv_txfm_add_64x16_dct_dct_2_8bpc_neon: 7276.1 5197.0 5251.3 inv_txfm_add_64x16_dct_dct_3_8bpc_neon: 7275.8 5196.2 5254.5 inv_txfm_add_64x16_dct_dct_4_8bpc_neon: 7273.6 5198.8 5254.2 After: inv_txfm_add_64x16_dct_dct_1_8bpc_neon: 5187.8 3763.8 3735.0 inv_txfm_add_64x16_dct_dct_2_8bpc_neon: 7280.6 5185.6 5256.3 inv_txfm_add_64x16_dct_dct_3_8bpc_neon: 7270.7 5179.8 5250.3 inv_txfm_add_64x16_dct_dct_4_8bpc_neon: 7271.7 5212.4 5256.4 The other related variants didn't have this bug and properly exited early when possible. --- src/arm/64/itx.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/arm') diff --git a/src/arm/64/itx.S b/src/arm/64/itx.S index 6ab6849..5f9c7b3 100644 --- a/src/arm/64/itx.S +++ b/src/arm/64/itx.S @@ -3218,7 +3218,6 @@ function inv_txfm_add_dct_dct_64x16_neon, export=1 mov w8, #(16 - \i) cmp w3, w12 b.lt 1f - ldrh w12, [x13], #2 .endif add x7, x2, #(\i*2) mov x8, #16*2 @@ -3226,6 +3225,9 @@ function inv_txfm_add_dct_dct_64x16_neon, export=1 bl inv_txfm_dct_clear_8x64_neon add x6, x4, #(\i*64*2) bl inv_txfm_horz_dct_64x8_neon +.if \i < 8 + ldrh w12, [x13], #2 +.endif .endr b 3f -- cgit v1.2.3