diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2021-08-10 15:21:44 +0300 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2021-08-12 21:01:45 +0300 |
commit | 7050f0581db37bae99b0ff525d535dea913f670c (patch) | |
tree | 9bcbb0a922e5d35f01a1d68c4de8b09c5303628b | |
parent | a5cea27ce964683da327cf1b5d3203fbb70b6138 (diff) |
x86/itx: combine .write_8x4 and .round{1,2} into a single function
-rw-r--r-- | src/x86/itx16_sse.asm | 31 |
1 files changed, 11 insertions, 20 deletions
diff --git a/src/x86/itx16_sse.asm b/src/x86/itx16_sse.asm index 1b3e352..a201313 100644 --- a/src/x86/itx16_sse.asm +++ b/src/x86/itx16_sse.asm @@ -1478,19 +1478,16 @@ cglobal idct_8x4_internal_16bpc, 0, 0, 0, dst, stride, c, eob, tx2 %endif call m_suffix(idct_8x4_internal_8bpc, _ssse3).main .end: - call .round2 lea r3, [strideq*3] - call .write_8x4_load + call .round2_and_write_8x4 REPX {mova [cq+16*x], m6}, 0, 1, 2, 3, 4, 5, 6, 7 RET -.round2: - mova m4, [o(pw_2048)] -.round1: - REPX {pmulhrsw x, m4}, m0, m1, m2, m3 - ret -.write_8x4_load: +.round2_and_write_8x4: pxor m6, m6 mova m5, [o(pixel_10bpc_max)] + mova m4, [o(pw_2048)] +.round1_and_write_8x4: + REPX {pmulhrsw x, m4}, m0, m1, m2, m3 .write_8x4: paddw m0, [dstq+strideq*0] paddw m1, [dstq+strideq*1] @@ -2351,11 +2348,9 @@ cglobal iidentity_8x16_internal_16bpc, 0, 0, 0, dst, stride, c, eob, tx2 .pass2_loop: call .main %if ARCH_X86_64 - call m(idct_8x4_internal_16bpc).round1 - call m(idct_8x4_internal_16bpc).write_8x4 + call m(idct_8x4_internal_16bpc).round1_and_write_8x4 %else - call m(idct_8x4_internal_16bpc).round2 - call m(idct_8x4_internal_16bpc).write_8x4_load + call m(idct_8x4_internal_16bpc).round2_and_write_8x4 %endif REPX {mova [cq+x*16], m6}, 0, 4, 8, 12, 16, 20, 24, 28 dec r5d @@ -2747,8 +2742,7 @@ cglobal idct_16x4_internal_16bpc, 0, 0, 0, dst, stride, c, eob, tx2 lea r5, [o(itx8_start)] %endif call r4 - call m(idct_8x4_internal_16bpc).round2 - call m(idct_8x4_internal_16bpc).write_8x4_load + call m(idct_8x4_internal_16bpc).round2_and_write_8x4 REPX {mova [cq+x*16], m6}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 %if ARCH_X86_64 mova m0, m8 @@ -2766,8 +2760,7 @@ cglobal idct_16x4_internal_16bpc, 0, 0, 0, dst, stride, c, eob, tx2 lea r5, [o(itx8_start)] %endif call r4 - call m(idct_8x4_internal_16bpc).round2 - call m(idct_8x4_internal_16bpc).write_8x4_load + call m(idct_8x4_internal_16bpc).round2_and_write_8x4 RET INV_TXFM_16X4_FN adst, dct @@ -4564,11 +4557,9 @@ cglobal iidentity_16x16_internal_16bpc, 0, 0, 0, dst, stride, c, eob, tx2 mova m3, [cq+3*64+0] call m(iidentity_8x16_internal_16bpc).main %if ARCH_X86_64 - call m(idct_8x4_internal_16bpc).round1 - call m(idct_8x4_internal_16bpc).write_8x4 + call m(idct_8x4_internal_16bpc).round1_and_write_8x4 %else - call m(idct_8x4_internal_16bpc).round2 - call m(idct_8x4_internal_16bpc).write_8x4_load + call m(idct_8x4_internal_16bpc).round2_and_write_8x4 %endif REPX {mova [cq+x*16], m6}, 0, 4, 8, 12 add cq, 16 |