Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2021-08-10 15:21:44 +0300
committerRonald S. Bultje <rsbultje@gmail.com>2021-08-12 21:01:45 +0300
commit7050f0581db37bae99b0ff525d535dea913f670c (patch)
tree9bcbb0a922e5d35f01a1d68c4de8b09c5303628b
parenta5cea27ce964683da327cf1b5d3203fbb70b6138 (diff)
x86/itx: combine .write_8x4 and .round{1,2} into a single function
-rw-r--r--src/x86/itx16_sse.asm31
1 files changed, 11 insertions, 20 deletions
diff --git a/src/x86/itx16_sse.asm b/src/x86/itx16_sse.asm
index 1b3e352..a201313 100644
--- a/src/x86/itx16_sse.asm
+++ b/src/x86/itx16_sse.asm
@@ -1478,19 +1478,16 @@ cglobal idct_8x4_internal_16bpc, 0, 0, 0, dst, stride, c, eob, tx2
%endif
call m_suffix(idct_8x4_internal_8bpc, _ssse3).main
.end:
- call .round2
lea r3, [strideq*3]
- call .write_8x4_load
+ call .round2_and_write_8x4
REPX {mova [cq+16*x], m6}, 0, 1, 2, 3, 4, 5, 6, 7
RET
-.round2:
- mova m4, [o(pw_2048)]
-.round1:
- REPX {pmulhrsw x, m4}, m0, m1, m2, m3
- ret
-.write_8x4_load:
+.round2_and_write_8x4:
pxor m6, m6
mova m5, [o(pixel_10bpc_max)]
+ mova m4, [o(pw_2048)]
+.round1_and_write_8x4:
+ REPX {pmulhrsw x, m4}, m0, m1, m2, m3
.write_8x4:
paddw m0, [dstq+strideq*0]
paddw m1, [dstq+strideq*1]
@@ -2351,11 +2348,9 @@ cglobal iidentity_8x16_internal_16bpc, 0, 0, 0, dst, stride, c, eob, tx2
.pass2_loop:
call .main
%if ARCH_X86_64
- call m(idct_8x4_internal_16bpc).round1
- call m(idct_8x4_internal_16bpc).write_8x4
+ call m(idct_8x4_internal_16bpc).round1_and_write_8x4
%else
- call m(idct_8x4_internal_16bpc).round2
- call m(idct_8x4_internal_16bpc).write_8x4_load
+ call m(idct_8x4_internal_16bpc).round2_and_write_8x4
%endif
REPX {mova [cq+x*16], m6}, 0, 4, 8, 12, 16, 20, 24, 28
dec r5d
@@ -2747,8 +2742,7 @@ cglobal idct_16x4_internal_16bpc, 0, 0, 0, dst, stride, c, eob, tx2
lea r5, [o(itx8_start)]
%endif
call r4
- call m(idct_8x4_internal_16bpc).round2
- call m(idct_8x4_internal_16bpc).write_8x4_load
+ call m(idct_8x4_internal_16bpc).round2_and_write_8x4
REPX {mova [cq+x*16], m6}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
%if ARCH_X86_64
mova m0, m8
@@ -2766,8 +2760,7 @@ cglobal idct_16x4_internal_16bpc, 0, 0, 0, dst, stride, c, eob, tx2
lea r5, [o(itx8_start)]
%endif
call r4
- call m(idct_8x4_internal_16bpc).round2
- call m(idct_8x4_internal_16bpc).write_8x4_load
+ call m(idct_8x4_internal_16bpc).round2_and_write_8x4
RET
INV_TXFM_16X4_FN adst, dct
@@ -4564,11 +4557,9 @@ cglobal iidentity_16x16_internal_16bpc, 0, 0, 0, dst, stride, c, eob, tx2
mova m3, [cq+3*64+0]
call m(iidentity_8x16_internal_16bpc).main
%if ARCH_X86_64
- call m(idct_8x4_internal_16bpc).round1
- call m(idct_8x4_internal_16bpc).write_8x4
+ call m(idct_8x4_internal_16bpc).round1_and_write_8x4
%else
- call m(idct_8x4_internal_16bpc).round2
- call m(idct_8x4_internal_16bpc).write_8x4_load
+ call m(idct_8x4_internal_16bpc).round2_and_write_8x4
%endif
REPX {mova [cq+x*16], m6}, 0, 4, 8, 12
add cq, 16