diff options
author | Matthias Dressel <code@deadcode.eu> | 2021-11-15 23:32:10 +0300 |
---|---|---|
committer | Matthias Dressel <code@deadcode.eu> | 2021-11-29 16:56:25 +0300 |
commit | 00f92f2ccbd954225a74892c53cd65b776917837 (patch) | |
tree | ad48b7cd1013e3fdd9ab01323ab6201fe6d869a1 /src/x86 | |
parent | 1f98769b337930e29845705a84664237e74226e9 (diff) |
x86/itx: Convert 8bpc WHT to SSE2
WHT uses no SSSE3 instructions. The 16bpc variant is already SSE2.
Diffstat (limited to 'src/x86')
-rw-r--r-- | src/x86/itx_init_tmpl.c | 6 | ||||
-rw-r--r-- | src/x86/itx_sse.asm | 7 |
2 files changed, 4 insertions, 9 deletions
diff --git a/src/x86/itx_init_tmpl.c b/src/x86/itx_init_tmpl.c index 0a8e7ba..90024f9 100644 --- a/src/x86/itx_init_tmpl.c +++ b/src/x86/itx_init_tmpl.c @@ -132,7 +132,7 @@ decl_itx_bpc_fns(12, avx2); decl_itx_fns(sse4); decl_itx_fns(ssse3); decl_itx_fn(dav1d_inv_txfm_add_wht_wht_4x4_16bpc_avx2); -decl_itx_fn(dav1d_inv_txfm_add_wht_wht_4x4_16bpc_sse2); +decl_itx_fn(BF(dav1d_inv_txfm_add_wht_wht_4x4, sse2)); COLD void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c, const int bpc) @@ -208,14 +208,12 @@ COLD void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c, if (!(flags & DAV1D_X86_CPU_FLAG_SSE2)) return; -#if BITDEPTH == 16 assign_itx_fn(, 4, 4, wht_wht, WHT_WHT, sse2); -#endif if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return; #if BITDEPTH == 8 - assign_itx17_fn(, 4, 4, ssse3); + assign_itx16_fn(, 4, 4, ssse3); assign_itx16_fn(R, 4, 8, ssse3); assign_itx16_fn(R, 8, 4, ssse3); assign_itx16_fn(, 8, 8, ssse3); diff --git a/src/x86/itx_sse.asm b/src/x86/itx_sse.asm index 89938b5..7cbd9c3 100644 --- a/src/x86/itx_sse.asm +++ b/src/x86/itx_sse.asm @@ -430,6 +430,7 @@ cglobal iidentity_4x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 paddw m2, m1 ;low: out3 %endmacro +INIT_XMM sse2 cglobal inv_txfm_add_wht_wht_4x4_8bpc, 3, 3, 4, dst, stride, coeff mova m0, [coeffq+16*0] mova m1, [coeffq+16*1] @@ -438,20 +439,15 @@ cglobal inv_txfm_add_wht_wht_4x4_8bpc, 3, 3, 4, dst, stride, coeff mova [coeffq+16*1], m2 psraw m0, 2 psraw m1, 2 - IWHT4_1D_PACKED - punpckhwd m0, m1 punpcklwd m3, m1, m2 punpckhdq m1, m0, m3 punpckldq m0, m3 - IWHT4_1D_PACKED - shufpd m0, m2, 0x01 ITX4_END 0, 3, 2, 1, 0 - %macro IDCT8_1D_PACKED 0 mova m6, [o(pd_2048)] punpckhwd m4, m0, m3 ;unpacked in1 in7 @@ -566,6 +562,7 @@ cglobal inv_txfm_add_wht_wht_4x4_8bpc, 3, 3, 4, dst, stride, coeff %endif %endmacro +INIT_XMM ssse3 INV_TXFM_4X8_FN dct, dct INV_TXFM_4X8_FN dct, adst INV_TXFM_4X8_FN dct, flipadst |