Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src/x86
diff options
context:
space:
mode:
authorMatthias Dressel <code@deadcode.eu>2021-11-15 23:32:10 +0300
committerMatthias Dressel <code@deadcode.eu>2021-11-29 16:56:25 +0300
commit00f92f2ccbd954225a74892c53cd65b776917837 (patch)
treead48b7cd1013e3fdd9ab01323ab6201fe6d869a1 /src/x86
parent1f98769b337930e29845705a84664237e74226e9 (diff)
x86/itx: Convert 8bpc WHT to SSE2
WHT uses no SSSE3 instructions. The 16bpc variant is already SSE2.
Diffstat (limited to 'src/x86')
-rw-r--r--src/x86/itx_init_tmpl.c6
-rw-r--r--src/x86/itx_sse.asm7
2 files changed, 4 insertions, 9 deletions
diff --git a/src/x86/itx_init_tmpl.c b/src/x86/itx_init_tmpl.c
index 0a8e7ba..90024f9 100644
--- a/src/x86/itx_init_tmpl.c
+++ b/src/x86/itx_init_tmpl.c
@@ -132,7 +132,7 @@ decl_itx_bpc_fns(12, avx2);
decl_itx_fns(sse4);
decl_itx_fns(ssse3);
decl_itx_fn(dav1d_inv_txfm_add_wht_wht_4x4_16bpc_avx2);
-decl_itx_fn(dav1d_inv_txfm_add_wht_wht_4x4_16bpc_sse2);
+decl_itx_fn(BF(dav1d_inv_txfm_add_wht_wht_4x4, sse2));
COLD void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c,
const int bpc)
@@ -208,14 +208,12 @@ COLD void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c,
if (!(flags & DAV1D_X86_CPU_FLAG_SSE2)) return;
-#if BITDEPTH == 16
assign_itx_fn(, 4, 4, wht_wht, WHT_WHT, sse2);
-#endif
if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
#if BITDEPTH == 8
- assign_itx17_fn(, 4, 4, ssse3);
+ assign_itx16_fn(, 4, 4, ssse3);
assign_itx16_fn(R, 4, 8, ssse3);
assign_itx16_fn(R, 8, 4, ssse3);
assign_itx16_fn(, 8, 8, ssse3);
diff --git a/src/x86/itx_sse.asm b/src/x86/itx_sse.asm
index 89938b5..7cbd9c3 100644
--- a/src/x86/itx_sse.asm
+++ b/src/x86/itx_sse.asm
@@ -430,6 +430,7 @@ cglobal iidentity_4x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
paddw m2, m1 ;low: out3
%endmacro
+INIT_XMM sse2
cglobal inv_txfm_add_wht_wht_4x4_8bpc, 3, 3, 4, dst, stride, coeff
mova m0, [coeffq+16*0]
mova m1, [coeffq+16*1]
@@ -438,20 +439,15 @@ cglobal inv_txfm_add_wht_wht_4x4_8bpc, 3, 3, 4, dst, stride, coeff
mova [coeffq+16*1], m2
psraw m0, 2
psraw m1, 2
-
IWHT4_1D_PACKED
-
punpckhwd m0, m1
punpcklwd m3, m1, m2
punpckhdq m1, m0, m3
punpckldq m0, m3
-
IWHT4_1D_PACKED
-
shufpd m0, m2, 0x01
ITX4_END 0, 3, 2, 1, 0
-
%macro IDCT8_1D_PACKED 0
mova m6, [o(pd_2048)]
punpckhwd m4, m0, m3 ;unpacked in1 in7
@@ -566,6 +562,7 @@ cglobal inv_txfm_add_wht_wht_4x4_8bpc, 3, 3, 4, dst, stride, coeff
%endif
%endmacro
+INIT_XMM ssse3
INV_TXFM_4X8_FN dct, dct
INV_TXFM_4X8_FN dct, adst
INV_TXFM_4X8_FN dct, flipadst