Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan E. Egge <unlord@xiph.org>2021-01-16 17:06:09 +0300
committerNathan Egge <unlord@xiph.org>2021-02-17 15:21:26 +0300
commitec95ea52cb1ed0bb59bf50ea14156b12cf78654c (patch)
tree1fda91dbbdce23049c42eaf90e815352329bdc1d
parent1d6aae4795a3e59ec4b416c0c3a22530a51fd471 (diff)
Add bpc suffix to cdef functions
-rw-r--r--src/x86/cdef_avx2.asm8
-rw-r--r--src/x86/cdef_avx512.asm11
-rw-r--r--src/x86/cdef_init_tmpl.c64
-rw-r--r--src/x86/cdef_sse.asm12
4 files changed, 49 insertions, 46 deletions
diff --git a/src/x86/cdef_avx2.asm b/src/x86/cdef_avx2.asm
index 685a127..f274a1d 100644
--- a/src/x86/cdef_avx2.asm
+++ b/src/x86/cdef_avx2.asm
@@ -39,7 +39,7 @@
%endmacro
%macro CDEF_FILTER_JMP_TABLE 1
-JMP_TABLE cdef_filter_%1, \
+JMP_TABLE cdef_filter_%1_8bpc, \
d6k0, d6k1, d7k0, d7k1, \
d0k0, d0k1, d1k0, d1k1, d2k0, d2k1, d3k0, d3k1, \
d4k0, d4k1, d5k0, d5k1, d6k0, d6k1, d7k0, d7k1, \
@@ -94,7 +94,7 @@ SECTION .text
%macro PREP_REGS 2 ; w, h
; off1/2/3[k] [6 total] from [tapq+12+(dir+0/2/6)*2+k]
mov dird, r6m
- lea tableq, [cdef_filter_%1x%2_jmptable]
+ lea tableq, [cdef_filter_%1x%2_8bpc_jmptable]
lea dirq, [tableq+dirq*2*4]
%if %1 == 4
%if %2 == 4
@@ -397,7 +397,7 @@ SECTION .text
%macro CDEF_FILTER 2 ; w, h
INIT_YMM avx2
-cglobal cdef_filter_%1x%2, 4, 9, 0, dst, stride, left, top, \
+cglobal cdef_filter_%1x%2_8bpc, 4, 9, 0, dst, stride, left, top, \
pri, sec, dir, damping, edge
%assign stack_offset_entry stack_offset
mov edged, edgem
@@ -1592,7 +1592,7 @@ CDEF_FILTER 4, 8
CDEF_FILTER 4, 4
INIT_YMM avx2
-cglobal cdef_dir, 3, 4, 15, src, stride, var, stride3
+cglobal cdef_dir_8bpc, 3, 4, 15, src, stride, var, stride3
lea stride3q, [strideq*3]
movq xm0, [srcq+strideq*0]
movq xm1, [srcq+strideq*1]
diff --git a/src/x86/cdef_avx512.asm b/src/x86/cdef_avx512.asm
index b1fa1ad..94fa818 100644
--- a/src/x86/cdef_avx512.asm
+++ b/src/x86/cdef_avx512.asm
@@ -109,7 +109,8 @@ DECLARE_REG_TMP 8, 5
; 5e 5f 50 51 52 53 54 55
INIT_ZMM avx512icl
-cglobal cdef_filter_4x4, 4, 8, 13, dst, stride, left, top, pri, sec, dir, damping, edge
+cglobal cdef_filter_4x4_8bpc, 4, 8, 13, dst, stride, left, top, \
+ pri, sec, dir, damping, edge
%define base r7-edge_mask
movq xmm0, [dstq+strideq*0]
movhps xmm0, [dstq+strideq*1]
@@ -269,8 +270,8 @@ DECLARE_REG_TMP 2, 7
; L8 L9 40 41 42 43 44 45 8e 8f 80 81 82 83 84 85
; La Lb 50 51 52 53 54 55 9e 9f 90 91 92 93 94 95
-cglobal cdef_filter_4x8, 4, 9, 22, dst, stride, left, top, \
- pri, sec, dir, damping, edge
+cglobal cdef_filter_4x8_8bpc, 4, 9, 22, dst, stride, left, top, \
+ pri, sec, dir, damping, edge
%define base r8-edge_mask
vpbroadcastd ym21, strided
mov r6d, edgem
@@ -504,8 +505,8 @@ ALIGN function_align
; 8e 8f 80 81 82 83 84 85 84 85 86 87 88 89 8a 8b
; 9e 9f 90 91 92 93 94 95 94 95 96 97 98 99 9a 9b
-cglobal cdef_filter_8x8, 4, 11, 32, 4*64, dst, stride, left, top, \
- pri, sec, dir, damping, edge
+cglobal cdef_filter_8x8_8bpc, 4, 11, 32, 4*64, dst, stride, left, top, \
+ pri, sec, dir, damping, edge
%define base r8-edge_mask
mov r6d, edgem
lea r10, [dstq+strideq*4-2]
diff --git a/src/x86/cdef_init_tmpl.c b/src/x86/cdef_init_tmpl.c
index edc3b5d..0c14497 100644
--- a/src/x86/cdef_init_tmpl.c
+++ b/src/x86/cdef_init_tmpl.c
@@ -28,20 +28,22 @@
#include "src/cpu.h"
#include "src/cdef.h"
-#define decl_cdef_size_fn(sz) \
- decl_cdef_fn(dav1d_cdef_filter_##sz##_avx512icl); \
- decl_cdef_fn(dav1d_cdef_filter_##sz##_avx2); \
- decl_cdef_fn(dav1d_cdef_filter_##sz##_sse4); \
- decl_cdef_fn(dav1d_cdef_filter_##sz##_ssse3); \
- decl_cdef_fn(dav1d_cdef_filter_##sz##_sse2)
+#define decl_cdef_fns(ext) \
+ decl_cdef_fn(BF(dav1d_cdef_filter_4x4, ext)); \
+ decl_cdef_fn(BF(dav1d_cdef_filter_4x8, ext)); \
+ decl_cdef_fn(BF(dav1d_cdef_filter_8x8, ext))
-decl_cdef_size_fn(4x4);
-decl_cdef_size_fn(4x8);
-decl_cdef_size_fn(8x8);
-
-decl_cdef_dir_fn(dav1d_cdef_dir_avx2);
-decl_cdef_dir_fn(dav1d_cdef_dir_sse4);
-decl_cdef_dir_fn(dav1d_cdef_dir_ssse3);
+#if BITDEPTH == 8
+decl_cdef_fns(avx512icl);
+decl_cdef_fns(avx2);
+decl_cdef_fns(sse4);
+decl_cdef_fns(ssse3);
+decl_cdef_fns(sse2);
+
+decl_cdef_dir_fn(BF(dav1d_cdef_dir, avx2));
+decl_cdef_dir_fn(BF(dav1d_cdef_dir, sse4));
+decl_cdef_dir_fn(BF(dav1d_cdef_dir, ssse3));
+#endif
COLD void bitfn(dav1d_cdef_dsp_init_x86)(Dav1dCdefDSPContext *const c) {
const unsigned flags = dav1d_get_cpu_flags();
@@ -49,45 +51,45 @@ COLD void bitfn(dav1d_cdef_dsp_init_x86)(Dav1dCdefDSPContext *const c) {
if (!(flags & DAV1D_X86_CPU_FLAG_SSE2)) return;
#if BITDEPTH == 8
- c->fb[0] = dav1d_cdef_filter_8x8_sse2;
- c->fb[1] = dav1d_cdef_filter_4x8_sse2;
- c->fb[2] = dav1d_cdef_filter_4x4_sse2;
+ c->fb[0] = BF(dav1d_cdef_filter_8x8, sse2);
+ c->fb[1] = BF(dav1d_cdef_filter_4x8, sse2);
+ c->fb[2] = BF(dav1d_cdef_filter_4x4, sse2);
#endif
if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
#if BITDEPTH == 8
- c->dir = dav1d_cdef_dir_ssse3;
- c->fb[0] = dav1d_cdef_filter_8x8_ssse3;
- c->fb[1] = dav1d_cdef_filter_4x8_ssse3;
- c->fb[2] = dav1d_cdef_filter_4x4_ssse3;
+ c->dir = BF(dav1d_cdef_dir, ssse3);
+ c->fb[0] = BF(dav1d_cdef_filter_8x8, ssse3);
+ c->fb[1] = BF(dav1d_cdef_filter_4x8, ssse3);
+ c->fb[2] = BF(dav1d_cdef_filter_4x4, ssse3);
#endif
if (!(flags & DAV1D_X86_CPU_FLAG_SSE41)) return;
#if BITDEPTH == 8
- c->dir = dav1d_cdef_dir_sse4;
- c->fb[0] = dav1d_cdef_filter_8x8_sse4;
- c->fb[1] = dav1d_cdef_filter_4x8_sse4;
- c->fb[2] = dav1d_cdef_filter_4x4_sse4;
+ c->dir = BF(dav1d_cdef_dir, sse4);
+ c->fb[0] = BF(dav1d_cdef_filter_8x8, sse4);
+ c->fb[1] = BF(dav1d_cdef_filter_4x8, sse4);
+ c->fb[2] = BF(dav1d_cdef_filter_4x4, sse4);
#endif
#if ARCH_X86_64
if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
#if BITDEPTH == 8
- c->dir = dav1d_cdef_dir_avx2;
- c->fb[0] = dav1d_cdef_filter_8x8_avx2;
- c->fb[1] = dav1d_cdef_filter_4x8_avx2;
- c->fb[2] = dav1d_cdef_filter_4x4_avx2;
+ c->dir = BF(dav1d_cdef_dir, avx2);
+ c->fb[0] = BF(dav1d_cdef_filter_8x8, avx2);
+ c->fb[1] = BF(dav1d_cdef_filter_4x8, avx2);
+ c->fb[2] = BF(dav1d_cdef_filter_4x4, avx2);
#endif
if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
#if HAVE_AVX512ICL && BITDEPTH == 8
- c->fb[0] = dav1d_cdef_filter_8x8_avx512icl;
- c->fb[1] = dav1d_cdef_filter_4x8_avx512icl;
- c->fb[2] = dav1d_cdef_filter_4x4_avx512icl;
+ c->fb[0] = BF(dav1d_cdef_filter_8x8, avx512icl);
+ c->fb[1] = BF(dav1d_cdef_filter_4x8, avx512icl);
+ c->fb[2] = BF(dav1d_cdef_filter_4x4, avx512icl);
#endif
#endif
diff --git a/src/x86/cdef_sse.asm b/src/x86/cdef_sse.asm
index 2dcaf22..4c335ab 100644
--- a/src/x86/cdef_sse.asm
+++ b/src/x86/cdef_sse.asm
@@ -249,13 +249,13 @@ SECTION .text
%macro CDEF_FILTER 2 ; w, h
%if ARCH_X86_64
-cglobal cdef_filter_%1x%2, 4, 9, 16, 3 * 16 + (%2+4)*32, \
- dst, stride, left, top, pri, sec, edge, stride3, dst4
+cglobal cdef_filter_%1x%2_8bpc, 4, 9, 16, 3 * 16 + (%2+4)*32, \
+ dst, stride, left, top, pri, sec, edge, stride3, dst4
%define px rsp+3*16+2*32
%define base 0
%else
-cglobal cdef_filter_%1x%2, 2, 7, 8, - 7 * 16 - (%2+4)*32, \
- dst, stride, left, edge, stride3
+cglobal cdef_filter_%1x%2_8bpc, 2, 7, 8, - 7 * 16 - (%2+4)*32, \
+ dst, stride, left, edge, stride3
%define topq r2
%define dst4q r2
LEA r5, tap_table
@@ -758,7 +758,7 @@ cglobal cdef_filter_%1x%2, 2, 7, 8, - 7 * 16 - (%2+4)*32, \
%macro CDEF_DIR 0
%if ARCH_X86_64
-cglobal cdef_dir, 3, 5, 16, 32, src, stride, var, stride3
+cglobal cdef_dir_8bpc, 3, 5, 16, 32, src, stride, var, stride3
lea stride3q, [strideq*3]
movq m1, [srcq+strideq*0]
movhps m1, [srcq+strideq*1]
@@ -1030,7 +1030,7 @@ cglobal cdef_dir, 3, 5, 16, 32, src, stride, var, stride3
shr r1d, 10
mov [varq], r1d
%else
-cglobal cdef_dir, 2, 4, 8, 96, src, stride, var, stride3
+cglobal cdef_dir_8bpc, 2, 4, 8, 96, src, stride, var, stride3
%define base r2-shufw_6543210x
LEA r2, shufw_6543210x
pxor m0, m0