diff options
author | Nathan E. Egge <unlord@xiph.org> | 2021-01-16 17:06:09 +0300 |
---|---|---|
committer | Nathan Egge <unlord@xiph.org> | 2021-02-17 15:21:26 +0300 |
commit | ec95ea52cb1ed0bb59bf50ea14156b12cf78654c (patch) | |
tree | 1fda91dbbdce23049c42eaf90e815352329bdc1d | |
parent | 1d6aae4795a3e59ec4b416c0c3a22530a51fd471 (diff) |
Add bpc suffix to cdef functions
-rw-r--r-- | src/x86/cdef_avx2.asm | 8 | ||||
-rw-r--r-- | src/x86/cdef_avx512.asm | 11 | ||||
-rw-r--r-- | src/x86/cdef_init_tmpl.c | 64 | ||||
-rw-r--r-- | src/x86/cdef_sse.asm | 12 |
4 files changed, 49 insertions, 46 deletions
diff --git a/src/x86/cdef_avx2.asm b/src/x86/cdef_avx2.asm index 685a127..f274a1d 100644 --- a/src/x86/cdef_avx2.asm +++ b/src/x86/cdef_avx2.asm @@ -39,7 +39,7 @@ %endmacro %macro CDEF_FILTER_JMP_TABLE 1 -JMP_TABLE cdef_filter_%1, \ +JMP_TABLE cdef_filter_%1_8bpc, \ d6k0, d6k1, d7k0, d7k1, \ d0k0, d0k1, d1k0, d1k1, d2k0, d2k1, d3k0, d3k1, \ d4k0, d4k1, d5k0, d5k1, d6k0, d6k1, d7k0, d7k1, \ @@ -94,7 +94,7 @@ SECTION .text %macro PREP_REGS 2 ; w, h ; off1/2/3[k] [6 total] from [tapq+12+(dir+0/2/6)*2+k] mov dird, r6m - lea tableq, [cdef_filter_%1x%2_jmptable] + lea tableq, [cdef_filter_%1x%2_8bpc_jmptable] lea dirq, [tableq+dirq*2*4] %if %1 == 4 %if %2 == 4 @@ -397,7 +397,7 @@ SECTION .text %macro CDEF_FILTER 2 ; w, h INIT_YMM avx2 -cglobal cdef_filter_%1x%2, 4, 9, 0, dst, stride, left, top, \ +cglobal cdef_filter_%1x%2_8bpc, 4, 9, 0, dst, stride, left, top, \ pri, sec, dir, damping, edge %assign stack_offset_entry stack_offset mov edged, edgem @@ -1592,7 +1592,7 @@ CDEF_FILTER 4, 8 CDEF_FILTER 4, 4 INIT_YMM avx2 -cglobal cdef_dir, 3, 4, 15, src, stride, var, stride3 +cglobal cdef_dir_8bpc, 3, 4, 15, src, stride, var, stride3 lea stride3q, [strideq*3] movq xm0, [srcq+strideq*0] movq xm1, [srcq+strideq*1] diff --git a/src/x86/cdef_avx512.asm b/src/x86/cdef_avx512.asm index b1fa1ad..94fa818 100644 --- a/src/x86/cdef_avx512.asm +++ b/src/x86/cdef_avx512.asm @@ -109,7 +109,8 @@ DECLARE_REG_TMP 8, 5 ; 5e 5f 50 51 52 53 54 55 INIT_ZMM avx512icl -cglobal cdef_filter_4x4, 4, 8, 13, dst, stride, left, top, pri, sec, dir, damping, edge +cglobal cdef_filter_4x4_8bpc, 4, 8, 13, dst, stride, left, top, \ + pri, sec, dir, damping, edge %define base r7-edge_mask movq xmm0, [dstq+strideq*0] movhps xmm0, [dstq+strideq*1] @@ -269,8 +270,8 @@ DECLARE_REG_TMP 2, 7 ; L8 L9 40 41 42 43 44 45 8e 8f 80 81 82 83 84 85 ; La Lb 50 51 52 53 54 55 9e 9f 90 91 92 93 94 95 -cglobal cdef_filter_4x8, 4, 9, 22, dst, stride, left, top, \ - pri, sec, dir, damping, edge +cglobal cdef_filter_4x8_8bpc, 4, 9, 22, dst, stride, left, top, \ + pri, sec, dir, damping, edge %define base r8-edge_mask vpbroadcastd ym21, strided mov r6d, edgem @@ -504,8 +505,8 @@ ALIGN function_align ; 8e 8f 80 81 82 83 84 85 84 85 86 87 88 89 8a 8b ; 9e 9f 90 91 92 93 94 95 94 95 96 97 98 99 9a 9b -cglobal cdef_filter_8x8, 4, 11, 32, 4*64, dst, stride, left, top, \ - pri, sec, dir, damping, edge +cglobal cdef_filter_8x8_8bpc, 4, 11, 32, 4*64, dst, stride, left, top, \ + pri, sec, dir, damping, edge %define base r8-edge_mask mov r6d, edgem lea r10, [dstq+strideq*4-2] diff --git a/src/x86/cdef_init_tmpl.c b/src/x86/cdef_init_tmpl.c index edc3b5d..0c14497 100644 --- a/src/x86/cdef_init_tmpl.c +++ b/src/x86/cdef_init_tmpl.c @@ -28,20 +28,22 @@ #include "src/cpu.h" #include "src/cdef.h" -#define decl_cdef_size_fn(sz) \ - decl_cdef_fn(dav1d_cdef_filter_##sz##_avx512icl); \ - decl_cdef_fn(dav1d_cdef_filter_##sz##_avx2); \ - decl_cdef_fn(dav1d_cdef_filter_##sz##_sse4); \ - decl_cdef_fn(dav1d_cdef_filter_##sz##_ssse3); \ - decl_cdef_fn(dav1d_cdef_filter_##sz##_sse2) +#define decl_cdef_fns(ext) \ + decl_cdef_fn(BF(dav1d_cdef_filter_4x4, ext)); \ + decl_cdef_fn(BF(dav1d_cdef_filter_4x8, ext)); \ + decl_cdef_fn(BF(dav1d_cdef_filter_8x8, ext)) -decl_cdef_size_fn(4x4); -decl_cdef_size_fn(4x8); -decl_cdef_size_fn(8x8); - -decl_cdef_dir_fn(dav1d_cdef_dir_avx2); -decl_cdef_dir_fn(dav1d_cdef_dir_sse4); -decl_cdef_dir_fn(dav1d_cdef_dir_ssse3); +#if BITDEPTH == 8 +decl_cdef_fns(avx512icl); +decl_cdef_fns(avx2); +decl_cdef_fns(sse4); +decl_cdef_fns(ssse3); +decl_cdef_fns(sse2); + +decl_cdef_dir_fn(BF(dav1d_cdef_dir, avx2)); +decl_cdef_dir_fn(BF(dav1d_cdef_dir, sse4)); +decl_cdef_dir_fn(BF(dav1d_cdef_dir, ssse3)); +#endif COLD void bitfn(dav1d_cdef_dsp_init_x86)(Dav1dCdefDSPContext *const c) { const unsigned flags = dav1d_get_cpu_flags(); @@ -49,45 +51,45 @@ COLD void bitfn(dav1d_cdef_dsp_init_x86)(Dav1dCdefDSPContext *const c) { if (!(flags & DAV1D_X86_CPU_FLAG_SSE2)) return; #if BITDEPTH == 8 - c->fb[0] = dav1d_cdef_filter_8x8_sse2; - c->fb[1] = dav1d_cdef_filter_4x8_sse2; - c->fb[2] = dav1d_cdef_filter_4x4_sse2; + c->fb[0] = BF(dav1d_cdef_filter_8x8, sse2); + c->fb[1] = BF(dav1d_cdef_filter_4x8, sse2); + c->fb[2] = BF(dav1d_cdef_filter_4x4, sse2); #endif if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return; #if BITDEPTH == 8 - c->dir = dav1d_cdef_dir_ssse3; - c->fb[0] = dav1d_cdef_filter_8x8_ssse3; - c->fb[1] = dav1d_cdef_filter_4x8_ssse3; - c->fb[2] = dav1d_cdef_filter_4x4_ssse3; + c->dir = BF(dav1d_cdef_dir, ssse3); + c->fb[0] = BF(dav1d_cdef_filter_8x8, ssse3); + c->fb[1] = BF(dav1d_cdef_filter_4x8, ssse3); + c->fb[2] = BF(dav1d_cdef_filter_4x4, ssse3); #endif if (!(flags & DAV1D_X86_CPU_FLAG_SSE41)) return; #if BITDEPTH == 8 - c->dir = dav1d_cdef_dir_sse4; - c->fb[0] = dav1d_cdef_filter_8x8_sse4; - c->fb[1] = dav1d_cdef_filter_4x8_sse4; - c->fb[2] = dav1d_cdef_filter_4x4_sse4; + c->dir = BF(dav1d_cdef_dir, sse4); + c->fb[0] = BF(dav1d_cdef_filter_8x8, sse4); + c->fb[1] = BF(dav1d_cdef_filter_4x8, sse4); + c->fb[2] = BF(dav1d_cdef_filter_4x4, sse4); #endif #if ARCH_X86_64 if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return; #if BITDEPTH == 8 - c->dir = dav1d_cdef_dir_avx2; - c->fb[0] = dav1d_cdef_filter_8x8_avx2; - c->fb[1] = dav1d_cdef_filter_4x8_avx2; - c->fb[2] = dav1d_cdef_filter_4x4_avx2; + c->dir = BF(dav1d_cdef_dir, avx2); + c->fb[0] = BF(dav1d_cdef_filter_8x8, avx2); + c->fb[1] = BF(dav1d_cdef_filter_4x8, avx2); + c->fb[2] = BF(dav1d_cdef_filter_4x4, avx2); #endif if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return; #if HAVE_AVX512ICL && BITDEPTH == 8 - c->fb[0] = dav1d_cdef_filter_8x8_avx512icl; - c->fb[1] = dav1d_cdef_filter_4x8_avx512icl; - c->fb[2] = dav1d_cdef_filter_4x4_avx512icl; + c->fb[0] = BF(dav1d_cdef_filter_8x8, avx512icl); + c->fb[1] = BF(dav1d_cdef_filter_4x8, avx512icl); + c->fb[2] = BF(dav1d_cdef_filter_4x4, avx512icl); #endif #endif diff --git a/src/x86/cdef_sse.asm b/src/x86/cdef_sse.asm index 2dcaf22..4c335ab 100644 --- a/src/x86/cdef_sse.asm +++ b/src/x86/cdef_sse.asm @@ -249,13 +249,13 @@ SECTION .text %macro CDEF_FILTER 2 ; w, h %if ARCH_X86_64 -cglobal cdef_filter_%1x%2, 4, 9, 16, 3 * 16 + (%2+4)*32, \ - dst, stride, left, top, pri, sec, edge, stride3, dst4 +cglobal cdef_filter_%1x%2_8bpc, 4, 9, 16, 3 * 16 + (%2+4)*32, \ + dst, stride, left, top, pri, sec, edge, stride3, dst4 %define px rsp+3*16+2*32 %define base 0 %else -cglobal cdef_filter_%1x%2, 2, 7, 8, - 7 * 16 - (%2+4)*32, \ - dst, stride, left, edge, stride3 +cglobal cdef_filter_%1x%2_8bpc, 2, 7, 8, - 7 * 16 - (%2+4)*32, \ + dst, stride, left, edge, stride3 %define topq r2 %define dst4q r2 LEA r5, tap_table @@ -758,7 +758,7 @@ cglobal cdef_filter_%1x%2, 2, 7, 8, - 7 * 16 - (%2+4)*32, \ %macro CDEF_DIR 0 %if ARCH_X86_64 -cglobal cdef_dir, 3, 5, 16, 32, src, stride, var, stride3 +cglobal cdef_dir_8bpc, 3, 5, 16, 32, src, stride, var, stride3 lea stride3q, [strideq*3] movq m1, [srcq+strideq*0] movhps m1, [srcq+strideq*1] @@ -1030,7 +1030,7 @@ cglobal cdef_dir, 3, 5, 16, 32, src, stride, var, stride3 shr r1d, 10 mov [varq], r1d %else -cglobal cdef_dir, 2, 4, 8, 96, src, stride, var, stride3 +cglobal cdef_dir_8bpc, 2, 4, 8, 96, src, stride, var, stride3 %define base r2-shufw_6543210x LEA r2, shufw_6543210x pxor m0, m0 |