From a9323ef58df2c0713e4115965df10c76818aadb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 16 Mar 2020 00:04:57 +0200 Subject: arm: ipred: Prepare for 16 bpc --- src/arm/32/ipred.S | 60 ++++++++-------- src/arm/64/ipred.S | 174 +++++++++++++++++++++++----------------------- src/arm/ipred_init_tmpl.c | 72 +++++++++---------- 3 files changed, 153 insertions(+), 153 deletions(-) (limited to 'src/arm') diff --git a/src/arm/32/ipred.S b/src/arm/32/ipred.S index 8e33f91..d850a0c 100644 --- a/src/arm/32/ipred.S +++ b/src/arm/32/ipred.S @@ -29,11 +29,11 @@ #include "src/arm/asm.S" #include "util.S" -// void ipred_dc_128_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, const int a, -// const int max_width, const int max_height); -function ipred_dc_128_neon, export=1 +// void ipred_dc_128_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, const int a, +// const int max_width, const int max_height); +function ipred_dc_128_8bpc_neon, export=1 push {r4, lr} ldr r4, [sp, #8] clz r3, r3 @@ -107,11 +107,11 @@ L(ipred_dc_128_tbl): pop {r4, pc} endfunc -// void ipred_v_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, const int a, -// const int max_width, const int max_height); -function ipred_v_neon, export=1 +// void ipred_v_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, const int a, +// const int max_width, const int max_height); +function ipred_v_8bpc_neon, export=1 push {r4, lr} ldr lr, [sp, #8] clz r3, r3 @@ -189,11 +189,11 @@ L(ipred_v_tbl): pop {r4, pc} endfunc -// void ipred_h_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, const int a, -// const int max_width, const int max_height); -function ipred_h_neon, export=1 +// void ipred_h_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, const int a, +// const int max_width, const int max_height); +function ipred_h_8bpc_neon, export=1 push {r4-r5, lr} ldr r4, [sp, #12] clz r3, r3 @@ -297,11 +297,11 @@ L(ipred_h_tbl): pop {r4-r5, pc} endfunc -// void ipred_dc_top_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, const int a, -// const int max_width, const int max_height); -function ipred_dc_top_neon, export=1 +// void ipred_dc_top_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, const int a, +// const int max_width, const int max_height); +function ipred_dc_top_8bpc_neon, export=1 push {r4-r5, lr} ldr r4, [sp, #12] clz r3, r3 @@ -418,11 +418,11 @@ L(ipred_dc_top_tbl): pop {r4-r5, pc} endfunc -// void ipred_dc_left_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, const int a, -// const int max_width, const int max_height); -function ipred_dc_left_neon, export=1 +// void ipred_dc_left_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, const int a, +// const int max_width, const int max_height); +function ipred_dc_left_8bpc_neon, export=1 push {r4-r5, lr} ldr r4, [sp, #12] sub r2, r2, r4 @@ -556,11 +556,11 @@ L(ipred_dc_left_w64): pop {r4-r5, pc} endfunc -// void ipred_dc_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, const int a, -// const int max_width, const int max_height); -function ipred_dc_neon, export=1 +// void ipred_dc_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, const int a, +// const int max_width, const int max_height); +function ipred_dc_8bpc_neon, export=1 push {r4-r6, lr} ldr r4, [sp, #16] sub r2, r2, r4 diff --git a/src/arm/64/ipred.S b/src/arm/64/ipred.S index bbb8145..e53665a 100644 --- a/src/arm/64/ipred.S +++ b/src/arm/64/ipred.S @@ -28,11 +28,11 @@ #include "src/arm/asm.S" #include "util.S" -// void ipred_dc_128_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, const int a, -// const int max_width, const int max_height); -function ipred_dc_128_neon, export=1 +// void ipred_dc_128_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, const int a, +// const int max_width, const int max_height); +function ipred_dc_128_8bpc_neon, export=1 clz w3, w3 adr x5, L(ipred_dc_128_tbl) sub w3, w3, #25 @@ -97,11 +97,11 @@ L(ipred_dc_128_tbl): .hword L(ipred_dc_128_tbl) - 4b endfunc -// void ipred_v_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, const int a, -// const int max_width, const int max_height); -function ipred_v_neon, export=1 +// void ipred_v_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, const int a, +// const int max_width, const int max_height); +function ipred_v_8bpc_neon, export=1 clz w3, w3 adr x5, L(ipred_v_tbl) sub w3, w3, #25 @@ -170,11 +170,11 @@ L(ipred_v_tbl): .hword L(ipred_v_tbl) - 40b endfunc -// void ipred_h_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, const int a, -// const int max_width, const int max_height); -function ipred_h_neon, export=1 +// void ipred_h_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, const int a, +// const int max_width, const int max_height); +function ipred_h_8bpc_neon, export=1 clz w3, w3 adr x5, L(ipred_h_tbl) sub w3, w3, #25 @@ -251,11 +251,11 @@ L(ipred_h_tbl): .hword L(ipred_h_tbl) - 4b endfunc -// void ipred_dc_top_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, const int a, -// const int max_width, const int max_height); -function ipred_dc_top_neon, export=1 +// void ipred_dc_top_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, const int a, +// const int max_width, const int max_height); +function ipred_dc_top_8bpc_neon, export=1 clz w3, w3 adr x5, L(ipred_dc_top_tbl) sub w3, w3, #25 @@ -351,11 +351,11 @@ L(ipred_dc_top_tbl): .hword L(ipred_dc_top_tbl) - 40b endfunc -// void ipred_dc_left_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, const int a, -// const int max_width, const int max_height); -function ipred_dc_left_neon, export=1 +// void ipred_dc_left_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, const int a, +// const int max_width, const int max_height); +function ipred_dc_left_8bpc_neon, export=1 sub x2, x2, w4, uxtw clz w3, w3 clz w7, w4 @@ -472,11 +472,11 @@ L(ipred_dc_left_tbl): .hword L(ipred_dc_left_tbl) - L(ipred_dc_left_w4) endfunc -// void ipred_dc_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, const int a, -// const int max_width, const int max_height); -function ipred_dc_neon, export=1 +// void ipred_dc_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, const int a, +// const int max_width, const int max_height); +function ipred_dc_8bpc_neon, export=1 sub x2, x2, w4, uxtw add w7, w3, w4 // width + height clz w3, w3 @@ -687,11 +687,11 @@ L(ipred_dc_tbl): .hword L(ipred_dc_tbl) - L(ipred_dc_w4) endfunc -// void ipred_paeth_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, const int a, -// const int max_width, const int max_height); -function ipred_paeth_neon, export=1 +// void ipred_paeth_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, const int a, +// const int max_width, const int max_height); +function ipred_paeth_8bpc_neon, export=1 clz w9, w3 adr x5, L(ipred_paeth_tbl) sub w9, w9, #25 @@ -864,11 +864,11 @@ L(ipred_paeth_tbl): .hword L(ipred_paeth_tbl) - 40b endfunc -// void ipred_smooth_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, const int a, -// const int max_width, const int max_height); -function ipred_smooth_neon, export=1 +// void ipred_smooth_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, const int a, +// const int max_width, const int max_height); +function ipred_smooth_8bpc_neon, export=1 movrel x10, X(sm_weights) add x11, x10, w4, uxtw add x10, x10, w3, uxtw @@ -1042,11 +1042,11 @@ L(ipred_smooth_tbl): .hword L(ipred_smooth_tbl) - 40b endfunc -// void ipred_smooth_v_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, const int a, -// const int max_width, const int max_height); -function ipred_smooth_v_neon, export=1 +// void ipred_smooth_v_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, const int a, +// const int max_width, const int max_height); +function ipred_smooth_v_8bpc_neon, export=1 movrel x7, X(sm_weights) add x7, x7, w4, uxtw clz w9, w3 @@ -1180,11 +1180,11 @@ L(ipred_smooth_v_tbl): .hword L(ipred_smooth_v_tbl) - 40b endfunc -// void ipred_smooth_h_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, const int a, -// const int max_width, const int max_height); -function ipred_smooth_h_neon, export=1 +// void ipred_smooth_h_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, const int a, +// const int max_width, const int max_height); +function ipred_smooth_h_8bpc_neon, export=1 movrel x8, X(sm_weights) add x8, x8, w3, uxtw clz w9, w3 @@ -1323,11 +1323,11 @@ L(ipred_smooth_h_tbl): .hword L(ipred_smooth_h_tbl) - 40b endfunc -// void ipred_filter_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, const int filt_idx, -// const int max_width, const int max_height); -function ipred_filter_neon, export=1 +// void ipred_filter_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, const int filt_idx, +// const int max_width, const int max_height); +function ipred_filter_8bpc_neon, export=1 and w5, w5, #511 movrel x6, X(filter_intra_taps) lsl w5, w5, #6 @@ -1483,10 +1483,10 @@ L(ipred_filter_tbl): .hword L(ipred_filter_tbl) - 40b endfunc -// void pal_pred_neon(pixel *dst, const ptrdiff_t stride, -// const uint16_t *const pal, const uint8_t *idx, -// const int w, const int h); -function pal_pred_neon, export=1 +// void pal_pred_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const uint16_t *const pal, const uint8_t *idx, +// const int w, const int h); +function pal_pred_8bpc_neon, export=1 ld1 {v0.8h}, [x2] clz w9, w4 adr x6, L(pal_pred_tbl) @@ -1574,11 +1574,11 @@ L(pal_pred_tbl): .hword L(pal_pred_tbl) - 4b endfunc -// void ipred_cfl_128_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, -// const int16_t *ac, const int alpha); -function ipred_cfl_128_neon, export=1 +// void ipred_cfl_128_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, +// const int16_t *ac, const int alpha); +function ipred_cfl_128_8bpc_neon, export=1 clz w9, w3 adr x7, L(ipred_cfl_128_tbl) sub w9, w9, #26 @@ -1695,11 +1695,11 @@ L(ipred_cfl_splat_tbl): .hword L(ipred_cfl_128_tbl) - L(ipred_cfl_splat_w4) endfunc -// void ipred_cfl_top_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, -// const int16_t *ac, const int alpha); -function ipred_cfl_top_neon, export=1 +// void ipred_cfl_top_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, +// const int16_t *ac, const int alpha); +function ipred_cfl_top_8bpc_neon, export=1 clz w9, w3 adr x7, L(ipred_cfl_top_tbl) sub w9, w9, #26 @@ -1744,11 +1744,11 @@ L(ipred_cfl_top_tbl): .hword L(ipred_cfl_top_tbl) - 4b endfunc -// void ipred_cfl_left_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, -// const int16_t *ac, const int alpha); -function ipred_cfl_left_neon, export=1 +// void ipred_cfl_left_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, +// const int16_t *ac, const int alpha); +function ipred_cfl_left_8bpc_neon, export=1 sub x2, x2, w4, uxtw clz w9, w3 clz w8, w4 @@ -1802,11 +1802,11 @@ L(ipred_cfl_left_tbl): .hword L(ipred_cfl_left_tbl) - L(ipred_cfl_left_h4) endfunc -// void ipred_cfl_neon(pixel *dst, const ptrdiff_t stride, -// const pixel *const topleft, -// const int width, const int height, -// const int16_t *ac, const int alpha); -function ipred_cfl_neon, export=1 +// void ipred_cfl_8bpc_neon(pixel *dst, const ptrdiff_t stride, +// const pixel *const topleft, +// const int width, const int height, +// const int16_t *ac, const int alpha); +function ipred_cfl_8bpc_neon, export=1 sub x2, x2, w4, uxtw add w8, w3, w4 // width + height dup v1.8h, w6 // alpha @@ -1942,10 +1942,10 @@ L(ipred_cfl_tbl): .hword L(ipred_cfl_tbl) - L(ipred_cfl_w4) endfunc -// void cfl_ac_420_neon(int16_t *const ac, const pixel *const ypx, -// const ptrdiff_t stride, const int w_pad, -// const int h_pad, const int cw, const int ch); -function ipred_cfl_ac_420_neon, export=1 +// void cfl_ac_420_8bpc_neon(int16_t *const ac, const pixel *const ypx, +// const ptrdiff_t stride, const int w_pad, +// const int h_pad, const int cw, const int ch); +function ipred_cfl_ac_420_8bpc_neon, export=1 clz w8, w5 lsl w4, w4, #2 adr x7, L(ipred_cfl_ac_420_tbl) @@ -2260,10 +2260,10 @@ L(ipred_cfl_ac_420_w16_tbl): .hword L(ipred_cfl_ac_420_w16_tbl) - L(ipred_cfl_ac_420_w16_wpad3) endfunc -// void cfl_ac_422_neon(int16_t *const ac, const pixel *const ypx, -// const ptrdiff_t stride, const int w_pad, -// const int h_pad, const int cw, const int ch); -function ipred_cfl_ac_422_neon, export=1 +// void cfl_ac_422_8bpc_neon(int16_t *const ac, const pixel *const ypx, +// const ptrdiff_t stride, const int w_pad, +// const int h_pad, const int cw, const int ch); +function ipred_cfl_ac_422_8bpc_neon, export=1 clz w8, w5 lsl w4, w4, #2 adr x7, L(ipred_cfl_ac_422_tbl) diff --git a/src/arm/ipred_init_tmpl.c b/src/arm/ipred_init_tmpl.c index 5b3eb07..c838972 100644 --- a/src/arm/ipred_init_tmpl.c +++ b/src/arm/ipred_init_tmpl.c @@ -27,27 +27,27 @@ #include "src/cpu.h" #include "src/ipred.h" -decl_angular_ipred_fn(dav1d_ipred_dc_neon); -decl_angular_ipred_fn(dav1d_ipred_dc_128_neon); -decl_angular_ipred_fn(dav1d_ipred_dc_top_neon); -decl_angular_ipred_fn(dav1d_ipred_dc_left_neon); -decl_angular_ipred_fn(dav1d_ipred_h_neon); -decl_angular_ipred_fn(dav1d_ipred_v_neon); -decl_angular_ipred_fn(dav1d_ipred_paeth_neon); -decl_angular_ipred_fn(dav1d_ipred_smooth_neon); -decl_angular_ipred_fn(dav1d_ipred_smooth_v_neon); -decl_angular_ipred_fn(dav1d_ipred_smooth_h_neon); -decl_angular_ipred_fn(dav1d_ipred_filter_neon); +decl_angular_ipred_fn(BF(dav1d_ipred_dc, neon)); +decl_angular_ipred_fn(BF(dav1d_ipred_dc_128, neon)); +decl_angular_ipred_fn(BF(dav1d_ipred_dc_top, neon)); +decl_angular_ipred_fn(BF(dav1d_ipred_dc_left, neon)); +decl_angular_ipred_fn(BF(dav1d_ipred_h, neon)); +decl_angular_ipred_fn(BF(dav1d_ipred_v, neon)); +decl_angular_ipred_fn(BF(dav1d_ipred_paeth, neon)); +decl_angular_ipred_fn(BF(dav1d_ipred_smooth, neon)); +decl_angular_ipred_fn(BF(dav1d_ipred_smooth_v, neon)); +decl_angular_ipred_fn(BF(dav1d_ipred_smooth_h, neon)); +decl_angular_ipred_fn(BF(dav1d_ipred_filter, neon)); -decl_cfl_pred_fn(dav1d_ipred_cfl_neon); -decl_cfl_pred_fn(dav1d_ipred_cfl_128_neon); -decl_cfl_pred_fn(dav1d_ipred_cfl_top_neon); -decl_cfl_pred_fn(dav1d_ipred_cfl_left_neon); +decl_cfl_pred_fn(BF(dav1d_ipred_cfl, neon)); +decl_cfl_pred_fn(BF(dav1d_ipred_cfl_128, neon)); +decl_cfl_pred_fn(BF(dav1d_ipred_cfl_top, neon)); +decl_cfl_pred_fn(BF(dav1d_ipred_cfl_left, neon)); -decl_cfl_ac_fn(dav1d_ipred_cfl_ac_420_neon); -decl_cfl_ac_fn(dav1d_ipred_cfl_ac_422_neon); +decl_cfl_ac_fn(BF(dav1d_ipred_cfl_ac_420, neon)); +decl_cfl_ac_fn(BF(dav1d_ipred_cfl_ac_422, neon)); -decl_pal_pred_fn(dav1d_pal_pred_neon); +decl_pal_pred_fn(BF(dav1d_pal_pred, neon)); COLD void bitfn(dav1d_intra_pred_dsp_init_arm)(Dav1dIntraPredDSPContext *const c) { const unsigned flags = dav1d_get_cpu_flags(); @@ -55,28 +55,28 @@ COLD void bitfn(dav1d_intra_pred_dsp_init_arm)(Dav1dIntraPredDSPContext *const c if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return; #if BITDEPTH == 8 - c->intra_pred[DC_PRED] = dav1d_ipred_dc_neon; - c->intra_pred[DC_128_PRED] = dav1d_ipred_dc_128_neon; - c->intra_pred[TOP_DC_PRED] = dav1d_ipred_dc_top_neon; - c->intra_pred[LEFT_DC_PRED] = dav1d_ipred_dc_left_neon; - c->intra_pred[HOR_PRED] = dav1d_ipred_h_neon; - c->intra_pred[VERT_PRED] = dav1d_ipred_v_neon; + c->intra_pred[DC_PRED] = BF(dav1d_ipred_dc, neon); + c->intra_pred[DC_128_PRED] = BF(dav1d_ipred_dc_128, neon); + c->intra_pred[TOP_DC_PRED] = BF(dav1d_ipred_dc_top, neon); + c->intra_pred[LEFT_DC_PRED] = BF(dav1d_ipred_dc_left, neon); + c->intra_pred[HOR_PRED] = BF(dav1d_ipred_h, neon); + c->intra_pred[VERT_PRED] = BF(dav1d_ipred_v, neon); #if ARCH_AARCH64 - c->intra_pred[PAETH_PRED] = dav1d_ipred_paeth_neon; - c->intra_pred[SMOOTH_PRED] = dav1d_ipred_smooth_neon; - c->intra_pred[SMOOTH_V_PRED] = dav1d_ipred_smooth_v_neon; - c->intra_pred[SMOOTH_H_PRED] = dav1d_ipred_smooth_h_neon; - c->intra_pred[FILTER_PRED] = dav1d_ipred_filter_neon; + c->intra_pred[PAETH_PRED] = BF(dav1d_ipred_paeth, neon); + c->intra_pred[SMOOTH_PRED] = BF(dav1d_ipred_smooth, neon); + c->intra_pred[SMOOTH_V_PRED] = BF(dav1d_ipred_smooth_v, neon); + c->intra_pred[SMOOTH_H_PRED] = BF(dav1d_ipred_smooth_h, neon); + c->intra_pred[FILTER_PRED] = BF(dav1d_ipred_filter, neon); - c->cfl_pred[DC_PRED] = dav1d_ipred_cfl_neon; - c->cfl_pred[DC_128_PRED] = dav1d_ipred_cfl_128_neon; - c->cfl_pred[TOP_DC_PRED] = dav1d_ipred_cfl_top_neon; - c->cfl_pred[LEFT_DC_PRED] = dav1d_ipred_cfl_left_neon; + c->cfl_pred[DC_PRED] = BF(dav1d_ipred_cfl, neon); + c->cfl_pred[DC_128_PRED] = BF(dav1d_ipred_cfl_128, neon); + c->cfl_pred[TOP_DC_PRED] = BF(dav1d_ipred_cfl_top, neon); + c->cfl_pred[LEFT_DC_PRED] = BF(dav1d_ipred_cfl_left, neon); - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = dav1d_ipred_cfl_ac_420_neon; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = dav1d_ipred_cfl_ac_422_neon; + c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_ipred_cfl_ac_420, neon); + c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_ipred_cfl_ac_422, neon); - c->pal_pred = dav1d_pal_pred_neon; + c->pal_pred = BF(dav1d_pal_pred, neon); #endif #endif } -- cgit v1.2.3