diff options
author | David Michael Barr <b@rr-dav.id.au> | 2018-11-09 13:41:04 +0300 |
---|---|---|
committer | Henrik Gramner <gramner@twoorioles.com> | 2018-11-14 00:14:23 +0300 |
commit | e10b855c530a3a5ed519fcb3f3ec085a6e0aa79e (patch) | |
tree | f7e4f2ac81025e7a0a7e94d93f4c560625e8f154 | |
parent | d401106b2a764422d6233cb2f5f3d90a8e5883c4 (diff) |
Pass dimensions to cfl_ac and derive log2sz
-rw-r--r-- | src/ipred.h | 4 | ||||
-rw-r--r-- | src/ipred_tmpl.c | 88 | ||||
-rw-r--r-- | src/recon_tmpl.c | 8 |
3 files changed, 19 insertions, 81 deletions
diff --git a/src/ipred.h b/src/ipred.h index 43a7e3a..1ff368d 100644 --- a/src/ipred.h +++ b/src/ipred.h @@ -52,7 +52,7 @@ typedef decl_angular_ipred_fn(*angular_ipred_fn); */ #define decl_cfl_ac_fn(name) \ void (name)(int16_t *ac, const pixel *y, ptrdiff_t stride, \ - int w_pad, int h_pad) + int w_pad, int h_pad, int cw, int ch) typedef decl_cfl_ac_fn(*cfl_ac_fn); /* @@ -77,7 +77,7 @@ typedef struct Dav1dIntraPredDSPContext { angular_ipred_fn intra_pred[N_IMPL_INTRA_PRED_MODES]; // chroma-from-luma - cfl_ac_fn cfl_ac[3 /* 420, 422, 444 */][N_RECT_TX_SIZES /* chroma tx size */]; + cfl_ac_fn cfl_ac[3 /* 420, 422, 444 */]; cfl_pred_fn cfl_pred[DC_128_PRED + 1]; // palette diff --git a/src/ipred_tmpl.c b/src/ipred_tmpl.c index e928bfb..9b5ce88 100644 --- a/src/ipred_tmpl.c +++ b/src/ipred_tmpl.c @@ -614,7 +614,7 @@ static void ipred_filter_c(pixel *dst, const ptrdiff_t stride, static NOINLINE void cfl_ac_c(int16_t *ac, const pixel *ypx, const ptrdiff_t stride, const int w_pad, const int h_pad, const int width, const int height, - const int ss_hor, const int ss_ver, const int log2sz) + const int ss_hor, const int ss_ver) { int y, x; int16_t *const ac_orig = ac; @@ -642,6 +642,7 @@ cfl_ac_c(int16_t *ac, const pixel *ypx, const ptrdiff_t stride, ac += width; } + const int log2sz = ctz(width) + ctz(height); int sum = (1 << log2sz) >> 1; for (ac = ac_orig, y = 0; y < height; y++) { for (x = 0; x < width; x++) @@ -658,49 +659,17 @@ cfl_ac_c(int16_t *ac, const pixel *ypx, const ptrdiff_t stride, } } -#define cfl_ac_fn(lw, lh, cw, ch, ss_hor, ss_ver, log2sz) \ -static void cfl_ac_##lw##x##lh##_to_##cw##x##ch##_c(int16_t *const ac, \ - const pixel *const ypx, \ - const ptrdiff_t stride, \ - const int w_pad, \ - const int h_pad) \ +#define cfl_ac_fn(fmt, ss_hor, ss_ver) \ +static void cfl_ac_##fmt##_c(int16_t *const ac, const pixel *const ypx, \ + const ptrdiff_t stride, const int w_pad, \ + const int h_pad, const int cw, const int ch) \ { \ - cfl_ac_c(ac, ypx, stride, w_pad, h_pad, cw, ch, ss_hor, ss_ver, log2sz); \ + cfl_ac_c(ac, ypx, stride, w_pad, h_pad, cw, ch, ss_hor, ss_ver); \ } -cfl_ac_fn( 8, 8, 4, 4, 1, 1, 4) -cfl_ac_fn( 8, 16, 4, 8, 1, 1, 5) -cfl_ac_fn( 8, 32, 4, 16, 1, 1, 6) -cfl_ac_fn(16, 8, 8, 4, 1, 1, 5) -cfl_ac_fn(16, 16, 8, 8, 1, 1, 6) -cfl_ac_fn(16, 32, 8, 16, 1, 1, 7) -cfl_ac_fn(32, 8, 16, 4, 1, 1, 6) -cfl_ac_fn(32, 16, 16, 8, 1, 1, 7) -cfl_ac_fn(32, 32, 16, 16, 1, 1, 8) - -cfl_ac_fn( 8, 4, 4, 4, 1, 0, 4) -cfl_ac_fn( 8, 8, 4, 8, 1, 0, 5) -cfl_ac_fn(16, 4, 8, 4, 1, 0, 5) -cfl_ac_fn(16, 8, 8, 8, 1, 0, 6) -cfl_ac_fn(16, 16, 8, 16, 1, 0, 7) -cfl_ac_fn(32, 8, 16, 8, 1, 0, 7) -cfl_ac_fn(32, 16, 16, 16, 1, 0, 8) -cfl_ac_fn(32, 32, 16, 32, 1, 0, 9) - -cfl_ac_fn( 4, 4, 4, 4, 0, 0, 4) -cfl_ac_fn( 4, 8, 4, 8, 0, 0, 5) -cfl_ac_fn( 4, 16, 4, 16, 0, 0, 6) -cfl_ac_fn( 8, 4, 8, 4, 0, 0, 5) -cfl_ac_fn( 8, 8, 8, 8, 0, 0, 6) -cfl_ac_fn( 8, 16, 8, 16, 0, 0, 7) -cfl_ac_fn( 8, 32, 8, 32, 0, 0, 8) -cfl_ac_fn(16, 4, 16, 4, 0, 0, 6) -cfl_ac_fn(16, 8, 16, 8, 0, 0, 7) -cfl_ac_fn(16, 16, 16, 16, 0, 0, 8) -cfl_ac_fn(16, 32, 16, 32, 0, 0, 9) -cfl_ac_fn(32, 8, 32, 8, 0, 0, 8) -cfl_ac_fn(32, 16, 32, 16, 0, 0, 9) -cfl_ac_fn(32, 32, 32, 32, 0, 0, 10) +cfl_ac_fn(420, 1, 1) +cfl_ac_fn(422, 1, 0) +cfl_ac_fn(444, 0, 0) static void pal_pred_c(pixel *dst, const ptrdiff_t stride, const uint16_t *const pal, const uint8_t *idx, @@ -730,40 +699,9 @@ void bitfn(dav1d_intra_pred_dsp_init)(Dav1dIntraPredDSPContext *const c) { c->intra_pred[Z3_PRED ] = ipred_z3_c; c->intra_pred[FILTER_PRED ] = ipred_filter_c; - // cfl functions are split per chroma subsampling type - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][ TX_4X4 ] = cfl_ac_8x8_to_4x4_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_4X8 ] = cfl_ac_8x16_to_4x8_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_4X16 ] = cfl_ac_8x32_to_4x16_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_8X4 ] = cfl_ac_16x8_to_8x4_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][ TX_8X8 ] = cfl_ac_16x16_to_8x8_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_8X16 ] = cfl_ac_16x32_to_8x16_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_16X4 ] = cfl_ac_32x8_to_16x4_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_16X8 ] = cfl_ac_32x16_to_16x8_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][ TX_16X16] = cfl_ac_32x32_to_16x16_c; - - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][ TX_4X4 ] = cfl_ac_8x4_to_4x4_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_4X8 ] = cfl_ac_8x8_to_4x8_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_8X4 ] = cfl_ac_16x4_to_8x4_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][ TX_8X8 ] = cfl_ac_16x8_to_8x8_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_8X16 ] = cfl_ac_16x16_to_8x16_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_16X8 ] = cfl_ac_32x8_to_16x8_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][ TX_16X16] = cfl_ac_32x16_to_16x16_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_16X32] = cfl_ac_32x32_to_16x32_c; - - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_4X4 ] = cfl_ac_4x4_to_4x4_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_4X8 ] = cfl_ac_4x8_to_4x8_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_4X16 ] = cfl_ac_4x16_to_4x16_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_8X4 ] = cfl_ac_8x4_to_8x4_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_8X8 ] = cfl_ac_8x8_to_8x8_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_8X16 ] = cfl_ac_8x16_to_8x16_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_8X32 ] = cfl_ac_8x32_to_8x32_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_16X4 ] = cfl_ac_16x4_to_16x4_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_16X8 ] = cfl_ac_16x8_to_16x8_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_16X16] = cfl_ac_16x16_to_16x16_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_16X32] = cfl_ac_16x32_to_16x32_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_32X8 ] = cfl_ac_32x8_to_32x8_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_32X16] = cfl_ac_32x16_to_32x16_c; - c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_32X32] = cfl_ac_32x32_to_32x32_c; + c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = cfl_ac_420_c; + c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = cfl_ac_422_c; + c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1] = cfl_ac_444_c; c->cfl_pred[DC_PRED ] = ipred_cfl_c; c->cfl_pred[DC_128_PRED ] = ipred_cfl_128_c; diff --git a/src/recon_tmpl.c b/src/recon_tmpl.c index d87c8c5..38f8de6 100644 --- a/src/recon_tmpl.c +++ b/src/recon_tmpl.c @@ -856,10 +856,10 @@ void bytefn(dav1d_recon_b_intra)(Dav1dTileContext *const t, const enum BlockSize ((cw4 << ss_hor) + t_dim->w - 1) & ~(t_dim->w - 1); const int furthest_b = ((ch4 << ss_ver) + t_dim->h - 1) & ~(t_dim->h - 1); - dsp->ipred.cfl_ac[f->cur.p.p.layout - 1] - [b->uvtx](ac, y_src, f->cur.p.stride[0], - cbw4 - (furthest_r >> ss_hor), - cbh4 - (furthest_b >> ss_ver)); + dsp->ipred.cfl_ac[f->cur.p.p.layout - 1](ac, y_src, f->cur.p.stride[0], + cbw4 - (furthest_r >> ss_hor), + cbh4 - (furthest_b >> ss_ver), + cbw4 * 4, cbh4 * 4); for (int pl = 0; pl < 2; pl++) { if (!b->cfl_alpha[pl]) continue; int angle = 0; |