Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Michael Barr <b@rr-dav.id.au>2018-11-09 13:41:04 +0300
committerHenrik Gramner <gramner@twoorioles.com>2018-11-14 00:14:23 +0300
commite10b855c530a3a5ed519fcb3f3ec085a6e0aa79e (patch)
treef7e4f2ac81025e7a0a7e94d93f4c560625e8f154
parentd401106b2a764422d6233cb2f5f3d90a8e5883c4 (diff)
Pass dimensions to cfl_ac and derive log2sz
-rw-r--r--src/ipred.h4
-rw-r--r--src/ipred_tmpl.c88
-rw-r--r--src/recon_tmpl.c8
3 files changed, 19 insertions, 81 deletions
diff --git a/src/ipred.h b/src/ipred.h
index 43a7e3a..1ff368d 100644
--- a/src/ipred.h
+++ b/src/ipred.h
@@ -52,7 +52,7 @@ typedef decl_angular_ipred_fn(*angular_ipred_fn);
*/
#define decl_cfl_ac_fn(name) \
void (name)(int16_t *ac, const pixel *y, ptrdiff_t stride, \
- int w_pad, int h_pad)
+ int w_pad, int h_pad, int cw, int ch)
typedef decl_cfl_ac_fn(*cfl_ac_fn);
/*
@@ -77,7 +77,7 @@ typedef struct Dav1dIntraPredDSPContext {
angular_ipred_fn intra_pred[N_IMPL_INTRA_PRED_MODES];
// chroma-from-luma
- cfl_ac_fn cfl_ac[3 /* 420, 422, 444 */][N_RECT_TX_SIZES /* chroma tx size */];
+ cfl_ac_fn cfl_ac[3 /* 420, 422, 444 */];
cfl_pred_fn cfl_pred[DC_128_PRED + 1];
// palette
diff --git a/src/ipred_tmpl.c b/src/ipred_tmpl.c
index e928bfb..9b5ce88 100644
--- a/src/ipred_tmpl.c
+++ b/src/ipred_tmpl.c
@@ -614,7 +614,7 @@ static void ipred_filter_c(pixel *dst, const ptrdiff_t stride,
static NOINLINE void
cfl_ac_c(int16_t *ac, const pixel *ypx, const ptrdiff_t stride,
const int w_pad, const int h_pad, const int width, const int height,
- const int ss_hor, const int ss_ver, const int log2sz)
+ const int ss_hor, const int ss_ver)
{
int y, x;
int16_t *const ac_orig = ac;
@@ -642,6 +642,7 @@ cfl_ac_c(int16_t *ac, const pixel *ypx, const ptrdiff_t stride,
ac += width;
}
+ const int log2sz = ctz(width) + ctz(height);
int sum = (1 << log2sz) >> 1;
for (ac = ac_orig, y = 0; y < height; y++) {
for (x = 0; x < width; x++)
@@ -658,49 +659,17 @@ cfl_ac_c(int16_t *ac, const pixel *ypx, const ptrdiff_t stride,
}
}
-#define cfl_ac_fn(lw, lh, cw, ch, ss_hor, ss_ver, log2sz) \
-static void cfl_ac_##lw##x##lh##_to_##cw##x##ch##_c(int16_t *const ac, \
- const pixel *const ypx, \
- const ptrdiff_t stride, \
- const int w_pad, \
- const int h_pad) \
+#define cfl_ac_fn(fmt, ss_hor, ss_ver) \
+static void cfl_ac_##fmt##_c(int16_t *const ac, const pixel *const ypx, \
+ const ptrdiff_t stride, const int w_pad, \
+ const int h_pad, const int cw, const int ch) \
{ \
- cfl_ac_c(ac, ypx, stride, w_pad, h_pad, cw, ch, ss_hor, ss_ver, log2sz); \
+ cfl_ac_c(ac, ypx, stride, w_pad, h_pad, cw, ch, ss_hor, ss_ver); \
}
-cfl_ac_fn( 8, 8, 4, 4, 1, 1, 4)
-cfl_ac_fn( 8, 16, 4, 8, 1, 1, 5)
-cfl_ac_fn( 8, 32, 4, 16, 1, 1, 6)
-cfl_ac_fn(16, 8, 8, 4, 1, 1, 5)
-cfl_ac_fn(16, 16, 8, 8, 1, 1, 6)
-cfl_ac_fn(16, 32, 8, 16, 1, 1, 7)
-cfl_ac_fn(32, 8, 16, 4, 1, 1, 6)
-cfl_ac_fn(32, 16, 16, 8, 1, 1, 7)
-cfl_ac_fn(32, 32, 16, 16, 1, 1, 8)
-
-cfl_ac_fn( 8, 4, 4, 4, 1, 0, 4)
-cfl_ac_fn( 8, 8, 4, 8, 1, 0, 5)
-cfl_ac_fn(16, 4, 8, 4, 1, 0, 5)
-cfl_ac_fn(16, 8, 8, 8, 1, 0, 6)
-cfl_ac_fn(16, 16, 8, 16, 1, 0, 7)
-cfl_ac_fn(32, 8, 16, 8, 1, 0, 7)
-cfl_ac_fn(32, 16, 16, 16, 1, 0, 8)
-cfl_ac_fn(32, 32, 16, 32, 1, 0, 9)
-
-cfl_ac_fn( 4, 4, 4, 4, 0, 0, 4)
-cfl_ac_fn( 4, 8, 4, 8, 0, 0, 5)
-cfl_ac_fn( 4, 16, 4, 16, 0, 0, 6)
-cfl_ac_fn( 8, 4, 8, 4, 0, 0, 5)
-cfl_ac_fn( 8, 8, 8, 8, 0, 0, 6)
-cfl_ac_fn( 8, 16, 8, 16, 0, 0, 7)
-cfl_ac_fn( 8, 32, 8, 32, 0, 0, 8)
-cfl_ac_fn(16, 4, 16, 4, 0, 0, 6)
-cfl_ac_fn(16, 8, 16, 8, 0, 0, 7)
-cfl_ac_fn(16, 16, 16, 16, 0, 0, 8)
-cfl_ac_fn(16, 32, 16, 32, 0, 0, 9)
-cfl_ac_fn(32, 8, 32, 8, 0, 0, 8)
-cfl_ac_fn(32, 16, 32, 16, 0, 0, 9)
-cfl_ac_fn(32, 32, 32, 32, 0, 0, 10)
+cfl_ac_fn(420, 1, 1)
+cfl_ac_fn(422, 1, 0)
+cfl_ac_fn(444, 0, 0)
static void pal_pred_c(pixel *dst, const ptrdiff_t stride,
const uint16_t *const pal, const uint8_t *idx,
@@ -730,40 +699,9 @@ void bitfn(dav1d_intra_pred_dsp_init)(Dav1dIntraPredDSPContext *const c) {
c->intra_pred[Z3_PRED ] = ipred_z3_c;
c->intra_pred[FILTER_PRED ] = ipred_filter_c;
- // cfl functions are split per chroma subsampling type
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][ TX_4X4 ] = cfl_ac_8x8_to_4x4_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_4X8 ] = cfl_ac_8x16_to_4x8_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_4X16 ] = cfl_ac_8x32_to_4x16_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_8X4 ] = cfl_ac_16x8_to_8x4_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][ TX_8X8 ] = cfl_ac_16x16_to_8x8_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_8X16 ] = cfl_ac_16x32_to_8x16_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_16X4 ] = cfl_ac_32x8_to_16x4_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_16X8 ] = cfl_ac_32x16_to_16x8_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][ TX_16X16] = cfl_ac_32x32_to_16x16_c;
-
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][ TX_4X4 ] = cfl_ac_8x4_to_4x4_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_4X8 ] = cfl_ac_8x8_to_4x8_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_8X4 ] = cfl_ac_16x4_to_8x4_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][ TX_8X8 ] = cfl_ac_16x8_to_8x8_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_8X16 ] = cfl_ac_16x16_to_8x16_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_16X8 ] = cfl_ac_32x8_to_16x8_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][ TX_16X16] = cfl_ac_32x16_to_16x16_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_16X32] = cfl_ac_32x32_to_16x32_c;
-
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_4X4 ] = cfl_ac_4x4_to_4x4_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_4X8 ] = cfl_ac_4x8_to_4x8_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_4X16 ] = cfl_ac_4x16_to_4x16_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_8X4 ] = cfl_ac_8x4_to_8x4_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_8X8 ] = cfl_ac_8x8_to_8x8_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_8X16 ] = cfl_ac_8x16_to_8x16_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_8X32 ] = cfl_ac_8x32_to_8x32_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_16X4 ] = cfl_ac_16x4_to_16x4_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_16X8 ] = cfl_ac_16x8_to_16x8_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_16X16] = cfl_ac_16x16_to_16x16_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_16X32] = cfl_ac_16x32_to_16x32_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_32X8 ] = cfl_ac_32x8_to_32x8_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_32X16] = cfl_ac_32x16_to_32x16_c;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_32X32] = cfl_ac_32x32_to_32x32_c;
+ c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = cfl_ac_420_c;
+ c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = cfl_ac_422_c;
+ c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1] = cfl_ac_444_c;
c->cfl_pred[DC_PRED ] = ipred_cfl_c;
c->cfl_pred[DC_128_PRED ] = ipred_cfl_128_c;
diff --git a/src/recon_tmpl.c b/src/recon_tmpl.c
index d87c8c5..38f8de6 100644
--- a/src/recon_tmpl.c
+++ b/src/recon_tmpl.c
@@ -856,10 +856,10 @@ void bytefn(dav1d_recon_b_intra)(Dav1dTileContext *const t, const enum BlockSize
((cw4 << ss_hor) + t_dim->w - 1) & ~(t_dim->w - 1);
const int furthest_b =
((ch4 << ss_ver) + t_dim->h - 1) & ~(t_dim->h - 1);
- dsp->ipred.cfl_ac[f->cur.p.p.layout - 1]
- [b->uvtx](ac, y_src, f->cur.p.stride[0],
- cbw4 - (furthest_r >> ss_hor),
- cbh4 - (furthest_b >> ss_ver));
+ dsp->ipred.cfl_ac[f->cur.p.p.layout - 1](ac, y_src, f->cur.p.stride[0],
+ cbw4 - (furthest_r >> ss_hor),
+ cbh4 - (furthest_b >> ss_ver),
+ cbw4 * 4, cbh4 * 4);
for (int pl = 0; pl < 2; pl++) {
if (!b->cfl_alpha[pl]) continue;
int angle = 0;