diff options
author | Christophe Gisquet <christophe.gisquet@gmail.com> | 2021-06-18 00:12:22 +0300 |
---|---|---|
committer | Henrik Gramner <henrik@gramner.com> | 2021-08-20 00:22:09 +0300 |
commit | e3dbc3de9751e2e946b96ebfad06132b222c4252 (patch) | |
tree | 0baf4a164ad77669e6a07ec8a714959f78292da6 | |
parent | 3bae53c0b684b7fedf2f5b0b662a3bbd84631aaf (diff) |
refmvs: Make splat_mv into a DSP function
-rw-r--r-- | src/decode.c | 81 | ||||
-rw-r--r-- | src/internal.h | 1 | ||||
-rw-r--r-- | src/lib.c | 1 | ||||
-rw-r--r-- | src/refmvs.c | 28 | ||||
-rw-r--r-- | src/refmvs.h | 102 |
5 files changed, 102 insertions, 111 deletions
diff --git a/src/decode.c b/src/decode.c index d810ed2..1caea76 100644 --- a/src/decode.c +++ b/src/decode.c @@ -692,6 +692,68 @@ static inline unsigned get_prev_frame_segid(const Dav1dFrameContext *const f, return seg_id; } +static inline void splat_oneref_mv(const Dav1dContext *const c, + Dav1dTileContext *const t, + const enum BlockSize bs, + const Av1Block *const b, + const int bw4, const int bh4) +{ + const enum InterPredMode mode = b->inter_mode; + const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) { + .ref.ref = { b->ref[0] + 1, b->interintra_type ? 0 : -1 }, + .mv.mv[0] = b->mv[0], + .bs = bs, + .mf = (mode == GLOBALMV && imin(bw4, bh4) >= 2) | ((mode == NEWMV) * 2), + }; + c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4); +} + +static inline void splat_intrabc_mv(const Dav1dContext *const c, + Dav1dTileContext *const t, + const enum BlockSize bs, + const Av1Block *const b, + const int bw4, const int bh4) +{ + const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) { + .ref.ref = { 0, -1 }, + .mv.mv[0] = b->mv[0], + .bs = bs, + .mf = 0, + }; + c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4); +} + +static inline void splat_tworef_mv(const Dav1dContext *const c, + Dav1dTileContext *const t, + const enum BlockSize bs, + const Av1Block *const b, + const int bw4, const int bh4) +{ + assert(bw4 >= 2 && bh4 >= 2); + const enum CompInterPredMode mode = b->inter_mode; + const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) { + .ref.ref = { b->ref[0] + 1, b->ref[1] + 1 }, + .mv.mv = { b->mv[0], b->mv[1] }, + .bs = bs, + .mf = (mode == GLOBALMV_GLOBALMV) | !!((1 << mode) & (0xbc)) * 2, + }; + c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4); +} + +static inline void splat_intraref(const Dav1dContext *const c, + Dav1dTileContext *const t, + const enum BlockSize bs, + const int bw4, const int bh4) +{ + const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) { + .ref.ref = { 0, -1 }, + .mv.mv[0].n = INVALID_MV, + .bs = bs, + .mf = 0, + }; + c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4); +} + static int decode_b(Dav1dTileContext *const t, const enum BlockLevel bl, const enum BlockSize bs, @@ -1294,9 +1356,8 @@ static int decode_b(Dav1dTileContext *const t, } } } - if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) { - splat_intraref(&t->rt, t->by, t->bx, bs); - } + if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) + splat_intraref(f->c, t, bs, bw4, bh4); } else if (IS_KEY_OR_INTRA(f->frame_hdr)) { // intra block copy refmvs_candidate mvstack[8]; @@ -1392,7 +1453,7 @@ static int decode_b(Dav1dTileContext *const t, if (f->bd_fn.recon_b_inter(t, bs, b)) return -1; } - splat_intrabc_mv(&t->rt, t->by, t->bx, bs, b->mv[0]); + splat_intrabc_mv(f->c, t, bs, b, bw4, bh4); #define set_ctx(type, dir, diridx, off, mul, rep_macro) \ rep_macro(type, t->dir tx_intra, off, mul * b_dim[2 + diridx]); \ @@ -1934,14 +1995,10 @@ static int decode_b(Dav1dTileContext *const t, } // context updates - if (is_comp) { - splat_tworef_mv(&t->rt, t->by, t->bx, bs, b->inter_mode, - (refmvs_refpair) { .ref = { b->ref[0], b->ref[1] }}, - (refmvs_mvpair) { .mv = { [0] = b->mv[0], [1] = b->mv[1] }}); - } else { - splat_oneref_mv(&t->rt, t->by, t->bx, bs, b->inter_mode, - b->ref[0], b->mv[0], b->interintra_type); - } + if (is_comp) + splat_tworef_mv(f->c, t, bs, b, bw4, bh4); + else + splat_oneref_mv(f->c, t, bs, b, bw4, bh4); #define set_ctx(type, dir, diridx, off, mul, rep_macro) \ rep_macro(type, t->dir seg_pred, off, mul * seg_pred); \ diff --git a/src/internal.h b/src/internal.h index fb84422..94cf923 100644 --- a/src/internal.h +++ b/src/internal.h @@ -134,6 +134,7 @@ struct Dav1dContext { CdfThreadContext cdf[8]; Dav1dDSPContext dsp[3 /* 8, 10, 12 bits/component */]; + Dav1dRefmvsDSPContext refmvs_dsp; // tree to keep track of which edges are available struct { @@ -266,6 +266,7 @@ COLD int dav1d_open(Dav1dContext **const c_out, const Dav1dSettings *const s) { f->frame_thread.td.inited = 1; } } + dav1d_refmvs_dsp_init(&c->refmvs_dsp); // intra edge tree c->intra_edge.root[BL_128X128] = &c->intra_edge.branch_sb128[0].node; diff --git a/src/refmvs.c b/src/refmvs.c index 09f18db..115cb94 100644 --- a/src/refmvs.c +++ b/src/refmvs.c @@ -35,6 +35,7 @@ #include "common/intops.h" #include "src/env.h" +#include "src/mem.h" #include "src/refmvs.h" static void add_spatial_candidate(refmvs_candidate *const mvstack, int *const cnt, @@ -817,16 +818,16 @@ int dav1d_refmvs_init_frame(refmvs_frame *const rf, const ptrdiff_t r_stride = ((frm_hdr->width[0] + 127) & ~127) >> 2; const int n_tile_rows = n_tile_threads > 1 ? frm_hdr->tiling.rows : 1; if (r_stride != rf->r_stride || n_tile_rows != rf->n_tile_rows) { - if (rf->r) free(rf->r); - rf->r = malloc(sizeof(*rf->r) * 35 * r_stride * n_tile_rows); + if (rf->r) dav1d_freep_aligned(&rf->r); + rf->r = dav1d_alloc_aligned(sizeof(*rf->r) * 35 * r_stride * n_tile_rows, 64); if (!rf->r) return DAV1D_ERR(ENOMEM); rf->r_stride = r_stride; } const ptrdiff_t rp_stride = r_stride >> 1; if (rp_stride != rf->rp_stride || n_tile_rows != rf->n_tile_rows) { - if (rf->rp_proj) free(rf->rp_proj); - rf->rp_proj = malloc(sizeof(*rf->rp_proj) * 16 * rp_stride * n_tile_rows); + if (rf->rp_proj) dav1d_freep_aligned(&rf->rp_proj); + rf->rp_proj = dav1d_alloc_aligned(sizeof(*rf->rp_proj) * 16 * rp_stride * n_tile_rows, 64); if (!rf->rp_proj) return DAV1D_ERR(ENOMEM); rf->rp_stride = rp_stride; } @@ -902,6 +903,21 @@ void dav1d_refmvs_init(refmvs_frame *const rf) { } void dav1d_refmvs_clear(refmvs_frame *const rf) { - if (rf->r) free(rf->r); - if (rf->rp_proj) free(rf->rp_proj); + if (rf->r) dav1d_freep_aligned(&rf->r); + if (rf->rp_proj) dav1d_freep_aligned(&rf->rp_proj); +} + +static void splat_mv_c(refmvs_block **rr, const refmvs_block *const rmv, + const int bx4, const int bw4, int bh4) +{ + do { + refmvs_block *const r = *rr++ + bx4; + for (int x = 0; x < bw4; x++) + r[x] = *rmv; + } while (--bh4); +} + +COLD void dav1d_refmvs_dsp_init(Dav1dRefmvsDSPContext *const c) +{ + c->splat_mv = splat_mv_c; } diff --git a/src/refmvs.h b/src/refmvs.h index 976d209..92208d0 100644 --- a/src/refmvs.h +++ b/src/refmvs.h @@ -35,7 +35,6 @@ #include "common/intops.h" #include "src/intra_edge.h" -#include "src/levels.h" #include "src/tables.h" #define INVALID_MV 0x80008000 @@ -97,6 +96,14 @@ typedef struct refmvs_candidate { int weight; } refmvs_candidate; +#define decl_splat_mv_fn(name) \ +void (name)(refmvs_block **rr, const refmvs_block *rmv, int bx4, int bw4, int bh4) +typedef decl_splat_mv_fn(*splat_mv_fn); + +typedef struct Dav1dRefmvsDSPContext { + splat_mv_fn splat_mv; +} Dav1dRefmvsDSPContext; + // call once per frame thread void dav1d_refmvs_init(refmvs_frame *rf); void dav1d_refmvs_clear(refmvs_frame *rf); @@ -137,97 +144,6 @@ void dav1d_refmvs_find(const refmvs_tile *rt, int *ctx, const refmvs_refpair ref, enum BlockSize bs, enum EdgeFlags edge_flags, int by4, int bx4); -static inline void splat_oneref_mv(refmvs_tile *const rt, - const int by4, const int bx4, - const enum BlockSize bs, - const enum InterPredMode mode, - const int ref, const mv mv, - const int is_interintra) -{ - const int bw4 = dav1d_block_dimensions[bs][0]; - int bh4 = dav1d_block_dimensions[bs][1]; - refmvs_block **rr = &rt->r[(by4 & 31) + 5]; - - const refmvs_block tmpl = (refmvs_block) { - .ref.ref = { ref + 1, is_interintra ? 0 : -1 }, - .mv.mv[0] = mv, - .bs = bs, - .mf = (mode == GLOBALMV && imin(bw4, bh4) >= 2) | ((mode == NEWMV) * 2), - }; - do { - refmvs_block *r = *rr++ + bx4; - for (int x = 0; x < bw4; x++) - r[x] = tmpl; - } while (--bh4); -} - -static inline void splat_intrabc_mv(refmvs_tile *const rt, - const int by4, const int bx4, - const enum BlockSize bs, const mv mv) -{ - const int bw4 = dav1d_block_dimensions[bs][0]; - int bh4 = dav1d_block_dimensions[bs][1]; - refmvs_block **rr = &rt->r[(by4 & 31) + 5]; - - const refmvs_block tmpl = (refmvs_block) { - .ref.ref = { 0, -1 }, - .mv.mv[0] = mv, - .bs = bs, - .mf = 0, - }; - do { - refmvs_block *r = *rr++ + bx4; - for (int x = 0; x < bw4; x++) { - r[x] = tmpl; - } - } while (--bh4); -} - -static inline void splat_tworef_mv(refmvs_tile *const rt, - const int by4, const int bx4, - const enum BlockSize bs, - const enum CompInterPredMode mode, - const refmvs_refpair ref, - const refmvs_mvpair mv) -{ - const int bw4 = dav1d_block_dimensions[bs][0]; - int bh4 = dav1d_block_dimensions[bs][1]; - refmvs_block **rr = &rt->r[(by4 & 31) + 5]; - - assert(bw4 >= 2 && bh4 >= 2); - const refmvs_block tmpl = (refmvs_block) { - .ref.pair = ref.pair + 0x0101, - .mv = mv, - .bs = bs, - .mf = (mode == GLOBALMV_GLOBALMV) | !!((1 << mode) & (0xbc)) * 2, - }; - do { - refmvs_block *r = *rr++ + bx4; - for (int x = 0; x < bw4; x++) - r[x] = tmpl; - } while (--bh4); -} - -static inline void splat_intraref(refmvs_tile *const rt, - const int by4, const int bx4, - const enum BlockSize bs) -{ - const int bw4 = dav1d_block_dimensions[bs][0]; - int bh4 = dav1d_block_dimensions[bs][1]; - refmvs_block **rr = &rt->r[(by4 & 31) + 5]; - - const refmvs_block tmpl = (refmvs_block) { - .ref.ref = { 0, -1 }, - .mv.mv[0].n = INVALID_MV, - .bs = bs, - .mf = 0, - }; - do { - refmvs_block *r = *rr++ + bx4; - for (int x = 0; x < bw4; x++) { - r[x] = tmpl; - } - } while (--bh4); -} +void dav1d_refmvs_dsp_init(Dav1dRefmvsDSPContext *dsp); #endif /* DAV1D_SRC_REF_MVS_H */ |