From a029d6892c5c39f4cda629d4a3b676ef2e8288f6 Mon Sep 17 00:00:00 2001 From: Henrik Gramner Date: Mon, 18 Jul 2022 15:17:00 +0200 Subject: Adjust inlining attributes on some functions The code size increase of inlining every call to certain functions isn't a worthwhile trade-off, and most compilers actually ends up overriding those particular inlining hints anyway. In some cases it's also better to split the function into separate luma and chroma functions. --- src/decode.c | 56 ++++++++++++++++++++++++++++++++++++-------------------- src/lf_mask.c | 53 +++++++++++++++++++++++++++++++---------------------- 2 files changed, 67 insertions(+), 42 deletions(-) diff --git a/src/decode.c b/src/decode.c index b44f157..8a1ef7c 100644 --- a/src/decode.c +++ b/src/decode.c @@ -749,9 +749,9 @@ static inline void splat_intraref(const Dav1dContext *const c, c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4); } -static inline void mc_lowest_px(int *const dst, const int by4, const int bh4, - const int mvy, const int ss_ver, - const struct ScalableMotionParams *const smp) +static void mc_lowest_px(int *const dst, const int by4, const int bh4, + const int mvy, const int ss_ver, + const struct ScalableMotionParams *const smp) { const int v_mul = 4 >> ss_ver; if (!smp->scale) { @@ -766,14 +766,11 @@ static inline void mc_lowest_px(int *const dst, const int by4, const int bh4, } } -static inline void affine_lowest_px(Dav1dTaskContext *const t, - int *const dst, const int is_chroma, - const uint8_t *const b_dim, - const Dav1dWarpedMotionParams *const wmp) +static ALWAYS_INLINE void affine_lowest_px(Dav1dTaskContext *const t, int *const dst, + const uint8_t *const b_dim, + const Dav1dWarpedMotionParams *const wmp, + const int ss_ver, const int ss_hor) { - const Dav1dFrameContext *const f = t->f; - const int ss_ver = is_chroma && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420; - const int ss_hor = is_chroma && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444; const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver; assert(!((b_dim[0] * h_mul) & 7) && !((b_dim[1] * v_mul) & 7)); const int32_t *const mat = wmp->matrix; @@ -792,6 +789,25 @@ static inline void affine_lowest_px(Dav1dTaskContext *const t, } } +static NOINLINE void affine_lowest_px_luma(Dav1dTaskContext *const t, int *const dst, + const uint8_t *const b_dim, + const Dav1dWarpedMotionParams *const wmp) +{ + affine_lowest_px(t, dst, b_dim, wmp, 0, 0); +} + +static NOINLINE void affine_lowest_px_chroma(Dav1dTaskContext *const t, int *const dst, + const uint8_t *const b_dim, + const Dav1dWarpedMotionParams *const wmp) +{ + const Dav1dFrameContext *const f = t->f; + assert(f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400); + if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I444) + affine_lowest_px_luma(t, dst, b_dim, wmp); + else + affine_lowest_px(t, dst, b_dim, wmp, f->cur.p.layout & DAV1D_PIXEL_LAYOUT_I420, 1); +} + static void obmc_lowest_px(Dav1dTaskContext *const t, int (*const dst)[2], const int is_chroma, const uint8_t *const b_dim, @@ -2150,9 +2166,9 @@ static int decode_b(Dav1dTaskContext *const t, ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) || (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION))) { - affine_lowest_px(t, &lowest_px[b->ref[0]][0], 0, b_dim, - b->motion_mode == MM_WARP ? &t->warpmv : - &f->frame_hdr->gmv[b->ref[0]]); + affine_lowest_px_luma(t, &lowest_px[b->ref[0]][0], b_dim, + b->motion_mode == MM_WARP ? &t->warpmv : + &f->frame_hdr->gmv[b->ref[0]]); } else { mc_lowest_px(&lowest_px[b->ref[0]][0], t->by, bh4, b->mv[0].y, 0, &f->svc[b->ref[0]][1]); @@ -2203,9 +2219,9 @@ static int decode_b(Dav1dTaskContext *const t, ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) || (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION))) { - affine_lowest_px(t, &lowest_px[b->ref[0]][1], 1, b_dim, - b->motion_mode == MM_WARP ? &t->warpmv : - &f->frame_hdr->gmv[b->ref[0]]); + affine_lowest_px_chroma(t, &lowest_px[b->ref[0]][1], b_dim, + b->motion_mode == MM_WARP ? &t->warpmv : + &f->frame_hdr->gmv[b->ref[0]]); } else { mc_lowest_px(&lowest_px[b->ref[0]][1], t->by & ~ss_ver, bh4 << (bh4 == ss_ver), @@ -2220,8 +2236,8 @@ static int decode_b(Dav1dTaskContext *const t, // y for (int i = 0; i < 2; i++) { if (b->inter_mode == GLOBALMV_GLOBALMV && f->gmv_warp_allowed[b->ref[i]]) { - affine_lowest_px(t, &lowest_px[b->ref[i]][0], 0, b_dim, - &f->frame_hdr->gmv[b->ref[i]]); + affine_lowest_px_luma(t, &lowest_px[b->ref[i]][0], b_dim, + &f->frame_hdr->gmv[b->ref[i]]); } else { mc_lowest_px(&lowest_px[b->ref[i]][0], t->by, bh4, b->mv[i].y, 0, &f->svc[b->ref[i]][1]); @@ -2233,8 +2249,8 @@ static int decode_b(Dav1dTaskContext *const t, if (b->inter_mode == GLOBALMV_GLOBALMV && imin(cbw4, cbh4) > 1 && f->gmv_warp_allowed[b->ref[i]]) { - affine_lowest_px(t, &lowest_px[b->ref[i]][1], 1, b_dim, - &f->frame_hdr->gmv[b->ref[i]]); + affine_lowest_px_chroma(t, &lowest_px[b->ref[i]][1], b_dim, + &f->frame_hdr->gmv[b->ref[i]]); } else { mc_lowest_px(&lowest_px[b->ref[i]][1], t->by, bh4, b->mv[i].y, ss_ver, &f->svc[b->ref[i]][1]); diff --git a/src/lf_mask.c b/src/lf_mask.c index 411c884..91fe4a0 100644 --- a/src/lf_mask.c +++ b/src/lf_mask.c @@ -212,13 +212,13 @@ static inline void mask_edges_intra(uint16_t (*const masks)[32][3][2], #undef set_ctx } -static inline void mask_edges_chroma(uint16_t (*const masks)[32][2][2], - const int cby4, const int cbx4, - const int cw4, const int ch4, - const int skip_inter, - const enum RectTxfmSize tx, - uint8_t *const a, uint8_t *const l, - const int ss_hor, const int ss_ver) +static void mask_edges_chroma(uint16_t (*const masks)[32][2][2], + const int cby4, const int cbx4, + const int cw4, const int ch4, + const int skip_inter, + const enum RectTxfmSize tx, + uint8_t *const a, uint8_t *const l, + const int ss_hor, const int ss_ver) { const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx]; const int twl4 = t_dim->lw, thl4 = t_dim->lh; @@ -424,16 +424,14 @@ void dav1d_calc_eih(Av1FilterLUT *const lim_lut, const int filter_sharpness) { lim_lut->sharp[1] = sharp ? 9 - sharp : 0xff; } -static inline void calc_lf_value(uint8_t (*const lflvl_values)[2], - const int is_chroma, const int base_lvl, - const int lf_delta, const int seg_delta, - const Dav1dLoopfilterModeRefDeltas *const mr_delta) +static void calc_lf_value(uint8_t (*const lflvl_values)[2], + const int base_lvl, const int lf_delta, + const int seg_delta, + const Dav1dLoopfilterModeRefDeltas *const mr_delta) { const int base = iclip(iclip(base_lvl + lf_delta, 0, 63) + seg_delta, 0, 63); - if (!base_lvl && is_chroma) { - memset(lflvl_values, 0, 8 * 2); - } else if (!mr_delta) { + if (!mr_delta) { memset(lflvl_values, base, 8 * 2); } else { const int sh = base >= 32; @@ -449,6 +447,17 @@ static inline void calc_lf_value(uint8_t (*const lflvl_values)[2], } } +static inline void calc_lf_value_chroma(uint8_t (*const lflvl_values)[2], + const int base_lvl, const int lf_delta, + const int seg_delta, + const Dav1dLoopfilterModeRefDeltas *const mr_delta) +{ + if (!base_lvl) + memset(lflvl_values, 0, 8 * 2); + else + calc_lf_value(lflvl_values, base_lvl, lf_delta, seg_delta, mr_delta); +} + void dav1d_calc_lf_values(uint8_t (*const lflvl_values)[4][8][2], const Dav1dFrameHeader *const hdr, const int8_t lf_delta[4]) @@ -467,16 +476,16 @@ void dav1d_calc_lf_values(uint8_t (*const lflvl_values)[4][8][2], const Dav1dSegmentationData *const segd = hdr->segmentation.enabled ? &hdr->segmentation.seg_data.d[s] : NULL; - calc_lf_value(lflvl_values[s][0], 0, hdr->loopfilter.level_y[0], + calc_lf_value(lflvl_values[s][0], hdr->loopfilter.level_y[0], lf_delta[0], segd ? segd->delta_lf_y_v : 0, mr_deltas); - calc_lf_value(lflvl_values[s][1], 0, hdr->loopfilter.level_y[1], + calc_lf_value(lflvl_values[s][1], hdr->loopfilter.level_y[1], lf_delta[hdr->delta.lf.multi ? 1 : 0], segd ? segd->delta_lf_y_h : 0, mr_deltas); - calc_lf_value(lflvl_values[s][2], 1, hdr->loopfilter.level_u, - lf_delta[hdr->delta.lf.multi ? 2 : 0], - segd ? segd->delta_lf_u : 0, mr_deltas); - calc_lf_value(lflvl_values[s][3], 1, hdr->loopfilter.level_v, - lf_delta[hdr->delta.lf.multi ? 3 : 0], - segd ? segd->delta_lf_v : 0, mr_deltas); + calc_lf_value_chroma(lflvl_values[s][2], hdr->loopfilter.level_u, + lf_delta[hdr->delta.lf.multi ? 2 : 0], + segd ? segd->delta_lf_u : 0, mr_deltas); + calc_lf_value_chroma(lflvl_values[s][3], hdr->loopfilter.level_v, + lf_delta[hdr->delta.lf.multi ? 3 : 0], + segd ? segd->delta_lf_v : 0, mr_deltas); } } -- cgit v1.2.3