diff options
author | Henrik Gramner <gramner@twoorioles.com> | 2020-01-29 16:17:11 +0300 |
---|---|---|
committer | Henrik Gramner <gramner@twoorioles.com> | 2020-02-01 16:17:56 +0300 |
commit | bb178db019da21d62fae8c255675efbd84327195 (patch) | |
tree | 9eef133c2a24b8612947dc92ef990d0f8f2069e7 /src/cdef_apply_tmpl.c | |
parent | dccc21b742f328060c124463c5d05f76472d758c (diff) |
Rework the CDEF top edge handling
Avoids some pointer chasing and simplifies the DSP code, at the cost
of making the initialization a little bit more complicated.
Also reduces memory usage by a small amount due to properly sizing
the buffers instead of always allocating enough space for 4:4:4.
Diffstat (limited to 'src/cdef_apply_tmpl.c')
-rw-r--r-- | src/cdef_apply_tmpl.c | 63 |
1 files changed, 27 insertions, 36 deletions
diff --git a/src/cdef_apply_tmpl.c b/src/cdef_apply_tmpl.c index 6fdd1f4..3c41ed5 100644 --- a/src/cdef_apply_tmpl.c +++ b/src/cdef_apply_tmpl.c @@ -39,24 +39,28 @@ enum Backup2x8Flags { BACKUP_2X8_UV = 1 << 1, }; -static void backup2lines(pixel *const dst[3][2], - /*const*/ pixel *const src[3], - const ptrdiff_t src_stride[2], int y_off, int w, +static void backup2lines(pixel *const dst[3], /*const*/ pixel *const src[3], + const ptrdiff_t stride[2], const enum Dav1dPixelLayout layout) { - pixel_copy(dst[0][0], src[0] + (y_off - 2) * PXSTRIDE(src_stride[0]), w); - pixel_copy(dst[0][1], src[0] + (y_off - 1) * PXSTRIDE(src_stride[0]), w); - - if (layout == DAV1D_PIXEL_LAYOUT_I400) return; - const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420; - const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444; - - w >>= ss_hor; - y_off >>= ss_ver; - pixel_copy(dst[1][0], src[1] + (y_off - 2) * PXSTRIDE(src_stride[1]), w); - pixel_copy(dst[1][1], src[1] + (y_off - 1) * PXSTRIDE(src_stride[1]), w); - pixel_copy(dst[2][0], src[2] + (y_off - 2) * PXSTRIDE(src_stride[1]), w); - pixel_copy(dst[2][1], src[2] + (y_off - 1) * PXSTRIDE(src_stride[1]), w); + const ptrdiff_t y_stride = PXSTRIDE(stride[0]); + if (y_stride < 0) + pixel_copy(dst[0] + y_stride, src[0] + 7 * y_stride, -2 * y_stride); + else + pixel_copy(dst[0], src[0] + 6 * y_stride, 2 * y_stride); + + if (layout != DAV1D_PIXEL_LAYOUT_I400) { + const ptrdiff_t uv_stride = PXSTRIDE(stride[1]); + if (uv_stride < 0) { + const int uv_off = layout == DAV1D_PIXEL_LAYOUT_I420 ? 3 : 7; + pixel_copy(dst[1] + uv_stride, src[1] + uv_off * uv_stride, -2 * uv_stride); + pixel_copy(dst[2] + uv_stride, src[2] + uv_off * uv_stride, -2 * uv_stride); + } else { + const int uv_off = layout == DAV1D_PIXEL_LAYOUT_I420 ? 2 : 6; + pixel_copy(dst[1], src[1] + uv_off * uv_stride, 2 * uv_stride); + pixel_copy(dst[2], src[2] + uv_off * uv_stride, 2 * uv_stride); + } + } } static void backup2x8(pixel dst[3][8][2], @@ -113,11 +117,8 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f, const int by_idx = by & 30; if (by + 2 >= f->bh) edges &= ~CDEF_HAVE_BOTTOM; - if (edges & CDEF_HAVE_BOTTOM) { - // backup pre-filter data for next iteration - backup2lines(f->lf.cdef_line[!tf], ptrs, f->cur.stride, - 8, f->bw * 4, layout); - } + if (edges & CDEF_HAVE_BOTTOM) // backup pre-filter data for next iteration + backup2lines(f->lf.cdef_line[!tf], ptrs, f->cur.stride, layout); pixel lr_bak[2 /* idx */][3 /* plane */][8 /* y */][2 /* x */]; pixel *iptrs[3] = { ptrs[0], ptrs[1], ptrs[2] }; @@ -125,7 +126,7 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f, edges |= CDEF_HAVE_RIGHT; enum Backup2x8Flags prev_flag = 0; for (int sbx = 0, last_skip = 1; sbx < sb64w; sbx++, edges |= CDEF_HAVE_LEFT) { - const int sb128x = sbx >>1; + const int sb128x = sbx >> 1; const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1); const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx]; if (cdef_idx == -1 || @@ -188,18 +189,12 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f, const int adj_y_pri_lvl = adjust_strength(y_pri_lvl, variance); if (adj_y_pri_lvl || y_sec_lvl) dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0], - (pixel *const [2]) { - &f->lf.cdef_line[tf][0][0][bx * 4], - &f->lf.cdef_line[tf][0][1][bx * 4], - }, + &f->lf.cdef_line[tf][0][bx * 4], adj_y_pri_lvl, y_sec_lvl, dir, damping, edges HIGHBD_CALL_SUFFIX); } else if (y_sec_lvl) dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0], - (pixel *const [2]) { - &f->lf.cdef_line[tf][0][0][bx * 4], - &f->lf.cdef_line[tf][0][1][bx * 4], - }, + &f->lf.cdef_line[tf][0][bx * 4], 0, y_sec_lvl, 0, damping, edges HIGHBD_CALL_SUFFIX); if (uv_lvl) { @@ -207,12 +202,8 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f, const int uvdir = uv_pri_lvl ? layout == DAV1D_PIXEL_LAYOUT_I422 ? ((const uint8_t[]) { 7, 0, 2, 4, 5, 6, 6, 6 })[dir] : dir : 0; for (int pl = 1; pl <= 2; pl++) { - dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1], - lr_bak[bit][pl], - (pixel *const [2]) { - &f->lf.cdef_line[tf][pl][0][bx * 4 >> ss_hor], - &f->lf.cdef_line[tf][pl][1][bx * 4 >> ss_hor], - }, + dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1], lr_bak[bit][pl], + &f->lf.cdef_line[tf][pl][bx * 4 >> ss_hor], uv_pri_lvl, uv_sec_lvl, uvdir, damping - 1, edges HIGHBD_CALL_SUFFIX); } |