diff options
author | Henrik Gramner <gramner@twoorioles.com> | 2018-11-09 22:18:18 +0300 |
---|---|---|
committer | Henrik Gramner <gramner@twoorioles.com> | 2018-11-10 04:40:44 +0300 |
commit | 58fc51659634b48026da97eced714d214c97857a (patch) | |
tree | c02087a02355c50140e627f44fab5441a0df8356 /src/mc_tmpl.c | |
parent | 8b8e9fe85f6875a86ed66726e8964450a318cdc6 (diff) |
Split MC blend
The mstride == 0, mstride == 1, and mstride == w cases are very different
from each other, and splitting them into separate functions makes it easier
top optimize them.
Also add some further optimizations to the AVX2 asm that became possible
after this change.
Diffstat (limited to 'src/mc_tmpl.c')
-rw-r--r-- | src/mc_tmpl.c | 45 |
1 files changed, 37 insertions, 8 deletions
diff --git a/src/mc_tmpl.c b/src/mc_tmpl.c index c43745e..cef6972 100644 --- a/src/mc_tmpl.c +++ b/src/mc_tmpl.c @@ -373,19 +373,46 @@ static void mask_c(pixel *dst, const ptrdiff_t dst_stride, } while (--h); } -static void blend_c(pixel *dst, const ptrdiff_t dst_stride, - const pixel *tmp, const int w, const int h, - const uint8_t *mask, const ptrdiff_t m_stride) +#define blend_px(a, b, m) (((a * (64 - m) + b * m) + 32) >> 6) +static NOINLINE void +blend_internal_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, + const int w, int h, const uint8_t *mask, + const ptrdiff_t mask_stride) { - for (int y = 0; y < h; y++) { + do { for (int x = 0; x < w; x++) { -#define blend_px(a, b, m) (((a * (64 - m) + b * m) + 32) >> 6) - dst[x] = blend_px(dst[x], tmp[x], mask[m_stride == 1 ? 0 : x]); + dst[x] = blend_px(dst[x], tmp[x], mask[x]); } dst += PXSTRIDE(dst_stride); tmp += w; - mask += m_stride; - } + mask += mask_stride; + } while (--h); +} + +static void blend_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, + const int w, const int h, const uint8_t *mask) +{ + blend_internal_c(dst, dst_stride, tmp, w, h, mask, w); +} + +static void blend_v_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, + const int w, const int h) +{ + blend_internal_c(dst, dst_stride, tmp, w, h, &dav1d_obmc_masks[w], 0); +} + +static void blend_h_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, + const int w, int h) +{ + const uint8_t *mask = &dav1d_obmc_masks[h]; + do { + const int m = *mask++; + for (int x = 0; x < w; x++) { + dst[x] = blend_px(dst[x], tmp[x], m); + } + dst += PXSTRIDE(dst_stride); + tmp += w; + } while (--h); } static void w_mask_c(pixel *dst, const ptrdiff_t dst_stride, @@ -591,6 +618,8 @@ void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) { c->w_avg = w_avg_c; c->mask = mask_c; c->blend = blend_c; + c->blend_v = blend_v_c; + c->blend_h = blend_h_c; c->w_mask[0] = w_mask_444_c; c->w_mask[1] = w_mask_422_c; c->w_mask[2] = w_mask_420_c; |