Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHenrik Gramner <gramner@twoorioles.com>2018-11-09 22:18:18 +0300
committerHenrik Gramner <gramner@twoorioles.com>2018-11-10 04:40:44 +0300
commit58fc51659634b48026da97eced714d214c97857a (patch)
treec02087a02355c50140e627f44fab5441a0df8356 /src/mc_tmpl.c
parent8b8e9fe85f6875a86ed66726e8964450a318cdc6 (diff)
Split MC blend
The mstride == 0, mstride == 1, and mstride == w cases are very different from each other, and splitting them into separate functions makes it easier top optimize them. Also add some further optimizations to the AVX2 asm that became possible after this change.
Diffstat (limited to 'src/mc_tmpl.c')
-rw-r--r--src/mc_tmpl.c45
1 files changed, 37 insertions, 8 deletions
diff --git a/src/mc_tmpl.c b/src/mc_tmpl.c
index c43745e..cef6972 100644
--- a/src/mc_tmpl.c
+++ b/src/mc_tmpl.c
@@ -373,19 +373,46 @@ static void mask_c(pixel *dst, const ptrdiff_t dst_stride,
} while (--h);
}
-static void blend_c(pixel *dst, const ptrdiff_t dst_stride,
- const pixel *tmp, const int w, const int h,
- const uint8_t *mask, const ptrdiff_t m_stride)
+#define blend_px(a, b, m) (((a * (64 - m) + b * m) + 32) >> 6)
+static NOINLINE void
+blend_internal_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp,
+ const int w, int h, const uint8_t *mask,
+ const ptrdiff_t mask_stride)
{
- for (int y = 0; y < h; y++) {
+ do {
for (int x = 0; x < w; x++) {
-#define blend_px(a, b, m) (((a * (64 - m) + b * m) + 32) >> 6)
- dst[x] = blend_px(dst[x], tmp[x], mask[m_stride == 1 ? 0 : x]);
+ dst[x] = blend_px(dst[x], tmp[x], mask[x]);
}
dst += PXSTRIDE(dst_stride);
tmp += w;
- mask += m_stride;
- }
+ mask += mask_stride;
+ } while (--h);
+}
+
+static void blend_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp,
+ const int w, const int h, const uint8_t *mask)
+{
+ blend_internal_c(dst, dst_stride, tmp, w, h, mask, w);
+}
+
+static void blend_v_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp,
+ const int w, const int h)
+{
+ blend_internal_c(dst, dst_stride, tmp, w, h, &dav1d_obmc_masks[w], 0);
+}
+
+static void blend_h_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp,
+ const int w, int h)
+{
+ const uint8_t *mask = &dav1d_obmc_masks[h];
+ do {
+ const int m = *mask++;
+ for (int x = 0; x < w; x++) {
+ dst[x] = blend_px(dst[x], tmp[x], m);
+ }
+ dst += PXSTRIDE(dst_stride);
+ tmp += w;
+ } while (--h);
}
static void w_mask_c(pixel *dst, const ptrdiff_t dst_stride,
@@ -591,6 +618,8 @@ void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) {
c->w_avg = w_avg_c;
c->mask = mask_c;
c->blend = blend_c;
+ c->blend_v = blend_v_c;
+ c->blend_h = blend_h_c;
c->w_mask[0] = w_mask_444_c;
c->w_mask[1] = w_mask_422_c;
c->w_mask[2] = w_mask_420_c;