diff options
author | Henrik Gramner <gramner@twoorioles.com> | 2018-11-05 16:11:34 +0300 |
---|---|---|
committer | Henrik Gramner <gramner@twoorioles.com> | 2018-11-05 19:52:42 +0300 |
commit | a0692eb8ef38e3a8715aa2e56f48d791e3bc8ceb (patch) | |
tree | 84b1630109869a836473da4f9bcc7d5783e2b427 /src/mc_tmpl.c | |
parent | 4a499fd51ad6e650d067fdfd6cce07f7209c54c4 (diff) |
Reorder the mc warp filter array
Required to be able to use pmaddubsw without overflow in the x86 SIMD.
Diffstat (limited to 'src/mc_tmpl.c')
-rw-r--r-- | src/mc_tmpl.c | 24 |
1 files changed, 20 insertions, 4 deletions
diff --git a/src/mc_tmpl.c b/src/mc_tmpl.c index 4765f42..5f5c3a2 100644 --- a/src/mc_tmpl.c +++ b/src/mc_tmpl.c @@ -442,6 +442,22 @@ w_mask_fns(420, 1, 1); #undef w_mask_fns +#define FILTER_WARP(src, x, F, stride) \ + (F[0] * src[x + -3 * stride] + \ + F[4] * src[x + -2 * stride] + \ + F[1] * src[x + -1 * stride] + \ + F[5] * src[x + +0 * stride] + \ + F[2] * src[x + +1 * stride] + \ + F[6] * src[x + +2 * stride] + \ + F[3] * src[x + +3 * stride] + \ + F[7] * src[x + +4 * stride]) + +#define FILTER_WARP_RND(src, x, F, stride, sh) \ + ((FILTER_WARP(src, x, F, stride) + ((1 << sh) >> 1)) >> sh) + +#define FILTER_WARP_CLIP(src, x, F, stride, sh) \ + iclip_pixel(FILTER_WARP_RND(src, x, F, stride, sh)) + static void warp_affine_8x8_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *src, const ptrdiff_t src_stride, const int16_t *const abcd, int mx, int my) @@ -454,7 +470,7 @@ static void warp_affine_8x8_c(pixel *dst, const ptrdiff_t dst_stride, const int8_t *const filter = dav1d_mc_warp_filter[64 + ((tmx + 512) >> 10)]; - mid_ptr[x] = FILTER_8TAP_RND(src, x, filter, 1, 3); + mid_ptr[x] = FILTER_WARP_RND(src, x, filter, 1, 3); } src += PXSTRIDE(src_stride); mid_ptr += 8; @@ -466,7 +482,7 @@ static void warp_affine_8x8_c(pixel *dst, const ptrdiff_t dst_stride, const int8_t *const filter = dav1d_mc_warp_filter[64 + ((tmy + 512) >> 10)]; - dst[x] = FILTER_8TAP_CLIP(mid_ptr, x, filter, 8, 11); + dst[x] = FILTER_WARP_CLIP(mid_ptr, x, filter, 8, 11); } mid_ptr += 8; dst += PXSTRIDE(dst_stride); @@ -485,7 +501,7 @@ static void warp_affine_8x8t_c(coef *tmp, const ptrdiff_t tmp_stride, const int8_t *const filter = dav1d_mc_warp_filter[64 + ((tmx + 512) >> 10)]; - mid_ptr[x] = FILTER_8TAP_RND(src, x, filter, 1, 3); + mid_ptr[x] = FILTER_WARP_RND(src, x, filter, 1, 3); } src += PXSTRIDE(src_stride); mid_ptr += 8; @@ -497,7 +513,7 @@ static void warp_affine_8x8t_c(coef *tmp, const ptrdiff_t tmp_stride, const int8_t *const filter = dav1d_mc_warp_filter[64 + ((tmy + 512) >> 10)]; - tmp[x] = FILTER_8TAP_RND(mid_ptr, x, filter, 8, 7); + tmp[x] = FILTER_WARP_RND(mid_ptr, x, filter, 8, 7); } mid_ptr += 8; tmp += tmp_stride; |