diff options
author | Martin Storsjö <martin@martin.st> | 2019-02-13 11:53:35 +0300 |
---|---|---|
committer | Janne Grunau <janne-vlc@jannau.net> | 2019-02-14 01:32:37 +0300 |
commit | 8abcf5dc6739c85a7458985fe566ef4caf0537f8 (patch) | |
tree | 2d91e17f3c6dd408944ddf96302d90ff7977521d /src/mc_tmpl.c | |
parent | b3f0c9844be8610e23b0aa29e52f499de4eda083 (diff) |
tables: Restore the warp filter table order for architectures other than x86
This effectively reverts a0692eb8ef38 for other architectures. The
order that is beneficial for x86 SIMD is not beneficial for other
architectures.
For a NEON implementation of the warp filter, reordering the filter
coefficients back in the right order took 1/4 of the filter runtime.
Diffstat (limited to 'src/mc_tmpl.c')
-rw-r--r-- | src/mc_tmpl.c | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/src/mc_tmpl.c b/src/mc_tmpl.c index 2057db8..b1fe67a 100644 --- a/src/mc_tmpl.c +++ b/src/mc_tmpl.c @@ -737,6 +737,7 @@ w_mask_fns(420, 1, 1); #undef w_mask_fns +#if ARCH_X86 #define FILTER_WARP(src, x, F, stride) \ (F[0] * src[x + -3 * stride] + \ F[4] * src[x + -2 * stride] + \ @@ -746,6 +747,17 @@ w_mask_fns(420, 1, 1); F[6] * src[x + +2 * stride] + \ F[3] * src[x + +3 * stride] + \ F[7] * src[x + +4 * stride]) +#else +#define FILTER_WARP(src, x, F, stride) \ + (F[0] * src[x + -3 * stride] + \ + F[1] * src[x + -2 * stride] + \ + F[2] * src[x + -1 * stride] + \ + F[3] * src[x + +0 * stride] + \ + F[4] * src[x + +1 * stride] + \ + F[5] * src[x + +2 * stride] + \ + F[6] * src[x + +3 * stride] + \ + F[7] * src[x + +4 * stride]) +#endif #define FILTER_WARP_RND(src, x, F, stride, sh) \ ((FILTER_WARP(src, x, F, stride) + ((1 << (sh)) >> 1)) >> (sh)) |