Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/FFmpeg/FFmpeg.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJun Zhao <mypopydev@gmail.com>2018-07-09 19:41:00 +0300
committerJun Zhao <jun.zhao@intel.com>2018-07-31 14:17:51 +0300
commitd36b8394f4fa95403afd194ca9b7edbd8f470076 (patch)
treeeb11d875c5433aedd6344114e5a39c6dfd542673 /libavutil/x86/pixelutils_init.c
parentb8bf7408dc189668efa3e45b418349ce3a7c816e (diff)
avutil/pixelutils: sad_32x32 sse2/avx2 optimizations.
add ff_pixelutils_sad_32x32_sse2, ff_pixelutils_sad_{a,u}_32x32_sse2, ff_pixelutils_sad_32x32_avx22, ff_pixelutils_sad_{a,u}_32x32_avx2 use perf record/report profiling, get instructions:u for avx2 sad_32x32: 72.05% pixelutils pixelutils [.] block_sad_32x32_c 18.50% pixelutils pixelutils [.] block_sad_16x16_c 4.78% pixelutils pixelutils [.] block_sad_8x8_c 2.69% pixelutils pixelutils [.] block_sad_4x4_c 0.89% pixelutils pixelutils [.] block_sad_2x2_c 0.16% pixelutils pixelutils [.] ff_pixelutils_sad_32x32_avx2 0.16% pixelutils pixelutils [.] ff_pixelutils_sad_u_32x32_avx2 0.12% pixelutils pixelutils [.] ff_pixelutils_sad_a_32x32_avx2 sse2 sad_32x32 instructions:u like: 71.86% pixelutils pixelutils [.] block_sad_32x32_c 18.42% pixelutils pixelutils [.] block_sad_16x16_c 4.81% pixelutils pixelutils [.] block_sad_8x8_c 2.68% pixelutils pixelutils [.] block_sad_4x4_c 0.88% pixelutils pixelutils [.] block_sad_2x2_c 0.29% pixelutils pixelutils [.] ff_pixelutils_sad_32x32_sse2 0.26% pixelutils pixelutils [.] ff_pixelutils_sad_u_32x32_sse2 0.23% pixelutils pixelutils [.] ff_pixelutils_sad_a_32x32_sse2 Signed-off-by: Jun Zhao <mypopydev@gmail.com>
Diffstat (limited to 'libavutil/x86/pixelutils_init.c')
-rw-r--r--libavutil/x86/pixelutils_init.c30
1 files changed, 30 insertions, 0 deletions
diff --git a/libavutil/x86/pixelutils_init.c b/libavutil/x86/pixelutils_init.c
index c24a533aea..dd05421b50 100644
--- a/libavutil/x86/pixelutils_init.c
+++ b/libavutil/x86/pixelutils_init.c
@@ -35,6 +35,20 @@ int ff_pixelutils_sad_a_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
int ff_pixelutils_sad_u_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
const uint8_t *src2, ptrdiff_t stride2);
+int ff_pixelutils_sad_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1,
+ const uint8_t *src2, ptrdiff_t stride2);
+int ff_pixelutils_sad_a_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1,
+ const uint8_t *src2, ptrdiff_t stride2);
+int ff_pixelutils_sad_u_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1,
+ const uint8_t *src2, ptrdiff_t stride2);
+
+int ff_pixelutils_sad_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1,
+ const uint8_t *src2, ptrdiff_t stride2);
+int ff_pixelutils_sad_a_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1,
+ const uint8_t *src2, ptrdiff_t stride2);
+int ff_pixelutils_sad_u_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1,
+ const uint8_t *src2, ptrdiff_t stride2);
+
void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned)
{
int cpu_flags = av_get_cpu_flags();
@@ -61,4 +75,20 @@ void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned)
case 2: sad[3] = ff_pixelutils_sad_a_16x16_sse2; break; // src1 aligned, src2 aligned
}
}
+
+ if (EXTERNAL_SSE2(cpu_flags)) {
+ switch (aligned) {
+ case 0: sad[4] = ff_pixelutils_sad_32x32_sse2; break; // src1 unaligned, src2 unaligned
+ case 1: sad[4] = ff_pixelutils_sad_u_32x32_sse2; break; // src1 aligned, src2 unaligned
+ case 2: sad[4] = ff_pixelutils_sad_a_32x32_sse2; break; // src1 aligned, src2 aligned
+ }
+ }
+
+ if (EXTERNAL_AVX2(cpu_flags)) {
+ switch (aligned) {
+ case 0: sad[4] = ff_pixelutils_sad_32x32_avx2; break; // src1 unaligned, src2 unaligned
+ case 1: sad[4] = ff_pixelutils_sad_u_32x32_avx2; break; // src1 aligned, src2 unaligned
+ case 2: sad[4] = ff_pixelutils_sad_a_32x32_avx2; break; // src1 aligned, src2 aligned
+ }
+ }
}