Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/FFmpeg/FFmpeg.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Vignali <martin.vignali@gmail.com>2017-11-21 10:34:20 +0300
committerMartin Vignali <martin.vignali@gmail.com>2017-11-21 11:00:42 +0300
commitb5ebe38443542c4d6ca285026670512da482d8e5 (patch)
treeb066a44b00eef45a925ba8434c7a0aee121a0a92 /libavcodec/x86
parent48b7c45b0c6fa8f48eaa265f60bc672489ae97ee (diff)
avcodec/utvideodsp : add avx2 version for the dsp
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/utvideodsp.asm20
-rw-r--r--libavcodec/x86/utvideodsp_init.c11
2 files changed, 31 insertions, 0 deletions
diff --git a/libavcodec/x86/utvideodsp.asm b/libavcodec/x86/utvideodsp.asm
index 55ef127ccb..b67a509dda 100644
--- a/libavcodec/x86/utvideodsp.asm
+++ b/libavcodec/x86/utvideodsp.asm
@@ -1,6 +1,7 @@
;******************************************************************************
;* SIMD-optimized UTVideo functions
;* Copyright (c) 2017 Paul B Mahol
+;* Copyright (c) 2017 Jokyo Images
;*
;* This file is part of FFmpeg.
;*
@@ -45,7 +46,11 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
%define wq r6m
%define hd r7mp
%endif
+%if mmsize == 32
+ vbroadcasti128 m3, [pb_128]
+%else
mova m3, [pb_128]
+%endif
.nextrow:
mov xq, wq
@@ -72,6 +77,11 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
INIT_XMM sse2
RESTORE_RGB_PLANES
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+RESTORE_RGB_PLANES
+%endif
+
%macro RESTORE_RGB_PLANES10 0
cglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
shl wd, 1
@@ -81,8 +91,13 @@ cglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, sr
add src_rq, wq
add src_gq, wq
add src_bq, wq
+%if mmsize == 32
+ vbroadcasti128 m3, [pw_512]
+ vbroadcasti128 m4, [pw_1023]
+%else
mova m3, [pw_512]
mova m4, [pw_1023]
+%endif
neg wq
%if ARCH_X86_64 == 0
mov wm, wq
@@ -117,3 +132,8 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
INIT_XMM sse2
RESTORE_RGB_PLANES10
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+RESTORE_RGB_PLANES10
+%endif
diff --git a/libavcodec/x86/utvideodsp_init.c b/libavcodec/x86/utvideodsp_init.c
index f8b2a9b074..2b436c6c5c 100644
--- a/libavcodec/x86/utvideodsp_init.c
+++ b/libavcodec/x86/utvideodsp_init.c
@@ -28,9 +28,16 @@
void ff_restore_rgb_planes_sse2(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
ptrdiff_t linesize_r, ptrdiff_t linesize_g,
ptrdiff_t linesize_b, int width, int height);
+void ff_restore_rgb_planes_avx2(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
+ ptrdiff_t linesize_r, ptrdiff_t linesize_g,
+ ptrdiff_t linesize_b, int width, int height);
+
void ff_restore_rgb_planes10_sse2(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b,
ptrdiff_t linesize_r, ptrdiff_t linesize_g,
ptrdiff_t linesize_b, int width, int height);
+void ff_restore_rgb_planes10_avx2(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b,
+ ptrdiff_t linesize_r, ptrdiff_t linesize_g,
+ ptrdiff_t linesize_b, int width, int height);
av_cold void ff_utvideodsp_init_x86(UTVideoDSPContext *c)
{
@@ -40,4 +47,8 @@ av_cold void ff_utvideodsp_init_x86(UTVideoDSPContext *c)
c->restore_rgb_planes = ff_restore_rgb_planes_sse2;
c->restore_rgb_planes10 = ff_restore_rgb_planes10_sse2;
}
+ if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+ c->restore_rgb_planes = ff_restore_rgb_planes_avx2;
+ c->restore_rgb_planes10 = ff_restore_rgb_planes10_avx2;
+ }
}