Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/FFmpeg.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2015-12-24 05:51:45 +0300
committerJames Almer <jamrial@gmail.com>2015-12-24 19:05:21 +0300
commit0988c68cf9cd39680b1e49cc2873ec38c8199905 (patch)
tree261a6301e9f8ed2d61e6c791f3a2c94a0b0f5a75
parentce4c85de6a402cdc8d5320184eb3694346f58909 (diff)
x86/vf_blend: simplify using macros
Reviewed-by: Paul B Mahol <onemda@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r--libavfilter/x86/vf_blend.asm276
-rw-r--r--libavfilter/x86/vf_blend_init.c102
2 files changed, 53 insertions, 325 deletions
diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
index f0fb2ea4b9..d079b79b12 100644
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm
@@ -33,113 +33,26 @@ pb_255: times 16 db 255
SECTION .text
-INIT_XMM sse2
-cglobal blend_xor, 9, 11, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
- add topq, widthq
- add bottomq, widthq
- add dstq, widthq
- sub endq, startq
- neg widthq
-.nextrow:
- mov r10q, widthq
- %define x r10q
-
- .loop:
- movu m0, [topq + x]
- movu m1, [bottomq + x]
- pxor m0, m1
- mova [dstq + x], m0
- add r10q, mmsize
- jl .loop
-
- add topq, top_linesizeq
- add bottomq, bottom_linesizeq
- add dstq, dst_linesizeq
- sub endd, 1
- jg .nextrow
-REP_RET
-
-cglobal blend_or, 9, 11, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
- add topq, widthq
- add bottomq, widthq
- add dstq, widthq
- sub endq, startq
- neg widthq
-.nextrow:
- mov r10q, widthq
- %define x r10q
-
- .loop:
- movu m0, [topq + x]
- movu m1, [bottomq + x]
- por m0, m1
- mova [dstq + x], m0
- add r10q, mmsize
- jl .loop
-
- add topq, top_linesizeq
- add bottomq, bottom_linesizeq
- add dstq, dst_linesizeq
- sub endd, 1
- jg .nextrow
-REP_RET
-
-cglobal blend_and, 9, 11, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
- add topq, widthq
- add bottomq, widthq
- add dstq, widthq
- sub endq, startq
- neg widthq
-.nextrow:
- mov r10q, widthq
- %define x r10q
-
- .loop:
- movu m0, [topq + x]
- movu m1, [bottomq + x]
- pand m0, m1
- mova [dstq + x], m0
- add r10q, mmsize
- jl .loop
-
- add topq, top_linesizeq
- add bottomq, bottom_linesizeq
- add dstq, dst_linesizeq
- sub endd, 1
- jg .nextrow
-REP_RET
-
-cglobal blend_addition, 9, 11, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
+%macro BLEND_INIT 2
+cglobal blend_%1, 9, 11, %2, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
add topq, widthq
add bottomq, widthq
add dstq, widthq
sub endq, startq
neg widthq
-.nextrow:
- mov r10q, widthq
- %define x r10q
-
- .loop:
- movu m0, [topq + x]
- movu m1, [bottomq + x]
- paddusb m0, m1
- mova [dstq + x], m0
- add r10q, mmsize
- jl .loop
+%endmacro
+%macro BLEND_END 0
add topq, top_linesizeq
add bottomq, bottom_linesizeq
add dstq, dst_linesizeq
sub endd, 1
jg .nextrow
REP_RET
+%endmacro
-cglobal blend_subtract, 9, 11, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
- add topq, widthq
- add bottomq, widthq
- add dstq, widthq
- sub endq, startq
- neg widthq
+%macro BLEND_SIMPLE 2
+BLEND_INIT %1, 2
.nextrow:
mov r10q, widthq
%define x r10q
@@ -147,26 +60,25 @@ cglobal blend_subtract, 9, 11, 2, 0, top, top_linesize, bottom, bottom_linesize,
.loop:
movu m0, [topq + x]
movu m1, [bottomq + x]
- psubusb m0, m1
+ p%2 m0, m1
mova [dstq + x], m0
add r10q, mmsize
jl .loop
+BLEND_END
+%endmacro
- add topq, top_linesizeq
- add bottomq, bottom_linesizeq
- add dstq, dst_linesizeq
- sub endd, 1
- jg .nextrow
-REP_RET
-
-cglobal blend_difference128, 9, 11, 4, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
- add topq, widthq
- add bottomq, widthq
- add dstq, widthq
- sub endq, startq
+INIT_XMM sse2
+BLEND_SIMPLE xor, xor
+BLEND_SIMPLE or, or
+BLEND_SIMPLE and, and
+BLEND_SIMPLE addition, addusb
+BLEND_SIMPLE subtract, subusb
+BLEND_SIMPLE darken, minub
+BLEND_SIMPLE lighten, maxub
+
+BLEND_INIT difference128, 4
pxor m2, m2
mova m3, [pw_128]
- neg widthq
.nextrow:
mov r10q, widthq
%define x r10q
@@ -182,21 +94,10 @@ cglobal blend_difference128, 9, 11, 4, 0, top, top_linesize, bottom, bottom_line
movh [dstq + x], m0
add r10q, mmsize / 2
jl .loop
+BLEND_END
- add topq, top_linesizeq
- add bottomq, bottom_linesizeq
- add dstq, dst_linesizeq
- sub endd, 1
- jg .nextrow
-REP_RET
-
-cglobal blend_average, 9, 11, 3, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
- add topq, widthq
- add bottomq, widthq
- add dstq, widthq
- sub endq, startq
+BLEND_INIT average, 3
pxor m2, m2
- neg widthq
.nextrow:
mov r10q, widthq
%define x r10q
@@ -212,22 +113,11 @@ cglobal blend_average, 9, 11, 3, 0, top, top_linesize, bottom, bottom_linesize,
movh [dstq + x], m0
add r10q, mmsize / 2
jl .loop
+BLEND_END
- add topq, top_linesizeq
- add bottomq, bottom_linesizeq
- add dstq, dst_linesizeq
- sub endd, 1
- jg .nextrow
-REP_RET
-
-cglobal blend_addition128, 9, 11, 4, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
- add topq, widthq
- add bottomq, widthq
- add dstq, widthq
- sub endq, startq
+BLEND_INIT addition128, 4
pxor m2, m2
mova m3, [pw_128]
- neg widthq
.nextrow:
mov r10q, widthq
%define x r10q
@@ -243,48 +133,12 @@ cglobal blend_addition128, 9, 11, 4, 0, top, top_linesize, bottom, bottom_linesi
movh [dstq + x], m0
add r10q, mmsize / 2
jl .loop
+BLEND_END
- add topq, top_linesizeq
- add bottomq, bottom_linesizeq
- add dstq, dst_linesizeq
- sub endd, 1
- jg .nextrow
-REP_RET
-
-cglobal blend_darken, 9, 11, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
- add topq, widthq
- add bottomq, widthq
- add dstq, widthq
- sub endq, startq
- neg widthq
-.nextrow:
- mov r10q, widthq
- %define x r10q
-
- .loop:
- movu m0, [topq + x]
- movu m1, [bottomq + x]
- pminub m0, m1
- mova [dstq + x], m0
- add r10q, mmsize
- jl .loop
-
- add topq, top_linesizeq
- add bottomq, bottom_linesizeq
- add dstq, dst_linesizeq
- sub endd, 1
- jg .nextrow
-REP_RET
-
-cglobal blend_hardmix, 9, 11, 5, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
- add topq, widthq
- add bottomq, widthq
- add dstq, widthq
- sub endq, startq
+BLEND_INIT hardmix, 5
mova m2, [pb_255]
mova m3, [pb_128]
mova m4, [pb_127]
- neg widthq
.nextrow:
mov r10q, widthq
%define x r10q
@@ -299,46 +153,10 @@ cglobal blend_hardmix, 9, 11, 5, 0, top, top_linesize, bottom, bottom_linesize,
mova [dstq + x], m1
add r10q, mmsize
jl .loop
+BLEND_END
- add topq, top_linesizeq
- add bottomq, bottom_linesizeq
- add dstq, dst_linesizeq
- sub endd, 1
- jg .nextrow
-REP_RET
-
-cglobal blend_lighten, 9, 11, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
- add topq, widthq
- add bottomq, widthq
- add dstq, widthq
- sub endq, startq
- neg widthq
-.nextrow:
- mov r10q, widthq
- %define x r10q
-
- .loop:
- movu m0, [topq + x]
- movu m1, [bottomq + x]
- pmaxub m0, m1
- mova [dstq + x], m0
- add r10q, mmsize
- jl .loop
-
- add topq, top_linesizeq
- add bottomq, bottom_linesizeq
- add dstq, dst_linesizeq
- sub endd, 1
- jg .nextrow
-REP_RET
-
-cglobal blend_phoenix, 9, 11, 4, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
- add topq, widthq
- add bottomq, widthq
- add dstq, widthq
- sub endq, startq
+BLEND_INIT phoenix, 4
mova m3, [pb_255]
- neg widthq
.nextrow:
mov r10q, widthq
%define x r10q
@@ -355,22 +173,11 @@ cglobal blend_phoenix, 9, 11, 4, 0, top, top_linesize, bottom, bottom_linesize,
mova [dstq + x], m2
add r10q, mmsize
jl .loop
-
- add topq, top_linesizeq
- add bottomq, bottom_linesizeq
- add dstq, dst_linesizeq
- sub endd, 1
- jg .nextrow
-REP_RET
+BLEND_END
INIT_XMM ssse3
-cglobal blend_difference, 9, 11, 3, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
- add topq, widthq
- add bottomq, widthq
- add dstq, widthq
- sub endq, startq
+BLEND_INIT difference, 3
pxor m2, m2
- neg widthq
.nextrow:
mov r10q, widthq
%define x r10q
@@ -386,22 +193,11 @@ cglobal blend_difference, 9, 11, 3, 0, top, top_linesize, bottom, bottom_linesiz
movh [dstq + x], m0
add r10q, mmsize / 2
jl .loop
+BLEND_END
- add topq, top_linesizeq
- add bottomq, bottom_linesizeq
- add dstq, dst_linesizeq
- sub endd, 1
- jg .nextrow
-REP_RET
-
-cglobal blend_negation, 9, 11, 5, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
- add topq, widthq
- add bottomq, widthq
- add dstq, widthq
- sub endq, startq
+BLEND_INIT negation, 5
pxor m2, m2
mova m4, [pw_255]
- neg widthq
.nextrow:
mov r10q, widthq
%define x r10q
@@ -421,12 +217,6 @@ cglobal blend_negation, 9, 11, 5, 0, top, top_linesize, bottom, bottom_linesize,
movh [dstq + x], m0
add r10q, mmsize / 2
jl .loop
-
- add topq, top_linesizeq
- add bottomq, bottom_linesizeq
- add dstq, dst_linesizeq
- sub endd, 1
- jg .nextrow
-REP_RET
+BLEND_END
%endif
diff --git a/libavfilter/x86/vf_blend_init.c b/libavfilter/x86/vf_blend_init.c
index 454d03030d..82b88487d8 100644
--- a/libavfilter/x86/vf_blend_init.c
+++ b/libavfilter/x86/vf_blend_init.c
@@ -23,90 +23,28 @@
#include "libavutil/x86/cpu.h"
#include "libavfilter/blend.h"
-void ff_blend_addition_sse2(const uint8_t *top, ptrdiff_t top_linesize,
- const uint8_t *bottom, ptrdiff_t bottom_linesize,
- uint8_t *dst, ptrdiff_t dst_linesize,
- ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
- struct FilterParams *param, double *values);
-
-void ff_blend_addition128_sse2(const uint8_t *top, ptrdiff_t top_linesize,
- const uint8_t *bottom, ptrdiff_t bottom_linesize,
- uint8_t *dst, ptrdiff_t dst_linesize,
- ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
- struct FilterParams *param, double *values);
-
-void ff_blend_average_sse2(const uint8_t *top, ptrdiff_t top_linesize,
- const uint8_t *bottom, ptrdiff_t bottom_linesize,
- uint8_t *dst, ptrdiff_t dst_linesize,
- ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
- struct FilterParams *param, double *values);
-
-void ff_blend_and_sse2(const uint8_t *top, ptrdiff_t top_linesize,
- const uint8_t *bottom, ptrdiff_t bottom_linesize,
- uint8_t *dst, ptrdiff_t dst_linesize,
- ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
- struct FilterParams *param, double *values);
-
-void ff_blend_darken_sse2(const uint8_t *top, ptrdiff_t top_linesize,
- const uint8_t *bottom, ptrdiff_t bottom_linesize,
- uint8_t *dst, ptrdiff_t dst_linesize,
- ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
- struct FilterParams *param, double *values);
-
-void ff_blend_difference128_sse2(const uint8_t *top, ptrdiff_t top_linesize,
- const uint8_t *bottom, ptrdiff_t bottom_linesize,
- uint8_t *dst, ptrdiff_t dst_linesize,
- ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
- struct FilterParams *param, double *values);
-
-void ff_blend_hardmix_sse2(const uint8_t *top, ptrdiff_t top_linesize,
- const uint8_t *bottom, ptrdiff_t bottom_linesize,
- uint8_t *dst, ptrdiff_t dst_linesize,
- ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
- struct FilterParams *param, double *values);
-
-void ff_blend_lighten_sse2(const uint8_t *top, ptrdiff_t top_linesize,
- const uint8_t *bottom, ptrdiff_t bottom_linesize,
- uint8_t *dst, ptrdiff_t dst_linesize,
- ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
- struct FilterParams *param, double *values);
-
-void ff_blend_or_sse2(const uint8_t *top, ptrdiff_t top_linesize,
- const uint8_t *bottom, ptrdiff_t bottom_linesize,
- uint8_t *dst, ptrdiff_t dst_linesize,
- ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
- struct FilterParams *param, double *values);
-
-void ff_blend_phoenix_sse2(const uint8_t *top, ptrdiff_t top_linesize,
- const uint8_t *bottom, ptrdiff_t bottom_linesize,
- uint8_t *dst, ptrdiff_t dst_linesize,
- ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
- struct FilterParams *param, double *values);
-
-void ff_blend_subtract_sse2(const uint8_t *top, ptrdiff_t top_linesize,
- const uint8_t *bottom, ptrdiff_t bottom_linesize,
- uint8_t *dst, ptrdiff_t dst_linesize,
- ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
- struct FilterParams *param, double *values);
-
-void ff_blend_xor_sse2(const uint8_t *top, ptrdiff_t top_linesize,
- const uint8_t *bottom, ptrdiff_t bottom_linesize,
- uint8_t *dst, ptrdiff_t dst_linesize,
- ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
- struct FilterParams *param, double *values);
-
-void ff_blend_difference_ssse3(const uint8_t *top, ptrdiff_t top_linesize,
- const uint8_t *bottom, ptrdiff_t bottom_linesize,
- uint8_t *dst, ptrdiff_t dst_linesize,
- ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
- struct FilterParams *param, double *values);
-
-void ff_blend_negation_ssse3(const uint8_t *top, ptrdiff_t top_linesize,
- const uint8_t *bottom, ptrdiff_t bottom_linesize,
- uint8_t *dst, ptrdiff_t dst_linesize,
- ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
+#define BLEND_FUNC(name, opt) \
+void ff_blend_##name##_##opt(const uint8_t *top, ptrdiff_t top_linesize, \
+ const uint8_t *bottom, ptrdiff_t bottom_linesize, \
+ uint8_t *dst, ptrdiff_t dst_linesize, \
+ ptrdiff_t width, ptrdiff_t start, ptrdiff_t end, \
struct FilterParams *param, double *values);
+BLEND_FUNC(addition, sse2)
+BLEND_FUNC(addition128, sse2)
+BLEND_FUNC(average, sse2)
+BLEND_FUNC(and, sse2)
+BLEND_FUNC(darken, sse2)
+BLEND_FUNC(difference128, sse2)
+BLEND_FUNC(hardmix, sse2)
+BLEND_FUNC(lighten, sse2)
+BLEND_FUNC(or, sse2)
+BLEND_FUNC(phoenix, sse2)
+BLEND_FUNC(subtract, sse2)
+BLEND_FUNC(xor, sse2)
+BLEND_FUNC(difference, ssse3)
+BLEND_FUNC(negation, ssse3)
+
av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
{
int cpu_flags = av_get_cpu_flags();