Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/FFmpeg.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2017-06-27 18:42:58 +0300
committerJames Almer <jamrial@gmail.com>2017-06-27 19:17:23 +0300
commit0daa1cf0731830288b8cc875ca1ee641cfe422b2 (patch)
tree5b9c89c54ca905d2e3aee7e50f8ab05e197537c7 /libavfilter
parentfa50d9360ba36ba2ee8f85f2c59e8d6af20e833a (diff)
x86/vf_blend: optimize difference and negation functions
Process more pixels per loop. Reviewed-by: Paul B Mahol <onemda@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavfilter')
-rw-r--r--libavfilter/x86/vf_blend.asm40
1 files changed, 24 insertions, 16 deletions
diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
index 25f6f5affc..d5e512e6e0 100644
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm
@@ -268,21 +268,25 @@ BLEND_INIT phoenix, 4
BLEND_END
%macro BLEND_ABS 0
-BLEND_INIT difference, 3
+BLEND_INIT difference, 5
pxor m2, m2
.nextrow:
mov xq, widthq
.loop:
- movh m0, [topq + xq]
- movh m1, [bottomq + xq]
+ movu m0, [topq + xq]
+ movu m1, [bottomq + xq]
+ punpckhbw m3, m0, m2
punpcklbw m0, m2
+ punpckhbw m4, m1, m2
punpcklbw m1, m2
psubw m0, m1
+ psubw m3, m4
ABS1 m0, m1
- packuswb m0, m0
- movh [dstq + xq], m0
- add xq, mmsize / 2
+ ABS1 m3, m4
+ packuswb m0, m3
+ mova [dstq + xq], m0
+ add xq, mmsize
jl .loop
BLEND_END
@@ -311,26 +315,30 @@ BLEND_INIT extremity, 8
jl .loop
BLEND_END
-BLEND_INIT negation, 5
+BLEND_INIT negation, 8
pxor m2, m2
mova m4, [pw_255]
.nextrow:
mov xq, widthq
.loop:
- movh m0, [topq + xq]
- movh m1, [bottomq + xq]
+ movu m0, [topq + xq]
+ movu m1, [bottomq + xq]
+ punpckhbw m5, m0, m2
punpcklbw m0, m2
+ punpckhbw m6, m1, m2
punpcklbw m1, m2
- mova m3, m4
- psubw m3, m0
+ psubw m3, m4, m0
+ psubw m7, m4, m5
psubw m3, m1
+ psubw m7, m6
ABS1 m3, m1
- mova m0, m4
- psubw m0, m3
- packuswb m0, m0
- movh [dstq + xq], m0
- add xq, mmsize / 2
+ ABS1 m7, m1
+ psubw m0, m4, m3
+ psubw m1, m4, m7
+ packuswb m0, m1
+ mova [dstq + xq], m0
+ add xq, mmsize
jl .loop
BLEND_END
%endmacro