Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/FFmpeg/FFmpeg.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2014-03-24 02:05:17 +0400
committerMichael Niedermayer <michaelni@gmx.at>2014-03-24 05:34:02 +0400
commit63dbba655e7b09bd5bd09d3a8eab270152bb803f (patch)
treec6bd91a05ab7f5d6c3fd8ea5a68f60a5f0bb2914 /libswresample/x86
parentfa25c4c400649bcc7693107d2d4b9d1fa137173e (diff)
swresample/resample: sse float linear interpolation
About two times faster Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswresample/x86')
-rw-r--r--libswresample/x86/resample_mmx.h35
1 files changed, 35 insertions, 0 deletions
diff --git a/libswresample/x86/resample_mmx.h b/libswresample/x86/resample_mmx.h
index 28a317ce78..a0df6e1e1e 100644
--- a/libswresample/x86/resample_mmx.h
+++ b/libswresample/x86/resample_mmx.h
@@ -156,3 +156,38 @@ __asm__ volatile(\
"r" (((uint8_t*)filter)-len),\
"r" (dst+dst_index)\
);
+
+#define LINEAR_CORE_FLT_SSE \
+ x86_reg len= -4*c->filter_length;\
+__asm__ volatile(\
+ "xorps %%xmm0, %%xmm0 \n\t"\
+ "xorps %%xmm2, %%xmm2 \n\t"\
+ "1: \n\t"\
+ "movups (%3, %0), %%xmm1 \n\t"\
+ "movaps %%xmm1, %%xmm3 \n\t"\
+ "mulps (%4, %0), %%xmm1 \n\t"\
+ "mulps (%5, %0), %%xmm3 \n\t"\
+ "addps %%xmm1, %%xmm0 \n\t"\
+ "addps %%xmm3, %%xmm2 \n\t"\
+ "add $16, %0 \n\t"\
+ " js 1b \n\t"\
+ "movhlps %%xmm0, %%xmm1 \n\t"\
+ "movhlps %%xmm2, %%xmm3 \n\t"\
+ "addps %%xmm1, %%xmm0 \n\t"\
+ "addps %%xmm3, %%xmm2 \n\t"\
+ "movss %%xmm0, %%xmm1 \n\t"\
+ "movss %%xmm2, %%xmm3 \n\t"\
+ "shufps $1, %%xmm0, %%xmm0 \n\t"\
+ "shufps $1, %%xmm2, %%xmm2 \n\t"\
+ "addps %%xmm1, %%xmm0 \n\t"\
+ "addps %%xmm3, %%xmm2 \n\t"\
+ "movss %%xmm0, %1 \n\t"\
+ "movss %%xmm2, %2 \n\t"\
+ : "+r" (len),\
+ "=m" (val),\
+ "=m" (v2)\
+ : "r" (((uint8_t*)(src+sample_index))-len),\
+ "r" (((uint8_t*)filter)-len),\
+ "r" (((uint8_t*)(filter+c->filter_alloc))-len)\
+ XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3")\
+);