Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/FFmpeg/FFmpeg.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2013-06-30 14:15:12 +0400
committerMichael Niedermayer <michaelni@gmx.at>2013-06-30 14:15:12 +0400
commit6e76e6a05a5685b904dc0d1cd610d81ffe43bbc5 (patch)
tree7c520597874b28e57afee76ffae5f0d1c40162e3 /libavutil/x86/lls.asm
parenta285079bc75a6e3b9aa27910351044b79bc0c490 (diff)
parentb545179fdff1ccfbbb9d422e4e9720cb6c6d9191 (diff)
Merge commit 'b545179fdff1ccfbbb9d422e4e9720cb6c6d9191'
* commit 'b545179fdff1ccfbbb9d422e4e9720cb6c6d9191': x86: lpc: simd av_evaluate_lls Conflicts: libavutil/x86/lls.asm Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil/x86/lls.asm')
-rw-r--r--libavutil/x86/lls.asm37
1 files changed, 37 insertions, 0 deletions
diff --git a/libavutil/x86/lls.asm b/libavutil/x86/lls.asm
index 59398b4867..ae18f3a737 100644
--- a/libavutil/x86/lls.asm
+++ b/libavutil/x86/lls.asm
@@ -196,3 +196,40 @@ cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2
.ret:
REP_RET
%endif
+
+INIT_XMM sse2
+cglobal evaluate_lls, 2,4,2, ctx, var, order, i
+ ; This function is often called on the same buffer as update_lls, but with
+ ; an offset. They can't both be aligned.
+ ; Load halves rather than movu to avoid store-forwarding stalls, since the
+ ; input was initialized immediately prior to this function using scalar math.
+ %define coefsq ctxq
+ mov id, orderd
+ imul orderd, MAX_VARS
+ lea coefsq, [ctxq + LLSModel.coeff + orderq*8]
+ movsd m0, [varq]
+ movhpd m0, [varq + 8]
+ mulpd m0, [coefsq]
+ lea coefsq, [coefsq + iq*8]
+ lea varq, [varq + iq*8]
+ neg iq
+ add iq, 2
+.loop:
+ movsd m1, [varq + iq*8]
+ movhpd m1, [varq + iq*8 + 8]
+ mulpd m1, [coefsq + iq*8]
+ addpd m0, m1
+ add iq, 2
+ jl .loop
+ jg .skip1
+ movsd m1, [varq + iq*8]
+ mulsd m1, [coefsq + iq*8]
+ addpd m0, m1
+.skip1:
+ movhlps m1, m0
+ addsd m0, m1
+%if ARCH_X86_32
+ movsd r0m, m0
+ fld qword r0m
+%endif
+ RET