From 4b7166c9d57d3cc7ece02171338a27e129b12250 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 24 Apr 2019 12:19:48 +0100 Subject: x86/opusdsp: replace loads with shuffles Has a slight speedup. Can't be carried over to aarch64, since it has no shufps-like instruction. Reviewed-by: Paul B Mahol Signed-off-by: James Almer --- libavcodec/x86/opusdsp.asm | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'libavcodec/x86') diff --git a/libavcodec/x86/opusdsp.asm b/libavcodec/x86/opusdsp.asm index 6c99821b89..f5d206a8b1 100644 --- a/libavcodec/x86/opusdsp.asm +++ b/libavcodec/x86/opusdsp.asm @@ -24,9 +24,6 @@ SECTION_RODATA ; 0.85..^1 0.85..^2 0.85..^3 0.85..^4 tab_st: dd 0x3f599a00, 0x3f38f671, 0x3f1d382a, 0x3f05a32f -tab_x0: dd 0x0, 0x3f599a00, 0x3f599a00, 0x3f599a00 -tab_x1: dd 0x0, 0x0, 0x3f38f671, 0x3f38f671 -tab_x2: dd 0x0, 0x0, 0x0, 0x3f1d382a SECTION .text @@ -45,9 +42,9 @@ cglobal opus_deemphasis, 4, 4, 8, out, in, coeff, len %endif movaps m4, [tab_st] - movaps m5, [tab_x0] - movaps m6, [tab_x1] - movaps m7, [tab_x2] + VBROADCASTSS m5, m4 + shufps m6, m4, m4, q1111 + shufps m7, m4, m4, q2222 .loop: movaps m1, [inq] ; x0, x1, x2, x3 -- cgit v1.2.3