Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/FFmpeg/FFmpeg.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2010-07-26 17:50:59 +0400
committerRonald S. Bultje <rsbultje@gmail.com>2010-07-26 17:50:59 +0400
commit2208053bd3b06703632a82f7bd93c18eb5df18a1 (patch)
tree9cb059a5d1919dc1b4f52d0d0e3012951c3641b9 /libavcodec
parent065a20cb07b03ba7d57d0012dd75a57cb1d09d1e (diff)
Split pextrw macro-spaghetti into several opt-specific macros, this will make
future new optimizations (imagine a sse5) much easier. Also fix a bug where we used the direction (%2) rather than optimization (%1) to enable this, which means it wasn't ever actually used... Originally committed as revision 24507 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/x86/vp8dsp.asm79
1 files changed, 49 insertions, 30 deletions
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index 0e0155b3a0..602d974e67 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -2085,44 +2085,22 @@ INNER_LOOPFILTER ssse3, h, 6, 8, 13
; 3 is a pointer to the destination's 5th line
; 4 is a pointer to the destination's 4th line
; 5/6 is -stride and +stride
-; 7 is optimization string
-%macro WRITE_8W 7
-%ifidn %7, sse4
- pextrw [%4+%5*4], %1, 0
- pextrw [%3+%5*4], %1, 1
- pextrw [%4+%5*2], %1, 2
- pextrw [%4+%5 ], %1, 3
- pextrw [%4 ], %1, 4
- pextrw [%3 ], %1, 5
- pextrw [%3+%6 ], %1, 6
- pextrw [%3+%6*2], %1, 7
-%else
+%macro WRITE_2x4W 6
movd %3, %1
-%if mmsize == 8
punpckhdq %1, %1
-%else
- psrldq %1, 4
-%endif
mov [%4+%5*4], %3w
shr %3, 16
add %4, %6
mov [%4+%5*4], %3w
movd %3, %1
-%if mmsize == 16
- psrldq %1, 4
-%endif
add %4, %5
mov [%4+%5*2], %3w
shr %3, 16
mov [%4+%5 ], %3w
movd %3, %2
-%if mmsize == 8
punpckhdq %2, %2
-%else
- psrldq %2, 4
-%endif
mov [%4 ], %3w
shr %3, 16
mov [%4+%6 ], %3w
@@ -2132,10 +2110,46 @@ INNER_LOOPFILTER ssse3, h, 6, 8, 13
mov [%4+%6 ], %3w
shr %3, 16
mov [%4+%6*2], %3w
-%if mmsize == 8
add %4, %5
-%endif
-%endif
+%endmacro
+
+%macro WRITE_8W_SSE2 5
+ movd %2, %1
+ psrldq %1, 4
+ mov [%3+%4*4], %2w
+ shr %2, 16
+ add %3, %5
+ mov [%3+%4*4], %2w
+
+ movd %2, %1
+ psrldq %1, 4
+ add %3, %4
+ mov [%3+%4*2], %2w
+ shr %2, 16
+ mov [%3+%4 ], %2w
+
+ movd %2, %1
+ psrldq %1, 4
+ mov [%3 ], %2w
+ shr %2, 16
+ mov [%3+%5 ], %2w
+
+ movd %2, %1
+ add %3, %5
+ mov [%3+%5 ], %2w
+ shr %2, 16
+ mov [%3+%5*2], %2w
+%endmacro
+
+%macro WRITE_8W_SSE4 5
+ pextrw [%3+%4*4], %1, 0
+ pextrw [%2+%4*4], %1, 1
+ pextrw [%3+%4*2], %1, 2
+ pextrw [%3+%4 ], %1, 3
+ pextrw [%3 ], %1, 4
+ pextrw [%2 ], %1, 5
+ pextrw [%2+%5 ], %1, 6
+ pextrw [%2+%5*2], %1, 7
%endmacro
%macro MBEDGE_LOOPFILTER 5
@@ -2671,17 +2685,20 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
%if mmsize == 8 ; mmx/mmxext (h)
WRITE_4x2D 1, 2, 3, 4, dst_reg, dst2_reg, mstride_reg, stride_reg
add dst_reg, 4
- WRITE_8W m5, m6, dst2_reg, dst_reg, mstride_reg, stride_reg, %4
+ WRITE_2x4W m5, m6, dst2_reg, dst_reg, mstride_reg, stride_reg
%else ; sse2 (h)
lea dst8_reg, [dst8_reg+mstride_reg+1]
WRITE_4x4D 1, 2, 3, 4, dst_reg, dst2_reg, dst8_reg, mstride_reg, stride_reg, %4
lea dst_reg, [dst2_reg+mstride_reg+4]
lea dst8_reg, [dst8_reg+mstride_reg+4]
- WRITE_8W m5, m5, dst2_reg, dst_reg, mstride_reg, stride_reg, %2
-%ifidn %2, sse4
+%ifidn %1, sse4
+ add dst2_reg, 4
+%endif
+ WRITE_8W m5, dst2_reg, dst_reg, mstride_reg, stride_reg
+%ifidn %1, sse4
lea dst2_reg, [dst8_reg+ stride_reg]
%endif
- WRITE_8W m6, m6, dst2_reg, dst8_reg, mstride_reg, stride_reg, %2
+ WRITE_8W m6, dst2_reg, dst8_reg, mstride_reg, stride_reg
%endif
%endif
@@ -2725,6 +2742,7 @@ MBEDGE_LOOPFILTER mmxext, h, 6, 8, 0
INIT_XMM
%define SPLATB_REG SPLATB_REG_SSE2
+%define WRITE_8W WRITE_8W_SSE2
MBEDGE_LOOPFILTER sse2, v, 5, 16, 16
%ifdef m8
MBEDGE_LOOPFILTER sse2, h, 5, 16, 16
@@ -2744,6 +2762,7 @@ MBEDGE_LOOPFILTER ssse3, h, 6, 16, 16
MBEDGE_LOOPFILTER ssse3, v, 6, 8, 16
MBEDGE_LOOPFILTER ssse3, h, 6, 8, 16
+%define WRITE_8W WRITE_8W_SSE4
%ifdef m8
MBEDGE_LOOPFILTER sse4, h, 5, 16, 16
%else