From 648b422e171d5eab18f6c6fd346e4050d717b936 Mon Sep 17 00:00:00 2001 From: gxw Date: Mon, 21 Oct 2019 15:56:47 +0800 Subject: avcodec/mips: msa optimizations for vc1dsp Performance of WMV3 decoding has speed up from 3.66x to 5.23x tested on 3A4000. Reviewed-by: Shiyou Yin Signed-off-by: Michael Niedermayer --- libavutil/mips/generic_macros_msa.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'libavutil/mips') diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h index c25509e483..267d4e6ca5 100644 --- a/libavutil/mips/generic_macros_msa.h +++ b/libavutil/mips/generic_macros_msa.h @@ -299,6 +299,7 @@ #define LD_SB4(...) LD_V4(v16i8, __VA_ARGS__) #define LD_UH4(...) LD_V4(v8u16, __VA_ARGS__) #define LD_SH4(...) LD_V4(v8i16, __VA_ARGS__) +#define LD_SW4(...) LD_V4(v4i32, __VA_ARGS__) #define LD_V5(RTYPE, psrc, stride, out0, out1, out2, out3, out4) \ { \ @@ -337,6 +338,7 @@ #define LD_SB8(...) LD_V8(v16i8, __VA_ARGS__) #define LD_UH8(...) LD_V8(v8u16, __VA_ARGS__) #define LD_SH8(...) LD_V8(v8i16, __VA_ARGS__) +#define LD_SW8(...) LD_V8(v4i32, __VA_ARGS__) #define LD_V16(RTYPE, psrc, stride, \ out0, out1, out2, out3, out4, out5, out6, out7, \ @@ -1382,6 +1384,7 @@ out4, out5, out6, out7); \ } #define ILVR_B8_UH(...) ILVR_B8(v8u16, __VA_ARGS__) +#define ILVR_B8_SW(...) ILVR_B8(v4i32, __VA_ARGS__) /* Description : Interleave right half of halfword elements from vectors Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 -- cgit v1.2.3