diff options
Diffstat (limited to 'libavresample/x86/audio_mix.asm')
-rw-r--r-- | libavresample/x86/audio_mix.asm | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/libavresample/x86/audio_mix.asm b/libavresample/x86/audio_mix.asm index dbc79e585d..8a4cf061cd 100644 --- a/libavresample/x86/audio_mix.asm +++ b/libavresample/x86/audio_mix.asm @@ -21,6 +21,7 @@ %include "x86inc.asm" %include "x86util.asm" +%include "util.asm" SECTION_TEXT @@ -64,3 +65,88 @@ MIX_2_TO_1_FLTP_FLT INIT_YMM avx MIX_2_TO_1_FLTP_FLT %endif + +;----------------------------------------------------------------------------- +; void ff_mix_2_to_1_s16p_flt(int16_t **src, float **matrix, int len, +; int out_ch, int in_ch); +;----------------------------------------------------------------------------- + +%macro MIX_2_TO_1_S16P_FLT 0 +cglobal mix_2_to_1_s16p_flt, 3,4,6, src, matrix, len, src1 + mov src1q, [srcq+gprsize] + mov srcq, [srcq] + sub src1q, srcq + mov matrixq, [matrixq ] + VBROADCASTSS m4, [matrixq ] + VBROADCASTSS m5, [matrixq+4] + ALIGN 16 +.loop: + mova m0, [srcq ] + mova m2, [srcq+src1q] + S16_TO_S32_SX 0, 1 + S16_TO_S32_SX 2, 3 + cvtdq2ps m0, m0 + cvtdq2ps m1, m1 + cvtdq2ps m2, m2 + cvtdq2ps m3, m3 + mulps m0, m4 + mulps m1, m4 + mulps m2, m5 + mulps m3, m5 + addps m0, m2 + addps m1, m3 + cvtps2dq m0, m0 + cvtps2dq m1, m1 + packssdw m0, m1 + mova [srcq], m0 + add srcq, mmsize + sub lend, mmsize/2 + jg .loop + REP_RET +%endmacro + +INIT_XMM sse2 +MIX_2_TO_1_S16P_FLT +INIT_XMM sse4 +MIX_2_TO_1_S16P_FLT + +;----------------------------------------------------------------------------- +; void ff_mix_2_to_1_s16p_q8(int16_t **src, int16_t **matrix, int len, +; int out_ch, int in_ch); +;----------------------------------------------------------------------------- + +INIT_XMM sse2 +cglobal mix_2_to_1_s16p_q8, 3,4,6, src, matrix, len, src1 + mov src1q, [srcq+gprsize] + mov srcq, [srcq] + sub src1q, srcq + mov matrixq, [matrixq] + movd m4, [matrixq] + movd m5, [matrixq] + SPLATW m4, m4, 0 + SPLATW m5, m5, 1 + pxor m0, m0 + punpcklwd m4, m0 + punpcklwd m5, m0 + ALIGN 16 +.loop: + mova m0, [srcq ] + mova m2, [srcq+src1q] + punpckhwd m1, m0, m0 + punpcklwd m0, m0 + punpckhwd m3, m2, m2 + punpcklwd m2, m2 + pmaddwd m0, m4 + pmaddwd m1, m4 + pmaddwd m2, m5 + pmaddwd m3, m5 + paddd m0, m2 + paddd m1, m3 + psrad m0, 8 + psrad m1, 8 + packssdw m0, m1 + mova [srcq], m0 + add srcq, mmsize + sub lend, mmsize/2 + jg .loop + REP_RET |