diff options
Diffstat (limited to 'libavcodec/x86/sbcdsp.asm')
-rw-r--r-- | libavcodec/x86/sbcdsp.asm | 168 |
1 files changed, 168 insertions, 0 deletions
diff --git a/libavcodec/x86/sbcdsp.asm b/libavcodec/x86/sbcdsp.asm new file mode 100644 index 0000000000..d68d3a9ae8 --- /dev/null +++ b/libavcodec/x86/sbcdsp.asm @@ -0,0 +1,168 @@ +;****************************************************************************** +;* SIMD optimized SBC encoder DSP functions +;* +;* Copyright (C) 2017 Aurelien Jacobs <aurel@gnuage.org> +;* Copyright (C) 2008-2010 Nokia Corporation +;* Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org> +;* Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch> +;* Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA + +scale_mask: times 2 dd 0x8000 ; 1 << (SBC_PROTO_FIXED_SCALE - 1) + +SECTION .text + +%macro NIDN 3 +%ifnidn %2, %3 + %1 %2, %3 +%endif +%endmacro + +%macro ANALYZE_MAC 9 ; out1, out2, in1, in2, tmp1, tmp2, add1, add2, offset + NIDN movq, %5, %3 + NIDN movq, %6, %4 + pmaddwd %5, [constsq+%9] + pmaddwd %6, [constsq+%9+8] + NIDN paddd, %1, %7 + NIDN paddd, %2, %8 +%endmacro + +%macro ANALYZE_MAC_IN 7 ; out1, out2, tmp1, tmp2, add1, add2, offset + ANALYZE_MAC %1, %2, [inq+%7], [inq+%7+8], %3, %4, %5, %6, %7 +%endmacro + +%macro ANALYZE_MAC_REG 7 ; out1, out2, in, tmp1, tmp2, offset, pack +%ifidn %7, pack + psrad %3, 16 ; SBC_PROTO_FIXED_SCALE + packssdw %3, %3 +%endif + ANALYZE_MAC %1, %2, %3, %3, %4, %5, %4, %5, %6 +%endmacro + +;******************************************************************* +;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t *consts); +;******************************************************************* +INIT_MMX mmx +cglobal sbc_analyze_4, 3, 3, 4, in, out, consts + ANALYZE_MAC_IN m0, m1, m0, m1, [scale_mask], [scale_mask], 0 + ANALYZE_MAC_IN m0, m1, m2, m3, m2, m3, 16 + ANALYZE_MAC_IN m0, m1, m2, m3, m2, m3, 32 + ANALYZE_MAC_IN m0, m1, m2, m3, m2, m3, 48 + ANALYZE_MAC_IN m0, m1, m2, m3, m2, m3, 64 + + ANALYZE_MAC_REG m0, m2, m0, m0, m2, 80, pack + ANALYZE_MAC_REG m0, m2, m1, m1, m3, 96, pack + + movq [outq ], m0 + movq [outq+8], m2 + + RET + + +;******************************************************************* +;void ff_sbc_analyze_8(const int16_t *in, int32_t *out, const int16_t *consts); +;******************************************************************* +INIT_MMX mmx +cglobal sbc_analyze_8, 3, 3, 4, in, out, consts + ANALYZE_MAC_IN m0, m1, m0, m1, [scale_mask], [scale_mask], 0 + ANALYZE_MAC_IN m2, m3, m2, m3, [scale_mask], [scale_mask], 16 + ANALYZE_MAC_IN m0, m1, m4, m5, m4, m5, 32 + ANALYZE_MAC_IN m2, m3, m6, m7, m6, m7, 48 + ANALYZE_MAC_IN m0, m1, m4, m5, m4, m5, 64 + ANALYZE_MAC_IN m2, m3, m6, m7, m6, m7, 80 + ANALYZE_MAC_IN m0, m1, m4, m5, m4, m5, 96 + ANALYZE_MAC_IN m2, m3, m6, m7, m6, m7, 112 + ANALYZE_MAC_IN m0, m1, m4, m5, m4, m5, 128 + ANALYZE_MAC_IN m2, m3, m6, m7, m6, m7, 144 + + ANALYZE_MAC_REG m4, m5, m0, m4, m5, 160, pack + ANALYZE_MAC_REG m4, m5, m1, m6, m7, 192, pack + ANALYZE_MAC_REG m4, m5, m2, m6, m7, 224, pack + ANALYZE_MAC_REG m4, m5, m3, m6, m7, 256, pack + + movq [outq ], m4 + movq [outq+8], m5 + + ANALYZE_MAC_REG m0, m5, m0, m0, m5, 176, no + ANALYZE_MAC_REG m0, m5, m1, m1, m7, 208, no + ANALYZE_MAC_REG m0, m5, m2, m2, m7, 240, no + ANALYZE_MAC_REG m0, m5, m3, m3, m7, 272, no + + movq [outq+16], m0 + movq [outq+24], m5 + + RET + + +;******************************************************************* +;void ff_sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8], +; uint32_t scale_factor[2][8], +; int blocks, int channels, int subbands) +;******************************************************************* +INIT_MMX mmx +cglobal sbc_calc_scalefactors, 5, 7, 4, sb_sample_f, scale_factor, blocks, channels, subbands, ptr, blk + ; subbands = 4 * subbands * channels + movq m3, [scale_mask] + shl subbandsd, 2 + cmp channelsd, 2 + jl .loop_1 + shl subbandsd, 1 + +.loop_1: + sub subbandsq, 8 + lea ptrq, [sb_sample_fq + subbandsq] + + ; blk = (blocks - 1) * 64; + lea blkq, [blocksq - 1] + shl blkd, 6 + + movq m0, m3 +.loop_2: + movq m1, [ptrq+blkq] + pxor m2, m2 + pcmpgtd m1, m2 + paddd m1, [ptrq+blkq] + pcmpgtd m2, m1 + pxor m1, m2 + + por m0, m1 + + sub blkq, 64 + jns .loop_2 + + movd blkd, m0 + psrlq m0, 32 + bsr blkd, blkd + sub blkd, 15 ; SCALE_OUT_BITS + mov [scale_factorq + subbandsq], blkd + + movd blkd, m0 + bsr blkd, blkd + sub blkd, 15 ; SCALE_OUT_BITS + mov [scale_factorq + subbandsq + 4], blkd + + cmp subbandsq, 0 + jg .loop_1 + + emms + RET |