Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/FFmpeg/FFmpeg.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRostislav Pehlivanov <rpehlivanov@ob-encoder.com>2016-06-23 20:06:56 +0300
committerRostislav Pehlivanov <atomnuker@gmail.com>2016-07-12 01:30:11 +0300
commit80721cc1ff1f1c8c460c136184ed6416a73b4bfd (patch)
treeff11e24dd41ac6254ccd6a54e6c3ceb1d3781875 /libavcodec/x86
parent244d22452c7d40891627e5f7e376558d6b5d8582 (diff)
diracdsp: add dequantization SIMD
Currently unused, to be used in the following commits. Signed-off-by: Rostislav Pehlivanov <rpehlivanov@obe.tv>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/diracdsp.asm37
-rw-r--r--libavcodec/x86/diracdsp_init.c6
2 files changed, 43 insertions, 0 deletions
diff --git a/libavcodec/x86/diracdsp.asm b/libavcodec/x86/diracdsp.asm
index a042413c3a..8e9f0fbf02 100644
--- a/libavcodec/x86/diracdsp.asm
+++ b/libavcodec/x86/diracdsp.asm
@@ -263,3 +263,40 @@ ADD_RECT sse2
HPEL_FILTER sse2
ADD_OBMC 32, sse2
ADD_OBMC 16, sse2
+
+INIT_XMM sse4
+
+; void dequant_subband_32(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h)
+cglobal dequant_subband_32, 7, 7, 4, src, dst, stride, qf, qs, tot_v, tot_h
+ movd m2, qfd
+ movd m3, qsd
+ SPLATD m2
+ SPLATD m3
+ mov r4, tot_hq
+ mov r3, dstq
+
+ .loop_v:
+ mov tot_hq, r4
+ mov dstq, r3
+
+ .loop_h:
+ movu m0, [srcq]
+
+ pabsd m1, m0
+ pmulld m1, m2
+ paddd m1, m3
+ psrld m1, 2
+ psignd m1, m0
+
+ movu [dstq], m1
+
+ add srcq, mmsize
+ add dstq, mmsize
+ sub tot_hd, 4
+ jg .loop_h
+
+ add r3, strideq
+ dec tot_vd
+ jg .loop_v
+
+ RET
diff --git a/libavcodec/x86/diracdsp_init.c b/libavcodec/x86/diracdsp_init.c
index 5fae79891b..26b885d530 100644
--- a/libavcodec/x86/diracdsp_init.c
+++ b/libavcodec/x86/diracdsp_init.c
@@ -46,6 +46,8 @@ void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src,
void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
+void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
+
#if HAVE_YASM
#define HPEL_FILTER(MMSIZE, EXT) \
@@ -184,4 +186,8 @@ void ff_diracdsp_init_x86(DiracDSPContext* c)
c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2;
c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2;
}
+
+ if (EXTERNAL_SSE4(mm_flags)) {
+ c->dequant_subband[1] = ff_dequant_subband_32_sse4;
+ }
}