Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/FFmpeg/FFmpeg.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristophe GISQUET <christophe.gisquet@gmail.com>2012-03-03 18:09:36 +0400
committerRonald S. Bultje <rsbultje@gmail.com>2012-03-07 22:29:52 +0400
commit7e1ce6a6acd83cf2d5b21df94d2134b1553635ef (patch)
tree641b0483bf41da2c03c2a6cf4cad2e777c12c2f7 /libavcodec/arm
parentdabf8dd34afdbb6dc9dc7603d7a5228fc67de4c8 (diff)
dsputil: remove shift parameter from scalarproduct_int16
There is only one caller, which does not need the shifting. Other use cases are situations where different roundings would be needed. The x86 and neon versions are modified accordingly. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
Diffstat (limited to 'libavcodec/arm')
-rw-r--r--libavcodec/arm/dsputil_init_neon.c3
-rw-r--r--libavcodec/arm/int_neon.S28
2 files changed, 3 insertions, 28 deletions
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index 68e5b3ed42..b2931fe525 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -171,8 +171,7 @@ void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize);
-int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len,
- int shift);
+int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len);
int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2,
const int16_t *v3, int len, int mul);
diff --git a/libavcodec/arm/int_neon.S b/libavcodec/arm/int_neon.S
index 8bb58afb18..ea479bb580 100644
--- a/libavcodec/arm/int_neon.S
+++ b/libavcodec/arm/int_neon.S
@@ -29,32 +29,8 @@ function ff_scalarproduct_int16_neon, export=1
vmov.i16 q1, #0
vmov.i16 q2, #0
vmov.i16 q3, #0
- negs r3, r3
- beq 2f
-
- vdup.s32 q12, r3
1: vld1.16 {d16-d17}, [r0]!
vld1.16 {d20-d21}, [r1,:128]!
- vmull.s16 q12, d16, d20
- vld1.16 {d18-d19}, [r0]!
- vmull.s16 q13, d17, d21
- vld1.16 {d22-d23}, [r1,:128]!
- vmull.s16 q14, d18, d22
- vmull.s16 q15, d19, d23
- vshl.s32 q8, q12, q12
- vshl.s32 q9, q13, q12
- vadd.s32 q0, q0, q8
- vshl.s32 q10, q14, q12
- vadd.s32 q1, q1, q9
- vshl.s32 q11, q15, q12
- vadd.s32 q2, q2, q10
- vadd.s32 q3, q3, q11
- subs r2, r2, #16
- bne 1b
- b 3f
-
-2: vld1.16 {d16-d17}, [r0]!
- vld1.16 {d20-d21}, [r1,:128]!
vmlal.s16 q0, d16, d20
vld1.16 {d18-d19}, [r0]!
vmlal.s16 q1, d17, d21
@@ -62,9 +38,9 @@ function ff_scalarproduct_int16_neon, export=1
vmlal.s16 q2, d18, d22
vmlal.s16 q3, d19, d23
subs r2, r2, #16
- bne 2b
+ bne 1b
-3: vpadd.s32 d16, d0, d1
+ vpadd.s32 d16, d0, d1
vpadd.s32 d17, d2, d3
vpadd.s32 d10, d4, d5
vpadd.s32 d11, d6, d7