dsputil: remove shift parameter from scalarproduct_int16

There is only one caller, which does not need the shifting. Other use cases are situations where different roundings would be needed. The x86 and neon versions are modified accordingly. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
author: Christophe GISQUET <christophe.gisquet@gmail.com> 2012-03-03 18:09:36 +0400
committer: Ronald S. Bultje <rsbultje@gmail.com> 2012-03-07 22:29:52 +0400
commit: 7e1ce6a6acd83cf2d5b21df94d2134b1553635ef (patch)
tree: 641b0483bf41da2c03c2a6cf4cad2e777c12c2f7 /libavcodec/arm
parent: dabf8dd34afdbb6dc9dc7603d7a5228fc67de4c8 (diff)
2 files changed, 3 insertions, 28 deletions
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index 68e5b3ed42..b2931fe525 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -171,8 +171,7 @@ void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
 
 void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize);
 
-int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len,
-                                    int shift);
+int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len);
 int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2,
                                              const int16_t *v3, int len, int mul);
 
diff --git a/libavcodec/arm/int_neon.S b/libavcodec/arm/int_neon.S
index 8bb58afb18..ea479bb580 100644
--- a/libavcodec/arm/int_neon.S
+++ b/libavcodec/arm/int_neon.S
@@ -29,32 +29,8 @@ function ff_scalarproduct_int16_neon, export=1
         vmov.i16        q1,  #0
         vmov.i16        q2,  #0
         vmov.i16        q3,  #0
-        negs            r3,  r3
-        beq             2f
-
-        vdup.s32        q12, r3
 1:      vld1.16         {d16-d17}, [r0]!
         vld1.16         {d20-d21}, [r1,:128]!
-        vmull.s16       q12, d16,  d20
-        vld1.16         {d18-d19}, [r0]!
-        vmull.s16       q13, d17,  d21
-        vld1.16         {d22-d23}, [r1,:128]!
-        vmull.s16       q14, d18,  d22
-        vmull.s16       q15, d19,  d23
-        vshl.s32        q8,  q12,  q12
-        vshl.s32        q9,  q13,  q12
-        vadd.s32        q0,  q0,   q8
-        vshl.s32        q10, q14,  q12
-        vadd.s32        q1,  q1,   q9
-        vshl.s32        q11, q15,  q12
-        vadd.s32        q2,  q2,   q10
-        vadd.s32        q3,  q3,   q11
-        subs            r2,  r2,   #16
-        bne             1b
-        b               3f
-
-2:      vld1.16         {d16-d17}, [r0]!
-        vld1.16         {d20-d21}, [r1,:128]!
         vmlal.s16       q0,  d16,  d20
         vld1.16         {d18-d19}, [r0]!
         vmlal.s16       q1,  d17,  d21
@@ -62,9 +38,9 @@ function ff_scalarproduct_int16_neon, export=1
         vmlal.s16       q2,  d18,  d22
         vmlal.s16       q3,  d19,  d23
         subs            r2,  r2,   #16
-        bne             2b
+        bne             1b
 
-3:      vpadd.s32       d16, d0,   d1
+        vpadd.s32       d16, d0,   d1
         vpadd.s32       d17, d2,   d3
         vpadd.s32       d10, d4,   d5
         vpadd.s32       d11, d6,   d7
author	Christophe GISQUET <christophe.gisquet@gmail.com>	2012-03-03 18:09:36 +0400
committer	Ronald S. Bultje <rsbultje@gmail.com>	2012-03-07 22:29:52 +0400
commit	7e1ce6a6acd83cf2d5b21df94d2134b1553635ef (patch)
tree	641b0483bf41da2c03c2a6cf4cad2e777c12c2f7 /libavcodec/arm
parent	dabf8dd34afdbb6dc9dc7603d7a5228fc67de4c8 (diff)