diff options
author | Timothy B. Terriberry <tterribe@xiph.org> | 2013-11-27 23:06:59 +0400 |
---|---|---|
committer | Timothy B. Terriberry <tterribe@xiph.org> | 2013-11-27 23:08:33 +0400 |
commit | c16a96b45886283d23f8bfda5bad115397f21689 (patch) | |
tree | 9cff3b7dbd56522a8781e2019a16463c5234fda5 | |
parent | a1740d9ad176ed3716b5c35cba6c47da733b789c (diff) |
Minor speedups to celt_pitch_xcorr_edsp().v1.1-rc2
This moves some stuff out of the main loop for the unaligned case.
-rw-r--r-- | celt/arm/celt_pitch_xcorr_arm.s | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/celt/arm/celt_pitch_xcorr_arm.s b/celt/arm/celt_pitch_xcorr_arm.s index 0ad8f5be..09917b16 100644 --- a/celt/arm/celt_pitch_xcorr_arm.s +++ b/celt/arm/celt_pitch_xcorr_arm.s @@ -371,20 +371,19 @@ celt_pitch_xcorr_edsp PROC LDRH r8, [r5], #2 BLE celt_pitch_xcorr_edsp_process1u_loop4_done LDR r6, [r4], #4 + MOV r8, r8, LSL #16 +celt_pitch_xcorr_edsp_process1u_loop4 LDR r9, [r5], #4 + SMLABT r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0) LDR r7, [r4], #4 -celt_pitch_xcorr_edsp_process1u_loop4 - SMLABB r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0) - SUBS r12, r12, #4 ; j-=4 SMLATB r14, r6, r9, r14 ; sum = MAC16_16(sum, x_1, y_1) - LDR r10, [r5], #4 + LDR r8, [r5], #4 SMLABT r14, r7, r9, r14 ; sum = MAC16_16(sum, x_2, y_2) + SUBS r12, r12, #4 ; j-=4 + SMLATB r14, r7, r8, r14 ; sum = MAC16_16(sum, x_3, y_3) LDRGT r6, [r4], #4 - SMLATB r14, r7, r10, r14 ; sum = MAC16_16(sum, x_3, y_3) - LDRGT r9, [r5], #4 - MOV r8, r10, LSR #16 - LDRGT r7, [r4], #4 BGT celt_pitch_xcorr_edsp_process1u_loop4 + MOV r8, r8, LSR #16 celt_pitch_xcorr_edsp_process1u_loop4_done ADDS r12, r12, #4 celt_pitch_xcorr_edsp_process1u_loop1 @@ -530,6 +529,7 @@ celt_pitch_xcorr_edsp_process1a_loop_done ADDS r12, r12, #1 LDRGEH r6, [r4], #2 LDRGEH r8, [r5], #2 + ; Stall SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y) ; maxcorr = max(maxcorr, sum) CMP r0, r14 |