Welcome to mirror list, hosted at ThFree Co, Russian Federation.

cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWill Newton <will.newton@linaro.org>2013-06-21 13:10:37 +0400
committerWill Newton <will.newton@linaro.org>2013-06-21 13:10:37 +0400
commitc8af05790785011377b5cbc57397f3908581b628 (patch)
tree7782d90e02ca9cd1adc9c90b4f6ea4b071e42d29 /newlib/libc/machine
parenta1a7a74e6bca2a35d9ab1949cb48c066b9509782 (diff)
strlen-armv7.S: Import latest strlen cortex-strings code.
Import the latest version of strlen from the Linaro cortex-strings package. This version is faster across a variety of block size and alignments on ARMv7. newlib/ChangeLog: 2013-06-21 Will Newton <will.newton@linaro.org> * libc/machine/arm/strlen-armv7.S: Import latest strlen code from Linaro cortex-strings.
Diffstat (limited to 'newlib/libc/machine')
-rw-r--r--newlib/libc/machine/arm/strlen-armv7.S196
1 files changed, 113 insertions, 83 deletions
diff --git a/newlib/libc/machine/arm/strlen-armv7.S b/newlib/libc/machine/arm/strlen-armv7.S
index d6e2831ff..1aa51c9fb 100644
--- a/newlib/libc/machine/arm/strlen-armv7.S
+++ b/newlib/libc/machine/arm/strlen-armv7.S
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2011, Linaro Limited
+/* Copyright (c) 2010-2011,2013 Linaro Limited
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -28,100 +28,130 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- Written by Dave Gilbert <david.gilbert@linaro.org>
-
- This strlen routine is optimised on a Cortex-A9 and should work on
- all ARMv7 processors. This routine is reasonably fast for short
- strings, but is probably slower than a simple implementation if all
- your strings are very short */
-
-@ 2011-02-08 david.gilbert@linaro.org
-@ Extracted from local git 6848613a
-@ 2011-10-13 david.gilbert@linaro.org
-@ Extracted from cortex-strings bzr rev 63
-@ Integrate to newlib, flip to ldrd
-@ Pull in Endian macro from my memchr
+ Assumes:
+ ARMv6T2, AArch32
+ */
#include "arm_asm.h"
-@ NOTE: This ifdef MUST match the ones in arm/strlen.c
-@ We fallback to the one in arm/strlen.c for size optimised or
-@ for older arch's
+/* NOTE: This ifdef MUST match the ones in arm/strlen.c
+ We fallback to the one in arm/strlen.c for size optimised or
+ for older architectures. */
#if defined(_ISA_ARM_7) || defined(__ARM_ARCH_6T2__) && \
!(defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
(defined (__thumb__) && !defined (__thumb2__)))
-@ this lets us check a flag in a 00/ff byte easily in either endianness
+ .macro def_fn f p2align=0
+ .text
+ .p2align \p2align
+ .global \f
+ .type \f, %function
+\f:
+ .endm
+
#ifdef __ARMEB__
-#define CHARTSTMASK(c) 1<<(31-(c*8))
+#define S2LO lsl
+#define S2HI lsr
#else
-#define CHARTSTMASK(c) 1<<(c*8)
+#define S2LO lsr
+#define S2HI lsl
#endif
-@------------------------------------------------------------------------------
+ /* This code requires Thumb. */
+ .thumb
.syntax unified
- .arch armv7-a
-
- .thumb_func
- .align 2
- .p2align 4,,15
- .global strlen
- .type strlen,%function
-strlen:
- @ r0 = string
- @ returns count of bytes in string not including terminator
- mov r1, r0
- push { r4,r6 }
- mvns r6, #0 @ all F
- movs r4, #0
- tst r0, #7
- beq 2f
-
-1:
- ldrb r2, [r1], #1
- tst r1, #7 @ Hit alignment yet?
- cbz r2, 10f @ Exit if we found the 0
- bne 1b
-
- @ So we're now aligned
-2:
- ldrd r2,r3,[r1],#8
- uadd8 r2, r2, r6 @ Par add 0xff - sets the GE bits for bytes!=0
- sel r2, r4, r6 @ bytes are 00 for none-00 bytes,
- @ or ff for 00 bytes - NOTE INVERSION
- uadd8 r3, r3, r6 @ Par add 0xff - sets the GE bits for bytes!=0
- sel r3, r2, r6 @ chained...bytes are 00 for none-00 bytes,
- @ or ff for 00 bytes - NOTE INVERSION
- cmp r3, #0
- beq 2b
-
-strlenendtmp:
- @ One (or more) of the bytes we loaded was 0 - but which one?
- @ r2 has the mask corresponding to the first loaded word
- @ r3 has a combined mask of the two words - but if r2 was all-non 0
- @ then it's just the 2nd words
- cmp r2, #0
- itte eq
- moveq r2, r3 @ the end is in the 2nd word
- subeq r1,r1,#3
- subne r1,r1,#7
-
- @ r1 currently points to the 2nd byte of the word containing the 0
- tst r2, # CHARTSTMASK(0) @ 1st character
- bne 10f
- adds r1,r1,#1
- tst r2, # CHARTSTMASK(1) @ 2nd character
- ittt eq
- addeq r1,r1,#1
- tsteq r2, # (3<<15) @ 2nd & 3rd character
- @ If not the 3rd must be the last one
- addeq r1,r1,#1
-
-10:
- @ r0 is still at the beginning, r1 is pointing 1 byte after the nul
- sub r0, r1, r0
- subs r0, r0, #1
- pop { r4, r6 }
+
+/* Parameters and result. */
+#define srcin r0
+#define result r0
+
+/* Internal variables. */
+#define src r1
+#define data1a r2
+#define data1b r3
+#define const_m1 r12
+#define const_0 r4
+#define tmp1 r4 /* Overlaps const_0 */
+#define tmp2 r5
+
+def_fn strlen p2align=6
+ pld [srcin, #0]
+ strd r4, r5, [sp, #-8]!
+ bic src, srcin, #7
+ mvn const_m1, #0
+ ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
+ pld [src, #32]
+ bne.w .Lmisaligned8
+ mov const_0, #0
+ mov result, #-8
+.Lloop_aligned:
+ /* Bytes 0-7. */
+ ldrd data1a, data1b, [src]
+ pld [src, #64]
+ add result, result, #8
+.Lstart_realigned:
+ uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
+ sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
+ uadd8 data1b, data1b, const_m1
+ sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
+ cbnz data1b, .Lnull_found
+
+ /* Bytes 8-15. */
+ ldrd data1a, data1b, [src, #8]
+ uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
+ add result, result, #8
+ sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
+ uadd8 data1b, data1b, const_m1
+ sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
+ cbnz data1b, .Lnull_found
+
+ /* Bytes 16-23. */
+ ldrd data1a, data1b, [src, #16]
+ uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
+ add result, result, #8
+ sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
+ uadd8 data1b, data1b, const_m1
+ sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
+ cbnz data1b, .Lnull_found
+
+ /* Bytes 24-31. */
+ ldrd data1a, data1b, [src, #24]
+ add src, src, #32
+ uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
+ add result, result, #8
+ sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
+ uadd8 data1b, data1b, const_m1
+ sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
+ cmp data1b, #0
+ beq .Lloop_aligned
+
+.Lnull_found:
+ cmp data1a, #0
+ itt eq
+ addeq result, result, #4
+ moveq data1a, data1b
+#ifndef __ARMEB__
+ rev data1a, data1a
+#endif
+ clz data1a, data1a
+ ldrd r4, r5, [sp], #8
+ add result, result, data1a, lsr #3 /* Bits -> Bytes. */
bx lr
+.Lmisaligned8:
+ ldrd data1a, data1b, [src]
+ and tmp2, tmp1, #3
+ rsb result, tmp1, #0
+ lsl tmp2, tmp2, #3 /* Bytes -> bits. */
+ tst tmp1, #4
+ pld [src, #64]
+ S2HI tmp2, const_m1, tmp2
+ orn data1a, data1a, tmp2
+ itt ne
+ ornne data1b, data1b, tmp2
+ movne data1a, const_m1
+ mov const_0, #0
+ b .Lstart_realigned
+ .size strlen, . - strlen
+
#endif