Welcome to mirror list, hosted at ThFree Co, Russian Federation.

cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Blake <eblake@redhat.com>2008-05-22 01:46:04 +0400
committerEric Blake <eblake@redhat.com>2008-05-22 01:46:04 +0400
commit4962a9453ac3a9c23cba3c7e952242a6831f0cb3 (patch)
treee0c8a50b1aac0bb6858285dea3d6800000122c26 /newlib/libc/machine
parent804c0cc6d0eae37680bfc2900832958ad4915519 (diff)
Optimize strchr for x86.
* libc/machine/i386/strchr.S (strchr): Pre-align data so unaligned searches aren't penalized. Special-case searching for 0.
Diffstat (limited to 'newlib/libc/machine')
-rw-r--r--newlib/libc/machine/i386/strchr.S113
1 files changed, 100 insertions, 13 deletions
diff --git a/newlib/libc/machine/i386/strchr.S b/newlib/libc/machine/i386/strchr.S
index fe425d2d8..1d98b8149 100644
--- a/newlib/libc/machine/i386/strchr.S
+++ b/newlib/libc/machine/i386/strchr.S
@@ -1,6 +1,6 @@
/*
* ====================================================
- * Copyright (C) 1998, 2002 by Red Hat Inc. All rights reserved.
+ * Copyright (C) 1998, 2002, 2008 by Red Hat Inc. All rights reserved.
*
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
@@ -9,7 +9,7 @@
*/
#include "i386mach.h"
-
+
.global SYM (strchr)
SOTYPE_FUNCTION(strchr)
@@ -21,14 +21,45 @@ SYM (strchr):
pushl ebx
xorl ebx,ebx
movl 8(ebp),edi
- movb 12(ebp),bl
+ addb 12(ebp),bl
+
+#ifndef __OPTIMIZE_SIZE__
+/* Special case strchr(p,0). */
+ je L25
-#ifndef __OPTIMIZE_SIZE__
-/* check if string is aligned, if not do check one byte at a time */
+/* Do byte-wise checks until string is aligned. */
test $3,edi
- jne L9
+ je L5
+ movl edi,eax
+ movb (eax),cl
+ testb cl,cl
+ je L14
+ cmpb bl,cl
+ je L19
+ incl edi
+
+ test $3,edi
+ je L5
+ movl edi,eax
+ movb (eax),cl
+ testb cl,cl
+ je L14
+ cmpb bl,cl
+ je L19
+ incl edi
+
+ test $3,edi
+ je L5
+ movl edi,eax
+ movb (eax),cl
+ testb cl,cl
+ je L14
+ cmpb bl,cl
+ je L19
+ incl edi
/* create 4 byte mask which is just the desired byte repeated 4 times */
+L5:
movl ebx,ecx
sall $8,ebx
subl $4,edi
@@ -49,15 +80,14 @@ L10:
testl $-2139062144,edx
jne L9
- movl ebx,eax
- xorl ecx,eax
- leal -16843009(eax),edx
- notl eax
- andl eax,edx
+ xorl ebx,ecx
+ leal -16843009(ecx),edx
+ notl ecx
+ andl ecx,edx
testl $-2139062144,edx
je L10
#endif /* not __OPTIMIZE_SIZE__ */
-
+
/* loop while (*s && *s++ != c) */
L9:
leal -1(edi),eax
@@ -69,7 +99,7 @@ L15:
je L14
cmpb bl,dl
jne L15
-
+
L14:
/* if (*s == c) return address otherwise return NULL */
cmpb bl,(eax)
@@ -83,3 +113,60 @@ L19:
leave
ret
+#ifndef __OPTIMIZE_SIZE__
+/* Special case strchr(p,0). */
+#if 0
+ /* Hideous performance on modern machines. */
+L25:
+ cld
+ movl $-1,ecx
+ xor eax,eax
+ repnz
+ scasb
+ leal -1(edi),eax
+ jmp L19
+#endif
+L25:
+/* Do byte-wise checks until string is aligned. */
+ test $3,edi
+ je L26
+ movl edi,eax
+ movb (eax),cl
+ testb cl,cl
+ je L19
+ incl edi
+
+ test $3,edi
+ je L26
+ movl edi,eax
+ movb (eax),cl
+ testb cl,cl
+ je L19
+ incl edi
+
+ test $3,edi
+ je L26
+ movl edi,eax
+ movb (eax),cl
+ testb cl,cl
+ je L19
+ incl edi
+
+L26:
+ subl $4,edi
+
+/* loop performing 4 byte mask checking for desired 0 byte */
+ .p2align 4,,7
+L27:
+ addl $4,edi
+ movl (edi),ecx
+ leal -16843009(ecx),edx
+ movl ecx,eax
+ notl eax
+ andl eax,edx
+ testl $-2139062144,edx
+ je L27
+
+ jmp L9
+
+#endif /* !__OPTIMIZE_SIZE__ */