Welcome to mirror list, hosted at ThFree Co, Russian Federation.

cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Johnston <jjohnstn@redhat.com>2008-05-27 03:31:08 +0400
committerJeff Johnston <jjohnstn@redhat.com>2008-05-27 03:31:08 +0400
commit70bff2d5033567544fa1970b02699060974b2d70 (patch)
tree63f10ba544f805523216ade4dfb2bd0043fee46e
parenta6bd72a27873294887681d3bd102d848e5777e2c (diff)
2008-05-26 Eric Blake <ebb9@byu.net>
Optimize the generic and x86 memchr. * libc/string/memchr.c (memchr) [!__OPTIMIZE_SIZE__]: Pre-align pointer so unaligned searches aren't penalized. * libc/machine/i386/memchr.S (memchr) [!__OPTIMIZE_SIZE__]: Word operations are faster than repnz byte searches.
-rw-r--r--newlib/ChangeLog8
-rw-r--r--newlib/libc/machine/i386/memchr.S88
-rw-r--r--newlib/libc/string/memchr.c81
3 files changed, 124 insertions, 53 deletions
diff --git a/newlib/ChangeLog b/newlib/ChangeLog
index 02670c59c..fd19b2f85 100644
--- a/newlib/ChangeLog
+++ b/newlib/ChangeLog
@@ -1,5 +1,13 @@
2008-05-26 Eric Blake <ebb9@byu.net>
+ Optimize the generic and x86 memchr.
+ * libc/string/memchr.c (memchr) [!__OPTIMIZE_SIZE__]:
+ Pre-align pointer so unaligned searches aren't penalized.
+ * libc/machine/i386/memchr.S (memchr) [!__OPTIMIZE_SIZE__]: Word
+ operations are faster than repnz byte searches.
+
+2008-05-26 Eric Blake <ebb9@byu.net>
+
Optimize the generic and x86 memset.
* libc/string/memset.c (memset) [!__OPTIMIZE_SIZE__]:
Pre-align pointer so unaligned stores aren't penalized.
diff --git a/newlib/libc/machine/i386/memchr.S b/newlib/libc/machine/i386/memchr.S
index d29a04521..7639685be 100644
--- a/newlib/libc/machine/i386/memchr.S
+++ b/newlib/libc/machine/i386/memchr.S
@@ -1,6 +1,6 @@
/*
* ====================================================
- * Copyright (C) 1998, 2002 by Red Hat Inc. All rights reserved.
+ * Copyright (C) 1998, 2002, 2008 by Red Hat Inc. All rights reserved.
*
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
@@ -9,21 +9,23 @@
*/
#include "i386mach.h"
-
+
.global SYM (memchr)
SOTYPE_FUNCTION(memchr)
SYM (memchr):
pushl ebp
movl esp,ebp
- pushl edi
- movl 12(ebp),eax
- movl 16(ebp),ecx
- movl 8(ebp),edi
+ pushl edi
+ movzbl 12(ebp),eax
+ movl 16(ebp),ecx
+ movl 8(ebp),edi
xorl edx,edx
testl ecx,ecx
- jz L1
+ jz L20
+
+#ifdef __OPTIMIZE_SIZE__
cld
repnz
@@ -31,9 +33,79 @@ SYM (memchr):
setnz dl
decl edi
+
+#else /* !__OPTIMIZE_SIZE__ */
+/* Do byte-wise checks until string is aligned. */
+ testl $3,edi
+ je L5
+ cmpb (edi),al
+ je L15
+ incl edi
+ decl ecx
+ je L20
+
+ testl $3,edi
+ je L5
+ cmpb (edi),al
+ je L15
+ incl edi
+ decl ecx
+ je L20
+
+ testl $3,edi
+ je L5
+ cmpb (edi),al
+ je L15
+ incl edi
+ decl ecx
+ je L20
+
+/* Create a mask, then check a word at a time. */
+L5:
+ movb al,ah
+ movl eax,edx
+ sall $16,edx
+ orl edx,eax
+ pushl ebx
+
+ .p2align 4,,7
+L8:
+ subl $4,ecx
+ jc L9
+ movl (edi),edx
+ addl $4,edi
+ xorl eax,edx
+ leal -16843009(edx),ebx
+ notl edx
+ andl edx,ebx
+ testl $-2139062144,ebx
+ je L8
+
+ subl $4,edi
+
+L9:
+ popl ebx
+ xorl edx,edx
+ addl $4,ecx
+ je L20
+
+/* Final byte-wise checks. */
+ .p2align 4,,7
+L10:
+ cmpb (edi),al
+ je L15
+ incl edi
+ decl ecx
+ jne L10
+
+ xorl edi,edi
+
+#endif /* !__OPTIMIZE_SIZE__ */
+
+L15:
decl edx
andl edi,edx
-L1:
+L20:
movl edx,eax
leal -4(ebp),esp
diff --git a/newlib/libc/string/memchr.c b/newlib/libc/string/memchr.c
index 7c1a584b7..13ed88186 100644
--- a/newlib/libc/string/memchr.c
+++ b/newlib/libc/string/memchr.c
@@ -20,7 +20,7 @@ DESCRIPTION
This function searches memory starting at <<*<[src]>>> for the
character <[c]>. The search only ends with the first
occurrence of <[c]>, or after <[length]> characters; in
- particular, <<NULL>> does not terminate the search.
+ particular, <<NUL>> does not terminate the search.
RETURNS
If the character <[c]> is found within <[length]> characters
@@ -64,6 +64,9 @@ QUICKREF
#error long int is not a 32bit or 64bit byte
#endif
+/* DETECTCHAR returns nonzero if (long)X contains the byte used
+ to fill (long)MASK. */
+#define DETECTCHAR(X,MASK) (DETECTNULL(X ^ MASK))
_PTR
_DEFUN (memchr, (src_void, c, length),
@@ -71,73 +74,61 @@ _DEFUN (memchr, (src_void, c, length),
int c _AND
size_t length)
{
-#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
_CONST unsigned char *src = (_CONST unsigned char *) src_void;
+ unsigned char d = c;
- c &= 0xff;
-
- while (length--)
+#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
+ unsigned long *asrc;
+ unsigned long mask;
+ int i;
+
+ while (UNALIGNED (src))
{
- if (*src == c)
- return (char *) src;
+ if (!length--)
+ return NULL;
+ if (*src == d)
+ return (void *) src;
src++;
}
- return NULL;
-#else
- _CONST unsigned char *src = (_CONST unsigned char *) src_void;
- unsigned long *asrc;
- unsigned long buffer;
- unsigned long mask;
- int i, j;
- c &= 0xff;
-
- /* If the size is small, or src is unaligned, then
- use the bytewise loop. We can hope this is rare. */
- if (!TOO_SMALL (length) && !UNALIGNED (src))
+ if (!TOO_SMALL (length))
{
- /* The fast code reads the ASCII one word at a time and only
+ /* If we get this far, we know that length is large and src is
+ word-aligned. */
+ /* The fast code reads the source one word at a time and only
performs the bytewise search on word-sized segments if they
- contain the search character, which is detected by XORing
+ contain the search character, which is detected by XORing
the word-sized segment with a word-sized block of the search
- character and then detecting for the presence of NULL in the
+ character and then detecting for the presence of NUL in the
result. */
- asrc = (unsigned long*) src;
- mask = 0;
- for (i = 0; i < LBLOCKSIZE; i++)
- mask = (mask << 8) + c;
+ asrc = (unsigned long *) src;
+ mask = d << 8 | d;
+ mask = mask << 16 | mask;
+ for (i = 32; i < LBLOCKSIZE * 8; i <<= 1)
+ mask = (mask << i) | mask;
while (length >= LBLOCKSIZE)
{
- buffer = *asrc;
- buffer ^= mask;
- if (DETECTNULL (buffer))
- {
- src = (unsigned char*) asrc;
- for ( j = 0; j < LBLOCKSIZE; j++ )
- {
- if (*src == c)
- return (char*) src;
- src++;
- }
- }
+ if (DETECTCHAR (*asrc, mask))
+ break;
length -= LBLOCKSIZE;
asrc++;
}
-
+
/* If there are fewer than LBLOCKSIZE characters left,
then we resort to the bytewise loop. */
- src = (unsigned char*) asrc;
+ src = (unsigned char *) asrc;
}
+#endif /* not PREFER_SIZE_OVER_SPEED */
+
while (length--)
- {
- if (*src == c)
- return (char*) src;
+ {
+ if (*src == d)
+ return (void *) src;
src++;
- }
+ }
return NULL;
-#endif /* not PREFER_SIZE_OVER_SPEED */
}