From 70bff2d5033567544fa1970b02699060974b2d70 Mon Sep 17 00:00:00 2001 From: Jeff Johnston Date: Mon, 26 May 2008 23:31:08 +0000 Subject: 2008-05-26 Eric Blake Optimize the generic and x86 memchr. * libc/string/memchr.c (memchr) [!__OPTIMIZE_SIZE__]: Pre-align pointer so unaligned searches aren't penalized. * libc/machine/i386/memchr.S (memchr) [!__OPTIMIZE_SIZE__]: Word operations are faster than repnz byte searches. --- newlib/ChangeLog | 8 ++++ newlib/libc/machine/i386/memchr.S | 88 +++++++++++++++++++++++++++++++++++---- newlib/libc/string/memchr.c | 81 ++++++++++++++++------------------- 3 files changed, 124 insertions(+), 53 deletions(-) diff --git a/newlib/ChangeLog b/newlib/ChangeLog index 02670c59c..fd19b2f85 100644 --- a/newlib/ChangeLog +++ b/newlib/ChangeLog @@ -1,3 +1,11 @@ +2008-05-26 Eric Blake + + Optimize the generic and x86 memchr. + * libc/string/memchr.c (memchr) [!__OPTIMIZE_SIZE__]: + Pre-align pointer so unaligned searches aren't penalized. + * libc/machine/i386/memchr.S (memchr) [!__OPTIMIZE_SIZE__]: Word + operations are faster than repnz byte searches. + 2008-05-26 Eric Blake Optimize the generic and x86 memset. diff --git a/newlib/libc/machine/i386/memchr.S b/newlib/libc/machine/i386/memchr.S index d29a04521..7639685be 100644 --- a/newlib/libc/machine/i386/memchr.S +++ b/newlib/libc/machine/i386/memchr.S @@ -1,6 +1,6 @@ /* * ==================================================== - * Copyright (C) 1998, 2002 by Red Hat Inc. All rights reserved. + * Copyright (C) 1998, 2002, 2008 by Red Hat Inc. All rights reserved. * * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice @@ -9,21 +9,23 @@ */ #include "i386mach.h" - + .global SYM (memchr) SOTYPE_FUNCTION(memchr) SYM (memchr): pushl ebp movl esp,ebp - pushl edi - movl 12(ebp),eax - movl 16(ebp),ecx - movl 8(ebp),edi + pushl edi + movzbl 12(ebp),eax + movl 16(ebp),ecx + movl 8(ebp),edi xorl edx,edx testl ecx,ecx - jz L1 + jz L20 + +#ifdef __OPTIMIZE_SIZE__ cld repnz @@ -31,9 +33,79 @@ SYM (memchr): setnz dl decl edi + +#else /* !__OPTIMIZE_SIZE__ */ +/* Do byte-wise checks until string is aligned. */ + testl $3,edi + je L5 + cmpb (edi),al + je L15 + incl edi + decl ecx + je L20 + + testl $3,edi + je L5 + cmpb (edi),al + je L15 + incl edi + decl ecx + je L20 + + testl $3,edi + je L5 + cmpb (edi),al + je L15 + incl edi + decl ecx + je L20 + +/* Create a mask, then check a word at a time. */ +L5: + movb al,ah + movl eax,edx + sall $16,edx + orl edx,eax + pushl ebx + + .p2align 4,,7 +L8: + subl $4,ecx + jc L9 + movl (edi),edx + addl $4,edi + xorl eax,edx + leal -16843009(edx),ebx + notl edx + andl edx,ebx + testl $-2139062144,ebx + je L8 + + subl $4,edi + +L9: + popl ebx + xorl edx,edx + addl $4,ecx + je L20 + +/* Final byte-wise checks. */ + .p2align 4,,7 +L10: + cmpb (edi),al + je L15 + incl edi + decl ecx + jne L10 + + xorl edi,edi + +#endif /* !__OPTIMIZE_SIZE__ */ + +L15: decl edx andl edi,edx -L1: +L20: movl edx,eax leal -4(ebp),esp diff --git a/newlib/libc/string/memchr.c b/newlib/libc/string/memchr.c index 7c1a584b7..13ed88186 100644 --- a/newlib/libc/string/memchr.c +++ b/newlib/libc/string/memchr.c @@ -20,7 +20,7 @@ DESCRIPTION This function searches memory starting at <<*<[src]>>> for the character <[c]>. The search only ends with the first occurrence of <[c]>, or after <[length]> characters; in - particular, <> does not terminate the search. + particular, <> does not terminate the search. RETURNS If the character <[c]> is found within <[length]> characters @@ -64,6 +64,9 @@ QUICKREF #error long int is not a 32bit or 64bit byte #endif +/* DETECTCHAR returns nonzero if (long)X contains the byte used + to fill (long)MASK. */ +#define DETECTCHAR(X,MASK) (DETECTNULL(X ^ MASK)) _PTR _DEFUN (memchr, (src_void, c, length), @@ -71,73 +74,61 @@ _DEFUN (memchr, (src_void, c, length), int c _AND size_t length) { -#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) _CONST unsigned char *src = (_CONST unsigned char *) src_void; + unsigned char d = c; - c &= 0xff; - - while (length--) +#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__) + unsigned long *asrc; + unsigned long mask; + int i; + + while (UNALIGNED (src)) { - if (*src == c) - return (char *) src; + if (!length--) + return NULL; + if (*src == d) + return (void *) src; src++; } - return NULL; -#else - _CONST unsigned char *src = (_CONST unsigned char *) src_void; - unsigned long *asrc; - unsigned long buffer; - unsigned long mask; - int i, j; - c &= 0xff; - - /* If the size is small, or src is unaligned, then - use the bytewise loop. We can hope this is rare. */ - if (!TOO_SMALL (length) && !UNALIGNED (src)) + if (!TOO_SMALL (length)) { - /* The fast code reads the ASCII one word at a time and only + /* If we get this far, we know that length is large and src is + word-aligned. */ + /* The fast code reads the source one word at a time and only performs the bytewise search on word-sized segments if they - contain the search character, which is detected by XORing + contain the search character, which is detected by XORing the word-sized segment with a word-sized block of the search - character and then detecting for the presence of NULL in the + character and then detecting for the presence of NUL in the result. */ - asrc = (unsigned long*) src; - mask = 0; - for (i = 0; i < LBLOCKSIZE; i++) - mask = (mask << 8) + c; + asrc = (unsigned long *) src; + mask = d << 8 | d; + mask = mask << 16 | mask; + for (i = 32; i < LBLOCKSIZE * 8; i <<= 1) + mask = (mask << i) | mask; while (length >= LBLOCKSIZE) { - buffer = *asrc; - buffer ^= mask; - if (DETECTNULL (buffer)) - { - src = (unsigned char*) asrc; - for ( j = 0; j < LBLOCKSIZE; j++ ) - { - if (*src == c) - return (char*) src; - src++; - } - } + if (DETECTCHAR (*asrc, mask)) + break; length -= LBLOCKSIZE; asrc++; } - + /* If there are fewer than LBLOCKSIZE characters left, then we resort to the bytewise loop. */ - src = (unsigned char*) asrc; + src = (unsigned char *) asrc; } +#endif /* not PREFER_SIZE_OVER_SPEED */ + while (length--) - { - if (*src == c) - return (char*) src; + { + if (*src == d) + return (void *) src; src++; - } + } return NULL; -#endif /* not PREFER_SIZE_OVER_SPEED */ } -- cgit v1.2.3