Welcome to mirror list, hosted at ThFree Co, Russian Federation.

cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Johnston <jjohnstn@redhat.com>2008-05-27 03:23:15 +0400
committerJeff Johnston <jjohnstn@redhat.com>2008-05-27 03:23:15 +0400
commita6bd72a27873294887681d3bd102d848e5777e2c (patch)
tree4da6a66d14c0993b5445d9bf6c5df596b72c47ed /newlib/libc/machine
parentcae28869c106eb342dd5a1c8242f933efab6f772 (diff)
2008-05-26 Eric Blake <ebb9@byu.net>
Optimize the generic and x86 memset. * libc/string/memset.c (memset) [!__OPTIMIZE_SIZE__]: Pre-align pointer so unaligned stores aren't penalized. * libc/machine/i386/memset.S (memset): [!__OPTIMIZE_SIZE__]: Pre-align pointer so unaligned stores aren't penalized. Prefer 8-byte over 4-byte alignment. Reduce register pressure.
Diffstat (limited to 'newlib/libc/machine')
-rw-r--r--newlib/libc/machine/i386/memset.S68
1 files changed, 54 insertions, 14 deletions
diff --git a/newlib/libc/machine/i386/memset.S b/newlib/libc/machine/i386/memset.S
index ce40820ff..36637fc21 100644
--- a/newlib/libc/machine/i386/memset.S
+++ b/newlib/libc/machine/i386/memset.S
@@ -1,6 +1,6 @@
/*
* ====================================================
- * Copyright (C) 1998, 2002 by Red Hat Inc. All rights reserved.
+ * Copyright (C) 1998, 2002, 2008 by Red Hat Inc. All rights reserved.
*
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
@@ -18,43 +18,83 @@ SYM (memset):
pushl ebp
movl esp,ebp
pushl edi
- pushl ebx
movl 8(ebp),edi
movl 12(ebp),eax
movl 16(ebp),ecx
cld
#ifndef __OPTIMIZE_SIZE__
- andl $255,eax
- movl ecx,ebx
- testl $3,edi
- jne .L19
+/* Less than 16 bytes won't benefit from the 'rep stosl' loop. */
cmpl $16,ecx
jbe .L19
+ cbw
+ testl $7,edi
+ je .L10
- movl eax,edx
- sall $8,eax
- orl edx,eax
+/* It turns out that 8-byte aligned 'rep stosl' outperforms
+ 4-byte aligned on some x86 platforms. */
+ movb al,(edi)
+ incl edi
+ decl ecx
+ testl $7,edi
+ je .L10
+
+ movb al,(edi)
+ incl edi
+ decl ecx
+ testl $7,edi
+ je .L10
+
+ movb al,(edi)
+ incl edi
+ decl ecx
+ testl $7,edi
+ je .L10
+
+ movb al,(edi)
+ incl edi
+ decl ecx
+ testl $7,edi
+ je .L10
+ movb al,(edi)
+ incl edi
+ decl ecx
+ testl $7,edi
+ je .L10
+
+ movb al,(edi)
+ incl edi
+ decl ecx
+ testl $7,edi
+ je .L10
+
+ movb al,(edi)
+ incl edi
+ decl ecx
+
+/* At this point, ecx>8 and edi%8==0. */
+.L10:
+ movb al,ah
movl eax,edx
sall $16,edx
orl edx,eax
+ movl ecx,edx
shrl $2,ecx
- andl $3,ebx
+ andl $3,edx
rep
stosl
- movl ebx,ecx
+ movl edx,ecx
#endif /* not __OPTIMIZE_SIZE__ */
-
+
.L19:
rep
stosb
movl 8(ebp),eax
- leal -8(ebp),esp
- popl ebx
+ leal -4(ebp),esp
popl edi
leave
ret