diff options
Diffstat (limited to 'newlib/libc/machine/arm')
20 files changed, 853 insertions, 3045 deletions
diff --git a/newlib/libc/machine/arm/Makefile.am b/newlib/libc/machine/arm/Makefile.am index dc17690ac..c5e797e70 100644 --- a/newlib/libc/machine/arm/Makefile.am +++ b/newlib/libc/machine/arm/Makefile.am @@ -8,65 +8,17 @@ AM_CCASFLAGS = $(INCLUDES) noinst_LIBRARIES = lib.a -if HAVE_THUMB1 -if OPT_SIZE -STRLEN_SRC=strlen.c -STRLEN_OBJ=$(lpfx)strlen.o -else -STRLEN_SRC= -STRLEN_OBJ= -endif -else -STRLEN_SRC=strlen.c -STRLEN_OBJ=$(lpfx)strlen.o -endif - -if HAVE_ARMV7 -MEMCHR_SRC=memchr.S -MEMCHR_OBJ=$(lpfx)memchr.o -else -MEMCHR_SRC= -MEMCHR_OBJ= -endif - -if OPT_SIZE -MEMCPY_SRC= -MEMCPY_OBJ= -else -if HAVE_ARMV7A -MEMCPY_SRC=memcpy.S -MEMCPY_OBJ=$(lpfx)memcpy.o -else -if HAVE_ARMV7M -MEMCPY_SRC=memcpy.S -MEMCPY_OBJ=$(lpfx)memcpy.o -else -MEMCPY_SRC= -MEMCPY_OBJ= -endif !HAVE_ARMV7M -endif !HAVE_ARMV7A -endif !OPT_SIZE - -lib_a_SOURCES = setjmp.S access.c strcmp.S strcpy.c \ - $(MEMCPY_SRC) $(MEMCHR_SRC) $(STRLEN_SRC) \ - strlen-armv7.S aeabi_memcpy.c aeabi_memcpy-armv7a.S - +lib_a_SOURCES = setjmp.S access.c strlen.c strcmp.S strcpy.c \ + memcpy.S memcpy-stub.c memchr-stub.c memchr.S \ + strlen.c strlen-armv7.S lib_a_CCASFLAGS=$(AM_CCASFLAGS) lib_a_CFLAGS = $(AM_CFLAGS) -lib_a_LIBADD = $(STRLEN_OBJ) $(MEMCHR_OBJ) $(MEMCPY_OBJ) -lib_a_DEPENDENCIES = $(STRLEN_OBJ) $(MEMCHR_OBJ) $(MEMCPY_OBJ) ACLOCAL_AMFLAGS = -I ../../.. -I ../../../.. CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host MEMCPY_DEP=memcpy-armv7a.S memcpy-armv7m.S -STRCMP_DEP=strcmp-arm-tiny.S strcmp-armv4.S strcmp-armv4t.S strcmp-armv6.S \ - strcmp-armv6m.S strcmp-armv7.S strcmp-armv7m.S $(lpfx)memcpy.o: $(MEMCPY_DEP) $(lpfx)memcpy.obj: $(MEMCPY_DEP) - -$(lpfx)strcmp.o: $(STRCMP_DEP) - -$(lpfx)strcmp.obj: $(STRCMP_DEP) diff --git a/newlib/libc/machine/arm/Makefile.in b/newlib/libc/machine/arm/Makefile.in index db65405ca..975103f6c 100644 --- a/newlib/libc/machine/arm/Makefile.in +++ b/newlib/libc/machine/arm/Makefile.in @@ -68,24 +68,13 @@ CONFIG_CLEAN_VPATH_FILES = LIBRARIES = $(noinst_LIBRARIES) ARFLAGS = cru lib_a_AR = $(AR) $(ARFLAGS) -@HAVE_THUMB1_FALSE@am__DEPENDENCIES_1 = $(lpfx)strlen.o -@HAVE_THUMB1_TRUE@@OPT_SIZE_TRUE@am__DEPENDENCIES_1 = $(lpfx)strlen.o -@HAVE_ARMV7_TRUE@am__DEPENDENCIES_2 = $(lpfx)memchr.o -@HAVE_ARMV7A_FALSE@@HAVE_ARMV7M_TRUE@@OPT_SIZE_FALSE@am__DEPENDENCIES_3 = $(lpfx)memcpy.o -@HAVE_ARMV7A_TRUE@@OPT_SIZE_FALSE@am__DEPENDENCIES_3 = \ -@HAVE_ARMV7A_TRUE@@OPT_SIZE_FALSE@ $(lpfx)memcpy.o -@HAVE_ARMV7A_FALSE@@HAVE_ARMV7M_TRUE@@OPT_SIZE_FALSE@am__objects_1 = lib_a-memcpy.$(OBJEXT) -@HAVE_ARMV7A_TRUE@@OPT_SIZE_FALSE@am__objects_1 = \ -@HAVE_ARMV7A_TRUE@@OPT_SIZE_FALSE@ lib_a-memcpy.$(OBJEXT) -@HAVE_ARMV7_TRUE@am__objects_2 = lib_a-memchr.$(OBJEXT) -@HAVE_THUMB1_FALSE@am__objects_3 = lib_a-strlen.$(OBJEXT) -@HAVE_THUMB1_TRUE@@OPT_SIZE_TRUE@am__objects_3 = \ -@HAVE_THUMB1_TRUE@@OPT_SIZE_TRUE@ lib_a-strlen.$(OBJEXT) +lib_a_LIBADD = am_lib_a_OBJECTS = lib_a-setjmp.$(OBJEXT) lib_a-access.$(OBJEXT) \ - lib_a-strcmp.$(OBJEXT) lib_a-strcpy.$(OBJEXT) $(am__objects_1) \ - $(am__objects_2) $(am__objects_3) lib_a-strlen-armv7.$(OBJEXT) \ - lib_a-aeabi_memcpy.$(OBJEXT) \ - lib_a-aeabi_memcpy-armv7a.$(OBJEXT) + lib_a-strlen.$(OBJEXT) lib_a-strcmp.$(OBJEXT) \ + lib_a-strcpy.$(OBJEXT) lib_a-memcpy.$(OBJEXT) \ + lib_a-memcpy-stub.$(OBJEXT) lib_a-memchr-stub.$(OBJEXT) \ + lib_a-memchr.$(OBJEXT) lib_a-strlen.$(OBJEXT) \ + lib_a-strlen-armv7.$(OBJEXT) lib_a_OBJECTS = $(am_lib_a_OBJECTS) DEFAULT_INCLUDES = -I.@am__isrc@ depcomp = @@ -116,7 +105,6 @@ CC = @CC@ CCAS = @CCAS@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ -CFLAGS = @CFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ @@ -212,38 +200,15 @@ AUTOMAKE_OPTIONS = cygnus INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS) AM_CCASFLAGS = $(INCLUDES) noinst_LIBRARIES = lib.a -@HAVE_THUMB1_FALSE@STRLEN_SRC = strlen.c -@HAVE_THUMB1_TRUE@@OPT_SIZE_FALSE@STRLEN_SRC = -@HAVE_THUMB1_TRUE@@OPT_SIZE_TRUE@STRLEN_SRC = strlen.c -@HAVE_THUMB1_FALSE@STRLEN_OBJ = $(lpfx)strlen.o -@HAVE_THUMB1_TRUE@@OPT_SIZE_FALSE@STRLEN_OBJ = -@HAVE_THUMB1_TRUE@@OPT_SIZE_TRUE@STRLEN_OBJ = $(lpfx)strlen.o -@HAVE_ARMV7_FALSE@MEMCHR_SRC = -@HAVE_ARMV7_TRUE@MEMCHR_SRC = memchr.S -@HAVE_ARMV7_FALSE@MEMCHR_OBJ = -@HAVE_ARMV7_TRUE@MEMCHR_OBJ = $(lpfx)memchr.o -@HAVE_ARMV7A_FALSE@@HAVE_ARMV7M_FALSE@@OPT_SIZE_FALSE@MEMCPY_SRC = -@HAVE_ARMV7A_FALSE@@HAVE_ARMV7M_TRUE@@OPT_SIZE_FALSE@MEMCPY_SRC = memcpy.S -@HAVE_ARMV7A_TRUE@@OPT_SIZE_FALSE@MEMCPY_SRC = memcpy.S -@OPT_SIZE_TRUE@MEMCPY_SRC = -@HAVE_ARMV7A_FALSE@@HAVE_ARMV7M_FALSE@@OPT_SIZE_FALSE@MEMCPY_OBJ = -@HAVE_ARMV7A_FALSE@@HAVE_ARMV7M_TRUE@@OPT_SIZE_FALSE@MEMCPY_OBJ = $(lpfx)memcpy.o -@HAVE_ARMV7A_TRUE@@OPT_SIZE_FALSE@MEMCPY_OBJ = $(lpfx)memcpy.o -@OPT_SIZE_TRUE@MEMCPY_OBJ = -lib_a_SOURCES = setjmp.S access.c strcmp.S strcpy.c \ - $(MEMCPY_SRC) $(MEMCHR_SRC) $(STRLEN_SRC) \ - strlen-armv7.S aeabi_memcpy.c aeabi_memcpy-armv7a.S +lib_a_SOURCES = setjmp.S access.c strlen.c strcmp.S strcpy.c \ + memcpy.S memcpy-stub.c memchr-stub.c memchr.S \ + strlen.c strlen-armv7.S lib_a_CCASFLAGS = $(AM_CCASFLAGS) lib_a_CFLAGS = $(AM_CFLAGS) -lib_a_LIBADD = $(STRLEN_OBJ) $(MEMCHR_OBJ) $(MEMCPY_OBJ) -lib_a_DEPENDENCIES = $(STRLEN_OBJ) $(MEMCHR_OBJ) $(MEMCPY_OBJ) ACLOCAL_AMFLAGS = -I ../../.. -I ../../../.. CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host MEMCPY_DEP = memcpy-armv7a.S memcpy-armv7m.S -STRCMP_DEP = strcmp-arm-tiny.S strcmp-armv4.S strcmp-armv4t.S strcmp-armv6.S \ - strcmp-armv6m.S strcmp-armv7.S strcmp-armv7m.S - all: all-am .SUFFIXES: @@ -332,12 +297,6 @@ lib_a-strlen-armv7.o: strlen-armv7.S lib_a-strlen-armv7.obj: strlen-armv7.S $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-strlen-armv7.obj `if test -f 'strlen-armv7.S'; then $(CYGPATH_W) 'strlen-armv7.S'; else $(CYGPATH_W) '$(srcdir)/strlen-armv7.S'; fi` -lib_a-aeabi_memcpy-armv7a.o: aeabi_memcpy-armv7a.S - $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-aeabi_memcpy-armv7a.o `test -f 'aeabi_memcpy-armv7a.S' || echo '$(srcdir)/'`aeabi_memcpy-armv7a.S - -lib_a-aeabi_memcpy-armv7a.obj: aeabi_memcpy-armv7a.S - $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-aeabi_memcpy-armv7a.obj `if test -f 'aeabi_memcpy-armv7a.S'; then $(CYGPATH_W) 'aeabi_memcpy-armv7a.S'; else $(CYGPATH_W) '$(srcdir)/aeabi_memcpy-armv7a.S'; fi` - .c.o: $(COMPILE) -c $< @@ -350,23 +309,29 @@ lib_a-access.o: access.c lib_a-access.obj: access.c $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi` +lib_a-strlen.o: strlen.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strlen.o `test -f 'strlen.c' || echo '$(srcdir)/'`strlen.c + +lib_a-strlen.obj: strlen.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strlen.obj `if test -f 'strlen.c'; then $(CYGPATH_W) 'strlen.c'; else $(CYGPATH_W) '$(srcdir)/strlen.c'; fi` + lib_a-strcpy.o: strcpy.c $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strcpy.o `test -f 'strcpy.c' || echo '$(srcdir)/'`strcpy.c lib_a-strcpy.obj: strcpy.c $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strcpy.obj `if test -f 'strcpy.c'; then $(CYGPATH_W) 'strcpy.c'; else $(CYGPATH_W) '$(srcdir)/strcpy.c'; fi` -lib_a-strlen.o: strlen.c - $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strlen.o `test -f 'strlen.c' || echo '$(srcdir)/'`strlen.c +lib_a-memcpy-stub.o: memcpy-stub.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-memcpy-stub.o `test -f 'memcpy-stub.c' || echo '$(srcdir)/'`memcpy-stub.c -lib_a-strlen.obj: strlen.c - $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strlen.obj `if test -f 'strlen.c'; then $(CYGPATH_W) 'strlen.c'; else $(CYGPATH_W) '$(srcdir)/strlen.c'; fi` +lib_a-memcpy-stub.obj: memcpy-stub.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-memcpy-stub.obj `if test -f 'memcpy-stub.c'; then $(CYGPATH_W) 'memcpy-stub.c'; else $(CYGPATH_W) '$(srcdir)/memcpy-stub.c'; fi` -lib_a-aeabi_memcpy.o: aeabi_memcpy.c - $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-aeabi_memcpy.o `test -f 'aeabi_memcpy.c' || echo '$(srcdir)/'`aeabi_memcpy.c +lib_a-memchr-stub.o: memchr-stub.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-memchr-stub.o `test -f 'memchr-stub.c' || echo '$(srcdir)/'`memchr-stub.c -lib_a-aeabi_memcpy.obj: aeabi_memcpy.c - $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-aeabi_memcpy.obj `if test -f 'aeabi_memcpy.c'; then $(CYGPATH_W) 'aeabi_memcpy.c'; else $(CYGPATH_W) '$(srcdir)/aeabi_memcpy.c'; fi` +lib_a-memchr-stub.obj: memchr-stub.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-memchr-stub.obj `if test -f 'memchr-stub.c'; then $(CYGPATH_W) 'memchr-stub.c'; else $(CYGPATH_W) '$(srcdir)/memchr-stub.c'; fi` ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ @@ -543,10 +508,6 @@ $(lpfx)memcpy.o: $(MEMCPY_DEP) $(lpfx)memcpy.obj: $(MEMCPY_DEP) -$(lpfx)strcmp.o: $(STRCMP_DEP) - -$(lpfx)strcmp.obj: $(STRCMP_DEP) - # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/newlib/libc/machine/arm/acle-compat.h b/newlib/libc/machine/arm/acle-compat.h deleted file mode 100644 index 888ae2ea8..000000000 --- a/newlib/libc/machine/arm/acle-compat.h +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Copyright (c) 2014 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __ARM_ARCH - -/* ACLE standardises a set of pre-defines that describe the ARM architecture. - These were mostly implemented in GCC around GCC-4.8; older versions - have no, or only partial support. To provide a level of backwards - compatibility we try to work out what the definitions should be, given - the older pre-defines that GCC did produce. This isn't complete, but - it should be enough for use by routines that depend on this header. */ - -/* No need to handle ARMv8, GCC had ACLE support before that. */ - -# ifdef __ARM_ARCH_7__ -/* The common subset of ARMv7 in all profiles. */ -# define __ARM_ARCH 7 -# define __ARM_ARCH_ISA_THUMB 2 -# define __ARM_FEATURE_CLZ -# define __ARM_FEATURE_LDREX 7 -# define __ARM_FEATURE_UNALIGNED -# endif - -# if defined (__ARM_ARCH_7A__) || defined (__ARM_ARCH_7R__) -# define __ARM_ARCH 7 -# define __ARM_ARCH_ISA_THUMB 2 -# define __ARM_ARCH_ISA_ARM -# define __ARM_FEATURE_CLZ -# define __ARM_FEATURE_SIMD32 -# define __ARM_FEATURE_DSP -# define __ARM_FEATURE_QBIT -# define __ARM_FEATURE_SAT -# define __ARM_FEATURE_LDREX 15 -# define __ARM_FEATURE_UNALIGNED -# ifdef __ARM_ARCH_7A__ -# define __ARM_ARCH_PROFILE 'A' -# else -# define __ARM_ARCH_PROFILE 'R' -# endif -# endif - -# ifdef __ARM_ARCH_7EM__ -# define __ARM_ARCH 7 -# define __ARM_ARCH_ISA_THUMB 2 -# define __ARM_FEATURE_CLZ -# define __ARM_FEATURE_SIMD32 -# define __ARM_FEATURE_DSP -# define __ARM_FEATURE_QBIT -# define __ARM_FEATURE_SAT -# define __ARM_FEATURE_LDREX 7 -# define __ARM_FEATURE_UNALIGNED -# define __ARM_ARCH_PROFILE 'M' -# endif - -# ifdef __ARM_ARCH_7M__ -# define __ARM_ARCH 7 -# define __ARM_ARCH_ISA_THUMB 2 -# define __ARM_FEATURE_CLZ -# define __ARM_FEATURE_QBIT -# define __ARM_FEATURE_SAT -# define __ARM_FEATURE_LDREX 7 -# define __ARM_FEATURE_UNALIGNED -# define __ARM_ARCH_PROFILE 'M' -# endif - -# ifdef __ARM_ARCH_6T2__ -# define __ARM_ARCH 6 -# define __ARM_ARCH_ISA_THUMB 2 -# define __ARM_ARCH_ISA_ARM -# define __ARM_FEATURE_CLZ -# define __ARM_FEATURE_SIMD32 -# define __ARM_FEATURE_DSP -# define __ARM_FEATURE_QBIT -# define __ARM_FEATURE_SAT -# define __ARM_FEATURE_LDREX 4 -# define __ARM_FEATURE_UNALIGNED -# endif - -# ifdef __ARM_ARCH_6M__ -# define __ARM_ARCH 6 -# define __ARM_ARCH_ISA_THUMB 1 -# define __ARM_ARCH_PROFILE 'M' -# endif - -# if defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) \ - || defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6Z__) \ - || defined (__ARM_ARCH_6ZK__) -# define __ARM_ARCH 6 -# define __ARM_ARCH_ISA_THUMB 1 -# define __ARM_ARCH_ISA_ARM -# define __ARM_FEATURE_CLZ -# define __ARM_FEATURE_SIMD32 -# define __ARM_FEATURE_DSP -# define __ARM_FEATURE_QBIT -# define __ARM_FEATURE_SAT -# define __ARM_FEATURE_UNALIGNED -# ifndef __thumb__ -# if defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6ZK__) -# define __ARM_FEATURE_LDREX 15 -# else -# define __ARM_FEATURE_LDREX 4 -# endif -# endif -# endif - -# if defined (__ARM_ARCH_5TE__) || defined (__ARM_ARCH_5E__) -# define __ARM_ARCH 5 -# define __ARM_ARCH_ISA_ARM -# ifdef __ARM_ARCH_5TE__ -# define __ARM_ARCH_ISA_THUMB 1 -# endif -# define __ARM_FEATURE_CLZ -# define __ARM_FEATURE_DSP -# endif - -# if defined (__ARM_ARCH_5T__) || defined (__ARM_ARCH_5__) -# define __ARM_ARCH 5 -# define __ARM_ARCH_ISA_ARM -# ifdef __ARM_ARCH_5TE__ -# define __ARM_ARCH_ISA_THUMB 1 -# endif -# define __ARM_FEATURE_CLZ -# endif - -# ifdef __ARM_ARCH_4T__ -# define __ARM_ARCH 4 -# define __ARM_ARCH_ISA_ARM -# define __ARM_ARCH_ISA_THUMB 1 -# endif - -# ifdef __ARM_ARCH_4__ -# define __ARM_ARCH 4 -# define __ARM_ARCH_ISA_ARM -# endif - -# if defined (__ARM_ARCH_3__) || defined (__ARM_ARCH_3M__) -# define __ARM_ARCH 3 -# define __ARM_ARCH_ISA_ARM -# endif - -# ifdef __ARM_ARCH_2__ -# define __ARM_ARCH 2 -# define __ARM_ARCH_ISA_ARM -# endif - -# ifdef __ARMEB__ -# define __ARM_BIG_ENDIAN -# endif - -/* If we still don't know what the target architecture is, then we're - probably not using GCC. */ -# ifndef __ARM_ARCH -# error Unable to determine architecture version. -# endif - -#endif /* __ARM_ARCH */ - diff --git a/newlib/libc/machine/arm/aeabi_memcpy-armv7a.S b/newlib/libc/machine/arm/aeabi_memcpy-armv7a.S deleted file mode 100644 index 53e3330ff..000000000 --- a/newlib/libc/machine/arm/aeabi_memcpy-armv7a.S +++ /dev/null @@ -1,286 +0,0 @@ -/* - * Copyright (c) 2014 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "arm_asm.h" - -/* NOTE: This ifdef MUST match the one in aeabi_memcpy.c. */ -#if defined (__ARM_ARCH_7A__) && defined (__ARM_FEATURE_UNALIGNED) && \ - (defined (__ARM_NEON__) || !defined (__SOFTFP__)) - - .syntax unified - .global __aeabi_memcpy - .type __aeabi_memcpy, %function -__aeabi_memcpy: - /* Assumes that n >= 0, and dst, src are valid pointers. - If there is at least 8 bytes to copy, use LDRD/STRD. - If src and dst are misaligned with different offsets, - first copy byte by byte until dst is aligned, - and then copy using LDRD/STRD and shift if needed. - When less than 8 left, copy a word and then byte by byte. */ - - /* Save registers (r0 holds the return value): - optimized push {r0, r4, r5, lr}. - To try and improve performance, stack layout changed, - i.e., not keeping the stack looking like users expect - (highest numbered register at highest address). */ - push {r0, lr} - strd r4, r5, [sp, #-8]! - - /* Get copying of tiny blocks out of the way first. */ - /* Is there at least 4 bytes to copy? */ - subs r2, r2, #4 - blt copy_less_than_4 /* If n < 4. */ - - /* Check word alignment. */ - ands ip, r0, #3 /* ip = last 2 bits of dst. */ - bne dst_not_word_aligned /* If dst is not word-aligned. */ - - /* Get here if dst is word-aligned. */ - ands ip, r1, #3 /* ip = last 2 bits of src. */ - bne src_not_word_aligned /* If src is not word-aligned. */ -word_aligned: - /* Get here if source and dst both are word-aligned. - The number of bytes remaining to copy is r2+4. */ - - /* Is there is at least 64 bytes to copy? */ - subs r2, r2, #60 - blt copy_less_than_64 /* If r2 + 4 < 64. */ - - /* First, align the destination buffer to 8-bytes, - to make sure double loads and stores don't cross cache line boundary, - as they are then more expensive even if the data is in the cache - (require two load/store issue cycles instead of one). - If only one of the buffers is not 8-bytes aligned, - then it's more important to align dst than src, - because there is more penalty for stores - than loads that cross cacheline boundary. - This check and realignment are only worth doing - if there is a lot to copy. */ - - /* Get here if dst is word aligned, - i.e., the 2 least significant bits are 0. - If dst is not 2w aligned (i.e., the 3rd bit is not set in dst), - then copy 1 word (4 bytes). */ - ands r3, r0, #4 - beq two_word_aligned /* If dst already two-word aligned. */ - ldr r3, [r1], #4 - str r3, [r0], #4 - subs r2, r2, #4 - blt copy_less_than_64 - -two_word_aligned: - /* TODO: Align to cacheline (useful for PLD optimization). */ - - /* Every loop iteration copies 64 bytes. */ -1: - .irp offset, #0, #8, #16, #24, #32, #40, #48, #56 - ldrd r4, r5, [r1, \offset] - strd r4, r5, [r0, \offset] - .endr - - add r0, r0, #64 - add r1, r1, #64 - subs r2, r2, #64 - bge 1b /* If there is more to copy. */ - -copy_less_than_64: - - /* Get here if less than 64 bytes to copy, -64 <= r2 < 0. - Restore the count if there is more than 7 bytes to copy. */ - adds r2, r2, #56 - blt copy_less_than_8 - - /* Copy 8 bytes at a time. */ -2: - ldrd r4, r5, [r1], #8 - strd r4, r5, [r0], #8 - subs r2, r2, #8 - bge 2b /* If there is more to copy. */ - -copy_less_than_8: - - /* Get here if less than 8 bytes to copy, -8 <= r2 < 0. - Check if there is more to copy. */ - cmn r2, #8 - beq return /* If r2 + 8 == 0. */ - - /* Restore the count if there is more than 3 bytes to copy. */ - adds r2, r2, #4 - blt copy_less_than_4 - - /* Copy 4 bytes. */ - ldr r3, [r1], #4 - str r3, [r0], #4 - -copy_less_than_4: - /* Get here if less than 4 bytes to copy, -4 <= r2 < 0. */ - - /* Restore the count, check if there is more to copy. */ - adds r2, r2, #4 - beq return /* If r2 == 0. */ - - /* Get here with r2 is in {1,2,3}={01,10,11}. */ - /* Logical shift left r2, insert 0s, update flags. */ - lsls r2, r2, #31 - - /* Copy byte by byte. - Condition ne means the last bit of r2 is 0. - Condition cs means the second to last bit of r2 is set, - i.e., r2 is 1 or 3. */ - itt ne - ldrbne r3, [r1], #1 - strbne r3, [r0], #1 - - itttt cs - ldrbcs r4, [r1], #1 - ldrbcs r5, [r1] - strbcs r4, [r0], #1 - strbcs r5, [r0] - -return: - /* Restore registers: optimized pop {r0, r4, r5, pc} */ - ldrd r4, r5, [sp], #8 - pop {r0, pc} /* This is the only return point of memcpy. */ - -dst_not_word_aligned: - - /* Get here when dst is not aligned and ip has the last 2 bits of dst, - i.e., ip is the offset of dst from word. - The number of bytes that remains to copy is r2 + 4, - i.e., there are at least 4 bytes to copy. - Write a partial word (0 to 3 bytes), such that dst becomes - word-aligned. */ - - /* If dst is at ip bytes offset from a word (with 0 < ip < 4), - then there are (4 - ip) bytes to fill up to align dst to the next - word. */ - rsb ip, ip, #4 /* ip = #4 - ip. */ - cmp ip, #2 - - /* Copy byte by byte with conditionals. */ - itt gt - ldrbgt r3, [r1], #1 - strbgt r3, [r0], #1 - - itt ge - ldrbge r4, [r1], #1 - strbge r4, [r0], #1 - - ldrb lr, [r1], #1 - strb lr, [r0], #1 - - /* Update the count. - ip holds the number of bytes we have just copied. */ - subs r2, r2, ip /* r2 = r2 - ip. */ - blt copy_less_than_4 /* If r2 < ip. */ - - /* Get here if there are more than 4 bytes to copy. - Check if src is aligned. If beforehand src and dst were not word - aligned but congruent (same offset), then now they are both - word-aligned, and we can copy the rest efficiently (without - shifting). */ - ands ip, r1, #3 /* ip = last 2 bits of src. */ - beq word_aligned /* If r1 is word-aligned. */ - -src_not_word_aligned: - /* Get here when src is not word-aligned, but dst is word-aligned. - The number of bytes that remains to copy is r2+4. */ - - /* Copy word by word using LDR when alignment can be done in hardware, - i.e., SCTLR.A is set, supporting unaligned access in LDR and STR. */ - subs r2, r2, #60 - blt 8f - -7: - /* Copy 64 bytes in every loop iteration. */ - .irp offset, #0, #4, #8, #12, #16, #20, #24, #28, #32, #36, #40, #44, #48, #52, #56, #60 - ldr r3, [r1, \offset] - str r3, [r0, \offset] - .endr - - add r0, r0, #64 - add r1, r1, #64 - subs r2, r2, #64 - bge 7b - -8: - /* Get here if less than 64 bytes to copy, -64 <= r2 < 0. - Check if there is more than 3 bytes to copy. */ - adds r2, r2, #60 - blt copy_less_than_4 - -9: - /* Get here if there is less than 64 but at least 4 bytes to copy, - where the number of bytes to copy is r2+4. */ - ldr r3, [r1], #4 - str r3, [r0], #4 - subs r2, r2, #4 - bge 9b - - b copy_less_than_4 - - - .syntax unified - .global __aeabi_memcpy4 - .type __aeabi_memcpy4, %function -__aeabi_memcpy4: - /* Assumes that both of its arguments are 4-byte aligned. */ - - push {r0, lr} - strd r4, r5, [sp, #-8]! - - /* Is there at least 4 bytes to copy? */ - subs r2, r2, #4 - blt copy_less_than_4 /* If n < 4. */ - - bl word_aligned - - .syntax unified - .global __aeabi_memcpy8 - .type __aeabi_memcpy8, %function -__aeabi_memcpy8: - /* Assumes that both of its arguments are 8-byte aligned. */ - - push {r0, lr} - strd r4, r5, [sp, #-8]! - - /* Is there at least 4 bytes to copy? */ - subs r2, r2, #4 - blt copy_less_than_4 /* If n < 4. */ - - /* Is there at least 8 bytes to copy? */ - subs r2, r2, #4 - blt copy_less_than_8 /* If n < 8. */ - - /* Is there at least 64 bytes to copy? */ - subs r2, r2, #56 - blt copy_less_than_64 /* if n + 8 < 64. */ - - bl two_word_aligned - -#endif diff --git a/newlib/libc/machine/arm/aeabi_memcpy.c b/newlib/libc/machine/arm/aeabi_memcpy.c deleted file mode 100644 index 9837c35fe..000000000 --- a/newlib/libc/machine/arm/aeabi_memcpy.c +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2014 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <stddef.h> -#include <_ansi.h> - -/* According to the Run-time ABI for the ARM Architecture. This - function is allowed to corrupt only the integer core register - permitted to be corrupted by the [AAPCS] (r0-r3, ip, lr, and - CPSR). - - The FP registers are used in memcpy for target __ARM_ARCH_7A. - Therefore, we can't just simply use alias to support the function - aeabi_memcpy for target __ARM_ARCH_7A. Instead, we choose the - previous versions of memcpy to suppport it as an alternative. */ - -/* NOTE: This ifdef MUST match the one in aeabi_memcpy-armv7a.S. */ -#if defined (__ARM_ARCH_7A__) && defined (__ARM_FEATURE_UNALIGNED) && \ - (defined (__ARM_NEON__) || !defined (__SOFTFP__)) - -/* Defined in aeabi_memcpy-armv7a.S. */ - -#else -/* Support the alias for the __aeabi_memcpy which may - assume memory alignment. */ -void __aeabi_memcpy4 (void *dest, const void *source, size_t n) - _ATTRIBUTE ((alias ("__aeabi_memcpy"))); - -void __aeabi_memcpy8 (void *dest, const void *source, size_t n) - _ATTRIBUTE ((alias ("__aeabi_memcpy"))); - -/* Support the routine __aeabi_memcpy. Can't alias to memcpy - because it's not defined in the same translation unit. */ -void __aeabi_memcpy (void *dest, const void *source, size_t n) -{ - extern void memcpy (void *dest, const void *source, size_t n); - memcpy (dest, source, n); -} -#endif diff --git a/newlib/libc/machine/arm/arm_asm.h b/newlib/libc/machine/arm/arm_asm.h index 1bb5edb23..5a63a8d9e 100644 --- a/newlib/libc/machine/arm/arm_asm.h +++ b/newlib/libc/machine/arm/arm_asm.h @@ -29,35 +29,35 @@ #ifndef ARM_ASM__H #define ARM_ASM__H -#include "acle-compat.h" - -#if __ARM_ARCH >= 7 && defined (__ARM_ARCH_ISA_ARM) -# define _ISA_ARM_7 -#endif - -#if __ARM_ARCH >= 6 && defined (__ARM_ARCH_ISA_ARM) -# define _ISA_ARM_6 +/* First define some macros that keep everything else sane. */ +#if defined (__ARM_ARCH_7A__) || defined (__ARM_ARCH_7R__) +#define _ISA_ARM_7 #endif -#if __ARM_ARCH >= 5 -# define _ISA_ARM_5 +#if defined (_ISA_ARM_7) || defined (__ARM_ARCH_6__) || \ + defined (__ARM_ARCH_6J__) || defined (__ARM_ARCH_6T2__) || \ + defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6ZK__) || \ + defined (__ARM_ARCH_6Z__) +#define _ISA_ARM_6 #endif -#if __ARM_ARCH >= 4 && __ARM_ARCH_ISA_THUMB >= 1 -# define _ISA_ARM_4T +#if defined (_ISA_ARM_6) || defined (__ARM_ARCH_5__) || \ + defined (__ARM_ARCH_5T__) || defined (__ARM_ARCH_5TE__) || \ + defined (__ARM_ARCH_5TEJ__) +#define _ISA_ARM_5 #endif -#if __ARM_ARCH >= 4 && __ARM_ARCH_ISA_THUMB == 0 -# define _ISA_ARM_4 +#if defined (_ISA_ARM_5) || defined (__ARM_ARCH_4T__) +#define _ISA_ARM_4T #endif - -#if __ARM_ARCH_ISA_THUMB >= 2 -# define _ISA_THUMB_2 +#if defined (__ARM_ARCH_7M__) || defined (__ARM_ARCH_7__) || \ + defined (__ARM_ARCH_7EM__) +#define _ISA_THUMB_2 #endif -#if __ARM_ARCH_ISA_THUMB >= 1 -# define _ISA_THUMB_1 +#if defined (_ISA_THUMB_2) || defined (__ARM_ARCH_6M__) +#define _ISA_THUMB_1 #endif diff --git a/newlib/libc/machine/arm/configure b/newlib/libc/machine/arm/configure index 48551009c..bf0c669dc 100755 --- a/newlib/libc/machine/arm/configure +++ b/newlib/libc/machine/arm/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.68 for newlib 2.2.0. +# Generated by GNU Autoconf 2.68 for newlib 2.1.0. # # # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, @@ -556,25 +556,14 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='newlib' PACKAGE_TARNAME='newlib' -PACKAGE_VERSION='2.2.0' -PACKAGE_STRING='newlib 2.2.0' +PACKAGE_VERSION='2.1.0' +PACKAGE_STRING='newlib 2.1.0' PACKAGE_BUGREPORT='' PACKAGE_URL='' ac_unique_file="Makefile.am" ac_subst_vars='LTLIBOBJS LIBOBJS -CFLAGS -HAVE_ARMV7M_FALSE -HAVE_ARMV7M_TRUE -HAVE_ARMV7A_FALSE -HAVE_ARMV7A_TRUE -HAVE_ARMV7_FALSE -HAVE_ARMV7_TRUE -OPT_SIZE_FALSE -OPT_SIZE_TRUE -HAVE_THUMB1_FALSE -HAVE_THUMB1_TRUE sys_dir machine_dir libm_machine_dir @@ -1250,7 +1239,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures newlib 2.2.0 to adapt to many kinds of systems. +\`configure' configures newlib 2.1.0 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1320,7 +1309,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of newlib 2.2.0:";; + short | recursive ) echo "Configuration of newlib 2.1.0:";; esac cat <<\_ACEOF @@ -1411,7 +1400,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -newlib configure 2.2.0 +newlib configure 2.1.0 generated by GNU Autoconf 2.68 Copyright (C) 2010 Free Software Foundation, Inc. @@ -1466,7 +1455,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by newlib $as_me 2.2.0, which was +It was created by newlib $as_me 2.1.0, which was generated by GNU Autoconf 2.68. Invocation command line was $ $0 $@ @@ -2538,7 +2527,7 @@ fi # Define the identity of the package. PACKAGE='newlib' - VERSION='2.2.0' + VERSION='2.1.0' # Some tools Automake needs. @@ -3421,218 +3410,6 @@ OBJEXT=${oext} -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using thumb1" >&5 -$as_echo_n "checking whether we are using thumb1... " >&6; } -if ${acnewlib_cv_thumb1_processor+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat > conftest.c <<EOF - -#if defined (__thumb__) && !defined (__thumb2__) - #define _THUMB1 - #else - #error "not thumb1" -#endif -int main () { - return 0; -} -EOF -if { ac_try='${CC} $CFLAGS $CPPFLAGS -c -o conftest.o conftest.c - 1>&5' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; } -then - acnewlib_cv_thumb1_processor=yes; -else - acnewlib_cv_thumb1_processor=no; -fi -rm -f conftest* -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $acnewlib_cv_thumb1_processor" >&5 -$as_echo "$acnewlib_cv_thumb1_processor" >&6; } - - if test x"$acnewlib_cv_thumb1_processor" = x"yes"; then - HAVE_THUMB1_TRUE= - HAVE_THUMB1_FALSE='#' -else - HAVE_THUMB1_TRUE='#' - HAVE_THUMB1_FALSE= -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the size is preferred" >&5 -$as_echo_n "checking whether the size is preferred... " >&6; } -if ${acnewlib_cv_opt_size+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat > conftest.c <<EOF - -#if defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) - #define OPT_SIZE - #else - #error "not need for size optimization." -#endif -int main () { - return 0; -} -EOF -if { ac_try='${CC} $CFLAGS $CPPFLAGS -c -o conftest.o conftest.c - 1>&5' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; } -then - acnewlib_cv_opt_size=yes; -else - acnewlib_cv_opt_size=no; -fi -rm -f conftest* -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $acnewlib_cv_opt_size" >&5 -$as_echo "$acnewlib_cv_opt_size" >&6; } - - if test x"$acnewlib_cv_opt_size" = x"yes"; then - OPT_SIZE_TRUE= - OPT_SIZE_FALSE='#' -else - OPT_SIZE_TRUE='#' - OPT_SIZE_FALSE= -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether armv7 processor is supported" >&5 -$as_echo_n "checking whether armv7 processor is supported... " >&6; } -if ${acnewlib_cv_armv7_processor+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat > conftest.c <<EOF - -#if defined (_ISA_ARM_7) || defined (__ARM_ARCH_6T2__) - #define HAVE_ARMV7 - #else - #error "ARMV7 is not supported." -#endif -int main () { - return 0; -} -EOF -if { ac_try='${CC} $CFLAGS $CPPFLAGS -c -o conftest.o conftest.c - 1>&5' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; } -then - acnewlib_cv_armv7_processor=yes; -else - acnewlib_cv_armv7_processor=no; -fi -rm -f conftest* -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $acnewlib_cv_armv7_processor" >&5 -$as_echo "$acnewlib_cv_armv7_processor" >&6; } - - if test x"$acnewlib_cv_armv7_processor" = x"yes"; then - HAVE_ARMV7_TRUE= - HAVE_ARMV7_FALSE='#' -else - HAVE_ARMV7_TRUE='#' - HAVE_ARMV7_FALSE= -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether armv7a processor is supported" >&5 -$as_echo_n "checking whether armv7a processor is supported... " >&6; } -if ${acnewlib_cv_armv7a_processor+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat > conftest.c <<EOF - -#if defined (__ARM_ARCH_7A__) && defined (__ARM_FEATURE_UNALIGNED) - #define HAVE_ARMV7A - #else - #error "ARMV7A is not supported." -#endif -int main () { - return 0; -} -EOF -if { ac_try='${CC} $CFLAGS $CPPFLAGS -c -o conftest.o conftest.c - 1>&5' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; } -then - acnewlib_cv_armv7a_processor=yes; -else - acnewlib_cv_armv7a_processor=no; -fi -rm -f conftest* -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $acnewlib_cv_armv7a_processor" >&5 -$as_echo "$acnewlib_cv_armv7a_processor" >&6; } - - if test x"$acnewlib_cv_armv7a_processor" = x"yes"; then - HAVE_ARMV7A_TRUE= - HAVE_ARMV7A_FALSE='#' -else - HAVE_ARMV7A_TRUE='#' - HAVE_ARMV7A_FALSE= -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether armv7m processor is supported" >&5 -$as_echo_n "checking whether armv7m processor is supported... " >&6; } -if ${acnewlib_cv_armv7m_processor+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat > conftest.c <<EOF - -#if defined (__ARM_ARCH_7M__) || defined (__ARM_ARCH_7EM__) - #define HAVE_ARMV7M - #else - #error "ARMV7M is not supported." -#endif -int main () { - return 0; -} -EOF -if { ac_try='${CC} $CFLAGS $CPPFLAGS -c -o conftest.o conftest.c - 1>&5' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; } -then - acnewlib_cv_armv7m_processor=yes; -else - acnewlib_cv_armv7m_processor=no; -fi -rm -f conftest* -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $acnewlib_cv_armv7m_processor" >&5 -$as_echo "$acnewlib_cv_armv7m_processor" >&6; } - - if test x"$acnewlib_cv_armv7m_processor" = x"yes"; then - HAVE_ARMV7M_TRUE= - HAVE_ARMV7M_FALSE='#' -else - HAVE_ARMV7M_TRUE='#' - HAVE_ARMV7M_FALSE= -fi - - - - ac_config_files="$ac_config_files Makefile" cat >confcache <<\_ACEOF @@ -3821,26 +3598,6 @@ if test -z "${USE_LIBTOOL_TRUE}" && test -z "${USE_LIBTOOL_FALSE}"; then as_fn_error $? "conditional \"USE_LIBTOOL\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi -if test -z "${HAVE_THUMB1_TRUE}" && test -z "${HAVE_THUMB1_FALSE}"; then - as_fn_error $? "conditional \"HAVE_THUMB1\" was never defined. -Usually this means the macro was only invoked conditionally." "$LINENO" 5 -fi -if test -z "${OPT_SIZE_TRUE}" && test -z "${OPT_SIZE_FALSE}"; then - as_fn_error $? "conditional \"OPT_SIZE\" was never defined. -Usually this means the macro was only invoked conditionally." "$LINENO" 5 -fi -if test -z "${HAVE_ARMV7_TRUE}" && test -z "${HAVE_ARMV7_FALSE}"; then - as_fn_error $? "conditional \"HAVE_ARMV7\" was never defined. -Usually this means the macro was only invoked conditionally." "$LINENO" 5 -fi -if test -z "${HAVE_ARMV7A_TRUE}" && test -z "${HAVE_ARMV7A_FALSE}"; then - as_fn_error $? "conditional \"HAVE_ARMV7A\" was never defined. -Usually this means the macro was only invoked conditionally." "$LINENO" 5 -fi -if test -z "${HAVE_ARMV7M_TRUE}" && test -z "${HAVE_ARMV7M_FALSE}"; then - as_fn_error $? "conditional \"HAVE_ARMV7M\" was never defined. -Usually this means the macro was only invoked conditionally." "$LINENO" 5 -fi : "${CONFIG_STATUS=./config.status}" ac_write_fail=0 @@ -4250,7 +4007,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by newlib $as_me 2.2.0, which was +This file was extended by newlib $as_me 2.1.0, which was generated by GNU Autoconf 2.68. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -4307,7 +4064,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -newlib config.status 2.2.0 +newlib config.status 2.1.0 configured by $0, generated by GNU Autoconf 2.68, with options \\"\$ac_cs_config\\" diff --git a/newlib/libc/machine/arm/configure.in b/newlib/libc/machine/arm/configure.in index edf922238..6236338f9 100644 --- a/newlib/libc/machine/arm/configure.in +++ b/newlib/libc/machine/arm/configure.in @@ -10,133 +10,5 @@ AC_CONFIG_AUX_DIR(../../../..) NEWLIB_CONFIGURE(../../..) -dnl Check for Thumb1 supported. -AC_CACHE_CHECK(whether we are using thumb1, - acnewlib_cv_thumb1_processor, [dnl -cat > conftest.c <<EOF - -#if defined (__thumb__) && !defined (__thumb2__) - #define _THUMB1 - #else - #error "not thumb1" -#endif -int main () { - return 0; -} -EOF -if AC_TRY_COMMAND([${CC} $CFLAGS $CPPFLAGS -c -o conftest.o conftest.c - 1>&AS_MESSAGE_LOG_FD]) -then - acnewlib_cv_thumb1_processor=yes; -else - acnewlib_cv_thumb1_processor=no; -fi -rm -f conftest*]) - -AM_CONDITIONAL(HAVE_THUMB1, test x"$acnewlib_cv_thumb1_processor" = x"yes") - -dnl Check for whether the size is preferred. -AC_CACHE_CHECK(whether the size is preferred, - acnewlib_cv_opt_size, [dnl -cat > conftest.c <<EOF - -#if defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) - #define OPT_SIZE - #else - #error "not need for size optimization." -#endif -int main () { - return 0; -} -EOF -if AC_TRY_COMMAND([${CC} $CFLAGS $CPPFLAGS -c -o conftest.o conftest.c - 1>&AS_MESSAGE_LOG_FD]) -then - acnewlib_cv_opt_size=yes; -else - acnewlib_cv_opt_size=no; -fi -rm -f conftest*]) - -AM_CONDITIONAL(OPT_SIZE, test x"$acnewlib_cv_opt_size" = x"yes") - -dnl Check for whether ARM_7 or ARM_ARCH_6T2 is defined. -dnl This macro is used to support memchr() for old CPU. -AC_CACHE_CHECK(whether armv7 processor is supported, - acnewlib_cv_armv7_processor, [dnl -cat > conftest.c <<EOF - -#if defined (_ISA_ARM_7) || defined (__ARM_ARCH_6T2__) - #define HAVE_ARMV7 - #else - #error "ARMV7 is not supported." -#endif -int main () { - return 0; -} -EOF -if AC_TRY_COMMAND([${CC} $CFLAGS $CPPFLAGS -c -o conftest.o conftest.c - 1>&AS_MESSAGE_LOG_FD]) -then - acnewlib_cv_armv7_processor=yes; -else - acnewlib_cv_armv7_processor=no; -fi -rm -f conftest*]) - -AM_CONDITIONAL(HAVE_ARMV7, test x"$acnewlib_cv_armv7_processor" = x"yes") - -dnl Check for whether ARM_ARCH_7A is defined. -AC_CACHE_CHECK(whether armv7a processor is supported, - acnewlib_cv_armv7a_processor, [dnl -cat > conftest.c <<EOF - -#if defined (__ARM_ARCH_7A__) && defined (__ARM_FEATURE_UNALIGNED) - #define HAVE_ARMV7A - #else - #error "ARMV7A is not supported." -#endif -int main () { - return 0; -} -EOF -if AC_TRY_COMMAND([${CC} $CFLAGS $CPPFLAGS -c -o conftest.o conftest.c - 1>&AS_MESSAGE_LOG_FD]) -then - acnewlib_cv_armv7a_processor=yes; -else - acnewlib_cv_armv7a_processor=no; -fi -rm -f conftest*]) - -AM_CONDITIONAL(HAVE_ARMV7A, test x"$acnewlib_cv_armv7a_processor" = x"yes") - -dnl Check for whether ARM_ARCH_7M is defined. -AC_CACHE_CHECK(whether armv7m processor is supported, - acnewlib_cv_armv7m_processor, [dnl -cat > conftest.c <<EOF - -#if defined (__ARM_ARCH_7M__) || defined (__ARM_ARCH_7EM__) - #define HAVE_ARMV7M - #else - #error "ARMV7M is not supported." -#endif -int main () { - return 0; -} -EOF -if AC_TRY_COMMAND([${CC} $CFLAGS $CPPFLAGS -c -o conftest.o conftest.c - 1>&AS_MESSAGE_LOG_FD]) -then - acnewlib_cv_armv7m_processor=yes; -else - acnewlib_cv_armv7m_processor=no; -fi -rm -f conftest*]) - -AM_CONDITIONAL(HAVE_ARMV7M, test x"$acnewlib_cv_armv7m_processor" = x"yes") - -AC_SUBST(CFLAGS) - AC_CONFIG_FILES([Makefile]) AC_OUTPUT diff --git a/newlib/libc/machine/arm/memchr-stub.c b/newlib/libc/machine/arm/memchr-stub.c new file mode 100644 index 000000000..04d9d5fd9 --- /dev/null +++ b/newlib/libc/machine/arm/memchr-stub.c @@ -0,0 +1,42 @@ +/* Copyright (c) 2010-2011, Linaro Limited + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Linaro Limited nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "arm_asm.h" + + +/* Note: This ifdef MUST match the one in memchr.S */ +#if defined(_ISA_ARM_7) || defined(__ARM_ARCH_6T2__) + /* Do nothing - the memchr.S will get used */ +#else + /* For an older CPU we just fall back to the .c code */ +#include "../../string/memchr.c" +#endif + diff --git a/newlib/libc/machine/arm/memchr.S b/newlib/libc/machine/arm/memchr.S index a17dfa314..56bfef09a 100644 --- a/newlib/libc/machine/arm/memchr.S +++ b/newlib/libc/machine/arm/memchr.S @@ -45,10 +45,8 @@ @ Make conditional on CPU type @ tidy -@ This code requires armv6t2 or later. Uses Thumb2. - .syntax unified - .arch armv6t2 + .arch armv7-a #include "arm_asm.h" @@ -164,19 +162,5 @@ memchr: pop {r4,r5,r6,r7} subs r0,r0,#1 bx lr -#else - /* For an older CPU we just fall back to the .c code. */ - - /* Leave this field blank. So the memchr() is not defined, and this will - automatically pull in the default C definition of memchr() from - ../../string/memchr.c. No need to include this file explicitely. - The lib_a-memchr.o will not be generated, so it won't replace the - default lib_a-memchr.o which is generated by ../../string/memchr.c. - See the commands in configure.in and Makefile.am for more details. - - However, if we need to rewrite this function to be more efficient, we - can add the corresponding assembly code into this field and change the - commands in configure.in and Makefile.am to allow the corresponding - lib_a-memchr.o to be generated. - */ + #endif diff --git a/newlib/libc/machine/arm/strcmp-arm-tiny.S b/newlib/libc/machine/arm/memcpy-stub.c index 158133f4e..449d31a50 100644 --- a/newlib/libc/machine/arm/strcmp-arm-tiny.S +++ b/newlib/libc/machine/arm/memcpy-stub.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2014 ARM Ltd + * Copyright (c) 2013 ARM Ltd * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,21 +26,15 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* Tiny version of strcmp in ARM state. Used only when optimizing - for size. Also supports Thumb-2. */ +/* The sole purpose of this file is to include the plain memcpy provided + in newlib. An optimized version of memcpy is provided in the assembly + file memcpy.S in this directory. */ +#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \ + (!((defined (__ARM_ARCH_7A__) && defined (__ARM_FEATURE_UNALIGNED)) \ + || defined (__ARM_ARCH_7EM__) || defined (__ARM_ARCH_7M__)))) - .syntax unified -def_fn strcmp - .cfi_startproc -1: - ldrb r2, [r0], #1 - ldrb r3, [r1], #1 - cmp r2, #1 - it cs - cmpcs r2, r3 - beq 1b -2: - subs r0, r2, r3 - RETURN - .cfi_endproc - .size strcmp, . - strcmp +#include "../../string/memcpy.c" + +#else + /* Do nothing. See memcpy.S in this directory. */ +#endif diff --git a/newlib/libc/machine/arm/memcpy.S b/newlib/libc/machine/arm/memcpy.S index 399752424..734a19776 100644 --- a/newlib/libc/machine/arm/memcpy.S +++ b/newlib/libc/machine/arm/memcpy.S @@ -27,19 +27,7 @@ */ #if defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) - /* Leave this field blank. So the memcpy() is not defined, and this will - automatically pull in the default C definition of memcpy() from - ../../string/memcpy.c. No need to include this file explicitely. - The lib_a-memcpy.o will not be generated, so it won't replace the default - lib_a-memcpy.o which is generated by ../../string/memcpy.c. - See the commands in configure.in and Makefile.am for more details. - - However, if we need to rewrite this function to be more efficient, we - can add the corresponding assembly code into this field and change the - commands in configure.in and Makefile.am to allow the corresponding - lib_a-memcpy.o to be generated. - */ - + /* Do nothing here. See memcpy-stub.c in the same directory. */ #elif defined (__ARM_ARCH_7A__) && defined (__ARM_FEATURE_UNALIGNED) #include "memcpy-armv7a.S" @@ -47,5 +35,5 @@ #include "memcpy-armv7m.S" #else - /* Leave this filed blank. See the commands above. */ + /* Do nothing here. See memcpy-stub.c in the same directory. */ #endif diff --git a/newlib/libc/machine/arm/strcmp-armv4.S b/newlib/libc/machine/arm/strcmp-armv4.S deleted file mode 100644 index b18c3dbe0..000000000 --- a/newlib/libc/machine/arm/strcmp-armv4.S +++ /dev/null @@ -1,381 +0,0 @@ -/* - * Copyright (c) 2012-2014 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - /* Basic ARM implementation. This should run on anything except - for ARMv6-M, but there are better implementations for later - revisions of the architecture. This version can support ARMv4T - ARM/Thumb interworking. */ -/* Parameters and result. */ -#define src1 r0 -#define src2 r1 -#define result r0 /* Overlaps src1. */ - -/* Internal variables. */ -#define data1 r2 -#define data2 r3 -#define magic1 r4 -#define tmp2 r5 -#define tmp1 r12 -#define syndrome r12 /* Overlaps tmp1 */ - - .arm -def_fn strcmp - .cfi_startproc - eor tmp1, src1, src2 - tst tmp1, #3 - /* Strings not at same byte offset from a word boundary. */ - bne .Lstrcmp_unaligned - ands tmp1, src1, #3 - bic src1, src1, #3 - bic src2, src2, #3 - ldr data1, [src1], #4 - ldreq data2, [src2], #4 - beq 1f - /* Although s1 and s2 have identical initial alignment, they are - not currently word aligned. Rather than comparing bytes, - make sure that any bytes fetched from before the addressed - bytes are forced to 0xff. Then they will always compare - equal. */ - eor tmp1, tmp1, #3 - mvn data2, #MSB - lsl tmp1, tmp1, #3 - S2LO tmp1, data2, tmp1 - ldr data2, [src2], #4 - orr data1, data1, tmp1 - orr data2, data2, tmp1 -1: - /* Load the 'magic' constant 0x01010101. */ - str r4, [sp, #-4]! - .cfi_def_cfa_offset 4 - .cfi_offset 4, -4 - mov magic1, #1 - orr magic1, magic1, magic1, lsl #8 - orr magic1, magic1, magic1, lsl #16 - .p2align 2 -4: - sub syndrome, data1, magic1 - cmp data1, data2 - /* check for any zero bytes in first word */ - biceq syndrome, syndrome, data1 - tsteq syndrome, magic1, lsl #7 - ldreq data1, [src1], #4 - ldreq data2, [src2], #4 - beq 4b -2: - /* There's a zero or a different byte in the word */ - S2HI result, data1, #24 - S2LO data1, data1, #8 - cmp result, #1 - cmpcs result, data2, S2HI #24 - S2LOEQ data2, data2, #8 - beq 2b - /* On a big-endian machine, RESULT contains the desired byte in bits - 0-7; on a little-endian machine they are in bits 24-31. In - both cases the other bits in RESULT are all zero. For DATA2 the - interesting byte is at the other end of the word, but the - other bits are not necessarily zero. We need a signed result - representing the differnece in the unsigned bytes, so for the - little-endian case we can't just shift the interesting bits - up. */ -#ifdef __ARM_BIG_ENDIAN - sub result, result, data2, lsr #24 -#else - and data2, data2, #255 - rsb result, data2, result, lsr #24 -#endif - ldr r4, [sp], #4 - .cfi_restore 4 - .cfi_def_cfa_offset 0 - RETURN - - -#if 0 - /* The assembly code below is based on the following alogrithm. */ -#ifdef __ARM_BIG_ENDIAN -#define RSHIFT << -#define LSHIFT >> -#else -#define RSHIFT >> -#define LSHIFT << -#endif - -#define body(shift) \ - mask = 0xffffffffU RSHIFT shift; \ - data1 = *src1++; \ - data2 = *src2++; \ - do \ - { \ - tmp2 = data1 & mask; \ - if (__builtin_expect(tmp2 != data2 RSHIFT shift, 0)) \ - { \ - data2 RSHIFT= shift; \ - break; \ - } \ - if (__builtin_expect(((data1 - b1) & ~data1) & (b1 << 7), 0)) \ - { \ - /* See comment in assembler below re syndrome on big-endian */\ - if ((((data1 - b1) & ~data1) & (b1 << 7)) & mask) \ - data2 RSHIFT= shift; \ - else \ - { \ - data2 = *src2; \ - tmp2 = data1 RSHIFT (32 - shift); \ - data2 = (data2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \ - } \ - break; \ - } \ - data2 = *src2++; \ - tmp2 ^= data1; \ - if (__builtin_expect(tmp2 != data2 LSHIFT (32 - shift), 0)) \ - { \ - tmp2 = data1 >> (32 - shift); \ - data2 = (data2 << (32 - shift)) RSHIFT (32 - shift); \ - break; \ - } \ - data1 = *src1++; \ - } while (1) - - const unsigned* src1; - const unsigned* src2; - unsigned data1, data2; - unsigned mask; - unsigned shift; - unsigned b1 = 0x01010101; - char c1, c2; - unsigned tmp2; - - while (((unsigned) s1) & 3) - { - c1 = *s1++; - c2 = *s2++; - if (c1 == 0 || c1 != c2) - return c1 - (int)c2; - } - src1 = (unsigned*) (((unsigned)s1) & ~3); - src2 = (unsigned*) (((unsigned)s2) & ~3); - tmp2 = ((unsigned) s2) & 3; - if (tmp2 == 1) - { - body(8); - } - else if (tmp2 == 2) - { - body(16); - } - else - { - body (24); - } - - do - { -#ifdef __ARM_BIG_ENDIAN - c1 = (char) tmp2 >> 24; - c2 = (char) data2 >> 24; -#else /* not __ARM_BIG_ENDIAN */ - c1 = (char) tmp2; - c2 = (char) data2; -#endif /* not __ARM_BIG_ENDIAN */ - tmp2 RSHIFT= 8; - data2 RSHIFT= 8; - } while (c1 != 0 && c1 == c2); - return c1 - c2; -#endif /* 0 */ - - - /* First of all, compare bytes until src1(sp1) is word-aligned. */ -.Lstrcmp_unaligned: - tst src1, #3 - beq 2f - ldrb data1, [src1], #1 - ldrb data2, [src2], #1 - cmp data1, #1 - cmpcs data1, data2 - beq .Lstrcmp_unaligned - sub result, data1, data2 - RETURN - -2: - stmfd sp!, {r4, r5} - .cfi_def_cfa_offset 8 - .cfi_offset 4, -8 - .cfi_offset 5, -4 - mov magic1, #1 - orr magic1, magic1, magic1, lsl #8 - orr magic1, magic1, magic1, lsl #16 - - ldr data1, [src1], #4 - and tmp2, src2, #3 - bic src2, src2, #3 - ldr data2, [src2], #4 - cmp tmp2, #2 - beq .Loverlap2 - bhi .Loverlap1 - - /* Critical inner Loop: Block with 3 bytes initial overlap */ - .p2align 2 -.Loverlap3: - bic tmp2, data1, #MSB - cmp tmp2, data2, S2LO #8 - sub syndrome, data1, magic1 - bic syndrome, syndrome, data1 - bne 4f - ands syndrome, syndrome, magic1, lsl #7 - ldreq data2, [src2], #4 - bne 5f - eor tmp2, tmp2, data1 - cmp tmp2, data2, S2HI #24 - bne 6f - ldr data1, [src1], #4 - b .Loverlap3 -4: - S2LO data2, data2, #8 - b .Lstrcmp_tail - -5: -#ifdef __ARM_BIG_ENDIAN - /* The syndrome value may contain false ones if the string ends - with the bytes 0x01 0x00. */ - tst data1, #0xff000000 - tstne data1, #0x00ff0000 - tstne data1, #0x0000ff00 - beq .Lstrcmp_done_equal -#else - bics syndrome, syndrome, #0xff000000 - bne .Lstrcmp_done_equal -#endif - ldrb data2, [src2] - S2LO tmp2, data1, #24 -#ifdef __ARM_BIG_ENDIAN - lsl data2, data2, #24 -#endif - b .Lstrcmp_tail - -6: - S2LO tmp2, data1, #24 - and data2, data2, #LSB - b .Lstrcmp_tail - - /* Critical inner Loop: Block with 2 bytes initial overlap. */ - .p2align 2 -.Loverlap2: - S2HI tmp2, data1, #16 - sub syndrome, data1, magic1 - S2LO tmp2, tmp2, #16 - bic syndrome, syndrome, data1 - cmp tmp2, data2, S2LO #16 - bne 4f - ands syndrome, syndrome, magic1, lsl #7 - ldreq data2, [src2], #4 - bne 5f - eor tmp2, tmp2, data1 - cmp tmp2, data2, S2HI #16 - bne 6f - ldr data1, [src1], #4 - b .Loverlap2 - -5: -#ifdef __ARM_BIG_ENDIAN - /* The syndrome value may contain false ones if the string ends - with the bytes 0x01 0x00 */ - tst data1, #0xff000000 - tstne data1, #0x00ff0000 - beq .Lstrcmp_done_equal -#else - lsls syndrome, syndrome, #16 - bne .Lstrcmp_done_equal -#endif - ldrh data2, [src2] - S2LO tmp2, data1, #16 -#ifdef __ARM_BIG_ENDIAN - lsl data2, data2, #16 -#endif - b .Lstrcmp_tail - -6: - S2HI data2, data2, #16 - S2LO tmp2, data1, #16 -4: - S2LO data2, data2, #16 - b .Lstrcmp_tail - - /* Critical inner Loop: Block with 1 byte initial overlap. */ - .p2align 2 -.Loverlap1: - and tmp2, data1, #LSB - cmp tmp2, data2, S2LO #24 - sub syndrome, data1, magic1 - bic syndrome, syndrome, data1 - bne 4f - ands syndrome, syndrome, magic1, lsl #7 - ldreq data2, [src2], #4 - bne 5f - eor tmp2, tmp2, data1 - cmp tmp2, data2, S2HI #8 - bne 6f - ldr data1, [src1], #4 - b .Loverlap1 -4: - S2LO data2, data2, #24 - b .Lstrcmp_tail -5: - /* The syndrome value may contain false ones if the string ends - with the bytes 0x01 0x00. */ - tst data1, #LSB - beq .Lstrcmp_done_equal - ldr data2, [src2], #4 -6: - S2LO tmp2, data1, #8 - bic data2, data2, #MSB - b .Lstrcmp_tail -.Lstrcmp_done_equal: - mov result, #0 - .cfi_remember_state - ldmfd sp!, {r4, r5} - .cfi_restore 4 - .cfi_restore 5 - .cfi_def_cfa_offset 0 - RETURN - -.Lstrcmp_tail: - .cfi_restore_state - and r2, tmp2, #LSB - and result, data2, #LSB - cmp result, #1 - cmpcs result, r2 - S2LOEQ tmp2, tmp2, #8 - S2LOEQ data2, data2, #8 - beq .Lstrcmp_tail - sub result, r2, result - ldmfd sp!, {r4, r5} - .cfi_restore 4 - .cfi_restore 5 - .cfi_def_cfa_offset 0 - RETURN - .cfi_endproc - .size strcmp, . - strcmp diff --git a/newlib/libc/machine/arm/strcmp-armv4t.S b/newlib/libc/machine/arm/strcmp-armv4t.S deleted file mode 100644 index 2716b87d2..000000000 --- a/newlib/libc/machine/arm/strcmp-armv4t.S +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2012-2014 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - /* This version is only used when we want a very basic Thumb1 - implementation or for size, otherwise we use the base ARMv4 - version. This is also suitable for ARMv6-M. */ - - .thumb - .syntax unified - .arch armv4t - .eabi_attribute Tag_also_compatible_with, "\006\013" /* ARMv6-M. */ - .eabi_attribute Tag_ARM_ISA_use, 0 -def_fn strcmp - .cfi_startproc -1: - ldrb r2, [r0] - ldrb r3, [r1] - cmp r2, #0 - beq 2f - adds r0, r0, #1 - adds r1, r1, #1 - cmp r2, r3 - beq 1b -2: - subs r0, r2, r3 - bx lr - .cfi_endproc - .size strcmp, . - strcmp diff --git a/newlib/libc/machine/arm/strcmp-armv6.S b/newlib/libc/machine/arm/strcmp-armv6.S deleted file mode 100644 index a557fc57e..000000000 --- a/newlib/libc/machine/arm/strcmp-armv6.S +++ /dev/null @@ -1,469 +0,0 @@ -/* - * Copyright (c) 2012-2014 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - /* Implementation of strcmp for ARMv6. Use ldrd to support wider - loads, provided the data is sufficiently aligned. Use - saturating arithmetic to optimize the compares. */ - - /* Build Options: - STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first - byte in the string. If comparing completely random strings - the pre-check will save time, since there is a very high - probability of a mismatch in the first character: we save - significant overhead if this is the common case. However, - if strings are likely to be identical (eg because we're - verifying a hit in a hash table), then this check is largely - redundant. */ - - .arm - -/* Parameters and result. */ -#define src1 r0 -#define src2 r1 -#define result r0 /* Overlaps src1. */ - -/* Internal variables. */ -#define tmp1 r4 -#define tmp2 r5 -#define const_m1 r12 - -/* Additional internal variables for 64-bit aligned data. */ -#define data1a r2 -#define data1b r3 -#define data2a r6 -#define data2b r7 -#define syndrome_a tmp1 -#define syndrome_b tmp2 - -/* Additional internal variables for 32-bit aligned data. */ -#define data1 r2 -#define data2 r3 -#define syndrome tmp2 - - - /* Macro to compute and return the result value for word-aligned - cases. */ - .macro strcmp_epilogue_aligned synd d1 d2 restore_r6 -#ifdef __ARM_BIG_ENDIAN - /* If data1 contains a zero byte, then syndrome will contain a 1 in - bit 7 of that byte. Otherwise, the highest set bit in the - syndrome will highlight the first different bit. It is therefore - sufficient to extract the eight bits starting with the syndrome - bit. */ - clz tmp1, \synd - lsl r1, \d2, tmp1 - .if \restore_r6 - ldrd r6, r7, [sp, #8] - .endif - .cfi_restore 6 - .cfi_restore 7 - lsl \d1, \d1, tmp1 - .cfi_remember_state - lsr result, \d1, #24 - ldrd r4, r5, [sp], #16 - .cfi_restore 4 - .cfi_restore 5 - sub result, result, r1, lsr #24 - bx lr -#else - /* To use the big-endian trick we'd have to reverse all three words. - that's slower than this approach. */ - rev \synd, \synd - clz tmp1, \synd - bic tmp1, tmp1, #7 - lsr r1, \d2, tmp1 - .cfi_remember_state - .if \restore_r6 - ldrd r6, r7, [sp, #8] - .endif - .cfi_restore 6 - .cfi_restore 7 - lsr \d1, \d1, tmp1 - and result, \d1, #255 - and r1, r1, #255 - ldrd r4, r5, [sp], #16 - .cfi_restore 4 - .cfi_restore 5 - sub result, result, r1 - - bx lr -#endif - .endm - - .text - .p2align 5 -.Lstrcmp_start_addr: -#ifndef STRCMP_NO_PRECHECK -.Lfastpath_exit: - sub r0, r2, r3 - bx lr -#endif -def_fn strcmp -#ifndef STRCMP_NO_PRECHECK - ldrb r2, [src1] - ldrb r3, [src2] - cmp r2, #1 - cmpcs r2, r3 - bne .Lfastpath_exit -#endif - .cfi_startproc - strd r4, r5, [sp, #-16]! - .cfi_def_cfa_offset 16 - .cfi_offset 4, -16 - .cfi_offset 5, -12 - orr tmp1, src1, src2 - strd r6, r7, [sp, #8] - .cfi_offset 6, -8 - .cfi_offset 7, -4 - mvn const_m1, #0 - tst tmp1, #7 - beq .Lloop_aligned8 - -.Lnot_aligned: - eor tmp1, src1, src2 - tst tmp1, #7 - bne .Lmisaligned8 - - /* Deal with mutual misalignment by aligning downwards and then - masking off the unwanted loaded data to prevent a difference. */ - and tmp1, src1, #7 - bic src1, src1, #7 - and tmp2, tmp1, #3 - bic src2, src2, #7 - lsl tmp2, tmp2, #3 /* Bytes -> bits. */ - ldrd data1a, data1b, [src1], #16 - tst tmp1, #4 - ldrd data2a, data2b, [src2], #16 - /* In ARM code we can't use ORN, but with do have MVN with a - register shift. */ - mvn tmp1, const_m1, S2HI tmp2 - orr data1a, data1a, tmp1 - orr data2a, data2a, tmp1 - beq .Lstart_realigned8 - orr data1b, data1b, tmp1 - mov data1a, const_m1 - orr data2b, data2b, tmp1 - mov data2a, const_m1 - b .Lstart_realigned8 - - /* Unwind the inner loop by a factor of 2, giving 16 bytes per - pass. */ - .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */ - .p2align 2 /* Always word aligned. */ -.Lloop_aligned8: - ldrd data1a, data1b, [src1], #16 - ldrd data2a, data2b, [src2], #16 -.Lstart_realigned8: - uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ - eor syndrome_a, data1a, data2a - sel syndrome_a, syndrome_a, const_m1 - uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ - eor syndrome_b, data1b, data2b - sel syndrome_b, syndrome_b, const_m1 - orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */ - bne .Ldiff_found - - ldrd data1a, data1b, [src1, #-8] - ldrd data2a, data2b, [src2, #-8] - uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ - eor syndrome_a, data1a, data2a - sel syndrome_a, syndrome_a, const_m1 - uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ - eor syndrome_b, data1b, data2b - sel syndrome_b, syndrome_b, const_m1 - orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */ - beq .Lloop_aligned8 - -.Ldiff_found: - cmp syndrome_a, #0 - bne .Ldiff_in_a - -.Ldiff_in_b: - strcmp_epilogue_aligned syndrome_b, data1b, data2b 1 - -.Ldiff_in_a: - .cfi_restore_state - strcmp_epilogue_aligned syndrome_a, data1a, data2a 1 - - .cfi_restore_state -.Lmisaligned8: - tst tmp1, #3 - bne .Lmisaligned4 - ands tmp1, src1, #3 - bne .Lmutual_align4 - - /* Unrolled by a factor of 2, to reduce the number of post-increment - operations. */ -.Lloop_aligned4: - ldr data1, [src1], #8 - ldr data2, [src2], #8 -.Lstart_realigned4: - uadd8 syndrome, data1, const_m1 /* Only need GE bits. */ - eor syndrome, data1, data2 - sel syndrome, syndrome, const_m1 - cmp syndrome, #0 - bne .Laligned4_done - - ldr data1, [src1, #-4] - ldr data2, [src2, #-4] - uadd8 syndrome, data1, const_m1 - eor syndrome, data1, data2 - sel syndrome, syndrome, const_m1 - cmp syndrome, #0 - beq .Lloop_aligned4 - -.Laligned4_done: - strcmp_epilogue_aligned syndrome, data1, data2, 0 - -.Lmutual_align4: - .cfi_restore_state - /* Deal with mutual misalignment by aligning downwards and then - masking off the unwanted loaded data to prevent a difference. */ - lsl tmp1, tmp1, #3 /* Bytes -> bits. */ - bic src1, src1, #3 - ldr data1, [src1], #8 - bic src2, src2, #3 - ldr data2, [src2], #8 - - /* In ARM code we can't use ORN, but with do have MVN with a - register shift. */ - mvn tmp1, const_m1, S2HI tmp1 - orr data1, data1, tmp1 - orr data2, data2, tmp1 - b .Lstart_realigned4 - -.Lmisaligned4: - ands tmp1, src1, #3 - beq .Lsrc1_aligned - sub src2, src2, tmp1 - bic src1, src1, #3 - lsls tmp1, tmp1, #31 - ldr data1, [src1], #4 - beq .Laligned_m2 - bcs .Laligned_m1 - -#ifdef STRCMP_NO_PRECHECK - ldrb data2, [src2, #1] - uxtb tmp1, data1, ror #BYTE1_OFFSET - cmp tmp1, #1 - cmpcs tmp1, data2 - bne .Lmisaligned_exit - -.Laligned_m2: - ldrb data2, [src2, #2] - uxtb tmp1, data1, ror #BYTE2_OFFSET - cmp tmp1, #1 - cmpcs tmp1, data2 - bne .Lmisaligned_exit - -.Laligned_m1: - ldrb data2, [src2, #3] - uxtb tmp1, data1, ror #BYTE3_OFFSET - cmp tmp1, #1 - cmpcs tmp1, data2 - beq .Lsrc1_aligned - -#else /* STRCMP_NO_PRECHECK */ - /* If we've done the pre-check, then we don't need to check the - first byte again here. */ - ldrb data2, [src2, #2] - uxtb tmp1, data1, ror #BYTE2_OFFSET - cmp tmp1, #1 - cmpcs tmp1, data2 - bne .Lmisaligned_exit - -.Laligned_m2: - ldrb data2, [src2, #3] - uxtb tmp1, data1, ror #BYTE3_OFFSET - cmp tmp1, #1 - cmpcs tmp1, data2 - beq .Laligned_m1 -#endif - -.Lmisaligned_exit: - .cfi_remember_state - sub result, tmp1, data2 - ldr r4, [sp], #16 - .cfi_restore 4 - bx lr - -#ifndef STRCMP_NO_PRECHECK -.Laligned_m1: - add src2, src2, #4 -#endif -.Lsrc1_aligned: - .cfi_restore_state - /* src1 is word aligned, but src2 has no common alignment - with it. */ - ldr data1, [src1], #4 - lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */ - - bic src2, src2, #3 - ldr data2, [src2], #4 - bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */ - bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */ - - /* (overlap3) C=0, Z=0 => src2[1:0] = 0b01. */ -.Loverlap3: - bic tmp1, data1, #MSB - uadd8 syndrome, data1, const_m1 - eors syndrome, tmp1, data2, S2LO #8 - sel syndrome, syndrome, const_m1 - bne 4f - cmp syndrome, #0 - ldreq data2, [src2], #4 - bne 5f - - eor tmp1, tmp1, data1 - cmp tmp1, data2, S2HI #24 - bne 6f - ldr data1, [src1], #4 - b .Loverlap3 -4: - S2LO data2, data2, #8 - b .Lstrcmp_tail - -5: - bics syndrome, syndrome, #MSB - bne .Lstrcmp_done_equal - - /* We can only get here if the MSB of data1 contains 0, so - fast-path the exit. */ - ldrb result, [src2] - .cfi_remember_state - ldrd r4, r5, [sp], #16 - .cfi_restore 4 - .cfi_restore 5 - /* R6/7 Not used in this sequence. */ - .cfi_restore 6 - .cfi_restore 7 - neg result, result - bx lr - -6: - .cfi_restore_state - S2LO data1, data1, #24 - and data2, data2, #LSB - b .Lstrcmp_tail - - .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ -.Loverlap2: - and tmp1, data1, const_m1, S2LO #16 - uadd8 syndrome, data1, const_m1 - eors syndrome, tmp1, data2, S2LO #16 - sel syndrome, syndrome, const_m1 - bne 4f - cmp syndrome, #0 - ldreq data2, [src2], #4 - bne 5f - eor tmp1, tmp1, data1 - cmp tmp1, data2, S2HI #16 - bne 6f - ldr data1, [src1], #4 - b .Loverlap2 -4: - S2LO data2, data2, #16 - b .Lstrcmp_tail -5: - ands syndrome, syndrome, const_m1, S2LO #16 - bne .Lstrcmp_done_equal - - ldrh data2, [src2] - S2LO data1, data1, #16 -#ifdef __ARM_BIG_ENDIAN - lsl data2, data2, #16 -#endif - b .Lstrcmp_tail - -6: - S2LO data1, data1, #16 - and data2, data2, const_m1, S2LO #16 - b .Lstrcmp_tail - - .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ -.Loverlap1: - and tmp1, data1, #LSB - uadd8 syndrome, data1, const_m1 - eors syndrome, tmp1, data2, S2LO #24 - sel syndrome, syndrome, const_m1 - bne 4f - cmp syndrome, #0 - ldreq data2, [src2], #4 - bne 5f - eor tmp1, tmp1, data1 - cmp tmp1, data2, S2HI #8 - bne 6f - ldr data1, [src1], #4 - b .Loverlap1 -4: - S2LO data2, data2, #24 - b .Lstrcmp_tail -5: - tst syndrome, #LSB - bne .Lstrcmp_done_equal - ldr data2, [src2] -6: - S2LO data1, data1, #8 - bic data2, data2, #MSB - b .Lstrcmp_tail - -.Lstrcmp_done_equal: - mov result, #0 - .cfi_remember_state - ldrd r4, r5, [sp], #16 - .cfi_restore 4 - .cfi_restore 5 - /* R6/7 not used in this sequence. */ - .cfi_restore 6 - .cfi_restore 7 - bx lr - -.Lstrcmp_tail: - .cfi_restore_state -#ifndef __ARM_BIG_ENDIAN - rev data1, data1 - rev data2, data2 - /* Now everything looks big-endian... */ -#endif - uadd8 tmp1, data1, const_m1 - eor tmp1, data1, data2 - sel syndrome, tmp1, const_m1 - clz tmp1, syndrome - lsl data1, data1, tmp1 - lsl data2, data2, tmp1 - lsr result, data1, #24 - ldrd r4, r5, [sp], #16 - .cfi_restore 4 - .cfi_restore 5 - /* R6/7 not used in this sequence. */ - .cfi_restore 6 - .cfi_restore 7 - sub result, result, data2, lsr #24 - bx lr - .cfi_endproc - .size strcmp, . - .Lstrcmp_start_addr diff --git a/newlib/libc/machine/arm/strcmp-armv6m.S b/newlib/libc/machine/arm/strcmp-armv6m.S deleted file mode 100644 index 932841d19..000000000 --- a/newlib/libc/machine/arm/strcmp-armv6m.S +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2014 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Implementation of strcmp for ARMv6m. This version is only used in - ARMv6-M when we want an efficient implementation. Otherwize if the - code size is preferred, strcmp-armv4t.S will be used. */ - - .thumb_func - .syntax unified - .arch armv6-m - - .macro DoSub n, label - subs r0, r0, r1 -#ifdef __ARM_BIG_ENDIAN - lsrs r1, r4, \n -#else - lsls r1, r4, \n -#endif - orrs r1, r0 - bne \label - .endm - - .macro Byte_Test n, label - lsrs r0, r2, \n - lsrs r1, r3, \n - DoSub \n, \label - .endm - - .text -def_fn strcmp - .cfi_startproc - mov r2, r0 - push {r4, r5, r6, lr} - orrs r2, r1 - lsls r2, r2, #30 - bne 6f - ldr r5, =0x01010101 - lsls r6, r5, #7 -1: - ldmia r0!, {r2} - ldmia r1!, {r3} - subs r4, r2, r5 - bics r4, r2 - ands r4, r6 - beq 3f - -#ifdef __ARM_BIG_ENDIAN - Byte_Test #24, 4f - Byte_Test #16, 4f - Byte_Test #8, 4f - - b 7f -3: - cmp r2, r3 - beq 1b - cmp r2, r3 -#else - uxtb r0, r2 - uxtb r1, r3 - DoSub #24, 2f - - uxth r0, r2 - uxth r1, r3 - DoSub #16, 2f - - lsls r0, r2, #8 - lsls r1, r3, #8 - lsrs r0, r0, #8 - lsrs r1, r1, #8 - DoSub #8, 2f - - lsrs r0, r2, #24 - lsrs r1, r3, #24 - subs r0, r0, r1 -2: - pop {r4, r5, r6, pc} - -3: - cmp r2, r3 - beq 1b - rev r0, r2 - rev r1, r3 - cmp r0, r1 -#endif - - bls 5f - movs r0, #1 -4: - pop {r4, r5, r6, pc} -5: - movs r0, #0 - mvns r0, r0 - pop {r4, r5, r6, pc} -6: - ldrb r2, [r0, #0] - ldrb r3, [r1, #0] - adds r0, #1 - adds r1, #1 - cmp r2, #0 - beq 7f - cmp r2, r3 - bne 7f - ldrb r2, [r0, #0] - ldrb r3, [r1, #0] - adds r0, #1 - adds r1, #1 - cmp r2, #0 - beq 7f - cmp r2, r3 - beq 6b -7: - subs r0, r2, r3 - pop {r4, r5, r6, pc} - .cfi_endproc - .size strcmp, . - strcmp diff --git a/newlib/libc/machine/arm/strcmp-armv7.S b/newlib/libc/machine/arm/strcmp-armv7.S deleted file mode 100644 index e2c47ff20..000000000 --- a/newlib/libc/machine/arm/strcmp-armv7.S +++ /dev/null @@ -1,468 +0,0 @@ -/* - * Copyright (c) 2012-2014 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - /* Implementation of strcmp for ARMv7 when DSP instructions are - available. Use ldrd to support wider loads, provided the data - is sufficiently aligned. Use saturating arithmetic to optimize - the compares. */ - - /* Build Options: - STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first - byte in the string. If comparing completely random strings - the pre-check will save time, since there is a very high - probability of a mismatch in the first character: we save - significant overhead if this is the common case. However, - if strings are likely to be identical (eg because we're - verifying a hit in a hash table), then this check is largely - redundant. */ - - /* This version uses Thumb-2 code. */ - .thumb - .syntax unified - -/* Parameters and result. */ -#define src1 r0 -#define src2 r1 -#define result r0 /* Overlaps src1. */ - -/* Internal variables. */ -#define tmp1 r4 -#define tmp2 r5 -#define const_m1 r12 - -/* Additional internal variables for 64-bit aligned data. */ -#define data1a r2 -#define data1b r3 -#define data2a r6 -#define data2b r7 -#define syndrome_a tmp1 -#define syndrome_b tmp2 - -/* Additional internal variables for 32-bit aligned data. */ -#define data1 r2 -#define data2 r3 -#define syndrome tmp2 - - - /* Macro to compute and return the result value for word-aligned - cases. */ - .macro strcmp_epilogue_aligned synd d1 d2 restore_r6 -#ifdef __ARM_BIG_ENDIAN - /* If data1 contains a zero byte, then syndrome will contain a 1 in - bit 7 of that byte. Otherwise, the highest set bit in the - syndrome will highlight the first different bit. It is therefore - sufficient to extract the eight bits starting with the syndrome - bit. */ - clz tmp1, \synd - lsl r1, \d2, tmp1 - .if \restore_r6 - ldrd r6, r7, [sp, #8] - .endif - .cfi_restore 6 - .cfi_restore 7 - lsl \d1, \d1, tmp1 - .cfi_remember_state - lsr result, \d1, #24 - ldrd r4, r5, [sp], #16 - .cfi_restore 4 - .cfi_restore 5 - sub result, result, r1, lsr #24 - bx lr -#else - /* To use the big-endian trick we'd have to reverse all three words. - that's slower than this approach. */ - rev \synd, \synd - clz tmp1, \synd - bic tmp1, tmp1, #7 - lsr r1, \d2, tmp1 - .cfi_remember_state - .if \restore_r6 - ldrd r6, r7, [sp, #8] - .endif - .cfi_restore 6 - .cfi_restore 7 - lsr \d1, \d1, tmp1 - and result, \d1, #255 - and r1, r1, #255 - ldrd r4, r5, [sp], #16 - .cfi_restore 4 - .cfi_restore 5 - sub result, result, r1 - - bx lr -#endif - .endm - - .text - .p2align 5 -.Lstrcmp_start_addr: -#ifndef STRCMP_NO_PRECHECK -.Lfastpath_exit: - sub r0, r2, r3 - bx lr - nop -#endif -def_fn strcmp -#ifndef STRCMP_NO_PRECHECK - ldrb r2, [src1] - ldrb r3, [src2] - cmp r2, #1 - it cs - cmpcs r2, r3 - bne .Lfastpath_exit -#endif - .cfi_startproc - strd r4, r5, [sp, #-16]! - .cfi_def_cfa_offset 16 - .cfi_offset 4, -16 - .cfi_offset 5, -12 - orr tmp1, src1, src2 - strd r6, r7, [sp, #8] - .cfi_offset 6, -8 - .cfi_offset 7, -4 - mvn const_m1, #0 - lsl r2, tmp1, #29 - cbz r2, .Lloop_aligned8 - -.Lnot_aligned: - eor tmp1, src1, src2 - tst tmp1, #7 - bne .Lmisaligned8 - - /* Deal with mutual misalignment by aligning downwards and then - masking off the unwanted loaded data to prevent a difference. */ - and tmp1, src1, #7 - bic src1, src1, #7 - and tmp2, tmp1, #3 - bic src2, src2, #7 - lsl tmp2, tmp2, #3 /* Bytes -> bits. */ - ldrd data1a, data1b, [src1], #16 - tst tmp1, #4 - ldrd data2a, data2b, [src2], #16 - /* In thumb code we can't use MVN with a register shift, but - we do have ORN. */ - S2HI tmp1, const_m1, tmp2 - orn data1a, data1a, tmp1 - orn data2a, data2a, tmp1 - beq .Lstart_realigned8 - orn data1b, data1b, tmp1 - mov data1a, const_m1 - orn data2b, data2b, tmp1 - mov data2a, const_m1 - b .Lstart_realigned8 - - /* Unwind the inner loop by a factor of 2, giving 16 bytes per - pass. */ - .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */ - .p2align 2 /* Always word aligned. */ -.Lloop_aligned8: - ldrd data1a, data1b, [src1], #16 - ldrd data2a, data2b, [src2], #16 -.Lstart_realigned8: - uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ - eor syndrome_a, data1a, data2a - sel syndrome_a, syndrome_a, const_m1 - cbnz syndrome_a, .Ldiff_in_a - uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ - eor syndrome_b, data1b, data2b - sel syndrome_b, syndrome_b, const_m1 - cbnz syndrome_b, .Ldiff_in_b - - ldrd data1a, data1b, [src1, #-8] - ldrd data2a, data2b, [src2, #-8] - uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ - eor syndrome_a, data1a, data2a - sel syndrome_a, syndrome_a, const_m1 - uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ - eor syndrome_b, data1b, data2b - sel syndrome_b, syndrome_b, const_m1 - /* Can't use CBZ for backwards branch. */ - orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */ - beq .Lloop_aligned8 - -.Ldiff_found: - cbnz syndrome_a, .Ldiff_in_a - -.Ldiff_in_b: - strcmp_epilogue_aligned syndrome_b, data1b, data2b 1 - -.Ldiff_in_a: - .cfi_restore_state - strcmp_epilogue_aligned syndrome_a, data1a, data2a 1 - - .cfi_restore_state -.Lmisaligned8: - tst tmp1, #3 - bne .Lmisaligned4 - ands tmp1, src1, #3 - bne .Lmutual_align4 - - /* Unrolled by a factor of 2, to reduce the number of post-increment - operations. */ -.Lloop_aligned4: - ldr data1, [src1], #8 - ldr data2, [src2], #8 -.Lstart_realigned4: - uadd8 syndrome, data1, const_m1 /* Only need GE bits. */ - eor syndrome, data1, data2 - sel syndrome, syndrome, const_m1 - cbnz syndrome, .Laligned4_done - ldr data1, [src1, #-4] - ldr data2, [src2, #-4] - uadd8 syndrome, data1, const_m1 - eor syndrome, data1, data2 - sel syndrome, syndrome, const_m1 - cmp syndrome, #0 - beq .Lloop_aligned4 - -.Laligned4_done: - strcmp_epilogue_aligned syndrome, data1, data2, 0 - -.Lmutual_align4: - .cfi_restore_state - /* Deal with mutual misalignment by aligning downwards and then - masking off the unwanted loaded data to prevent a difference. */ - lsl tmp1, tmp1, #3 /* Bytes -> bits. */ - bic src1, src1, #3 - ldr data1, [src1], #8 - bic src2, src2, #3 - ldr data2, [src2], #8 - - /* In thumb code we can't use MVN with a register shift, but - we do have ORN. */ - S2HI tmp1, const_m1, tmp1 - orn data1, data1, tmp1 - orn data2, data2, tmp1 - b .Lstart_realigned4 - -.Lmisaligned4: - ands tmp1, src1, #3 - beq .Lsrc1_aligned - sub src2, src2, tmp1 - bic src1, src1, #3 - lsls tmp1, tmp1, #31 - ldr data1, [src1], #4 - beq .Laligned_m2 - bcs .Laligned_m1 - -#ifdef STRCMP_NO_PRECHECK - ldrb data2, [src2, #1] - uxtb tmp1, data1, ror #BYTE1_OFFSET - subs tmp1, tmp1, data2 - bne .Lmisaligned_exit - cbz data2, .Lmisaligned_exit - -.Laligned_m2: - ldrb data2, [src2, #2] - uxtb tmp1, data1, ror #BYTE2_OFFSET - subs tmp1, tmp1, data2 - bne .Lmisaligned_exit - cbz data2, .Lmisaligned_exit - -.Laligned_m1: - ldrb data2, [src2, #3] - uxtb tmp1, data1, ror #BYTE3_OFFSET - subs tmp1, tmp1, data2 - bne .Lmisaligned_exit - add src2, src2, #4 - cbnz data2, .Lsrc1_aligned -#else /* STRCMP_NO_PRECHECK */ - /* If we've done the pre-check, then we don't need to check the - first byte again here. */ - ldrb data2, [src2, #2] - uxtb tmp1, data1, ror #BYTE2_OFFSET - subs tmp1, tmp1, data2 - bne .Lmisaligned_exit - cbz data2, .Lmisaligned_exit - -.Laligned_m2: - ldrb data2, [src2, #3] - uxtb tmp1, data1, ror #BYTE3_OFFSET - subs tmp1, tmp1, data2 - bne .Lmisaligned_exit - cbnz data2, .Laligned_m1 -#endif - -.Lmisaligned_exit: - .cfi_remember_state - mov result, tmp1 - ldr r4, [sp], #16 - .cfi_restore 4 - bx lr - -#ifndef STRCMP_NO_PRECHECK -.Laligned_m1: - add src2, src2, #4 -#endif -.Lsrc1_aligned: - .cfi_restore_state - /* src1 is word aligned, but src2 has no common alignment - with it. */ - ldr data1, [src1], #4 - lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */ - - bic src2, src2, #3 - ldr data2, [src2], #4 - bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */ - bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */ - - /* (overlap3) C=0, Z=0 => src2[1:0] = 0b01. */ -.Loverlap3: - bic tmp1, data1, #MSB - uadd8 syndrome, data1, const_m1 - eors syndrome, tmp1, data2, S2LO #8 - sel syndrome, syndrome, const_m1 - bne 4f - cbnz syndrome, 5f - ldr data2, [src2], #4 - eor tmp1, tmp1, data1 - cmp tmp1, data2, S2HI #24 - bne 6f - ldr data1, [src1], #4 - b .Loverlap3 -4: - S2LO data2, data2, #8 - b .Lstrcmp_tail - -5: - bics syndrome, syndrome, #MSB - bne .Lstrcmp_done_equal - - /* We can only get here if the MSB of data1 contains 0, so - fast-path the exit. */ - ldrb result, [src2] - .cfi_remember_state - ldrd r4, r5, [sp], #16 - .cfi_restore 4 - .cfi_restore 5 - /* R6/7 Not used in this sequence. */ - .cfi_restore 6 - .cfi_restore 7 - neg result, result - bx lr - -6: - .cfi_restore_state - S2LO data1, data1, #24 - and data2, data2, #LSB - b .Lstrcmp_tail - - .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ -.Loverlap2: - and tmp1, data1, const_m1, S2LO #16 - uadd8 syndrome, data1, const_m1 - eors syndrome, tmp1, data2, S2LO #16 - sel syndrome, syndrome, const_m1 - bne 4f - cbnz syndrome, 5f - ldr data2, [src2], #4 - eor tmp1, tmp1, data1 - cmp tmp1, data2, S2HI #16 - bne 6f - ldr data1, [src1], #4 - b .Loverlap2 -4: - S2LO data2, data2, #16 - b .Lstrcmp_tail -5: - ands syndrome, syndrome, const_m1, S2LO #16 - bne .Lstrcmp_done_equal - - ldrh data2, [src2] - S2LO data1, data1, #16 -#ifdef __ARM_BIG_ENDIAN - lsl data2, data2, #16 -#endif - b .Lstrcmp_tail - -6: - S2LO data1, data1, #16 - and data2, data2, const_m1, S2LO #16 - b .Lstrcmp_tail - - .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ -.Loverlap1: - and tmp1, data1, #LSB - uadd8 syndrome, data1, const_m1 - eors syndrome, tmp1, data2, S2LO #24 - sel syndrome, syndrome, const_m1 - bne 4f - cbnz syndrome, 5f - ldr data2, [src2], #4 - eor tmp1, tmp1, data1 - cmp tmp1, data2, S2HI #8 - bne 6f - ldr data1, [src1], #4 - b .Loverlap1 -4: - S2LO data2, data2, #24 - b .Lstrcmp_tail -5: - tst syndrome, #LSB - bne .Lstrcmp_done_equal - ldr data2, [src2] -6: - S2LO data1, data1, #8 - bic data2, data2, #MSB - b .Lstrcmp_tail - -.Lstrcmp_done_equal: - mov result, #0 - .cfi_remember_state - ldrd r4, r5, [sp], #16 - .cfi_restore 4 - .cfi_restore 5 - /* R6/7 not used in this sequence. */ - .cfi_restore 6 - .cfi_restore 7 - bx lr - -.Lstrcmp_tail: - .cfi_restore_state -#ifndef __ARM_BIG_ENDIAN - rev data1, data1 - rev data2, data2 - /* Now everything looks big-endian... */ -#endif - uadd8 tmp1, data1, const_m1 - eor tmp1, data1, data2 - sel syndrome, tmp1, const_m1 - clz tmp1, syndrome - lsl data1, data1, tmp1 - lsl data2, data2, tmp1 - lsr result, data1, #24 - ldrd r4, r5, [sp], #16 - .cfi_restore 4 - .cfi_restore 5 - /* R6/7 not used in this sequence. */ - .cfi_restore 6 - .cfi_restore 7 - sub result, result, data2, lsr #24 - bx lr - .cfi_endproc - .size strcmp, . - .Lstrcmp_start_addr diff --git a/newlib/libc/machine/arm/strcmp-armv7m.S b/newlib/libc/machine/arm/strcmp-armv7m.S deleted file mode 100644 index d66d393fb..000000000 --- a/newlib/libc/machine/arm/strcmp-armv7m.S +++ /dev/null @@ -1,377 +0,0 @@ -/* - * Copyright (c) 2012-2014 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Very similar to the generic code, but uses Thumb2 as implemented - in ARMv7-M. */ - -/* Parameters and result. */ -#define src1 r0 -#define src2 r1 -#define result r0 /* Overlaps src1. */ - -/* Internal variables. */ -#define data1 r2 -#define data2 r3 -#define tmp2 r5 -#define tmp1 r12 -#define syndrome r12 /* Overlaps tmp1 */ - - .thumb - .syntax unified -def_fn strcmp - .cfi_startproc - eor tmp1, src1, src2 - tst tmp1, #3 - /* Strings not at same byte offset from a word boundary. */ - bne .Lstrcmp_unaligned - ands tmp1, src1, #3 - bic src1, src1, #3 - bic src2, src2, #3 - ldr data1, [src1], #4 - it eq - ldreq data2, [src2], #4 - beq 4f - /* Although s1 and s2 have identical initial alignment, they are - not currently word aligned. Rather than comparing bytes, - make sure that any bytes fetched from before the addressed - bytes are forced to 0xff. Then they will always compare - equal. */ - eor tmp1, tmp1, #3 - mvn data2, #MSB - lsl tmp1, tmp1, #3 - S2LO tmp1, data2, tmp1 - ldr data2, [src2], #4 - orr data1, data1, tmp1 - orr data2, data2, tmp1 - .p2align 2 - /* Critical loop. */ -4: - sub syndrome, data1, #0x01010101 - cmp data1, data2 - /* check for any zero bytes in first word */ - itttt eq - biceq syndrome, syndrome, data1 - tsteq syndrome, #0x80808080 - ldreq data1, [src1], #4 - ldreq data2, [src2], #4 - beq 4b -2: - /* There's a zero or a different byte in the word */ - S2HI result, data1, #24 - S2LO data1, data1, #8 - cmp result, #1 - it cs - cmpcs result, data2, S2HI #24 - it eq - S2LOEQ data2, data2, #8 - beq 2b - /* On a big-endian machine, RESULT contains the desired byte in bits - 0-7; on a little-endian machine they are in bits 24-31. In - both cases the other bits in RESULT are all zero. For DATA2 the - interesting byte is at the other end of the word, but the - other bits are not necessarily zero. We need a signed result - representing the differnece in the unsigned bytes, so for the - little-endian case we can't just shift the interesting bits - up. */ -#ifdef __ARM_BIG_ENDIAN - sub result, result, data2, lsr #24 -#else - and data2, data2, #255 - lsrs result, result, #24 - subs result, result, data2 -#endif - RETURN - - -#if 0 - /* The assembly code below is based on the following alogrithm. */ -#ifdef __ARM_BIG_ENDIAN -#define RSHIFT << -#define LSHIFT >> -#else -#define RSHIFT >> -#define LSHIFT << -#endif - -#define body(shift) \ - mask = 0xffffffffU RSHIFT shift; \ - data1 = *src1++; \ - data2 = *src2++; \ - do \ - { \ - tmp2 = data1 & mask; \ - if (__builtin_expect(tmp2 != data2 RSHIFT shift, 0)) \ - { \ - data2 RSHIFT= shift; \ - break; \ - } \ - if (__builtin_expect(((data1 - b1) & ~data1) & (b1 << 7), 0)) \ - { \ - /* See comment in assembler below re syndrome on big-endian */\ - if ((((data1 - b1) & ~data1) & (b1 << 7)) & mask) \ - data2 RSHIFT= shift; \ - else \ - { \ - data2 = *src2; \ - tmp2 = data1 RSHIFT (32 - shift); \ - data2 = (data2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \ - } \ - break; \ - } \ - data2 = *src2++; \ - tmp2 ^= data1; \ - if (__builtin_expect(tmp2 != data2 LSHIFT (32 - shift), 0)) \ - { \ - tmp2 = data1 >> (32 - shift); \ - data2 = (data2 << (32 - shift)) RSHIFT (32 - shift); \ - break; \ - } \ - data1 = *src1++; \ - } while (1) - - const unsigned* src1; - const unsigned* src2; - unsigned data1, data2; - unsigned mask; - unsigned shift; - unsigned b1 = 0x01010101; - char c1, c2; - unsigned tmp2; - - while (((unsigned) s1) & 3) - { - c1 = *s1++; - c2 = *s2++; - if (c1 == 0 || c1 != c2) - return c1 - (int)c2; - } - src1 = (unsigned*) (((unsigned)s1) & ~3); - src2 = (unsigned*) (((unsigned)s2) & ~3); - tmp2 = ((unsigned) s2) & 3; - if (tmp2 == 1) - { - body(8); - } - else if (tmp2 == 2) - { - body(16); - } - else - { - body (24); - } - - do - { -#ifdef __ARM_BIG_ENDIAN - c1 = (char) tmp2 >> 24; - c2 = (char) data2 >> 24; -#else /* not __ARM_BIG_ENDIAN */ - c1 = (char) tmp2; - c2 = (char) data2; -#endif /* not __ARM_BIG_ENDIAN */ - tmp2 RSHIFT= 8; - data2 RSHIFT= 8; - } while (c1 != 0 && c1 == c2); - return c1 - c2; -#endif /* 0 */ - - - /* First of all, compare bytes until src1(sp1) is word-aligned. */ -.Lstrcmp_unaligned: - tst src1, #3 - beq 2f - ldrb data1, [src1], #1 - ldrb data2, [src2], #1 - cmp data1, #1 - it cs - cmpcs data1, data2 - beq .Lstrcmp_unaligned - sub result, data1, data2 - bx lr - -2: - stmfd sp!, {r5} - .cfi_def_cfa_offset 4 - .cfi_offset 5, -4 - - ldr data1, [src1], #4 - and tmp2, src2, #3 - bic src2, src2, #3 - ldr data2, [src2], #4 - cmp tmp2, #2 - beq .Loverlap2 - bhi .Loverlap1 - - /* Critical inner Loop: Block with 3 bytes initial overlap */ - .p2align 2 -.Loverlap3: - bic tmp2, data1, #MSB - cmp tmp2, data2, S2LO #8 - sub syndrome, data1, #0x01010101 - bic syndrome, syndrome, data1 - bne 4f - ands syndrome, syndrome, #0x80808080 - it eq - ldreq data2, [src2], #4 - bne 5f - eor tmp2, tmp2, data1 - cmp tmp2, data2, S2HI #24 - bne 6f - ldr data1, [src1], #4 - b .Loverlap3 -4: - S2LO data2, data2, #8 - b .Lstrcmp_tail - -5: -#ifdef __ARM_BIG_ENDIAN - /* The syndrome value may contain false ones if the string ends - with the bytes 0x01 0x00. */ - tst data1, #0xff000000 - itt ne - tstne data1, #0x00ff0000 - tstne data1, #0x0000ff00 - beq .Lstrcmp_done_equal -#else - bics syndrome, syndrome, #0xff000000 - bne .Lstrcmp_done_equal -#endif - ldrb data2, [src2] - S2LO tmp2, data1, #24 -#ifdef __ARM_BIG_ENDIAN - lsl data2, data2, #24 -#endif - b .Lstrcmp_tail - -6: - S2LO tmp2, data1, #24 - and data2, data2, #LSB - b .Lstrcmp_tail - - /* Critical inner Loop: Block with 2 bytes initial overlap. */ - .p2align 2 -.Loverlap2: - S2HI tmp2, data1, #16 - sub syndrome, data1, #0x01010101 - S2LO tmp2, tmp2, #16 - bic syndrome, syndrome, data1 - cmp tmp2, data2, S2LO #16 - bne 4f - ands syndrome, syndrome, #0x80808080 - it eq - ldreq data2, [src2], #4 - bne 5f - eor tmp2, tmp2, data1 - cmp tmp2, data2, S2HI #16 - bne 6f - ldr data1, [src1], #4 - b .Loverlap2 - -5: -#ifdef __ARM_BIG_ENDIAN - /* The syndrome value may contain false ones if the string ends - with the bytes 0x01 0x00 */ - tst data1, #0xff000000 - it ne - tstne data1, #0x00ff0000 - beq .Lstrcmp_done_equal -#else - lsls syndrome, syndrome, #16 - bne .Lstrcmp_done_equal -#endif - ldrh data2, [src2] - S2LO tmp2, data1, #16 -#ifdef __ARM_BIG_ENDIAN - lsl data2, data2, #16 -#endif - b .Lstrcmp_tail - -6: - S2HI data2, data2, #16 - S2LO tmp2, data1, #16 -4: - S2LO data2, data2, #16 - b .Lstrcmp_tail - - /* Critical inner Loop: Block with 1 byte initial overlap. */ - .p2align 2 -.Loverlap1: - and tmp2, data1, #LSB - cmp tmp2, data2, S2LO #24 - sub syndrome, data1, #0x01010101 - bic syndrome, syndrome, data1 - bne 4f - ands syndrome, syndrome, #0x80808080 - it eq - ldreq data2, [src2], #4 - bne 5f - eor tmp2, tmp2, data1 - cmp tmp2, data2, S2HI #8 - bne 6f - ldr data1, [src1], #4 - b .Loverlap1 -4: - S2LO data2, data2, #24 - b .Lstrcmp_tail -5: - /* The syndrome value may contain false ones if the string ends - with the bytes 0x01 0x00. */ - tst data1, #LSB - beq .Lstrcmp_done_equal - ldr data2, [src2], #4 -6: - S2LO tmp2, data1, #8 - bic data2, data2, #MSB - b .Lstrcmp_tail -.Lstrcmp_done_equal: - mov result, #0 - .cfi_remember_state - ldmfd sp!, {r5} - .cfi_restore 5 - .cfi_def_cfa_offset 0 - RETURN - -.Lstrcmp_tail: - .cfi_restore_state - and r2, tmp2, #LSB - and result, data2, #LSB - cmp result, #1 - it cs - cmpcs result, r2 - itt eq - S2LOEQ tmp2, tmp2, #8 - S2LOEQ data2, data2, #8 - beq .Lstrcmp_tail - sub result, r2, result - ldmfd sp!, {r5} - .cfi_restore 5 - .cfi_def_cfa_offset 0 - RETURN - .cfi_endproc - .size strcmp, . - strcmp diff --git a/newlib/libc/machine/arm/strcmp.S b/newlib/libc/machine/arm/strcmp.S index 7b4d7fc90..f3e738776 100644 --- a/newlib/libc/machine/arm/strcmp.S +++ b/newlib/libc/machine/arm/strcmp.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2014 ARM Ltd + * Copyright (c) 2012 ARM Ltd * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,72 +26,769 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* Wrapper for the various implementations of strcmp. */ - #include "arm_asm.h" -#ifdef __ARM_BIG_ENDIAN -#define S2LO lsl -#define S2LOEQ lsleq -#define S2HI lsr +#ifdef __ARMEB__ +#define S2LOMEM lsl +#define S2LOMEMEQ lsleq +#define S2HIMEM lsr #define MSB 0x000000ff #define LSB 0xff000000 #define BYTE0_OFFSET 24 #define BYTE1_OFFSET 16 #define BYTE2_OFFSET 8 #define BYTE3_OFFSET 0 -#else /* not __ARM_BIG_ENDIAN */ -#define S2LO lsr -#define S2LOEQ lsreq -#define S2HI lsl +#else /* not __ARMEB__ */ +#define S2LOMEM lsr +#define S2LOMEMEQ lsreq +#define S2HIMEM lsl #define BYTE0_OFFSET 0 #define BYTE1_OFFSET 8 #define BYTE2_OFFSET 16 #define BYTE3_OFFSET 24 #define MSB 0xff000000 #define LSB 0x000000ff -#endif /* not __ARM_BIG_ENDIAN */ +#endif /* not __ARMEB__ */ + +.syntax unified + +#if defined (__thumb__) + .thumb + .thumb_func +#if !defined (__thumb2__) + /* If we have thumb1 only, we need to explictly mark the + compatibility. */ + .arch armv4t + .eabi_attribute Tag_also_compatible_with, "\006\013" /* v6-M. */ + .eabi_attribute Tag_ARM_ISA_use, 0 +#endif +#endif + .global strcmp + .type strcmp, %function +strcmp: + +#if (defined (__thumb__) && !defined (__thumb2__)) +1: + ldrb r2, [r0] + ldrb r3, [r1] + adds r0, r0, #1 + adds r1, r1, #1 + cmp r2, #0 + beq 2f + cmp r2, r3 + beq 1b +2: + subs r0, r2, r3 + bx lr +#elif (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) +1: + ldrb r2, [r0], #1 + ldrb r3, [r1], #1 + cmp r2, #1 + it cs + cmpcs r2, r3 + beq 1b + subs r0, r2, r3 + RETURN + + +#elif (defined (_ISA_THUMB_2) || defined (_ISA_ARM_6)) + /* Use LDRD whenever possible. */ + +/* The main thing to look out for when comparing large blocks is that + the loads do not cross a page boundary when loading past the index + of the byte with the first difference or the first string-terminator. + + For example, if the strings are identical and the string-terminator + is at index k, byte by byte comparison will not load beyond address + s1+k and s2+k; word by word comparison may load up to 3 bytes beyond + k; double word - up to 7 bytes. If the load of these bytes crosses + a page boundary, it might cause a memory fault (if the page is not mapped) + that would not have happened in byte by byte comparison. + + If an address is (double) word aligned, then a load of a (double) word + from that address will not cross a page boundary. + Therefore, the algorithm below considers word and double-word alignment + of strings separately. */ + +/* High-level description of the algorithm. + + * The fast path: if both strings are double-word aligned, + use LDRD to load two words from each string in every loop iteration. + * If the strings have the same offset from a word boundary, + use LDRB to load and compare byte by byte until + the first string is aligned to a word boundary (at most 3 bytes). + This is optimized for quick return on short unaligned strings. + * If the strings have the same offset from a double-word boundary, + use LDRD to load two words from each string in every loop iteration, as in the fast path. + * If the strings do not have the same offset from a double-word boundary, + load a word from the second string before the loop to initialize the queue. + Use LDRD to load two words from every string in every loop iteration. + Inside the loop, load the second word from the second string only after comparing + the first word, using the queued value, to guarantee safety across page boundaries. + * If the strings do not have the same offset from a word boundary, + use LDR and a shift queue. Order of loads and comparisons matters, + similarly to the previous case. + + * Use UADD8 and SEL to compare words, and use REV and CLZ to compute the return value. + * The only difference between ARM and Thumb modes is the use of CBZ instruction. + * The only difference between big and little endian is the use of REV in little endian + to compute the return value, instead of MOV. + * No preload. [TODO.] +*/ + + .macro m_cbz reg label +#ifdef __thumb2__ + cbz \reg, \label +#else /* not defined __thumb2__ */ + cmp \reg, #0 + beq \label +#endif /* not defined __thumb2__ */ + .endm /* m_cbz */ + + .macro m_cbnz reg label +#ifdef __thumb2__ + cbnz \reg, \label +#else /* not defined __thumb2__ */ + cmp \reg, #0 + bne \label +#endif /* not defined __thumb2__ */ + .endm /* m_cbnz */ + + .macro init + /* Macro to save temporary registers and prepare magic values. */ + subs sp, sp, #16 + strd r4, r5, [sp, #8] + strd r6, r7, [sp] + mvn r6, #0 /* all F */ + mov r7, #0 /* all 0 */ + .endm /* init */ + + .macro magic_compare_and_branch w1 w2 label + /* Macro to compare registers w1 and w2 and conditionally branch to label. */ + cmp \w1, \w2 /* Are w1 and w2 the same? */ + magic_find_zero_bytes \w1 + it eq + cmpeq ip, #0 /* Is there a zero byte in w1? */ + bne \label + .endm /* magic_compare_and_branch */ + + .macro magic_find_zero_bytes w1 + /* Macro to find all-zero bytes in w1, result is in ip. */ +#if (defined (__ARM_FEATURE_DSP)) + uadd8 ip, \w1, r6 + sel ip, r7, r6 +#else /* not defined (__ARM_FEATURE_DSP) */ + /* __ARM_FEATURE_DSP is not defined for some Cortex-M processors. + Coincidently, these processors only have Thumb-2 mode, where we can use the + the (large) magic constant available directly as an immediate in instructions. + Note that we cannot use the magic constant in ARM mode, where we need + to create the constant in a register. */ + sub ip, \w1, #0x01010101 + bic ip, ip, \w1 + and ip, ip, #0x80808080 +#endif /* not defined (__ARM_FEATURE_DSP) */ + .endm /* magic_find_zero_bytes */ + + .macro setup_return w1 w2 +#ifdef __ARMEB__ + mov r1, \w1 + mov r2, \w2 +#else /* not __ARMEB__ */ + rev r1, \w1 + rev r2, \w2 +#endif /* not __ARMEB__ */ + .endm /* setup_return */ + + /* + optpld r0, #0 + optpld r1, #0 + */ + + /* Are both strings double-word aligned? */ + orr ip, r0, r1 + tst ip, #7 + bne .Ldo_align + + /* Fast path. */ + init + +.Ldoubleword_aligned: + + /* Get here when the strings to compare are double-word aligned. */ + /* Compare two words in every iteration. */ + .p2align 2 +2: + /* + optpld r0, #16 + optpld r1, #16 + */ + + /* Load the next double-word from each string. */ + ldrd r2, r3, [r0], #8 + ldrd r4, r5, [r1], #8 + + magic_compare_and_branch w1=r2, w2=r4, label=.Lreturn_24 + magic_compare_and_branch w1=r3, w2=r5, label=.Lreturn_35 + b 2b + +.Ldo_align: + /* Is the first string word-aligned? */ + ands ip, r0, #3 + beq .Lword_aligned_r0 + + /* Fast compare byte by byte until the first string is word-aligned. */ + /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes + to read until the next word boudnary is 4-ip. */ + bic r0, r0, #3 + ldr r2, [r0], #4 + lsls ip, ip, #31 + beq .Lbyte2 + bcs .Lbyte3 + +.Lbyte1: + ldrb ip, [r1], #1 + uxtb r3, r2, ror #BYTE1_OFFSET + subs ip, r3, ip + bne .Lfast_return + m_cbz reg=r3, label=.Lfast_return + +.Lbyte2: + ldrb ip, [r1], #1 + uxtb r3, r2, ror #BYTE2_OFFSET + subs ip, r3, ip + bne .Lfast_return + m_cbz reg=r3, label=.Lfast_return + +.Lbyte3: + ldrb ip, [r1], #1 + uxtb r3, r2, ror #BYTE3_OFFSET + subs ip, r3, ip + bne .Lfast_return + m_cbnz reg=r3, label=.Lword_aligned_r0 + +.Lfast_return: + mov r0, ip + bx lr + +.Lword_aligned_r0: + init + /* The first string is word-aligned. */ + /* Is the second string word-aligned? */ + ands ip, r1, #3 + bne .Lstrcmp_unaligned + +.Lword_aligned: + /* The strings are word-aligned. */ + /* Is the first string double-word aligned? */ + tst r0, #4 + beq .Ldoubleword_aligned_r0 + + /* If r0 is not double-word aligned yet, align it by loading + and comparing the next word from each string. */ + ldr r2, [r0], #4 + ldr r4, [r1], #4 + magic_compare_and_branch w1=r2 w2=r4 label=.Lreturn_24 + +.Ldoubleword_aligned_r0: + /* Get here when r0 is double-word aligned. */ + /* Is r1 doubleword_aligned? */ + tst r1, #4 + beq .Ldoubleword_aligned + + /* Get here when the strings to compare are word-aligned, + r0 is double-word aligned, but r1 is not double-word aligned. */ + + /* Initialize the queue. */ + ldr r5, [r1], #4 + + /* Compare two words in every iteration. */ + .p2align 2 +3: + /* + optpld r0, #16 + optpld r1, #16 + */ - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm + /* Load the next double-word from each string and compare. */ + ldrd r2, r3, [r0], #8 + magic_compare_and_branch w1=r2 w2=r5 label=.Lreturn_25 + ldrd r4, r5, [r1], #8 + magic_compare_and_branch w1=r3 w2=r4 label=.Lreturn_34 + b 3b -#if defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) \ - || (__ARM_ARCH == 6 && __ARM_ARCH_PROFILE == 'M') + .macro miscmp_word offsetlo offsethi + /* Macro to compare misaligned strings. */ + /* r0, r1 are word-aligned, and at least one of the strings + is not double-word aligned. */ + /* Compare one word in every loop iteration. */ + /* OFFSETLO is the original bit-offset of r1 from a word-boundary, + OFFSETHI is 32 - OFFSETLO (i.e., offset from the next word). */ -# if defined (__thumb__) && !defined (__thumb2__) -/* Thumb1 only variant. If size is preferred, use strcmp-armv4t.S. - If speed is preferred, the strcmp function in strcmp-armv6m.S - will be used. */ + /* Initialize the shift queue. */ + ldr r5, [r1], #4 -# if defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) -# include "strcmp-armv4t.S" -# else -# include "strcmp-armv6m.S" -# endif + /* Compare one word from each string in every loop iteration. */ + .p2align 2 +7: + ldr r3, [r0], #4 + S2LOMEM r5, r5, #\offsetlo + magic_find_zero_bytes w1=r3 + cmp r7, ip, S2HIMEM #\offsetlo + and r2, r3, r6, S2LOMEM #\offsetlo + it eq + cmpeq r2, r5 + bne .Lreturn_25 + ldr r5, [r1], #4 + cmp ip, #0 + eor r3, r2, r3 + S2HIMEM r2, r5, #\offsethi + it eq + cmpeq r3, r2 + bne .Lreturn_32 + b 7b + .endm /* miscmp_word */ -# else -# include "strcmp-arm-tiny.S" -# endif +.Lstrcmp_unaligned: + /* r0 is word-aligned, r1 is at offset ip from a word. */ + /* Align r1 to the (previous) word-boundary. */ + bic r1, r1, #3 -#elif __ARM_ARCH >= 7 + /* Unaligned comparison word by word using LDRs. */ + cmp ip, #2 + beq .Lmiscmp_word_16 /* If ip == 2. */ + bge .Lmiscmp_word_24 /* If ip == 3. */ + miscmp_word offsetlo=8 offsethi=24 /* If ip == 1. */ +.Lmiscmp_word_16: miscmp_word offsetlo=16 offsethi=16 +.Lmiscmp_word_24: miscmp_word offsetlo=24 offsethi=8 -# ifdef __ARM_FEATURE_SIMD32 -# include "strcmp-armv7.S" -# else -# include "strcmp-armv7m.S" -# endif -#elif __ARM_ARCH >= 6 +.Lreturn_32: + setup_return w1=r3, w2=r2 + b .Ldo_return +.Lreturn_34: + setup_return w1=r3, w2=r4 + b .Ldo_return +.Lreturn_25: + setup_return w1=r2, w2=r5 + b .Ldo_return +.Lreturn_35: + setup_return w1=r3, w2=r5 + b .Ldo_return +.Lreturn_24: + setup_return w1=r2, w2=r4 -# include "strcmp-armv6.S" +.Ldo_return: +#ifdef __ARMEB__ + mov r0, ip +#else /* not __ARMEB__ */ + rev r0, ip +#endif /* not __ARMEB__ */ + + /* Restore temporaries early, before computing the return value. */ + ldrd r6, r7, [sp] + ldrd r4, r5, [sp, #8] + adds sp, sp, #16 + + /* There is a zero or a different byte between r1 and r2. */ + /* r0 contains a mask of all-zero bytes in r1. */ + /* Using r0 and not ip here because cbz requires low register. */ + m_cbz reg=r0, label=.Lcompute_return_value + clz r0, r0 + /* r0 contains the number of bits on the left of the first all-zero byte in r1. */ + rsb r0, r0, #24 + /* Here, r0 contains the number of bits on the right of the first all-zero byte in r1. */ + lsr r1, r1, r0 + lsr r2, r2, r0 + +.Lcompute_return_value: + movs r0, #1 + cmp r1, r2 + /* The return value is computed as follows. + If r1>r2 then (C==1 and Z==0) and LS doesn't hold and r0 is #1 at return. + If r1<r2 then (C==0 and Z==0) and we execute SBC with carry_in=0, + which means r0:=r0-r0-1 and r0 is #-1 at return. + If r1=r2 then (C==1 and Z==1) and we execute SBC with carry_in=1, + which means r0:=r0-r0 and r0 is #0 at return. + (C==0 and Z==1) cannot happen because the carry bit is "not borrow". */ + it ls + sbcls r0, r0, r0 + bx lr + + +#else /* !(defined (_ISA_THUMB_2) || defined (_ISA_ARM_6) + defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || + (defined (__thumb__) && !defined (__thumb2__))) */ + + /* Use LDR whenever possible. */ + +#ifdef __thumb2__ +#define magic1(REG) 0x01010101 +#define magic2(REG) 0x80808080 +#else +#define magic1(REG) REG +#define magic2(REG) REG, lsl #7 +#endif + + optpld r0 + optpld r1 + eor r2, r0, r1 + tst r2, #3 + /* Strings not at same byte offset from a word boundary. */ + bne .Lstrcmp_unaligned + ands r2, r0, #3 + bic r0, r0, #3 + bic r1, r1, #3 + ldr ip, [r0], #4 + it eq + ldreq r3, [r1], #4 + beq 1f + /* Although s1 and s2 have identical initial alignment, they are + not currently word aligned. Rather than comparing bytes, + make sure that any bytes fetched from before the addressed + bytes are forced to 0xff. Then they will always compare + equal. */ + eor r2, r2, #3 + lsl r2, r2, #3 + mvn r3, MSB + S2LOMEM r2, r3, r2 + ldr r3, [r1], #4 + orr ip, ip, r2 + orr r3, r3, r2 +1: +#ifndef __thumb2__ + /* Load the 'magic' constant 0x01010101. */ + str r4, [sp, #-4]! + mov r4, #1 + orr r4, r4, r4, lsl #8 + orr r4, r4, r4, lsl #16 +#endif + .p2align 2 +4: + optpld r0, #8 + optpld r1, #8 + sub r2, ip, magic1(r4) + cmp ip, r3 + itttt eq + /* check for any zero bytes in first word */ + biceq r2, r2, ip + tsteq r2, magic2(r4) + ldreq ip, [r0], #4 + ldreq r3, [r1], #4 + beq 4b +2: + /* There's a zero or a different byte in the word */ + S2HIMEM r0, ip, #24 + S2LOMEM ip, ip, #8 + cmp r0, #1 + it cs + cmpcs r0, r3, S2HIMEM #24 + it eq + S2LOMEMEQ r3, r3, #8 + beq 2b + /* On a big-endian machine, r0 contains the desired byte in bits + 0-7; on a little-endian machine they are in bits 24-31. In + both cases the other bits in r0 are all zero. For r3 the + interesting byte is at the other end of the word, but the + other bits are not necessarily zero. We need a signed result + representing the differnece in the unsigned bytes, so for the + little-endian case we can't just shift the interesting bits + up. */ +#ifdef __ARMEB__ + sub r0, r0, r3, lsr #24 +#else + and r3, r3, #255 +#ifdef __thumb2__ + /* No RSB instruction in Thumb2 */ + lsr r0, r0, #24 + sub r0, r0, r3 +#else + rsb r0, r3, r0, lsr #24 +#endif +#endif +#ifndef __thumb2__ + ldr r4, [sp], #4 +#endif + RETURN + + +.Lstrcmp_unaligned: + +#if 0 + /* The assembly code below is based on the following alogrithm. */ +#ifdef __ARMEB__ +#define RSHIFT << +#define LSHIFT >> +#else +#define RSHIFT >> +#define LSHIFT << +#endif + +#define body(shift) \ + mask = 0xffffffffU RSHIFT shift; \ + w1 = *wp1++; \ + w2 = *wp2++; \ + do \ + { \ + t1 = w1 & mask; \ + if (__builtin_expect(t1 != w2 RSHIFT shift, 0)) \ + { \ + w2 RSHIFT= shift; \ + break; \ + } \ + if (__builtin_expect(((w1 - b1) & ~w1) & (b1 << 7), 0)) \ + { \ + /* See comment in assembler below re syndrome on big-endian */\ + if ((((w1 - b1) & ~w1) & (b1 << 7)) & mask) \ + w2 RSHIFT= shift; \ + else \ + { \ + w2 = *wp2; \ + t1 = w1 RSHIFT (32 - shift); \ + w2 = (w2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \ + } \ + break; \ + } \ + w2 = *wp2++; \ + t1 ^= w1; \ + if (__builtin_expect(t1 != w2 LSHIFT (32 - shift), 0)) \ + { \ + t1 = w1 >> (32 - shift); \ + w2 = (w2 << (32 - shift)) RSHIFT (32 - shift); \ + break; \ + } \ + w1 = *wp1++; \ + } while (1) + + const unsigned* wp1; + const unsigned* wp2; + unsigned w1, w2; + unsigned mask; + unsigned shift; + unsigned b1 = 0x01010101; + char c1, c2; + unsigned t1; + + while (((unsigned) s1) & 3) + { + c1 = *s1++; + c2 = *s2++; + if (c1 == 0 || c1 != c2) + return c1 - (int)c2; + } + wp1 = (unsigned*) (((unsigned)s1) & ~3); + wp2 = (unsigned*) (((unsigned)s2) & ~3); + t1 = ((unsigned) s2) & 3; + if (t1 == 1) + { + body(8); + } + else if (t1 == 2) + { + body(16); + } + else + { + body (24); + } + + do + { +#ifdef __ARMEB__ + c1 = (char) t1 >> 24; + c2 = (char) w2 >> 24; +#else /* not __ARMEB__ */ + c1 = (char) t1; + c2 = (char) w2; +#endif /* not __ARMEB__ */ + t1 RSHIFT= 8; + w2 RSHIFT= 8; + } while (c1 != 0 && c1 == c2); + return c1 - c2; +#endif /* 0 */ + + + wp1 .req r0 + wp2 .req r1 + b1 .req r2 + w1 .req r4 + w2 .req r5 + t1 .req ip + @ r3 is scratch + + /* First of all, compare bytes until wp1(sp1) is word-aligned. */ +1: + tst wp1, #3 + beq 2f + ldrb r2, [wp1], #1 + ldrb r3, [wp2], #1 + cmp r2, #1 + it cs + cmpcs r2, r3 + beq 1b + sub r0, r2, r3 + RETURN + +2: + str r5, [sp, #-4]! + str r4, [sp, #-4]! + //stmfd sp!, {r4, r5} + mov b1, #1 + orr b1, b1, b1, lsl #8 + orr b1, b1, b1, lsl #16 + + and t1, wp2, #3 + bic wp2, wp2, #3 + ldr w1, [wp1], #4 + ldr w2, [wp2], #4 + cmp t1, #2 + beq 2f + bhi 3f + + /* Critical inner Loop: Block with 3 bytes initial overlap */ + .p2align 2 +1: + bic t1, w1, MSB + cmp t1, w2, S2LOMEM #8 + sub r3, w1, b1 + bic r3, r3, w1 + bne 4f + ands r3, r3, b1, lsl #7 + it eq + ldreq w2, [wp2], #4 + bne 5f + eor t1, t1, w1 + cmp t1, w2, S2HIMEM #24 + bne 6f + ldr w1, [wp1], #4 + b 1b +4: + S2LOMEM w2, w2, #8 + b 8f + +5: +#ifdef __ARMEB__ + /* The syndrome value may contain false ones if the string ends + with the bytes 0x01 0x00 */ + tst w1, #0xff000000 + itt ne + tstne w1, #0x00ff0000 + tstne w1, #0x0000ff00 + beq 7f #else + bics r3, r3, #0xff000000 + bne 7f +#endif + ldrb w2, [wp2] + S2LOMEM t1, w1, #24 +#ifdef __ARMEB__ + lsl w2, w2, #24 +#endif + b 8f + +6: + S2LOMEM t1, w1, #24 + and w2, w2, LSB + b 8f -# include "strcmp-armv4.S" + /* Critical inner Loop: Block with 2 bytes initial overlap */ + .p2align 2 +2: + S2HIMEM t1, w1, #16 + sub r3, w1, b1 + S2LOMEM t1, t1, #16 + bic r3, r3, w1 + cmp t1, w2, S2LOMEM #16 + bne 4f + ands r3, r3, b1, lsl #7 + it eq + ldreq w2, [wp2], #4 + bne 5f + eor t1, t1, w1 + cmp t1, w2, S2HIMEM #16 + bne 6f + ldr w1, [wp1], #4 + b 2b +5: +#ifdef __ARMEB__ + /* The syndrome value may contain false ones if the string ends + with the bytes 0x01 0x00 */ + tst w1, #0xff000000 + it ne + tstne w1, #0x00ff0000 + beq 7f +#else + lsls r3, r3, #16 + bne 7f +#endif + ldrh w2, [wp2] + S2LOMEM t1, w1, #16 +#ifdef __ARMEB__ + lsl w2, w2, #16 #endif + b 8f + +6: + S2HIMEM w2, w2, #16 + S2LOMEM t1, w1, #16 +4: + S2LOMEM w2, w2, #16 + b 8f + + /* Critical inner Loop: Block with 1 byte initial overlap */ + .p2align 2 +3: + and t1, w1, LSB + cmp t1, w2, S2LOMEM #24 + sub r3, w1, b1 + bic r3, r3, w1 + bne 4f + ands r3, r3, b1, lsl #7 + it eq + ldreq w2, [wp2], #4 + bne 5f + eor t1, t1, w1 + cmp t1, w2, S2HIMEM #8 + bne 6f + ldr w1, [wp1], #4 + b 3b +4: + S2LOMEM w2, w2, #24 + b 8f +5: + /* The syndrome value may contain false ones if the string ends + with the bytes 0x01 0x00 */ + tst w1, LSB + beq 7f + ldr w2, [wp2], #4 +6: + S2LOMEM t1, w1, #8 + bic w2, w2, MSB + b 8f +7: + mov r0, #0 + //ldmfd sp!, {r4, r5} + ldr r4, [sp], #4 + ldr r5, [sp], #4 + RETURN +8: + and r2, t1, LSB + and r0, w2, LSB + cmp r0, #1 + it cs + cmpcs r0, r2 + itt eq + S2LOMEMEQ t1, t1, #8 + S2LOMEMEQ w2, w2, #8 + beq 8b + sub r0, r2, r0 + //ldmfd sp!, {r4, r5} + ldr r4, [sp], #4 + ldr r5, [sp], #4 + RETURN + +#endif /* !(defined (_ISA_THUMB_2) || defined (_ISA_ARM_6) + defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || + (defined (__thumb__) && !defined (__thumb2__))) */ diff --git a/newlib/libc/machine/arm/strlen.c b/newlib/libc/machine/arm/strlen.c index 7e59e755d..b8de22994 100644 --- a/newlib/libc/machine/arm/strlen.c +++ b/newlib/libc/machine/arm/strlen.c @@ -34,24 +34,6 @@ #if defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \ (defined (__thumb__) && !defined (__thumb2__)) -# if !defined (PREFER_SIZE_OVER_SPEED) && !defined (__OPTIMIZE_SIZE__) -/* Thumb1 only variant. - If speed is preferred, the strlen() function in ../../string/strlen.c - will be used. - - Leave this field blank. So the strlen() is not defined, and this will - automatically pull in the default C definition of strlen() from - ../../string/strlen.c. No need to include this file explicitely. - The lib_a-strlen.o will not be generated, so it won't replace the default - lib_a-strlen.o which is generated by ../../string/strlen.c. See the - commands in configure.in and Makefile.am for more details. - - However, if we need to rewrite this function to be more efficient, - we can add the corresponding assembly code into this field and change - the commands in configure.in and Makefile.am to allow the corresponding - lib_a-strlen.o to be generated. -*/ -# else size_t strlen (const char* str) { @@ -61,7 +43,7 @@ strlen (const char* str) asm ("mov %0, #0\n" "1:\n\t" "ldrb %1, [%2, %0]\n\t" - "add %0, %0, #1\n\t" + "add %0, %0, #1\n\t" "cmp %1, #0\n\t" "bne 1b" : "=&r" (len), "=&r" (scratch) : "r" (str) : "memory", "cc"); @@ -76,7 +58,6 @@ strlen (const char* str) return end - str - 1; #endif } -#endif #else #if !(defined(_ISA_ARM_7) || defined(__ARM_ARCH_6T2__)) |