diff options
author | cvs2svn <> | 2013-06-05 11:57:40 +0400 |
---|---|---|
committer | cvs2svn <> | 2013-06-05 11:57:40 +0400 |
commit | 310a2eeef1d094d59ea9897be0ddb1b33869b885 (patch) | |
tree | 6c061f692be2a0be45d81351d4b9dfc06ec9fe9c /newlib/libc/machine/arm/memcpy-armv7a.S | |
parent | c82eac05c783fa1cb82eac9216dab1ed8aa482ae (diff) |
This commit was manufactured by cvs2svn to create tag 'cygwin-cygwin-1_7_19-release
1_7_19-release'.
Sprout from cygwin-64bit-premerge-branch 2013-04-22 17:11:23 UTC cvs2svn 'This commit was manufactured by cvs2svn to create branch 'cygwin-64bit-'
Cherrypick from master 2013-06-05 07:57:39 UTC Corinna Vinschen <corinna@vinschen.de> ' * faq-programming.xml: Convert url to refer to new flat faq.html file.':
COPYING.NEWLIB
ChangeLog
config.guess
config.sub
config/ChangeLog
config/bootstrap-asan.mk
config/dfp.m4
config/picflag.m4
include/ChangeLog
include/elf/ChangeLog
include/elf/aarch64.h
include/elf/common.h
include/elf/mips.h
include/elf/msp430.h
include/opcode/ChangeLog
include/opcode/avr.h
include/opcode/mips.h
include/opcode/msp430.h
newlib/ChangeLog
newlib/MAINTAINERS
newlib/configure
newlib/configure.host
newlib/configure.in
newlib/libc/configure
newlib/libc/configure.in
newlib/libc/ctype/ctype_.c
newlib/libc/ctype/isalnum.c
newlib/libc/ctype/isalpha.c
newlib/libc/ctype/isblank.c
newlib/libc/ctype/iscntrl.c
newlib/libc/ctype/isdigit.c
newlib/libc/ctype/islower.c
newlib/libc/ctype/isprint.c
newlib/libc/ctype/ispunct.c
newlib/libc/ctype/isxdigit.c
newlib/libc/include/machine/ieeefp.h
newlib/libc/include/machine/setjmp.h
newlib/libc/include/reent.h
newlib/libc/include/sys/cdefs.h
newlib/libc/include/sys/config.h
newlib/libc/include/sys/features.h
newlib/libc/include/sys/reent.h
newlib/libc/include/sys/stat.h
newlib/libc/libc.texinfo
newlib/libc/machine/arm/Makefile.am
newlib/libc/machine/arm/Makefile.in
newlib/libc/machine/arm/memcpy-armv7a.S
newlib/libc/machine/arm/memcpy-armv7m.S
newlib/libc/machine/arm/memcpy-stub.c
newlib/libc/machine/arm/memcpy.S
newlib/libc/machine/arm/strcmp.S
newlib/libc/machine/configure
newlib/libc/machine/configure.in
newlib/libc/machine/powerpc/Makefile.am
newlib/libc/machine/powerpc/Makefile.in
newlib/libc/reent/reent.c
newlib/libc/stdio/fgetc.c
newlib/libc/stdio/fgetwc.c
newlib/libc/stdio/fgetws.c
newlib/libc/stdio/findfp.c
newlib/libc/stdio/fputc.c
newlib/libc/stdio/fputwc.c
newlib/libc/stdio/fputws.c
newlib/libc/stdio/getc.c
newlib/libc/stdio/getchar.c
newlib/libc/stdio/local.h
newlib/libc/stdio/putc.c
newlib/libc/stdio/putchar.c
newlib/libc/stdio/scanf.c
newlib/libc/stdio/setvbuf.c
newlib/libc/stdio/ungetwc.c
newlib/libc/stdio/vfscanf.c
newlib/libc/stdio/vfwscanf.c
newlib/libc/stdio/viprintf.c
newlib/libc/stdio/viscanf.c
newlib/libc/stdio/vprintf.c
newlib/libc/stdio/vscanf.c
newlib/libc/stdio/vwprintf.c
newlib/libc/stdio/vwscanf.c
newlib/libc/stdio/wscanf.c
newlib/libc/stdlib/Makefile.am
newlib/libc/stdlib/Makefile.in
newlib/libc/stdlib/__atexit.c
newlib/libc/stdlib/__call_atexit.c
newlib/libc/stdlib/ecvtbuf.c
newlib/libc/stdlib/mblen.c
newlib/libc/stdlib/mbrlen.c
newlib/libc/stdlib/mbrtowc.c
newlib/libc/stdlib/mbtowc.c
newlib/libc/stdlib/nano-mallocr.c
newlib/libc/stdlib/rand.c
newlib/libc/stdlib/strtod.c
newlib/libc/stdlib/wcrtomb.c
newlib/libc/stdlib/wctob.c
newlib/libc/stdlib/wctomb.c
newlib/libc/string/strtok.c
newlib/libc/time/asctime.c
newlib/libc/time/gmtime.c
newlib/libc/time/lcltime.c
newlib/libm/libm.texinfo
winsup/cygserver/ChangeLog
winsup/cygserver/ChangeLog.64bit
winsup/cygserver/Makefile.in
winsup/cygserver/bsd_helper.cc
winsup/cygserver/bsd_helper.h
winsup/cygserver/bsd_log.cc
winsup/cygserver/bsd_log.h
winsup/cygserver/bsd_mutex.cc
winsup/cygserver/client.cc
winsup/cygserver/cygserver.cc
winsup/cygserver/msg.cc
winsup/cygserver/process.h
winsup/cygserver/sem.cc
winsup/cygserver/shm.cc
winsup/cygserver/sysv_shm.cc
winsup/cygserver/threaded_queue.h
winsup/cygwin/ChangeLog
winsup/cygwin/ChangeLog.64bit
winsup/cygwin/Makefile.in
winsup/cygwin/aclocal.m4
winsup/cygwin/advapi32.cc
winsup/cygwin/autoload.cc
winsup/cygwin/automode.c
winsup/cygwin/binmode.c
winsup/cygwin/child_info.h
winsup/cygwin/common.din
winsup/cygwin/configure
winsup/cygwin/configure.ac
winsup/cygwin/cpuid.h
winsup/cygwin/cygerrno.h
winsup/cygwin/cygheap.cc
winsup/cygwin/cygheap.h
winsup/cygwin/cygmagic
winsup/cygwin/cygmalloc.h
winsup/cygwin/cygserver.h
winsup/cygwin/cygserver_ipc.h
winsup/cygwin/cygthread.cc
winsup/cygwin/cygtls.cc
winsup/cygwin/cygtls.h
winsup/cygwin/cygwin.sc.in
winsup/cygwin/dcrt0.cc
winsup/cygwin/debug.h
winsup/cygwin/devices.cc
winsup/cygwin/devices.h
winsup/cygwin/devices.in
winsup/cygwin/dir.cc
winsup/cygwin/dlfcn.cc
winsup/cygwin/dll_init.cc
winsup/cygwin/dll_init.h
winsup/cygwin/dtable.cc
winsup/cygwin/environ.cc
winsup/cygwin/environ.h
winsup/cygwin/errno.cc
winsup/cygwin/exception.h
winsup/cygwin/exceptions.cc
winsup/cygwin/external.cc
winsup/cygwin/fcntl.cc
winsup/cygwin/fenv.cc
winsup/cygwin/fhandler.cc
winsup/cygwin/fhandler.h
winsup/cygwin/fhandler_clipboard.cc
winsup/cygwin/fhandler_console.cc
winsup/cygwin/fhandler_dev.cc
winsup/cygwin/fhandler_disk_file.cc
winsup/cygwin/fhandler_dsp.cc
winsup/cygwin/fhandler_fifo.cc
winsup/cygwin/fhandler_floppy.cc
winsup/cygwin/fhandler_mailslot.cc
winsup/cygwin/fhandler_mem.cc
winsup/cygwin/fhandler_netdrive.cc
winsup/cygwin/fhandler_proc.cc
winsup/cygwin/fhandler_process.cc
winsup/cygwin/fhandler_procnet.cc
winsup/cygwin/fhandler_procsys.cc
winsup/cygwin/fhandler_procsysvipc.cc
winsup/cygwin/fhandler_random.cc
winsup/cygwin/fhandler_raw.cc
winsup/cygwin/fhandler_registry.cc
winsup/cygwin/fhandler_serial.cc
winsup/cygwin/fhandler_socket.cc
winsup/cygwin/fhandler_tape.cc
winsup/cygwin/fhandler_termios.cc
winsup/cygwin/fhandler_tty.cc
winsup/cygwin/fhandler_virtual.cc
winsup/cygwin/fhandler_virtual.h
winsup/cygwin/fhandler_windows.cc
winsup/cygwin/fhandler_zero.cc
winsup/cygwin/flock.cc
winsup/cygwin/fork.cc
winsup/cygwin/gendef
winsup/cygwin/gentls_offsets
winsup/cygwin/glob.cc
winsup/cygwin/globals.cc
winsup/cygwin/grp.cc
winsup/cygwin/heap.cc
winsup/cygwin/hookapi.cc
winsup/cygwin/i686.din
winsup/cygwin/include/a.out.h
winsup/cygwin/include/asm/byteorder.h
winsup/cygwin/include/bits/wordsize.h
winsup/cygwin/include/cygwin/acl.h
winsup/cygwin/include/cygwin/config.h
winsup/cygwin/include/cygwin/cygwin_dll.h
winsup/cygwin/include/cygwin/grp.h
winsup/cygwin/include/cygwin/if.h
winsup/cygwin/include/cygwin/ipc.h
winsup/cygwin/include/cygwin/msg.h
winsup/cygwin/include/cygwin/sem.h
winsup/cygwin/include/cygwin/shm.h
winsup/cygwin/include/cygwin/signal.h
winsup/cygwin/include/cygwin/socket.h
winsup/cygwin/include/cygwin/stat.h
winsup/cygwin/include/cygwin/stdlib.h
winsup/cygwin/include/cygwin/sysproto.h
winsup/cygwin/include/cygwin/time.h
winsup/cygwin/include/cygwin/types.h
winsup/cygwin/include/cygwin/version.h
winsup/cygwin/include/fcntl.h
winsup/cygwin/include/fts.h
winsup/cygwin/include/ftw.h
winsup/cygwin/include/glob.h
winsup/cygwin/include/inttypes.h
winsup/cygwin/include/io.h
winsup/cygwin/include/limits.h
winsup/cygwin/include/mntent.h
winsup/cygwin/include/stdint.h
winsup/cygwin/include/sys/cygwin.h
winsup/cygwin/include/sys/dirent.h
winsup/cygwin/include/sys/resource.h
winsup/cygwin/include/sys/socket.h
winsup/cygwin/include/sys/strace.h
winsup/cygwin/init.cc
winsup/cygwin/ioctl.cc
winsup/cygwin/ipc.cc
winsup/cygwin/kernel32.cc
winsup/cygwin/lc_msg.h
winsup/cygwin/lib/_cygwin_crt0_common.cc
winsup/cygwin/lib/crt0.h
winsup/cygwin/lib/cygwin_attach_dll.c
winsup/cygwin/lib/premain0.c
winsup/cygwin/lib/premain1.c
winsup/cygwin/lib/premain2.c
winsup/cygwin/lib/premain3.c
winsup/cygwin/libc/arc4random.cc
winsup/cygwin/libc/base64.c
winsup/cygwin/libc/bsdlib.cc
winsup/cygwin/libc/fts.c
winsup/cygwin/libc/ftw.c
winsup/cygwin/libc/inet_network.c
winsup/cygwin/libc/minires-os-if.c
winsup/cygwin/libc/minires.c
winsup/cygwin/libc/nftw.c
winsup/cygwin/libc/rcmd.cc
winsup/cygwin/libc/rexec.cc
winsup/cygwin/libstdcxx_wrapper.cc
winsup/cygwin/localtime.cc
winsup/cygwin/malloc_wrapper.cc
winsup/cygwin/miscfuncs.cc
winsup/cygwin/mkimport
winsup/cygwin/mktemp.cc
winsup/cygwin/mmap.cc
winsup/cygwin/mount.cc
winsup/cygwin/mount.h
winsup/cygwin/msg.cc
winsup/cygwin/mtinfo.h
winsup/cygwin/net.cc
winsup/cygwin/netdb.cc
winsup/cygwin/nfs.h
winsup/cygwin/nlsfuncs.cc
winsup/cygwin/ntdll.h
winsup/cygwin/ntea.cc
winsup/cygwin/passwd.cc
winsup/cygwin/path.cc
winsup/cygwin/path.h
winsup/cygwin/perprocess.h
winsup/cygwin/pinfo.cc
winsup/cygwin/pinfo.h
winsup/cygwin/pipe.cc
winsup/cygwin/poll.cc
winsup/cygwin/posix.sgml
winsup/cygwin/posix_ipc.cc
winsup/cygwin/profil.c
winsup/cygwin/profil.h
winsup/cygwin/pseudo-reloc.cc
winsup/cygwin/pwdgrp.h
winsup/cygwin/regex/engine.c
winsup/cygwin/regex/regcomp.c
winsup/cygwin/registry.cc
winsup/cygwin/regparm.h
winsup/cygwin/release/1.7.19
winsup/cygwin/resource.cc
winsup/cygwin/sched.cc
winsup/cygwin/sec_acl.cc
winsup/cygwin/sec_auth.cc
winsup/cygwin/sec_helper.cc
winsup/cygwin/security.cc
winsup/cygwin/security.h
winsup/cygwin/select.cc
winsup/cygwin/select.h
winsup/cygwin/sem.cc
winsup/cygwin/shared.cc
winsup/cygwin/shared_info.h
winsup/cygwin/shm.cc
winsup/cygwin/signal.cc
winsup/cygwin/sigproc.cc
winsup/cygwin/sigproc.h
winsup/cygwin/smallprint.cc
winsup/cygwin/spawn.cc
winsup/cygwin/speclib
winsup/cygwin/spinlock.h
winsup/cygwin/strace.cc
winsup/cygwin/strfuncs.cc
winsup/cygwin/strsig.cc
winsup/cygwin/sync.cc
winsup/cygwin/sync.h
winsup/cygwin/syscalls.cc
winsup/cygwin/sysconf.cc
winsup/cygwin/syslog.cc
winsup/cygwin/termios.cc
winsup/cygwin/textmode.c
winsup/cygwin/textreadmode.c
winsup/cygwin/thread.cc
winsup/cygwin/thread.h
winsup/cygwin/timer.cc
winsup/cygwin/times.cc
winsup/cygwin/tlsoffsets.h
winsup/cygwin/tlsoffsets64.h
winsup/cygwin/tty.cc
winsup/cygwin/tty.h
winsup/cygwin/uinfo.cc
winsup/cygwin/wait.cc
winsup/cygwin/winbase.h
winsup/cygwin/wincap.cc
winsup/cygwin/wincap.h
winsup/cygwin/window.cc
winsup/cygwin/winlean.h
winsup/cygwin/winsup.h
winsup/cygwin/wow64.cc
winsup/cygwin/wow64.h
winsup/cygwin/x86_64.din
winsup/doc/.cvsignore
winsup/doc/ChangeLog
winsup/doc/Makefile.in
winsup/doc/Wishlist
winsup/doc/bodysnatcher.pl
winsup/doc/configure
winsup/doc/configure.ac
winsup/doc/cygserver.xml
winsup/doc/cygwin-api.in.xml
winsup/doc/cygwin-ug-net.xml
winsup/doc/cygwin.xsl
winsup/doc/cygwinenv.xml
winsup/doc/dll.xml
winsup/doc/effectively.xml
winsup/doc/faq-api.xml
winsup/doc/faq-copyright.xml
winsup/doc/faq-programming.xml
winsup/doc/faq-resources.xml
winsup/doc/faq-setup.xml
winsup/doc/faq-using.xml
winsup/doc/faq-what.xml
winsup/doc/faq.xml
winsup/doc/filemodes.xml
winsup/doc/gcc.xml
winsup/doc/gdb.xml
winsup/doc/highlights.xml
winsup/doc/legal.xml
winsup/doc/new-features.xml
winsup/doc/ntsec.xml
winsup/doc/ov-ex-unix.xml
winsup/doc/ov-ex-win.xml
winsup/doc/overview.xml
winsup/doc/pathnames.xml
winsup/doc/programming.xml
winsup/doc/setup-env.xml
winsup/doc/setup-files.xml
winsup/doc/setup-locale.xml
winsup/doc/setup-maxmem.xml
winsup/doc/setup-net.xml
winsup/doc/specialnames.xml
winsup/doc/textbinary.xml
winsup/doc/ug-info.xml
winsup/doc/using.xml
winsup/doc/windres.xml
winsup/doc/xidepend
winsup/lsaauth/ChangeLog
winsup/lsaauth/ChangeLog.64bit
winsup/lsaauth/Makefile.in
winsup/lsaauth/configure
winsup/lsaauth/configure.ac
winsup/utils/ChangeLog
winsup/utils/ChangeLog.64bit
winsup/utils/Makefile.in
winsup/utils/aclocal.m4
winsup/utils/configure
winsup/utils/cygcheck.cc
winsup/utils/dumper.cc
winsup/utils/dumper.h
winsup/utils/kill.cc
winsup/utils/ldd.cc
winsup/utils/locale.cc
winsup/utils/mkgroup.c
winsup/utils/mkpasswd.c
winsup/utils/module_info.cc
winsup/utils/mount.cc
winsup/utils/parse_pe.cc
winsup/utils/passwd.c
winsup/utils/path.cc
winsup/utils/ps.cc
winsup/utils/regtool.cc
winsup/utils/ssp.c
winsup/utils/strace.cc
winsup/utils/tzset.c
winsup/utils/utils.xml
Delete:
COPYING3
COPYING3.LIB
config.rpath
configure.ac
ltgcc.m4
newlib/libc/machine/aarch64/Makefile.am
newlib/libc/machine/aarch64/Makefile.in
newlib/libc/machine/aarch64/aclocal.m4
newlib/libc/machine/aarch64/configure
newlib/libc/machine/aarch64/configure.in
newlib/libc/machine/aarch64/memcmp-stub.c
newlib/libc/machine/aarch64/memcmp.S
newlib/libc/machine/aarch64/memcpy-stub.c
newlib/libc/machine/aarch64/memcpy.S
newlib/libc/machine/aarch64/memmove-stub.c
newlib/libc/machine/aarch64/memmove.S
newlib/libc/machine/aarch64/memset-stub.c
newlib/libc/machine/aarch64/memset.S
newlib/libc/machine/aarch64/setjmp.S
newlib/libc/machine/aarch64/strcmp-stub.c
newlib/libc/machine/aarch64/strcmp.S
newlib/libc/machine/aarch64/strlen-stub.c
newlib/libc/machine/aarch64/strlen.S
newlib/libc/machine/aarch64/strncmp-stub.c
newlib/libc/machine/aarch64/strncmp.S
newlib/libc/machine/aarch64/strnlen-stub.c
newlib/libc/machine/aarch64/strnlen.S
newlib/libc/machine/epiphany/Makefile.am
newlib/libc/machine/epiphany/Makefile.in
newlib/libc/machine/epiphany/aclocal.m4
newlib/libc/machine/epiphany/configure
newlib/libc/machine/epiphany/configure.in
newlib/libc/machine/epiphany/machine/stdlib.h
newlib/libc/machine/epiphany/setjmp.S
newlib/libc/machine/powerpc/times.c
newlib/libc/sys/epiphany/Makefile.am
newlib/libc/sys/epiphany/Makefile.in
newlib/libc/sys/epiphany/aclocal.m4
newlib/libc/sys/epiphany/configure
newlib/libc/sys/epiphany/configure.in
newlib/libc/sys/epiphany/e_printf.c
newlib/libm/machine/aarch64/Makefile.am
newlib/libm/machine/aarch64/Makefile.in
newlib/libm/machine/aarch64/aclocal.m4
newlib/libm/machine/aarch64/configure
newlib/libm/machine/aarch64/configure.in
newlib/libm/machine/aarch64/s_ceil.c
newlib/libm/machine/aarch64/s_floor.c
newlib/libm/machine/aarch64/s_fma.c
newlib/libm/machine/aarch64/s_fmax.c
newlib/libm/machine/aarch64/s_fmin.c
newlib/libm/machine/aarch64/s_llrint.c
newlib/libm/machine/aarch64/s_llround.c
newlib/libm/machine/aarch64/s_lrint.c
newlib/libm/machine/aarch64/s_lround.c
newlib/libm/machine/aarch64/s_nearbyint.c
newlib/libm/machine/aarch64/s_rint.c
newlib/libm/machine/aarch64/s_round.c
newlib/libm/machine/aarch64/s_trunc.c
newlib/libm/machine/aarch64/sf_ceil.c
newlib/libm/machine/aarch64/sf_floor.c
newlib/libm/machine/aarch64/sf_fma.c
newlib/libm/machine/aarch64/sf_fmax.c
newlib/libm/machine/aarch64/sf_fmin.c
newlib/libm/machine/aarch64/sf_llrint.c
newlib/libm/machine/aarch64/sf_llround.c
newlib/libm/machine/aarch64/sf_lrint.c
newlib/libm/machine/aarch64/sf_lround.c
newlib/libm/machine/aarch64/sf_nearbyint.c
newlib/libm/machine/aarch64/sf_rint.c
newlib/libm/machine/aarch64/sf_round.c
newlib/libm/machine/aarch64/sf_trunc.c
winsup/cygwin/cygwin.din
winsup/cygwin/cygwin.sc
winsup/doc/cygserver.sgml
winsup/doc/cygwin-api.in.sgml
winsup/doc/cygwin-ug-net.in.sgml
winsup/doc/cygwin-ug.in.sgml
winsup/doc/cygwin.dsl
winsup/doc/cygwinenv.sgml
winsup/doc/dll.sgml
winsup/doc/effectively.sgml
winsup/doc/faq-sections.xml
winsup/doc/filemodes.sgml
winsup/doc/gcc.sgml
winsup/doc/gdb.sgml
winsup/doc/legal.sgml
winsup/doc/new-features.sgml
winsup/doc/ntsec.sgml
winsup/doc/overview.sgml
winsup/doc/overview2.sgml
winsup/doc/pathnames.sgml
winsup/doc/programming.sgml
winsup/doc/setup-net.sgml
winsup/doc/setup.sgml
winsup/doc/setup2.sgml
winsup/doc/textbinary.sgml
winsup/doc/using.sgml
winsup/doc/windres.sgml
winsup/utils/utils.sgml
Diffstat (limited to 'newlib/libc/machine/arm/memcpy-armv7a.S')
-rw-r--r-- | newlib/libc/machine/arm/memcpy-armv7a.S | 619 |
1 files changed, 619 insertions, 0 deletions
diff --git a/newlib/libc/machine/arm/memcpy-armv7a.S b/newlib/libc/machine/arm/memcpy-armv7a.S new file mode 100644 index 000000000..de5bf9ad8 --- /dev/null +++ b/newlib/libc/machine/arm/memcpy-armv7a.S @@ -0,0 +1,619 @@ +/* Copyright (c) 2013, Linaro Limited + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Linaro Limited nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + This memcpy routine is optimised for Cortex-A15 cores and takes advantage + of VFP or NEON when built with the appropriate flags. + + Assumptions: + + ARMv6 (ARMv7-a if using Neon) + ARM state + Unaligned accesses + LDRD/STRD support unaligned word accesses + + If compiled with GCC, this file should be enclosed within following + pre-processing check: + if defined (__ARM_ARCH_7A__) && defined (__ARM_FEATURE_UNALIGNED) + + */ + .syntax unified + /* This implementation requires ARM state. */ + .arm + +#ifdef __ARM_NEON__ + + .fpu neon + .arch armv7-a +# define FRAME_SIZE 4 +# define USE_VFP +# define USE_NEON + +#elif !defined (__SOFTFP__) + + .arch armv6 + .fpu vfpv2 +# define FRAME_SIZE 32 +# define USE_VFP + +#else + .arch armv6 +# define FRAME_SIZE 32 + +#endif + +/* Old versions of GAS incorrectly implement the NEON align semantics. */ +#ifdef BROKEN_ASM_NEON_ALIGN +#define ALIGN(addr, align) addr,:align +#else +#define ALIGN(addr, align) addr:align +#endif + +#define PC_OFFSET 8 /* PC pipeline compensation. */ +#define INSN_SIZE 4 + +/* Call parameters. */ +#define dstin r0 +#define src r1 +#define count r2 + +/* Locals. */ +#define tmp1 r3 +#define dst ip +#define tmp2 r10 + +#ifndef USE_NEON +/* For bulk copies using GP registers. */ +#define A_l r2 /* Call-clobbered. */ +#define A_h r3 /* Call-clobbered. */ +#define B_l r4 +#define B_h r5 +#define C_l r6 +#define C_h r7 +#define D_l r8 +#define D_h r9 +#endif + +/* Number of lines ahead to pre-fetch data. If you change this the code + below will need adjustment to compensate. */ + +#define prefetch_lines 5 + +#ifdef USE_VFP + .macro cpy_line_vfp vreg, base + vstr \vreg, [dst, #\base] + vldr \vreg, [src, #\base] + vstr d0, [dst, #\base + 8] + vldr d0, [src, #\base + 8] + vstr d1, [dst, #\base + 16] + vldr d1, [src, #\base + 16] + vstr d2, [dst, #\base + 24] + vldr d2, [src, #\base + 24] + vstr \vreg, [dst, #\base + 32] + vldr \vreg, [src, #\base + prefetch_lines * 64 - 32] + vstr d0, [dst, #\base + 40] + vldr d0, [src, #\base + 40] + vstr d1, [dst, #\base + 48] + vldr d1, [src, #\base + 48] + vstr d2, [dst, #\base + 56] + vldr d2, [src, #\base + 56] + .endm + + .macro cpy_tail_vfp vreg, base + vstr \vreg, [dst, #\base] + vldr \vreg, [src, #\base] + vstr d0, [dst, #\base + 8] + vldr d0, [src, #\base + 8] + vstr d1, [dst, #\base + 16] + vldr d1, [src, #\base + 16] + vstr d2, [dst, #\base + 24] + vldr d2, [src, #\base + 24] + vstr \vreg, [dst, #\base + 32] + vstr d0, [dst, #\base + 40] + vldr d0, [src, #\base + 40] + vstr d1, [dst, #\base + 48] + vldr d1, [src, #\base + 48] + vstr d2, [dst, #\base + 56] + vldr d2, [src, #\base + 56] + .endm +#endif + + .macro def_fn f p2align=0 + .text + .p2align \p2align + .global \f + .type \f, %function +\f: + .endm + +def_fn memcpy p2align=6 + + mov dst, dstin /* Preserve dstin, we need to return it. */ + cmp count, #64 + bge .Lcpy_not_short + /* Deal with small copies quickly by dropping straight into the + exit block. */ + +.Ltail63unaligned: +#ifdef USE_NEON + and tmp1, count, #0x38 + rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE) + add pc, pc, tmp1 + vld1.8 {d0}, [src]! /* 14 words to go. */ + vst1.8 {d0}, [dst]! + vld1.8 {d0}, [src]! /* 12 words to go. */ + vst1.8 {d0}, [dst]! + vld1.8 {d0}, [src]! /* 10 words to go. */ + vst1.8 {d0}, [dst]! + vld1.8 {d0}, [src]! /* 8 words to go. */ + vst1.8 {d0}, [dst]! + vld1.8 {d0}, [src]! /* 6 words to go. */ + vst1.8 {d0}, [dst]! + vld1.8 {d0}, [src]! /* 4 words to go. */ + vst1.8 {d0}, [dst]! + vld1.8 {d0}, [src]! /* 2 words to go. */ + vst1.8 {d0}, [dst]! + + tst count, #4 + ldrne tmp1, [src], #4 + strne tmp1, [dst], #4 +#else + /* Copy up to 15 full words of data. May not be aligned. */ + /* Cannot use VFP for unaligned data. */ + and tmp1, count, #0x3c + add dst, dst, tmp1 + add src, src, tmp1 + rsb tmp1, tmp1, #(60 - PC_OFFSET/2 + INSN_SIZE/2) + /* Jump directly into the sequence below at the correct offset. */ + add pc, pc, tmp1, lsl #1 + + ldr tmp1, [src, #-60] /* 15 words to go. */ + str tmp1, [dst, #-60] + + ldr tmp1, [src, #-56] /* 14 words to go. */ + str tmp1, [dst, #-56] + ldr tmp1, [src, #-52] + str tmp1, [dst, #-52] + + ldr tmp1, [src, #-48] /* 12 words to go. */ + str tmp1, [dst, #-48] + ldr tmp1, [src, #-44] + str tmp1, [dst, #-44] + + ldr tmp1, [src, #-40] /* 10 words to go. */ + str tmp1, [dst, #-40] + ldr tmp1, [src, #-36] + str tmp1, [dst, #-36] + + ldr tmp1, [src, #-32] /* 8 words to go. */ + str tmp1, [dst, #-32] + ldr tmp1, [src, #-28] + str tmp1, [dst, #-28] + + ldr tmp1, [src, #-24] /* 6 words to go. */ + str tmp1, [dst, #-24] + ldr tmp1, [src, #-20] + str tmp1, [dst, #-20] + + ldr tmp1, [src, #-16] /* 4 words to go. */ + str tmp1, [dst, #-16] + ldr tmp1, [src, #-12] + str tmp1, [dst, #-12] + + ldr tmp1, [src, #-8] /* 2 words to go. */ + str tmp1, [dst, #-8] + ldr tmp1, [src, #-4] + str tmp1, [dst, #-4] +#endif + + lsls count, count, #31 + ldrhcs tmp1, [src], #2 + ldrbne src, [src] /* Src is dead, use as a scratch. */ + strhcs tmp1, [dst], #2 + strbne src, [dst] + bx lr + +.Lcpy_not_short: + /* At least 64 bytes to copy, but don't know the alignment yet. */ + str tmp2, [sp, #-FRAME_SIZE]! + and tmp2, src, #3 + and tmp1, dst, #3 + cmp tmp1, tmp2 + bne .Lcpy_notaligned + +#ifdef USE_VFP + /* Magic dust alert! Force VFP on Cortex-A9. Experiments show + that the FP pipeline is much better at streaming loads and + stores. This is outside the critical loop. */ + vmov.f32 s0, s0 +#endif + + /* SRC and DST have the same mutual 32-bit alignment, but we may + still need to pre-copy some bytes to get to natural alignment. + We bring DST into full 64-bit alignment. */ + lsls tmp2, dst, #29 + beq 1f + rsbs tmp2, tmp2, #0 + sub count, count, tmp2, lsr #29 + ldrmi tmp1, [src], #4 + strmi tmp1, [dst], #4 + lsls tmp2, tmp2, #2 + ldrhcs tmp1, [src], #2 + ldrbne tmp2, [src], #1 + strhcs tmp1, [dst], #2 + strbne tmp2, [dst], #1 + +1: + subs tmp2, count, #64 /* Use tmp2 for count. */ + blt .Ltail63aligned + + cmp tmp2, #512 + bge .Lcpy_body_long + +.Lcpy_body_medium: /* Count in tmp2. */ +#ifdef USE_VFP +1: + vldr d0, [src, #0] + subs tmp2, tmp2, #64 + vldr d1, [src, #8] + vstr d0, [dst, #0] + vldr d0, [src, #16] + vstr d1, [dst, #8] + vldr d1, [src, #24] + vstr d0, [dst, #16] + vldr d0, [src, #32] + vstr d1, [dst, #24] + vldr d1, [src, #40] + vstr d0, [dst, #32] + vldr d0, [src, #48] + vstr d1, [dst, #40] + vldr d1, [src, #56] + vstr d0, [dst, #48] + add src, src, #64 + vstr d1, [dst, #56] + add dst, dst, #64 + bge 1b + tst tmp2, #0x3f + beq .Ldone + +.Ltail63aligned: /* Count in tmp2. */ + and tmp1, tmp2, #0x38 + add dst, dst, tmp1 + add src, src, tmp1 + rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE) + add pc, pc, tmp1 + + vldr d0, [src, #-56] /* 14 words to go. */ + vstr d0, [dst, #-56] + vldr d0, [src, #-48] /* 12 words to go. */ + vstr d0, [dst, #-48] + vldr d0, [src, #-40] /* 10 words to go. */ + vstr d0, [dst, #-40] + vldr d0, [src, #-32] /* 8 words to go. */ + vstr d0, [dst, #-32] + vldr d0, [src, #-24] /* 6 words to go. */ + vstr d0, [dst, #-24] + vldr d0, [src, #-16] /* 4 words to go. */ + vstr d0, [dst, #-16] + vldr d0, [src, #-8] /* 2 words to go. */ + vstr d0, [dst, #-8] +#else + sub src, src, #8 + sub dst, dst, #8 +1: + ldrd A_l, A_h, [src, #8] + strd A_l, A_h, [dst, #8] + ldrd A_l, A_h, [src, #16] + strd A_l, A_h, [dst, #16] + ldrd A_l, A_h, [src, #24] + strd A_l, A_h, [dst, #24] + ldrd A_l, A_h, [src, #32] + strd A_l, A_h, [dst, #32] + ldrd A_l, A_h, [src, #40] + strd A_l, A_h, [dst, #40] + ldrd A_l, A_h, [src, #48] + strd A_l, A_h, [dst, #48] + ldrd A_l, A_h, [src, #56] + strd A_l, A_h, [dst, #56] + ldrd A_l, A_h, [src, #64]! + strd A_l, A_h, [dst, #64]! + subs tmp2, tmp2, #64 + bge 1b + tst tmp2, #0x3f + bne 1f + ldr tmp2,[sp], #FRAME_SIZE + bx lr +1: + add src, src, #8 + add dst, dst, #8 + +.Ltail63aligned: /* Count in tmp2. */ + /* Copy up to 7 d-words of data. Similar to Ltail63unaligned, but + we know that the src and dest are 32-bit aligned so we can use + LDRD/STRD to improve efficiency. */ + /* TMP2 is now negative, but we don't care about that. The bottom + six bits still tell us how many bytes are left to copy. */ + + and tmp1, tmp2, #0x38 + add dst, dst, tmp1 + add src, src, tmp1 + rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE) + add pc, pc, tmp1 + ldrd A_l, A_h, [src, #-56] /* 14 words to go. */ + strd A_l, A_h, [dst, #-56] + ldrd A_l, A_h, [src, #-48] /* 12 words to go. */ + strd A_l, A_h, [dst, #-48] + ldrd A_l, A_h, [src, #-40] /* 10 words to go. */ + strd A_l, A_h, [dst, #-40] + ldrd A_l, A_h, [src, #-32] /* 8 words to go. */ + strd A_l, A_h, [dst, #-32] + ldrd A_l, A_h, [src, #-24] /* 6 words to go. */ + strd A_l, A_h, [dst, #-24] + ldrd A_l, A_h, [src, #-16] /* 4 words to go. */ + strd A_l, A_h, [dst, #-16] + ldrd A_l, A_h, [src, #-8] /* 2 words to go. */ + strd A_l, A_h, [dst, #-8] + +#endif + tst tmp2, #4 + ldrne tmp1, [src], #4 + strne tmp1, [dst], #4 + lsls tmp2, tmp2, #31 /* Count (tmp2) now dead. */ + ldrhcs tmp1, [src], #2 + ldrbne tmp2, [src] + strhcs tmp1, [dst], #2 + strbne tmp2, [dst] + +.Ldone: + ldr tmp2, [sp], #FRAME_SIZE + bx lr + +.Lcpy_body_long: /* Count in tmp2. */ + + /* Long copy. We know that there's at least (prefetch_lines * 64) + bytes to go. */ +#ifdef USE_VFP + /* Don't use PLD. Instead, read some data in advance of the current + copy position into a register. This should act like a PLD + operation but we won't have to repeat the transfer. */ + + vldr d3, [src, #0] + vldr d4, [src, #64] + vldr d5, [src, #128] + vldr d6, [src, #192] + vldr d7, [src, #256] + + vldr d0, [src, #8] + vldr d1, [src, #16] + vldr d2, [src, #24] + add src, src, #32 + + subs tmp2, tmp2, #prefetch_lines * 64 * 2 + blt 2f +1: + cpy_line_vfp d3, 0 + cpy_line_vfp d4, 64 + cpy_line_vfp d5, 128 + add dst, dst, #3 * 64 + add src, src, #3 * 64 + cpy_line_vfp d6, 0 + cpy_line_vfp d7, 64 + add dst, dst, #2 * 64 + add src, src, #2 * 64 + subs tmp2, tmp2, #prefetch_lines * 64 + bge 1b + +2: + cpy_tail_vfp d3, 0 + cpy_tail_vfp d4, 64 + cpy_tail_vfp d5, 128 + add src, src, #3 * 64 + add dst, dst, #3 * 64 + cpy_tail_vfp d6, 0 + vstr d7, [dst, #64] + vldr d7, [src, #64] + vstr d0, [dst, #64 + 8] + vldr d0, [src, #64 + 8] + vstr d1, [dst, #64 + 16] + vldr d1, [src, #64 + 16] + vstr d2, [dst, #64 + 24] + vldr d2, [src, #64 + 24] + vstr d7, [dst, #64 + 32] + add src, src, #96 + vstr d0, [dst, #64 + 40] + vstr d1, [dst, #64 + 48] + vstr d2, [dst, #64 + 56] + add dst, dst, #128 + add tmp2, tmp2, #prefetch_lines * 64 + b .Lcpy_body_medium +#else + /* Long copy. Use an SMS style loop to maximize the I/O + bandwidth of the core. We don't have enough spare registers + to synthesise prefetching, so use PLD operations. */ + /* Pre-bias src and dst. */ + sub src, src, #8 + sub dst, dst, #8 + pld [src, #8] + pld [src, #72] + subs tmp2, tmp2, #64 + pld [src, #136] + ldrd A_l, A_h, [src, #8] + strd B_l, B_h, [sp, #8] + ldrd B_l, B_h, [src, #16] + strd C_l, C_h, [sp, #16] + ldrd C_l, C_h, [src, #24] + strd D_l, D_h, [sp, #24] + pld [src, #200] + ldrd D_l, D_h, [src, #32]! + b 1f + .p2align 6 +2: + pld [src, #232] + strd A_l, A_h, [dst, #40] + ldrd A_l, A_h, [src, #40] + strd B_l, B_h, [dst, #48] + ldrd B_l, B_h, [src, #48] + strd C_l, C_h, [dst, #56] + ldrd C_l, C_h, [src, #56] + strd D_l, D_h, [dst, #64]! + ldrd D_l, D_h, [src, #64]! + subs tmp2, tmp2, #64 +1: + strd A_l, A_h, [dst, #8] + ldrd A_l, A_h, [src, #8] + strd B_l, B_h, [dst, #16] + ldrd B_l, B_h, [src, #16] + strd C_l, C_h, [dst, #24] + ldrd C_l, C_h, [src, #24] + strd D_l, D_h, [dst, #32] + ldrd D_l, D_h, [src, #32] + bcs 2b + /* Save the remaining bytes and restore the callee-saved regs. */ + strd A_l, A_h, [dst, #40] + add src, src, #40 + strd B_l, B_h, [dst, #48] + ldrd B_l, B_h, [sp, #8] + strd C_l, C_h, [dst, #56] + ldrd C_l, C_h, [sp, #16] + strd D_l, D_h, [dst, #64] + ldrd D_l, D_h, [sp, #24] + add dst, dst, #72 + tst tmp2, #0x3f + bne .Ltail63aligned + ldr tmp2, [sp], #FRAME_SIZE + bx lr +#endif + +.Lcpy_notaligned: + pld [src] + pld [src, #64] + /* There's at least 64 bytes to copy, but there is no mutual + alignment. */ + /* Bring DST to 64-bit alignment. */ + lsls tmp2, dst, #29 + pld [src, #(2 * 64)] + beq 1f + rsbs tmp2, tmp2, #0 + sub count, count, tmp2, lsr #29 + ldrmi tmp1, [src], #4 + strmi tmp1, [dst], #4 + lsls tmp2, tmp2, #2 + ldrbne tmp1, [src], #1 + ldrhcs tmp2, [src], #2 + strbne tmp1, [dst], #1 + strhcs tmp2, [dst], #2 +1: + pld [src, #(3 * 64)] + subs count, count, #64 + ldrmi tmp2, [sp], #FRAME_SIZE + bmi .Ltail63unaligned + pld [src, #(4 * 64)] + +#ifdef USE_NEON + vld1.8 {d0-d3}, [src]! + vld1.8 {d4-d7}, [src]! + subs count, count, #64 + bmi 2f +1: + pld [src, #(4 * 64)] + vst1.8 {d0-d3}, [ALIGN (dst, 64)]! + vld1.8 {d0-d3}, [src]! + vst1.8 {d4-d7}, [ALIGN (dst, 64)]! + vld1.8 {d4-d7}, [src]! + subs count, count, #64 + bpl 1b +2: + vst1.8 {d0-d3}, [ALIGN (dst, 64)]! + vst1.8 {d4-d7}, [ALIGN (dst, 64)]! + ands count, count, #0x3f +#else + /* Use an SMS style loop to maximize the I/O bandwidth. */ + sub src, src, #4 + sub dst, dst, #8 + subs tmp2, count, #64 /* Use tmp2 for count. */ + ldr A_l, [src, #4] + ldr A_h, [src, #8] + strd B_l, B_h, [sp, #8] + ldr B_l, [src, #12] + ldr B_h, [src, #16] + strd C_l, C_h, [sp, #16] + ldr C_l, [src, #20] + ldr C_h, [src, #24] + strd D_l, D_h, [sp, #24] + ldr D_l, [src, #28] + ldr D_h, [src, #32]! + b 1f + .p2align 6 +2: + pld [src, #(5 * 64) - (32 - 4)] + strd A_l, A_h, [dst, #40] + ldr A_l, [src, #36] + ldr A_h, [src, #40] + strd B_l, B_h, [dst, #48] + ldr B_l, [src, #44] + ldr B_h, [src, #48] + strd C_l, C_h, [dst, #56] + ldr C_l, [src, #52] + ldr C_h, [src, #56] + strd D_l, D_h, [dst, #64]! + ldr D_l, [src, #60] + ldr D_h, [src, #64]! + subs tmp2, tmp2, #64 +1: + strd A_l, A_h, [dst, #8] + ldr A_l, [src, #4] + ldr A_h, [src, #8] + strd B_l, B_h, [dst, #16] + ldr B_l, [src, #12] + ldr B_h, [src, #16] + strd C_l, C_h, [dst, #24] + ldr C_l, [src, #20] + ldr C_h, [src, #24] + strd D_l, D_h, [dst, #32] + ldr D_l, [src, #28] + ldr D_h, [src, #32] + bcs 2b + + /* Save the remaining bytes and restore the callee-saved regs. */ + strd A_l, A_h, [dst, #40] + add src, src, #36 + strd B_l, B_h, [dst, #48] + ldrd B_l, B_h, [sp, #8] + strd C_l, C_h, [dst, #56] + ldrd C_l, C_h, [sp, #16] + strd D_l, D_h, [dst, #64] + ldrd D_l, D_h, [sp, #24] + add dst, dst, #72 + ands count, tmp2, #0x3f +#endif + ldr tmp2, [sp], #FRAME_SIZE + bne .Ltail63unaligned + bx lr + + .size memcpy, . - memcpy |