diff options
Diffstat (limited to 'newlib/libc/machine/aarch64/strrchr.S')
-rw-r--r-- | newlib/libc/machine/aarch64/strrchr.S | 86 |
1 files changed, 28 insertions, 58 deletions
diff --git a/newlib/libc/machine/aarch64/strrchr.S b/newlib/libc/machine/aarch64/strrchr.S index d64fc09b1..b0574228b 100644 --- a/newlib/libc/machine/aarch64/strrchr.S +++ b/newlib/libc/machine/aarch64/strrchr.S @@ -1,32 +1,9 @@ /* - strrchr - find last instance of a character in a string - - Copyright (c) 2014, ARM Limited - All rights Reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the company nor the names of its contributors - may be used to endorse or promote products derived from this - software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - + * strrchr - find last position of a character in a string. + * + * Copyright (c) 2014-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ #if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) /* See strchr-stub.c */ #else @@ -37,6 +14,8 @@ * Neon Available. */ +#include "asmdefs.h" + /* Arguments and results. */ #define srcin x0 #define chrin w1 @@ -78,17 +57,8 @@ in the original string a count_trailing_zeros() operation will identify exactly which byte is causing the termination, and why. */ -/* Locals and temporaries. */ - - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -def_fn strrchr +ENTRY (strrchr) + PTR_ARG (0) /* Magic constant 0x40100401 to allow us to identify which lane matches the requested byte. Magic constant 0x80200802 used similarly for NUL termination. */ @@ -100,7 +70,7 @@ def_fn strrchr mov src_offset, #0 ands tmp1, srcin, #31 add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ - b.eq .Laligned + b.eq L(aligned) /* Input string is not 32-byte aligned. Rather than forcing the padding bytes to a safe value, we calculate the syndrome @@ -118,45 +88,45 @@ def_fn strrchr and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128 addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 - addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b // 128->64 - addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64 - mov nul_match, vhas_nul1.2d[0] + addp vend1.16b, vhas_nul1.16b, vhas_chr1.16b // 128->64 + mov nul_match, vend1.d[0] lsl tmp1, tmp1, #1 mov const_m1, #~0 - mov chr_match, vhas_chr1.2d[0] lsr tmp3, const_m1, tmp1 + mov chr_match, vend1.d[1] bic nul_match, nul_match, tmp3 // Mask padding bits. bic chr_match, chr_match, tmp3 // Mask padding bits. - cbnz nul_match, .Ltail + cbnz nul_match, L(tail) -.Lloop: + .p2align 4 +L(loop): cmp chr_match, #0 csel src_match, src, src_match, ne csel src_offset, chr_match, src_offset, ne -.Laligned: +L(aligned): ld1 {vdata1.16b, vdata2.16b}, [src], #32 - cmeq vhas_nul1.16b, vdata1.16b, #0 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b - cmeq vhas_nul2.16b, vdata2.16b, #0 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b - addp vend1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128 + uminp vend1.16b, vdata1.16b, vdata2.16b and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b + cmeq vend1.16b, vend1.16b, 0 addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 - addp vend1.16b, vend1.16b, vend1.16b // 128->64 - addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64 - mov nul_match, vend1.2d[0] - mov chr_match, vhas_chr1.2d[0] - cbz nul_match, .Lloop + addp vend1.16b, vend1.16b, vhas_chr1.16b // 128->64 + mov nul_match, vend1.d[0] + mov chr_match, vend1.d[1] + cbz nul_match, L(loop) + cmeq vhas_nul1.16b, vdata1.16b, #0 + cmeq vhas_nul2.16b, vdata2.16b, #0 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b - mov nul_match, vhas_nul1.2d[0] + mov nul_match, vhas_nul1.d[0] -.Ltail: +L(tail): /* Work out exactly where the string ends. */ sub tmp4, nul_match, #1 eor tmp4, tmp4, nul_match @@ -178,5 +148,5 @@ def_fn strrchr ret - .size strrchr, . - strrchr +END (strrchr) #endif |