]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/aarch64/strchrnul.S
Update copyright dates with scripts/update-copyrights
[thirdparty/glibc.git] / sysdeps / aarch64 / strchrnul.S
CommitLineData
be9d4ccc
RE
1/* strchrnul - find a character or nul in a string
2
2b778ceb 3 Copyright (C) 2014-2021 Free Software Foundation, Inc.
be9d4ccc
RE
4
5 This file is part of the GNU C Library.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library. If not, see
5a82c748 19 <https://www.gnu.org/licenses/>. */
be9d4ccc
RE
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
f7de454f
AC
25 * ARMv8-a, AArch64, Advanced SIMD.
26 * MTE compatible.
be9d4ccc
RE
27 */
28
be9d4ccc
RE
29#define srcin x0
30#define chrin w1
be9d4ccc
RE
31#define result x0
32
be9d4ccc 33#define src x2
f7de454f
AC
34#define tmp1 x1
35#define tmp2 x3
36#define tmp2w w3
be9d4ccc
RE
37
38#define vrepchr v0
f7de454f
AC
39#define vdata v1
40#define qdata q1
41#define vhas_nul v2
42#define vhas_chr v3
43#define vrepmask v4
44#define vend v5
45#define dend d5
46
47/* Core algorithm:
48
49 For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
50 per byte. For even bytes, bits 0-3 are set if the relevant byte matched the
51 requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are
52 set likewise for odd bytes so that adjacent bytes can be merged. Since the
53 bits in the syndrome reflect the order in which things occur in the original
54 string, counting trailing zeros identifies exactly which byte matched. */
be9d4ccc
RE
55
56ENTRY (__strchrnul)
45b1e17e 57 PTR_ARG (0)
f7de454f 58 bic src, srcin, 15
be9d4ccc 59 dup vrepchr.16b, chrin
f7de454f
AC
60 ld1 {vdata.16b}, [src]
61 mov tmp2w, 0xf00f
62 dup vrepmask.8h, tmp2w
63 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
64 cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b
65 lsl tmp2, srcin, 2
66 and vhas_chr.16b, vhas_chr.16b, vrepmask.16b
67 addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
68 fmov tmp1, dend
69 lsr tmp1, tmp1, tmp2 /* Mask padding bits. */
70 cbz tmp1, L(loop)
be9d4ccc 71
f7de454f
AC
72 rbit tmp1, tmp1
73 clz tmp1, tmp1
74 add result, srcin, tmp1, lsr 2
75 ret
76
77 .p2align 4
be9d4ccc 78L(loop):
f7de454f
AC
79 ldr qdata, [src, 16]!
80 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
81 cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b
82 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b
83 fmov tmp1, dend
be9d4ccc
RE
84 cbz tmp1, L(loop)
85
f7de454f
AC
86 and vhas_chr.16b, vhas_chr.16b, vrepmask.16b
87 addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
88 fmov tmp1, dend
89#ifndef __AARCH64EB__
be9d4ccc 90 rbit tmp1, tmp1
f7de454f
AC
91#endif
92 clz tmp1, tmp1
93 add result, src, tmp1, lsr 2
be9d4ccc
RE
94 ret
95
96END(__strchrnul)
97weak_alias (__strchrnul, strchrnul)