]>
Commit | Line | Data |
---|---|---|
dff8da6b | 1 | /* Copyright (C) 2012-2024 Free Software Foundation, Inc. |
d542f8ed MS |
2 | |
3 | This file is part of the GNU C Library. | |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with the GNU C Library. If not, see | |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
d542f8ed MS |
18 | |
19 | #include <sysdep.h> | |
20 | ||
21 | /* Assumptions: | |
22 | * | |
a365ac45 AC |
23 | * ARMv8-a, AArch64, Advanced SIMD. |
24 | * MTE compatible. | |
d542f8ed MS |
25 | */ |
26 | ||
436e4d5b SP |
27 | #ifndef STRLEN |
28 | # define STRLEN __strlen | |
29 | #endif | |
30 | ||
d542f8ed | 31 | #define srcin x0 |
a365ac45 | 32 | #define result x0 |
d542f8ed | 33 | |
d542f8ed | 34 | #define src x1 |
a365ac45 AC |
35 | #define synd x2 |
36 | #define tmp x3 | |
a365ac45 AC |
37 | #define shift x4 |
38 | ||
39 | #define data q0 | |
40 | #define vdata v0 | |
41 | #define vhas_nul v1 | |
3c998069 DK |
42 | #define vend v2 |
43 | #define dend d2 | |
a365ac45 AC |
44 | |
45 | /* Core algorithm: | |
03c8ce50 WD |
46 | Process the string in 16-byte aligned chunks. Compute a 64-bit mask with |
47 | four bits per byte using the shrn instruction. A count trailing zeros then | |
48 | identifies the first zero byte. */ | |
a365ac45 AC |
49 | |
50 | ENTRY (STRLEN) | |
45b1e17e | 51 | PTR_ARG (0) |
a365ac45 | 52 | bic src, srcin, 15 |
a365ac45 | 53 | ld1 {vdata.16b}, [src] |
a365ac45 AC |
54 | cmeq vhas_nul.16b, vdata.16b, 0 |
55 | lsl shift, srcin, 2 | |
3c998069 | 56 | shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ |
a365ac45 AC |
57 | fmov synd, dend |
58 | lsr synd, synd, shift | |
59 | cbz synd, L(loop) | |
60 | ||
61 | rbit synd, synd | |
62 | clz result, synd | |
63 | lsr result, result, 2 | |
c435989f WD |
64 | ret |
65 | ||
a365ac45 AC |
66 | .p2align 5 |
67 | L(loop): | |
03c8ce50 WD |
68 | ldr data, [src, 16] |
69 | cmeq vhas_nul.16b, vdata.16b, 0 | |
70 | umaxp vend.16b, vhas_nul.16b, vhas_nul.16b | |
71 | fmov synd, dend | |
72 | cbnz synd, L(loop_end) | |
73 | ldr data, [src, 32]! | |
a365ac45 AC |
74 | cmeq vhas_nul.16b, vdata.16b, 0 |
75 | umaxp vend.16b, vhas_nul.16b, vhas_nul.16b | |
76 | fmov synd, dend | |
77 | cbz synd, L(loop) | |
03c8ce50 WD |
78 | sub src, src, 16 |
79 | L(loop_end): | |
3c998069 | 80 | shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ |
a365ac45 AC |
81 | sub result, src, srcin |
82 | fmov synd, dend | |
83 | #ifndef __AARCH64EB__ | |
84 | rbit synd, synd | |
d542f8ed | 85 | #endif |
03c8ce50 | 86 | add result, result, 16 |
a365ac45 AC |
87 | clz tmp, synd |
88 | add result, result, tmp, lsr 2 | |
c435989f WD |
89 | ret |
90 | ||
436e4d5b SP |
91 | END (STRLEN) |
92 | weak_alias (STRLEN, strlen) | |
d542f8ed | 93 | libc_hidden_builtin_def (strlen) |