]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/aarch64/strlen.S
po: Update translations
[thirdparty/glibc.git] / sysdeps / aarch64 / strlen.S
CommitLineData
dff8da6b 1/* Copyright (C) 2012-2024 Free Software Foundation, Inc.
d542f8ed
MS
2
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
5a82c748 17 <https://www.gnu.org/licenses/>. */
d542f8ed
MS
18
19#include <sysdep.h>
20
21/* Assumptions:
22 *
a365ac45
AC
23 * ARMv8-a, AArch64, Advanced SIMD.
24 * MTE compatible.
d542f8ed
MS
25 */
26
436e4d5b
SP
27#ifndef STRLEN
28# define STRLEN __strlen
29#endif
30
d542f8ed 31#define srcin x0
a365ac45 32#define result x0
d542f8ed 33
d542f8ed 34#define src x1
a365ac45
AC
35#define synd x2
36#define tmp x3
a365ac45
AC
37#define shift x4
38
39#define data q0
40#define vdata v0
41#define vhas_nul v1
3c998069
DK
42#define vend v2
43#define dend d2
a365ac45
AC
44
45/* Core algorithm:
03c8ce50
WD
46 Process the string in 16-byte aligned chunks. Compute a 64-bit mask with
47 four bits per byte using the shrn instruction. A count trailing zeros then
48 identifies the first zero byte. */
a365ac45
AC
49
50ENTRY (STRLEN)
45b1e17e 51 PTR_ARG (0)
a365ac45 52 bic src, srcin, 15
a365ac45 53 ld1 {vdata.16b}, [src]
a365ac45
AC
54 cmeq vhas_nul.16b, vdata.16b, 0
55 lsl shift, srcin, 2
3c998069 56 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
a365ac45
AC
57 fmov synd, dend
58 lsr synd, synd, shift
59 cbz synd, L(loop)
60
61 rbit synd, synd
62 clz result, synd
63 lsr result, result, 2
c435989f
WD
64 ret
65
a365ac45
AC
66 .p2align 5
67L(loop):
03c8ce50
WD
68 ldr data, [src, 16]
69 cmeq vhas_nul.16b, vdata.16b, 0
70 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
71 fmov synd, dend
72 cbnz synd, L(loop_end)
73 ldr data, [src, 32]!
a365ac45
AC
74 cmeq vhas_nul.16b, vdata.16b, 0
75 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
76 fmov synd, dend
77 cbz synd, L(loop)
03c8ce50
WD
78 sub src, src, 16
79L(loop_end):
3c998069 80 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
a365ac45
AC
81 sub result, src, srcin
82 fmov synd, dend
83#ifndef __AARCH64EB__
84 rbit synd, synd
d542f8ed 85#endif
03c8ce50 86 add result, result, 16
a365ac45
AC
87 clz tmp, synd
88 add result, result, tmp, lsr 2
c435989f
WD
89 ret
90
436e4d5b
SP
91END (STRLEN)
92weak_alias (STRLEN, strlen)
d542f8ed 93libc_hidden_builtin_def (strlen)