]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/aarch64/strnlen.S
Update copyright dates with scripts/update-copyrights
[thirdparty/glibc.git] / sysdeps / aarch64 / strnlen.S
CommitLineData
4499bb3e
MS
1/* strnlen - calculate the length of a string with limit.
2
2b778ceb 3 Copyright (C) 2013-2021 Free Software Foundation, Inc.
4499bb3e
MS
4
5 This file is part of the GNU C Library.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library. If not, see
5a82c748 19 <https://www.gnu.org/licenses/>. */
4499bb3e
MS
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64
26 */
27
28/* Arguments and results. */
29#define srcin x0
30#define len x0
31#define limit x1
32
33/* Locals and temporaries. */
34#define src x2
35#define data1 x3
36#define data2 x4
37#define data2a x5
38#define has_nul1 x6
39#define has_nul2 x7
40#define tmp1 x8
41#define tmp2 x9
42#define tmp3 x10
43#define tmp4 x11
44#define zeroones x12
45#define pos x13
46#define limit_wd x14
47
2911cb68
XZ
48#define dataq q2
49#define datav v2
50#define datab2 b3
51#define dataq2 q3
52#define datav2 v3
4499bb3e
MS
53#define REP8_01 0x0101010101010101
54#define REP8_7f 0x7f7f7f7f7f7f7f7f
55#define REP8_80 0x8080808080808080
56
57ENTRY_ALIGN_AND_PAD (__strnlen, 6, 9)
45b1e17e
SN
58 PTR_ARG (0)
59 SIZE_ARG (1)
4499bb3e
MS
60 cbz limit, L(hit_limit)
61 mov zeroones, #REP8_01
62 bic src, srcin, #15
63 ands tmp1, srcin, #15
64 b.ne L(misaligned)
65 /* Calculate the number of full and partial words -1. */
66 sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */
67 lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */
68
69 /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
70 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
71 can be done in parallel across the entire word. */
72 /* The inner loop deals with two Dwords at a time. This has a
73 slightly higher start-up cost, but we should win quite quickly,
74 especially on cores with a high number of issue slots per
75 cycle, as we get much better parallelism out of the operations. */
76
77 /* Start of critial section -- keep to one 64Byte cache line. */
2911cb68 78
4499bb3e
MS
79 ldp data1, data2, [src], #16
80L(realigned):
81 sub tmp1, data1, zeroones
82 orr tmp2, data1, #REP8_7f
83 sub tmp3, data2, zeroones
84 orr tmp4, data2, #REP8_7f
85 bic has_nul1, tmp1, tmp2
86 bic has_nul2, tmp3, tmp4
87 subs limit_wd, limit_wd, #1
88 orr tmp1, has_nul1, has_nul2
89 ccmp tmp1, #0, #0, pl /* NZCV = 0000 */
90 b.eq L(loop)
91 /* End of critical section -- keep to one 64Byte cache line. */
92
93 orr tmp1, has_nul1, has_nul2
94 cbz tmp1, L(hit_limit) /* No null in final Qword. */
95
96 /* We know there's a null in the final Qword. The easiest thing
97 to do now is work out the length of the string and return
98 MIN (len, limit). */
99
100 sub len, src, srcin
101 cbz has_nul1, L(nul_in_data2)
102#ifdef __AARCH64EB__
103 mov data2, data1
104#endif
105 sub len, len, #8
106 mov has_nul2, has_nul1
107L(nul_in_data2):
108#ifdef __AARCH64EB__
109 /* For big-endian, carry propagation (if the final byte in the
110 string is 0x01) means we cannot use has_nul directly. The
111 easiest way to get the correct byte is to byte-swap the data
112 and calculate the syndrome a second time. */
113 rev data2, data2
114 sub tmp1, data2, zeroones
115 orr tmp2, data2, #REP8_7f
116 bic has_nul2, tmp1, tmp2
117#endif
118 sub len, len, #8
119 rev has_nul2, has_nul2
120 clz pos, has_nul2
121 add len, len, pos, lsr #3 /* Bits to bytes. */
122 cmp len, limit
123 csel len, len, limit, ls /* Return the lower value. */
124 RET
125
2911cb68
XZ
126L(loop):
127 ldr dataq, [src], #16
128 uminv datab2, datav.16b
129 mov tmp1, datav2.d[0]
130 subs limit_wd, limit_wd, #1
131 ccmp tmp1, #0, #4, pl /* NZCV = 0000 */
132 b.eq L(loop_end)
133 ldr dataq, [src], #16
134 uminv datab2, datav.16b
135 mov tmp1, datav2.d[0]
136 subs limit_wd, limit_wd, #1
137 ccmp tmp1, #0, #4, pl /* NZCV = 0000 */
138 b.ne L(loop)
139L(loop_end):
140 /* End of critical section -- keep to one 64Byte cache line. */
141
142 cbnz tmp1, L(hit_limit) /* No null in final Qword. */
143
144 /* We know there's a null in the final Qword. The easiest thing
145 to do now is work out the length of the string and return
146 MIN (len, limit). */
147
148#ifdef __AARCH64EB__
149 rev64 datav.16b, datav.16b
150#endif
151 /* Set te NULL byte as 0xff and the rest as 0x00, move the data into a
152 pair of scalars and then compute the length from the earliest NULL
153 byte. */
154
155 cmeq datav.16b, datav.16b, #0
59b64f9c
LS
156#ifdef __AARCH64EB__
157 mov data1, datav.d[1]
158 mov data2, datav.d[0]
159#else
2911cb68
XZ
160 mov data1, datav.d[0]
161 mov data2, datav.d[1]
59b64f9c 162#endif
2911cb68
XZ
163 cmp data1, 0
164 csel data1, data1, data2, ne
165 sub len, src, srcin
166 sub len, len, #16
167 rev data1, data1
168 add tmp2, len, 8
169 clz tmp1, data1
170 csel len, len, tmp2, ne
171 add len, len, tmp1, lsr 3
172 cmp len, limit
173 csel len, len, limit, ls /* Return the lower value. */
174 RET
175
4499bb3e
MS
176L(misaligned):
177 /* Deal with a partial first word.
178 We're doing two things in parallel here;
179 1) Calculate the number of words (but avoiding overflow if
180 limit is near ULONG_MAX) - to do this we need to work out
181 limit + tmp1 - 1 as a 65-bit value before shifting it;
182 2) Load and mask the initial data words - we force the bytes
183 before the ones we are interested in to 0xff - this ensures
184 early bytes will not hit any zero detection. */
185 sub limit_wd, limit, #1
186 neg tmp4, tmp1
187 cmp tmp1, #8
188
189 and tmp3, limit_wd, #15
190 lsr limit_wd, limit_wd, #4
191 mov tmp2, #~0
192
193 ldp data1, data2, [src], #16
194 lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */
195 add tmp3, tmp3, tmp1
196
197#ifdef __AARCH64EB__
198 /* Big-endian. Early bytes are at MSB. */
199 lsl tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
200#else
201 /* Little-endian. Early bytes are at LSB. */
202 lsr tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
203#endif
204 add limit_wd, limit_wd, tmp3, lsr #4
205
206 orr data1, data1, tmp2
207 orr data2a, data2, tmp2
208
209 csinv data1, data1, xzr, le
210 csel data2, data2, data2a, le
211 b L(realigned)
212
213L(hit_limit):
214 mov len, limit
215 RET
216END (__strnlen)
17696087 217libc_hidden_def (__strnlen)
4499bb3e
MS
218weak_alias (__strnlen, strnlen)
219libc_hidden_def (strnlen)