]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/aarch64/strcmp.S
Update copyright dates with scripts/update-copyrights
[thirdparty/glibc.git] / sysdeps / aarch64 / strcmp.S
CommitLineData
2b778ceb 1/* Copyright (C) 2012-2021 Free Software Foundation, Inc.
38fecb39
MS
2
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
5a82c748 17 <https://www.gnu.org/licenses/>. */
38fecb39
MS
18
19/* Assumptions:
20 *
adac54ff
AB
21 * ARMv8-a, AArch64.
22 * MTE compatible.
38fecb39
MS
23 */
24
25#include <sysdep.h>
26
27#define REP8_01 0x0101010101010101
28#define REP8_7f 0x7f7f7f7f7f7f7f7f
38fecb39
MS
29
30/* Parameters and result. */
31#define src1 x0
32#define src2 x1
33#define result x0
34
35/* Internal variables. */
36#define data1 x2
37#define data1w w2
38#define data2 x3
39#define data2w w3
40#define has_nul x4
41#define diff x5
adac54ff 42#define off1 x5
38fecb39 43#define syndrome x6
adac54ff
AB
44#define tmp x6
45#define data3 x7
46#define zeroones x8
47#define shift x9
48#define off2 x10
49
50/* On big-endian early bytes are at MSB and on little-endian LSB.
51 LS_FW means shifting towards early bytes. */
52#ifdef __AARCH64EB__
53# define LS_FW lsl
54#else
55# define LS_FW lsr
56#endif
38fecb39 57
adac54ff
AB
58/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
59 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
60 can be done in parallel across the entire word.
61 Since carry propagation makes 0x1 bytes before a NUL byte appear
62 NUL too in big-endian, byte-reverse the data before the NUL check. */
38fecb39 63
adac54ff 64ENTRY(strcmp)
45b1e17e
SN
65 PTR_ARG (0)
66 PTR_ARG (1)
adac54ff
AB
67 sub off2, src2, src1
68 mov zeroones, REP8_01
69 and tmp, src1, 7
70 tst off2, 7
38fecb39 71 b.ne L(misaligned8)
adac54ff
AB
72 cbnz tmp, L(mutual_align)
73
74 .p2align 4
75
38fecb39 76L(loop_aligned):
adac54ff
AB
77 ldr data2, [src1, off2]
78 ldr data1, [src1], 8
38fecb39 79L(start_realigned):
adac54ff
AB
80#ifdef __AARCH64EB__
81 rev tmp, data1
82 sub has_nul, tmp, zeroones
83 orr tmp, tmp, REP8_7f
84#else
85 sub has_nul, data1, zeroones
86 orr tmp, data1, REP8_7f
87#endif
88 bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */
89 ccmp data1, data2, 0, eq
90 b.eq L(loop_aligned)
91#ifdef __AARCH64EB__
92 rev has_nul, has_nul
93#endif
94 eor diff, data1, data2
38fecb39 95 orr syndrome, diff, has_nul
2bce01eb 96L(end):
adac54ff 97#ifndef __AARCH64EB__
38fecb39
MS
98 rev syndrome, syndrome
99 rev data1, data1
38fecb39 100 rev data2, data2
adac54ff
AB
101#endif
102 clz shift, syndrome
103 /* The most-significant-non-zero bit of the syndrome marks either the
104 first bit that is different, or the top bit of the first zero byte.
38fecb39
MS
105 Shifting left now will bring the critical information into the
106 top bits. */
adac54ff
AB
107 lsl data1, data1, shift
108 lsl data2, data2, shift
38fecb39
MS
109 /* But we need to zero-extend (char is unsigned) the value and then
110 perform a signed 32-bit subtraction. */
adac54ff
AB
111 lsr data1, data1, 56
112 sub result, data1, data2, lsr 56
113 ret
114
115 .p2align 4
38fecb39
MS
116
117L(mutual_align):
118 /* Sources are mutually aligned, but are not currently at an
119 alignment boundary. Round down the addresses and then mask off
adac54ff
AB
120 the bytes that precede the start point. */
121 bic src1, src1, 7
122 ldr data2, [src1, off2]
123 ldr data1, [src1], 8
124 neg shift, src2, lsl 3 /* Bits to alignment -64. */
125 mov tmp, -1
126 LS_FW tmp, tmp, shift
127 orr data1, data1, tmp
128 orr data2, data2, tmp
38fecb39
MS
129 b L(start_realigned)
130
131L(misaligned8):
2bce01eb 132 /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
adac54ff
AB
133 checking to make sure that we don't access beyond the end of SRC2. */
134 cbz tmp, L(src1_aligned)
2bce01eb 135L(do_misaligned):
adac54ff
AB
136 ldrb data1w, [src1], 1
137 ldrb data2w, [src2], 1
138 cmp data1w, 0
139 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
2bce01eb 140 b.ne L(done)
adac54ff 141 tst src1, 7
6ca24c43 142 b.ne L(do_misaligned)
2bce01eb 143
adac54ff
AB
144L(src1_aligned):
145 neg shift, src2, lsl 3
146 bic src2, src2, 7
147 ldr data3, [src2], 8
148#ifdef __AARCH64EB__
149 rev data3, data3
150#endif
151 lsr tmp, zeroones, shift
152 orr data3, data3, tmp
153 sub has_nul, data3, zeroones
154 orr tmp, data3, REP8_7f
155 bics has_nul, has_nul, tmp
156 b.ne L(tail)
157
158 sub off1, src2, src1
159
160 .p2align 4
161
162L(loop_unaligned):
163 ldr data3, [src1, off1]
164 ldr data2, [src1, off2]
165#ifdef __AARCH64EB__
166 rev data3, data3
167#endif
168 sub has_nul, data3, zeroones
169 orr tmp, data3, REP8_7f
170 ldr data1, [src1], 8
171 bics has_nul, has_nul, tmp
172 ccmp data1, data2, 0, eq
173 b.eq L(loop_unaligned)
174
175 lsl tmp, has_nul, shift
176#ifdef __AARCH64EB__
177 rev tmp, tmp
178#endif
179 eor diff, data1, data2
180 orr syndrome, diff, tmp
181 cbnz syndrome, L(end)
182L(tail):
183 ldr data1, [src1]
184 neg shift, shift
185 lsr data2, data3, shift
186 lsr has_nul, has_nul, shift
187#ifdef __AARCH64EB__
188 rev data2, data2
189 rev has_nul, has_nul
190#endif
191 eor diff, data1, data2
2bce01eb 192 orr syndrome, diff, has_nul
2bce01eb
SP
193 b L(end)
194
195L(done):
38fecb39 196 sub result, data1, data2
adac54ff
AB
197 ret
198
38fecb39
MS
199END(strcmp)
200libc_hidden_builtin_def (strcmp)