]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/strchrnul.S
cebfb2bbacf7a5ed7f1c5a89866d3ac0b85026d7
[thirdparty/glibc.git] / sysdeps / i386 / strchrnul.S
1 /* strchrnul (str, chr) -- Return pointer to first occurrence of CHR in STR
2 or the final NUL byte.
3 For Intel 80x86, x>=3.
4 Copyright (C) 1994-2020 Free Software Foundation, Inc.
5 This file is part of the GNU C Library.
6 Contributed by Ulrich Drepper <drepper@gnu.org>
7 Some optimisations by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
8
9 The GNU C Library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Lesser General Public
11 License as published by the Free Software Foundation; either
12 version 2.1 of the License, or (at your option) any later version.
13
14 The GNU C Library is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Lesser General Public License for more details.
18
19 You should have received a copy of the GNU Lesser General Public
20 License along with the GNU C Library; if not, see
21 <https://www.gnu.org/licenses/>. */
22
23 #include <sysdep.h>
24 #include "asm-syntax.h"
25
26 #define PARMS 4+4 /* space for 1 saved reg */
27 #define RTN PARMS
28 #define STR RTN
29 #define CHR STR+4
30
31 .text
32 ENTRY (__strchrnul)
33
34 pushl %edi /* Save callee-safe registers used here. */
35 cfi_adjust_cfa_offset (4)
36 cfi_rel_offset (edi, 0)
37
38 movl STR(%esp), %eax
39 movl CHR(%esp), %edx
40
41 /* At the moment %edx contains CHR. What we need for the
42 algorithm is CHR in all bytes of the dword. Avoid
43 operations on 16 bit words because these require an
44 prefix byte (and one more cycle). */
45 movb %dl, %dh /* now it is 0|0|c|c */
46 movl %edx, %ecx
47 shll $16, %edx /* now it is c|c|0|0 */
48 movw %cx, %dx /* and finally c|c|c|c */
49
50 /* Before we start with the main loop we process single bytes
51 until the source pointer is aligned. This has two reasons:
52 1. aligned 32-bit memory access is faster
53 and (more important)
54 2. we process in the main loop 32 bit in one step although
55 we don't know the end of the string. But accessing at
56 4-byte alignment guarantees that we never access illegal
57 memory if this would not also be done by the trivial
58 implementation (this is because all processor inherent
59 boundaries are multiples of 4. */
60
61 testb $3, %al /* correctly aligned ? */
62 jz L(11) /* yes => begin loop */
63 movb (%eax), %cl /* load byte in question (we need it twice) */
64 cmpb %cl, %dl /* compare byte */
65 je L(6) /* target found => return */
66 testb %cl, %cl /* is NUL? */
67 jz L(6) /* yes => return NULL */
68 incl %eax /* increment pointer */
69
70 testb $3, %al /* correctly aligned ? */
71 jz L(11) /* yes => begin loop */
72 movb (%eax), %cl /* load byte in question (we need it twice) */
73 cmpb %cl, %dl /* compare byte */
74 je L(6) /* target found => return */
75 testb %cl, %cl /* is NUL? */
76 jz L(6) /* yes => return NULL */
77 incl %eax /* increment pointer */
78
79 testb $3, %al /* correctly aligned ? */
80 jz L(11) /* yes => begin loop */
81 movb (%eax), %cl /* load byte in question (we need it twice) */
82 cmpb %cl, %dl /* compare byte */
83 je L(6) /* target found => return */
84 testb %cl, %cl /* is NUL? */
85 jz L(6) /* yes => return NULL */
86 incl %eax /* increment pointer */
87
88 /* No we have reached alignment. */
89 jmp L(11) /* begin loop */
90
91 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
92 change any of the hole bits of LONGWORD.
93
94 1) Is this safe? Will it catch all the zero bytes?
95 Suppose there is a byte with all zeros. Any carry bits
96 propagating from its left will fall into the hole at its
97 least significant bit and stop. Since there will be no
98 carry from its most significant bit, the LSB of the
99 byte to the left will be unchanged, and the zero will be
100 detected.
101
102 2) Is this worthwhile? Will it ignore everything except
103 zero bytes? Suppose every byte of LONGWORD has a bit set
104 somewhere. There will be a carry into bit 8. If bit 8
105 is set, this will carry into bit 16. If bit 8 is clear,
106 one of bits 9-15 must be set, so there will be a carry
107 into bit 16. Similarly, there will be a carry into bit
108 24. If one of bits 24-31 is set, there will be a carry
109 into bit 32 (=carry flag), so all of the hole bits will
110 be changed.
111
112 3) But wait! Aren't we looking for CHR, not zero?
113 Good point. So what we do is XOR LONGWORD with a longword,
114 each of whose bytes is CHR. This turns each byte that is CHR
115 into a zero. */
116
117 /* Each round the main loop processes 16 bytes. */
118
119 ALIGN(4)
120
121 L(1): addl $16, %eax /* adjust pointer for whole round */
122
123 L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
124 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
125 are now 0 */
126 movl $0xfefefeff, %edi /* magic value */
127 addl %ecx, %edi /* add the magic value to the word. We get
128 carry bits reported for each byte which
129 is *not* CHR */
130
131 /* According to the algorithm we had to reverse the effect of the
132 XOR first and then test the overflow bits. But because the
133 following XOR would destroy the carry flag and it would (in a
134 representation with more than 32 bits) not alter then last
135 overflow, we can now test this condition. If no carry is signaled
136 no overflow must have occurred in the last byte => it was 0. */
137 jnc L(7)
138
139 /* We are only interested in carry bits that change due to the
140 previous add, so remove original bits */
141 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
142
143 /* Now test for the other three overflow bits. */
144 orl $0xfefefeff, %edi /* set all non-carry bits */
145 incl %edi /* add 1: if one carry bit was *not* set
146 the addition will not result in 0. */
147
148 /* If at least one byte of the word is CHR we don't get 0 in %edi. */
149 jnz L(7) /* found it => return pointer */
150
151 /* Now we made sure the dword does not contain the character we are
152 looking for. But because we deal with strings we have to check
153 for the end of string before testing the next dword. */
154
155 xorl %edx, %ecx /* restore original dword without reload */
156 movl $0xfefefeff, %edi /* magic value */
157 addl %ecx, %edi /* add the magic value to the word. We get
158 carry bits reported for each byte which
159 is *not* 0 */
160 jnc L(7) /* highest byte is NUL => return NULL */
161 xorl %ecx, %edi /* (word+magic)^word */
162 orl $0xfefefeff, %edi /* set all non-carry bits */
163 incl %edi /* add 1: if one carry bit was *not* set
164 the addition will not result in 0. */
165 jnz L(7) /* found NUL => return NULL */
166
167 movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
168 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
169 are now 0 */
170 movl $0xfefefeff, %edi /* magic value */
171 addl %ecx, %edi /* add the magic value to the word. We get
172 carry bits reported for each byte which
173 is *not* CHR */
174 jnc L(71) /* highest byte is CHR => return pointer */
175 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
176 orl $0xfefefeff, %edi /* set all non-carry bits */
177 incl %edi /* add 1: if one carry bit was *not* set
178 the addition will not result in 0. */
179 jnz L(71) /* found it => return pointer */
180 xorl %edx, %ecx /* restore original dword without reload */
181 movl $0xfefefeff, %edi /* magic value */
182 addl %ecx, %edi /* add the magic value to the word. We get
183 carry bits reported for each byte which
184 is *not* 0 */
185 jnc L(71) /* highest byte is NUL => return NULL */
186 xorl %ecx, %edi /* (word+magic)^word */
187 orl $0xfefefeff, %edi /* set all non-carry bits */
188 incl %edi /* add 1: if one carry bit was *not* set
189 the addition will not result in 0. */
190 jnz L(71) /* found NUL => return NULL */
191
192 movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
193 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
194 are now 0 */
195 movl $0xfefefeff, %edi /* magic value */
196 addl %ecx, %edi /* add the magic value to the word. We get
197 carry bits reported for each byte which
198 is *not* CHR */
199 jnc L(72) /* highest byte is CHR => return pointer */
200 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
201 orl $0xfefefeff, %edi /* set all non-carry bits */
202 incl %edi /* add 1: if one carry bit was *not* set
203 the addition will not result in 0. */
204 jnz L(72) /* found it => return pointer */
205 xorl %edx, %ecx /* restore original dword without reload */
206 movl $0xfefefeff, %edi /* magic value */
207 addl %ecx, %edi /* add the magic value to the word. We get
208 carry bits reported for each byte which
209 is *not* 0 */
210 jnc L(72) /* highest byte is NUL => return NULL */
211 xorl %ecx, %edi /* (word+magic)^word */
212 orl $0xfefefeff, %edi /* set all non-carry bits */
213 incl %edi /* add 1: if one carry bit was *not* set
214 the addition will not result in 0. */
215 jnz L(72) /* found NUL => return NULL */
216
217 movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
218 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
219 are now 0 */
220 movl $0xfefefeff, %edi /* magic value */
221 addl %ecx, %edi /* add the magic value to the word. We get
222 carry bits reported for each byte which
223 is *not* CHR */
224 jnc L(73) /* highest byte is CHR => return pointer */
225 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
226 orl $0xfefefeff, %edi /* set all non-carry bits */
227 incl %edi /* add 1: if one carry bit was *not* set
228 the addition will not result in 0. */
229 jnz L(73) /* found it => return pointer */
230 xorl %edx, %ecx /* restore original dword without reload */
231 movl $0xfefefeff, %edi /* magic value */
232 addl %ecx, %edi /* add the magic value to the word. We get
233 carry bits reported for each byte which
234 is *not* 0 */
235 jnc L(73) /* highest byte is NUL => return NULL */
236 xorl %ecx, %edi /* (word+magic)^word */
237 orl $0xfefefeff, %edi /* set all non-carry bits */
238 incl %edi /* add 1: if one carry bit was *not* set
239 the addition will not result in 0. */
240 jz L(1) /* no NUL found => restart loop */
241
242 L(73): addl $4, %eax /* adjust pointer */
243 L(72): addl $4, %eax
244 L(71): addl $4, %eax
245
246 /* We now scan for the byte in which the character was matched.
247 But we have to take care of the case that a NUL char is
248 found before this in the dword. */
249
250 L(7): testb %cl, %cl /* is first byte CHR? */
251 jz L(6) /* yes => return pointer */
252 cmpb %dl, %cl /* is first byte NUL? */
253 je L(6) /* yes => return NULL */
254 incl %eax /* it's not in the first byte */
255
256 testb %ch, %ch /* is second byte CHR? */
257 jz L(6) /* yes => return pointer */
258 cmpb %dl, %ch /* is second byte NUL? */
259 je L(6) /* yes => return NULL? */
260 incl %eax /* it's not in the second byte */
261
262 shrl $16, %ecx /* make upper byte accessible */
263 testb %cl, %cl /* is third byte CHR? */
264 jz L(6) /* yes => return pointer */
265 cmpb %dl, %cl /* is third byte NUL? */
266 je L(6) /* yes => return NULL */
267
268 /* It must be in the fourth byte and it cannot be NUL. */
269 incl %eax
270
271 L(6): popl %edi /* restore saved register content */
272 cfi_adjust_cfa_offset (-4)
273 cfi_restore (edi)
274
275 ret
276 END (__strchrnul)
277
278 weak_alias (__strchrnul, strchrnul)