]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/i386/memchr.S
2.5-18.1
[thirdparty/glibc.git] / sysdeps / i386 / memchr.S
CommitLineData
0ecb606c
JJ
1/* memchr (str, chr, len) -- Return pointer to first occurrence of CHR in STR
2 less than LEN. For Intel 80x86, x>=3.
3 Copyright (C) 1994-1998, 2000, 2003, 2005 Free Software Foundation, Inc.
6d52618b
UD
4 This file is part of the GNU C Library.
5 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
6 Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
6d52618b
UD
7 This version is developed using the same algorithm as the fast C
8 version which carries the following introduction:
6d52618b
UD
9 Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
10 with help from Dan Sahlin (dan@sics.se) and
11 commentary by Jim Blandy (jimb@ai.mit.edu);
12 adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
13 and implemented by Roland McGrath (roland@ai.mit.edu).
14
15 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 2.1 of the License, or (at your option) any later version.
6d52618b
UD
19
20 The GNU C Library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 23 Lesser General Public License for more details.
6d52618b 24
41bdb6e2
AJ
25 You should have received a copy of the GNU Lesser General Public
26 License along with the GNU C Library; if not, write to the Free
27 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307 USA. */
8f5ca04b
RM
29
30#include <sysdep.h>
31#include "asm-syntax.h"
2fc08826 32#include "bp-sym.h"
3f02f778 33#include "bp-asm.h"
8f5ca04b 34
3f02f778
GM
35#define PARMS LINKAGE+8 /* space for 2 saved regs */
36#define RTN PARMS
37#define STR RTN+RTN_SIZE
38#define CHR STR+PTR_SIZE
39#define LEN CHR+4
8f5ca04b
RM
40
41 .text
abf70633 42ENTRY (BP_SYM (__memchr))
3f02f778
GM
43 ENTER
44
8f5ca04b
RM
45 /* Save callee-safe registers used in this function. */
46 pushl %esi
0ecb606c 47 cfi_adjust_cfa_offset (4)
8f5ca04b 48 pushl %edi
0ecb606c
JJ
49 cfi_adjust_cfa_offset (4)
50 cfi_rel_offset (edi, 0)
8f5ca04b
RM
51
52 /* Load parameters into registers. */
3f02f778
GM
53 movl STR(%esp), %eax /* str: pointer to memory block. */
54 movl CHR(%esp), %edx /* c: byte we are looking for. */
55 movl LEN(%esp), %esi /* len: length of memory block. */
0ecb606c 56 cfi_rel_offset (esi, 4)
53c06508 57 CHECK_BOUNDS_LOW (%eax, STR(%esp))
8f5ca04b
RM
58
59 /* If my must not test more than three characters test
60 them one by one. This is especially true for 0. */
61 cmpl $4, %esi
5929563f 62 jb L(3)
8f5ca04b 63
3f02f778
GM
64 /* At the moment %edx contains CHR. What we need for the
65 algorithm is CHR in all bytes of the dword. Avoid
8f5ca04b
RM
66 operations on 16 bit words because these require an
67 prefix byte (and one more cycle). */
68 movb %dl, %dh /* Now it is 0|0|c|c */
69 movl %edx, %ecx
70 shll $16, %edx /* Now c|c|0|0 */
71 movw %cx, %dx /* And finally c|c|c|c */
72
73 /* Better performance can be achieved if the word (32
74 bit) memory access is aligned on a four-byte-boundary.
75 So process first bytes one by one until boundary is
76 reached. Don't use a loop for better performance. */
77
50304ef0 78 testb $3, %al /* correctly aligned ? */
5929563f 79 je L(2) /* yes => begin loop */
8f5ca04b 80 cmpb %dl, (%eax) /* compare byte */
5929563f 81 je L(9) /* target found => return */
8f5ca04b
RM
82 incl %eax /* increment source pointer */
83 decl %esi /* decrement length counter */
5929563f 84 je L(4) /* len==0 => return NULL */
8f5ca04b 85
50304ef0 86 testb $3, %al /* correctly aligned ? */
5929563f 87 je L(2) /* yes => begin loop */
8f5ca04b 88 cmpb %dl, (%eax) /* compare byte */
5929563f 89 je L(9) /* target found => return */
8f5ca04b
RM
90 incl %eax /* increment source pointer */
91 decl %esi /* decrement length counter */
5929563f 92 je L(4) /* len==0 => return NULL */
8f5ca04b 93
50304ef0 94 testb $3, %al /* correctly aligned ? */
5929563f 95 je L(2) /* yes => begin loop */
8f5ca04b 96 cmpb %dl, (%eax) /* compare byte */
5929563f 97 je L(9) /* target found => return */
8f5ca04b
RM
98 incl %eax /* increment source pointer */
99 decl %esi /* decrement length counter */
100 /* no test for len==0 here, because this is done in the
101 loop head */
5929563f 102 jmp L(2)
8f5ca04b
RM
103
104 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
105 change any of the hole bits of LONGWORD.
106
107 1) Is this safe? Will it catch all the zero bytes?
108 Suppose there is a byte with all zeros. Any carry bits
109 propagating from its left will fall into the hole at its
110 least significant bit and stop. Since there will be no
111 carry from its most significant bit, the LSB of the
112 byte to the left will be unchanged, and the zero will be
113 detected.
114
115 2) Is this worthwhile? Will it ignore everything except
116 zero bytes? Suppose every byte of LONGWORD has a bit set
117 somewhere. There will be a carry into bit 8. If bit 8
118 is set, this will carry into bit 16. If bit 8 is clear,
119 one of bits 9-15 must be set, so there will be a carry
120 into bit 16. Similarly, there will be a carry into bit
121 24. If one of bits 24-31 is set, there will be a carry
122 into bit 32 (=carry flag), so all of the hole bits will
123 be changed.
124
3f02f778 125 3) But wait! Aren't we looking for CHR, not zero?
8f5ca04b 126 Good point. So what we do is XOR LONGWORD with a longword,
3f02f778 127 each of whose bytes is CHR. This turns each byte that is CHR
8f5ca04b
RM
128 into a zero. */
129
130
131 /* Each round the main loop processes 16 bytes. */
132
133 ALIGN (4)
134
5929563f 135L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
8f5ca04b
RM
136 movl $0xfefefeff, %edi /* magic value */
137 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
138 are now 0 */
139 addl %ecx, %edi /* add the magic value to the word. We get
140 carry bits reported for each byte which
141 is *not* 0 */
142
143 /* According to the algorithm we had to reverse the effect of the
144 XOR first and then test the overflow bits. But because the
145 following XOR would destroy the carry flag and it would (in a
146 representation with more than 32 bits) not alter then last
147 overflow, we can now test this condition. If no carry is signaled
6d52618b 148 no overflow must have occurred in the last byte => it was 0. */
5929563f 149 jnc L(8)
8f5ca04b
RM
150
151 /* We are only interested in carry bits that change due to the
152 previous add, so remove original bits */
153 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
154
155 /* Now test for the other three overflow bits. */
156 orl $0xfefefeff, %edi /* set all non-carry bits */
157 incl %edi /* add 1: if one carry bit was *not* set
158 the addition will not result in 0. */
159
3f02f778 160 /* If at least one byte of the word is CHR we don't get 0 in %edi. */
5929563f 161 jnz L(8) /* found it => return pointer */
8f5ca04b
RM
162
163 /* This process is unfolded four times for better performance.
164 we don't increment the source pointer each time. Instead we
165 use offsets and increment by 16 in each run of the loop. But
166 before probing for the matching byte we need some extra code
167 (following LL(13) below). Even the len can be compared with
168 constants instead of decrementing each time. */
169
170 movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
171 movl $0xfefefeff, %edi /* magic value */
172 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
173 are now 0 */
174 addl %ecx, %edi /* add the magic value to the word. We get
175 carry bits reported for each byte which
176 is *not* 0 */
3f02f778 177 jnc L(7) /* highest byte is CHR => return pointer */
8f5ca04b
RM
178 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
179 orl $0xfefefeff, %edi /* set all non-carry bits */
180 incl %edi /* add 1: if one carry bit was *not* set
181 the addition will not result in 0. */
5929563f 182 jnz L(7) /* found it => return pointer */
8f5ca04b
RM
183
184 movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
185 movl $0xfefefeff, %edi /* magic value */
186 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
187 are now 0 */
188 addl %ecx, %edi /* add the magic value to the word. We get
189 carry bits reported for each byte which
190 is *not* 0 */
3f02f778 191 jnc L(6) /* highest byte is CHR => return pointer */
8f5ca04b
RM
192 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
193 orl $0xfefefeff, %edi /* set all non-carry bits */
194 incl %edi /* add 1: if one carry bit was *not* set
195 the addition will not result in 0. */
5929563f 196 jnz L(6) /* found it => return pointer */
8f5ca04b
RM
197
198 movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
199 movl $0xfefefeff, %edi /* magic value */
200 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
201 are now 0 */
202 addl %ecx, %edi /* add the magic value to the word. We get
203 carry bits reported for each byte which
204 is *not* 0 */
3f02f778 205 jnc L(5) /* highest byte is CHR => return pointer */
8f5ca04b
RM
206 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
207 orl $0xfefefeff, %edi /* set all non-carry bits */
208 incl %edi /* add 1: if one carry bit was *not* set
209 the addition will not result in 0. */
5929563f 210 jnz L(5) /* found it => return pointer */
8f5ca04b
RM
211
212 /* Adjust both counters for a full round, i.e. 16 bytes. */
213 addl $16, %eax
5929563f
UD
214L(2): subl $16, %esi
215 jae L(1) /* Still more than 16 bytes remaining */
8f5ca04b
RM
216
217 /* Process remaining bytes separately. */
218 cmpl $4-16, %esi /* rest < 4 bytes? */
5929563f 219 jb L(3) /* yes, than test byte by byte */
8f5ca04b
RM
220
221 movl (%eax), %ecx /* get word (= 4 bytes) in question */
222 movl $0xfefefeff, %edi /* magic value */
223 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
224 are now 0 */
225 addl %ecx, %edi /* add the magic value to the word. We get
226 carry bits reported for each byte which
227 is *not* 0 */
3f02f778 228 jnc L(8) /* highest byte is CHR => return pointer */
8f5ca04b
RM
229 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
230 orl $0xfefefeff, %edi /* set all non-carry bits */
231 incl %edi /* add 1: if one carry bit was *not* set
232 the addition will not result in 0. */
5929563f 233 jne L(8) /* found it => return pointer */
8f5ca04b
RM
234 addl $4, %eax /* adjust source pointer */
235
236 cmpl $8-16, %esi /* rest < 8 bytes? */
5929563f 237 jb L(3) /* yes, than test byte by byte */
8f5ca04b
RM
238
239 movl (%eax), %ecx /* get word (= 4 bytes) in question */
240 movl $0xfefefeff, %edi /* magic value */
241 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
242 are now 0 */
243 addl %ecx, %edi /* add the magic value to the word. We get
244 carry bits reported for each byte which
245 is *not* 0 */
3f02f778 246 jnc L(8) /* highest byte is CHR => return pointer */
8f5ca04b
RM
247 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
248 orl $0xfefefeff, %edi /* set all non-carry bits */
249 incl %edi /* add 1: if one carry bit was *not* set
250 the addition will not result in 0. */
5929563f 251 jne L(8) /* found it => return pointer */
8f5ca04b
RM
252 addl $4, %eax /* adjust source pointer */
253
254 cmpl $12-16, %esi /* rest < 12 bytes? */
5929563f 255 jb L(3) /* yes, than test byte by byte */
8f5ca04b
RM
256
257 movl (%eax), %ecx /* get word (= 4 bytes) in question */
258 movl $0xfefefeff, %edi /* magic value */
259 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
260 are now 0 */
261 addl %ecx, %edi /* add the magic value to the word. We get
262 carry bits reported for each byte which
263 is *not* 0 */
3f02f778 264 jnc L(8) /* highest byte is CHR => return pointer */
8f5ca04b
RM
265 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
266 orl $0xfefefeff, %edi /* set all non-carry bits */
267 incl %edi /* add 1: if one carry bit was *not* set
268 the addition will not result in 0. */
5929563f 269 jne L(8) /* found it => return pointer */
8f5ca04b
RM
270 addl $4, %eax /* adjust source pointer */
271
272 /* Check the remaining bytes one by one. */
5929563f
UD
273L(3): andl $3, %esi /* mask out uninteresting bytes */
274 jz L(4) /* no remaining bytes => return NULL */
8f5ca04b 275
3f02f778 276 cmpb %dl, (%eax) /* compare byte with CHR */
5929563f 277 je L(9) /* equal, than return pointer */
8f5ca04b
RM
278 incl %eax /* increment source pointer */
279 decl %esi /* decrement length */
5929563f 280 jz L(4) /* no remaining bytes => return NULL */
8f5ca04b 281
3f02f778 282 cmpb %dl, (%eax) /* compare byte with CHR */
5929563f 283 je L(9) /* equal, than return pointer */
8f5ca04b
RM
284 incl %eax /* increment source pointer */
285 decl %esi /* decrement length */
5929563f 286 jz L(4) /* no remaining bytes => return NULL */
8f5ca04b 287
3f02f778 288 cmpb %dl, (%eax) /* compare byte with CHR */
5929563f 289 je L(9) /* equal, than return pointer */
8f5ca04b 290
5929563f 291L(4): /* no byte found => return NULL */
8f5ca04b 292 xorl %eax, %eax
5929563f 293 jmp L(9)
8f5ca04b
RM
294
295 /* add missing source pointer increments */
5929563f
UD
296L(5): addl $4, %eax
297L(6): addl $4, %eax
298L(7): addl $4, %eax
8f5ca04b
RM
299
300 /* Test for the matching byte in the word. %ecx contains a NUL
301 char in the byte which originally was the byte we are looking
302 at. */
5929563f
UD
303L(8): testb %cl, %cl /* test first byte in dword */
304 jz L(9) /* if zero => return pointer */
8f5ca04b
RM
305 incl %eax /* increment source pointer */
306
307 testb %ch, %ch /* test second byte in dword */
5929563f 308 jz L(9) /* if zero => return pointer */
8f5ca04b
RM
309 incl %eax /* increment source pointer */
310
311 testl $0xff0000, %ecx /* test third byte in dword */
5929563f 312 jz L(9) /* if zero => return pointer */
8f5ca04b
RM
313 incl %eax /* increment source pointer */
314
6d52618b 315 /* No further test needed we we know it is one of the four bytes. */
2fc08826
GM
316L(9):
317#if __BOUNDED_POINTERS__
53c06508 318 CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb)
2fc08826
GM
319 /* If RTN pointer is phony, don't copy return value into it. */
320 movl RTN(%esp), %ecx
321 testl %ecx, %ecx
322 jz L(pop)
323 RETURN_BOUNDED_POINTER (STR(%esp))
324#endif
325L(pop): popl %edi /* pop saved registers */
0ecb606c
JJ
326 cfi_adjust_cfa_offset (-4)
327 cfi_restore (edi)
8f5ca04b 328 popl %esi
0ecb606c
JJ
329 cfi_adjust_cfa_offset (-4)
330 cfi_restore (esi)
8f5ca04b 331
3f02f778
GM
332 LEAVE
333 RET_PTR
abf70633
GM
334END (BP_SYM (__memchr))
335
336weak_alias (BP_SYM (__memchr), BP_SYM (memchr))
2ed5fd9a
GM
337#if !__BOUNDED_POINTERS__
338weak_alias (__memchr, __ubp_memchr)
339#endif
85dd1003 340libc_hidden_builtin_def (memchr)