]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/i386/memchr.S
Update.
[thirdparty/glibc.git] / sysdeps / i386 / memchr.S
CommitLineData
3f02f778
GM
1/* memchr (str, chr, len) -- Return pointer to first occurrence of CHR in STR less
2 than LEN.
6d52618b 3 For Intel 80x86, x>=3.
85dd1003 4 Copyright (C) 1994-1998, 2000, 2003 Free Software Foundation, Inc.
6d52618b
UD
5 This file is part of the GNU C Library.
6 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
7 Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
6d52618b
UD
8 This version is developed using the same algorithm as the fast C
9 version which carries the following introduction:
6d52618b
UD
10 Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
11 with help from Dan Sahlin (dan@sics.se) and
12 commentary by Jim Blandy (jimb@ai.mit.edu);
13 adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
14 and implemented by Roland McGrath (roland@ai.mit.edu).
15
16 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
17 modify it under the terms of the GNU Lesser General Public
18 License as published by the Free Software Foundation; either
19 version 2.1 of the License, or (at your option) any later version.
6d52618b
UD
20
21 The GNU C Library is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 24 Lesser General Public License for more details.
6d52618b 25
41bdb6e2
AJ
26 You should have received a copy of the GNU Lesser General Public
27 License along with the GNU C Library; if not, write to the Free
28 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 02111-1307 USA. */
8f5ca04b
RM
30
31#include <sysdep.h>
32#include "asm-syntax.h"
2fc08826 33#include "bp-sym.h"
3f02f778 34#include "bp-asm.h"
8f5ca04b 35
3f02f778
GM
36#define PARMS LINKAGE+8 /* space for 2 saved regs */
37#define RTN PARMS
38#define STR RTN+RTN_SIZE
39#define CHR STR+PTR_SIZE
40#define LEN CHR+4
8f5ca04b
RM
41
42 .text
abf70633 43ENTRY (BP_SYM (__memchr))
3f02f778
GM
44 ENTER
45
8f5ca04b
RM
46 /* Save callee-safe registers used in this function. */
47 pushl %esi
48 pushl %edi
49
50 /* Load parameters into registers. */
3f02f778
GM
51 movl STR(%esp), %eax /* str: pointer to memory block. */
52 movl CHR(%esp), %edx /* c: byte we are looking for. */
53 movl LEN(%esp), %esi /* len: length of memory block. */
53c06508 54 CHECK_BOUNDS_LOW (%eax, STR(%esp))
8f5ca04b
RM
55
56 /* If my must not test more than three characters test
57 them one by one. This is especially true for 0. */
58 cmpl $4, %esi
5929563f 59 jb L(3)
8f5ca04b 60
3f02f778
GM
61 /* At the moment %edx contains CHR. What we need for the
62 algorithm is CHR in all bytes of the dword. Avoid
8f5ca04b
RM
63 operations on 16 bit words because these require an
64 prefix byte (and one more cycle). */
65 movb %dl, %dh /* Now it is 0|0|c|c */
66 movl %edx, %ecx
67 shll $16, %edx /* Now c|c|0|0 */
68 movw %cx, %dx /* And finally c|c|c|c */
69
70 /* Better performance can be achieved if the word (32
71 bit) memory access is aligned on a four-byte-boundary.
72 So process first bytes one by one until boundary is
73 reached. Don't use a loop for better performance. */
74
50304ef0 75 testb $3, %al /* correctly aligned ? */
5929563f 76 je L(2) /* yes => begin loop */
8f5ca04b 77 cmpb %dl, (%eax) /* compare byte */
5929563f 78 je L(9) /* target found => return */
8f5ca04b
RM
79 incl %eax /* increment source pointer */
80 decl %esi /* decrement length counter */
5929563f 81 je L(4) /* len==0 => return NULL */
8f5ca04b 82
50304ef0 83 testb $3, %al /* correctly aligned ? */
5929563f 84 je L(2) /* yes => begin loop */
8f5ca04b 85 cmpb %dl, (%eax) /* compare byte */
5929563f 86 je L(9) /* target found => return */
8f5ca04b
RM
87 incl %eax /* increment source pointer */
88 decl %esi /* decrement length counter */
5929563f 89 je L(4) /* len==0 => return NULL */
8f5ca04b 90
50304ef0 91 testb $3, %al /* correctly aligned ? */
5929563f 92 je L(2) /* yes => begin loop */
8f5ca04b 93 cmpb %dl, (%eax) /* compare byte */
5929563f 94 je L(9) /* target found => return */
8f5ca04b
RM
95 incl %eax /* increment source pointer */
96 decl %esi /* decrement length counter */
97 /* no test for len==0 here, because this is done in the
98 loop head */
5929563f 99 jmp L(2)
8f5ca04b
RM
100
101 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
102 change any of the hole bits of LONGWORD.
103
104 1) Is this safe? Will it catch all the zero bytes?
105 Suppose there is a byte with all zeros. Any carry bits
106 propagating from its left will fall into the hole at its
107 least significant bit and stop. Since there will be no
108 carry from its most significant bit, the LSB of the
109 byte to the left will be unchanged, and the zero will be
110 detected.
111
112 2) Is this worthwhile? Will it ignore everything except
113 zero bytes? Suppose every byte of LONGWORD has a bit set
114 somewhere. There will be a carry into bit 8. If bit 8
115 is set, this will carry into bit 16. If bit 8 is clear,
116 one of bits 9-15 must be set, so there will be a carry
117 into bit 16. Similarly, there will be a carry into bit
118 24. If one of bits 24-31 is set, there will be a carry
119 into bit 32 (=carry flag), so all of the hole bits will
120 be changed.
121
3f02f778 122 3) But wait! Aren't we looking for CHR, not zero?
8f5ca04b 123 Good point. So what we do is XOR LONGWORD with a longword,
3f02f778 124 each of whose bytes is CHR. This turns each byte that is CHR
8f5ca04b
RM
125 into a zero. */
126
127
128 /* Each round the main loop processes 16 bytes. */
129
130 ALIGN (4)
131
5929563f 132L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
8f5ca04b
RM
133 movl $0xfefefeff, %edi /* magic value */
134 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
135 are now 0 */
136 addl %ecx, %edi /* add the magic value to the word. We get
137 carry bits reported for each byte which
138 is *not* 0 */
139
140 /* According to the algorithm we had to reverse the effect of the
141 XOR first and then test the overflow bits. But because the
142 following XOR would destroy the carry flag and it would (in a
143 representation with more than 32 bits) not alter then last
144 overflow, we can now test this condition. If no carry is signaled
6d52618b 145 no overflow must have occurred in the last byte => it was 0. */
5929563f 146 jnc L(8)
8f5ca04b
RM
147
148 /* We are only interested in carry bits that change due to the
149 previous add, so remove original bits */
150 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
151
152 /* Now test for the other three overflow bits. */
153 orl $0xfefefeff, %edi /* set all non-carry bits */
154 incl %edi /* add 1: if one carry bit was *not* set
155 the addition will not result in 0. */
156
3f02f778 157 /* If at least one byte of the word is CHR we don't get 0 in %edi. */
5929563f 158 jnz L(8) /* found it => return pointer */
8f5ca04b
RM
159
160 /* This process is unfolded four times for better performance.
161 we don't increment the source pointer each time. Instead we
162 use offsets and increment by 16 in each run of the loop. But
163 before probing for the matching byte we need some extra code
164 (following LL(13) below). Even the len can be compared with
165 constants instead of decrementing each time. */
166
167 movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
168 movl $0xfefefeff, %edi /* magic value */
169 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
170 are now 0 */
171 addl %ecx, %edi /* add the magic value to the word. We get
172 carry bits reported for each byte which
173 is *not* 0 */
3f02f778 174 jnc L(7) /* highest byte is CHR => return pointer */
8f5ca04b
RM
175 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
176 orl $0xfefefeff, %edi /* set all non-carry bits */
177 incl %edi /* add 1: if one carry bit was *not* set
178 the addition will not result in 0. */
5929563f 179 jnz L(7) /* found it => return pointer */
8f5ca04b
RM
180
181 movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
182 movl $0xfefefeff, %edi /* magic value */
183 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
184 are now 0 */
185 addl %ecx, %edi /* add the magic value to the word. We get
186 carry bits reported for each byte which
187 is *not* 0 */
3f02f778 188 jnc L(6) /* highest byte is CHR => return pointer */
8f5ca04b
RM
189 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
190 orl $0xfefefeff, %edi /* set all non-carry bits */
191 incl %edi /* add 1: if one carry bit was *not* set
192 the addition will not result in 0. */
5929563f 193 jnz L(6) /* found it => return pointer */
8f5ca04b
RM
194
195 movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
196 movl $0xfefefeff, %edi /* magic value */
197 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
198 are now 0 */
199 addl %ecx, %edi /* add the magic value to the word. We get
200 carry bits reported for each byte which
201 is *not* 0 */
3f02f778 202 jnc L(5) /* highest byte is CHR => return pointer */
8f5ca04b
RM
203 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
204 orl $0xfefefeff, %edi /* set all non-carry bits */
205 incl %edi /* add 1: if one carry bit was *not* set
206 the addition will not result in 0. */
5929563f 207 jnz L(5) /* found it => return pointer */
8f5ca04b
RM
208
209 /* Adjust both counters for a full round, i.e. 16 bytes. */
210 addl $16, %eax
5929563f
UD
211L(2): subl $16, %esi
212 jae L(1) /* Still more than 16 bytes remaining */
8f5ca04b
RM
213
214 /* Process remaining bytes separately. */
215 cmpl $4-16, %esi /* rest < 4 bytes? */
5929563f 216 jb L(3) /* yes, than test byte by byte */
8f5ca04b
RM
217
218 movl (%eax), %ecx /* get word (= 4 bytes) in question */
219 movl $0xfefefeff, %edi /* magic value */
220 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
221 are now 0 */
222 addl %ecx, %edi /* add the magic value to the word. We get
223 carry bits reported for each byte which
224 is *not* 0 */
3f02f778 225 jnc L(8) /* highest byte is CHR => return pointer */
8f5ca04b
RM
226 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
227 orl $0xfefefeff, %edi /* set all non-carry bits */
228 incl %edi /* add 1: if one carry bit was *not* set
229 the addition will not result in 0. */
5929563f 230 jne L(8) /* found it => return pointer */
8f5ca04b
RM
231 addl $4, %eax /* adjust source pointer */
232
233 cmpl $8-16, %esi /* rest < 8 bytes? */
5929563f 234 jb L(3) /* yes, than test byte by byte */
8f5ca04b
RM
235
236 movl (%eax), %ecx /* get word (= 4 bytes) in question */
237 movl $0xfefefeff, %edi /* magic value */
238 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
239 are now 0 */
240 addl %ecx, %edi /* add the magic value to the word. We get
241 carry bits reported for each byte which
242 is *not* 0 */
3f02f778 243 jnc L(8) /* highest byte is CHR => return pointer */
8f5ca04b
RM
244 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
245 orl $0xfefefeff, %edi /* set all non-carry bits */
246 incl %edi /* add 1: if one carry bit was *not* set
247 the addition will not result in 0. */
5929563f 248 jne L(8) /* found it => return pointer */
8f5ca04b
RM
249 addl $4, %eax /* adjust source pointer */
250
251 cmpl $12-16, %esi /* rest < 12 bytes? */
5929563f 252 jb L(3) /* yes, than test byte by byte */
8f5ca04b
RM
253
254 movl (%eax), %ecx /* get word (= 4 bytes) in question */
255 movl $0xfefefeff, %edi /* magic value */
256 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
257 are now 0 */
258 addl %ecx, %edi /* add the magic value to the word. We get
259 carry bits reported for each byte which
260 is *not* 0 */
3f02f778 261 jnc L(8) /* highest byte is CHR => return pointer */
8f5ca04b
RM
262 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
263 orl $0xfefefeff, %edi /* set all non-carry bits */
264 incl %edi /* add 1: if one carry bit was *not* set
265 the addition will not result in 0. */
5929563f 266 jne L(8) /* found it => return pointer */
8f5ca04b
RM
267 addl $4, %eax /* adjust source pointer */
268
269 /* Check the remaining bytes one by one. */
5929563f
UD
270L(3): andl $3, %esi /* mask out uninteresting bytes */
271 jz L(4) /* no remaining bytes => return NULL */
8f5ca04b 272
3f02f778 273 cmpb %dl, (%eax) /* compare byte with CHR */
5929563f 274 je L(9) /* equal, than return pointer */
8f5ca04b
RM
275 incl %eax /* increment source pointer */
276 decl %esi /* decrement length */
5929563f 277 jz L(4) /* no remaining bytes => return NULL */
8f5ca04b 278
3f02f778 279 cmpb %dl, (%eax) /* compare byte with CHR */
5929563f 280 je L(9) /* equal, than return pointer */
8f5ca04b
RM
281 incl %eax /* increment source pointer */
282 decl %esi /* decrement length */
5929563f 283 jz L(4) /* no remaining bytes => return NULL */
8f5ca04b 284
3f02f778 285 cmpb %dl, (%eax) /* compare byte with CHR */
5929563f 286 je L(9) /* equal, than return pointer */
8f5ca04b 287
5929563f 288L(4): /* no byte found => return NULL */
8f5ca04b 289 xorl %eax, %eax
5929563f 290 jmp L(9)
8f5ca04b
RM
291
292 /* add missing source pointer increments */
5929563f
UD
293L(5): addl $4, %eax
294L(6): addl $4, %eax
295L(7): addl $4, %eax
8f5ca04b
RM
296
297 /* Test for the matching byte in the word. %ecx contains a NUL
298 char in the byte which originally was the byte we are looking
299 at. */
5929563f
UD
300L(8): testb %cl, %cl /* test first byte in dword */
301 jz L(9) /* if zero => return pointer */
8f5ca04b
RM
302 incl %eax /* increment source pointer */
303
304 testb %ch, %ch /* test second byte in dword */
5929563f 305 jz L(9) /* if zero => return pointer */
8f5ca04b
RM
306 incl %eax /* increment source pointer */
307
308 testl $0xff0000, %ecx /* test third byte in dword */
5929563f 309 jz L(9) /* if zero => return pointer */
8f5ca04b
RM
310 incl %eax /* increment source pointer */
311
6d52618b 312 /* No further test needed we we know it is one of the four bytes. */
2fc08826
GM
313L(9):
314#if __BOUNDED_POINTERS__
53c06508 315 CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb)
2fc08826
GM
316 /* If RTN pointer is phony, don't copy return value into it. */
317 movl RTN(%esp), %ecx
318 testl %ecx, %ecx
319 jz L(pop)
320 RETURN_BOUNDED_POINTER (STR(%esp))
321#endif
322L(pop): popl %edi /* pop saved registers */
8f5ca04b
RM
323 popl %esi
324
3f02f778
GM
325 LEAVE
326 RET_PTR
abf70633
GM
327END (BP_SYM (__memchr))
328
329weak_alias (BP_SYM (__memchr), BP_SYM (memchr))
2ed5fd9a
GM
330#if !__BOUNDED_POINTERS__
331weak_alias (__memchr, __ubp_memchr)
332#endif
85dd1003 333libc_hidden_builtin_def (memchr)