]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/i386/memchr.S
Remove CHECK_STRING, CHECK_STRING_NULL_OK and __ubp_memchr.
[thirdparty/glibc.git] / sysdeps / i386 / memchr.S
CommitLineData
1ad9da69
UD
1/* memchr (str, chr, len) -- Return pointer to first occurrence of CHR in STR
2 less than LEN. For Intel 80x86, x>=3.
568035b7 3 Copyright (C) 1994-2013 Free Software Foundation, Inc.
6d52618b
UD
4 This file is part of the GNU C Library.
5 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
6 Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
6d52618b
UD
7 This version is developed using the same algorithm as the fast C
8 version which carries the following introduction:
6d52618b
UD
9 Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
10 with help from Dan Sahlin (dan@sics.se) and
11 commentary by Jim Blandy (jimb@ai.mit.edu);
12 adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
13 and implemented by Roland McGrath (roland@ai.mit.edu).
14
15 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 2.1 of the License, or (at your option) any later version.
6d52618b
UD
19
20 The GNU C Library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 23 Lesser General Public License for more details.
6d52618b 24
41bdb6e2 25 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
26 License along with the GNU C Library; if not, see
27 <http://www.gnu.org/licenses/>. */
8f5ca04b
RM
28
29#include <sysdep.h>
30#include "asm-syntax.h"
2fc08826 31#include "bp-sym.h"
3f02f778 32#include "bp-asm.h"
8f5ca04b 33
3f02f778
GM
34#define PARMS LINKAGE+8 /* space for 2 saved regs */
35#define RTN PARMS
36#define STR RTN+RTN_SIZE
37#define CHR STR+PTR_SIZE
38#define LEN CHR+4
8f5ca04b
RM
39
40 .text
abf70633 41ENTRY (BP_SYM (__memchr))
3f02f778
GM
42 ENTER
43
8f5ca04b
RM
44 /* Save callee-safe registers used in this function. */
45 pushl %esi
1ad9da69 46 cfi_adjust_cfa_offset (4)
8f5ca04b 47 pushl %edi
1ad9da69
UD
48 cfi_adjust_cfa_offset (4)
49 cfi_rel_offset (edi, 0)
8f5ca04b
RM
50
51 /* Load parameters into registers. */
3f02f778
GM
52 movl STR(%esp), %eax /* str: pointer to memory block. */
53 movl CHR(%esp), %edx /* c: byte we are looking for. */
54 movl LEN(%esp), %esi /* len: length of memory block. */
1ad9da69 55 cfi_rel_offset (esi, 4)
53c06508 56 CHECK_BOUNDS_LOW (%eax, STR(%esp))
8f5ca04b
RM
57
58 /* If my must not test more than three characters test
59 them one by one. This is especially true for 0. */
60 cmpl $4, %esi
5929563f 61 jb L(3)
8f5ca04b 62
3f02f778
GM
63 /* At the moment %edx contains CHR. What we need for the
64 algorithm is CHR in all bytes of the dword. Avoid
8f5ca04b
RM
65 operations on 16 bit words because these require an
66 prefix byte (and one more cycle). */
67 movb %dl, %dh /* Now it is 0|0|c|c */
68 movl %edx, %ecx
69 shll $16, %edx /* Now c|c|0|0 */
70 movw %cx, %dx /* And finally c|c|c|c */
71
72 /* Better performance can be achieved if the word (32
73 bit) memory access is aligned on a four-byte-boundary.
74 So process first bytes one by one until boundary is
75 reached. Don't use a loop for better performance. */
76
50304ef0 77 testb $3, %al /* correctly aligned ? */
5929563f 78 je L(2) /* yes => begin loop */
8f5ca04b 79 cmpb %dl, (%eax) /* compare byte */
5929563f 80 je L(9) /* target found => return */
8f5ca04b
RM
81 incl %eax /* increment source pointer */
82 decl %esi /* decrement length counter */
5929563f 83 je L(4) /* len==0 => return NULL */
8f5ca04b 84
50304ef0 85 testb $3, %al /* correctly aligned ? */
5929563f 86 je L(2) /* yes => begin loop */
8f5ca04b 87 cmpb %dl, (%eax) /* compare byte */
5929563f 88 je L(9) /* target found => return */
8f5ca04b
RM
89 incl %eax /* increment source pointer */
90 decl %esi /* decrement length counter */
5929563f 91 je L(4) /* len==0 => return NULL */
8f5ca04b 92
50304ef0 93 testb $3, %al /* correctly aligned ? */
5929563f 94 je L(2) /* yes => begin loop */
8f5ca04b 95 cmpb %dl, (%eax) /* compare byte */
5929563f 96 je L(9) /* target found => return */
8f5ca04b
RM
97 incl %eax /* increment source pointer */
98 decl %esi /* decrement length counter */
99 /* no test for len==0 here, because this is done in the
100 loop head */
5929563f 101 jmp L(2)
8f5ca04b
RM
102
103 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
104 change any of the hole bits of LONGWORD.
105
106 1) Is this safe? Will it catch all the zero bytes?
107 Suppose there is a byte with all zeros. Any carry bits
108 propagating from its left will fall into the hole at its
109 least significant bit and stop. Since there will be no
110 carry from its most significant bit, the LSB of the
111 byte to the left will be unchanged, and the zero will be
112 detected.
113
114 2) Is this worthwhile? Will it ignore everything except
115 zero bytes? Suppose every byte of LONGWORD has a bit set
116 somewhere. There will be a carry into bit 8. If bit 8
117 is set, this will carry into bit 16. If bit 8 is clear,
118 one of bits 9-15 must be set, so there will be a carry
119 into bit 16. Similarly, there will be a carry into bit
120 24. If one of bits 24-31 is set, there will be a carry
121 into bit 32 (=carry flag), so all of the hole bits will
122 be changed.
123
3f02f778 124 3) But wait! Aren't we looking for CHR, not zero?
8f5ca04b 125 Good point. So what we do is XOR LONGWORD with a longword,
3f02f778 126 each of whose bytes is CHR. This turns each byte that is CHR
8f5ca04b
RM
127 into a zero. */
128
129
130 /* Each round the main loop processes 16 bytes. */
131
132 ALIGN (4)
133
5929563f 134L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
8f5ca04b
RM
135 movl $0xfefefeff, %edi /* magic value */
136 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
137 are now 0 */
138 addl %ecx, %edi /* add the magic value to the word. We get
139 carry bits reported for each byte which
140 is *not* 0 */
141
142 /* According to the algorithm we had to reverse the effect of the
143 XOR first and then test the overflow bits. But because the
144 following XOR would destroy the carry flag and it would (in a
145 representation with more than 32 bits) not alter then last
146 overflow, we can now test this condition. If no carry is signaled
6d52618b 147 no overflow must have occurred in the last byte => it was 0. */
5929563f 148 jnc L(8)
8f5ca04b
RM
149
150 /* We are only interested in carry bits that change due to the
151 previous add, so remove original bits */
152 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
153
154 /* Now test for the other three overflow bits. */
155 orl $0xfefefeff, %edi /* set all non-carry bits */
156 incl %edi /* add 1: if one carry bit was *not* set
157 the addition will not result in 0. */
158
3f02f778 159 /* If at least one byte of the word is CHR we don't get 0 in %edi. */
5929563f 160 jnz L(8) /* found it => return pointer */
8f5ca04b
RM
161
162 /* This process is unfolded four times for better performance.
163 we don't increment the source pointer each time. Instead we
164 use offsets and increment by 16 in each run of the loop. But
165 before probing for the matching byte we need some extra code
166 (following LL(13) below). Even the len can be compared with
167 constants instead of decrementing each time. */
168
169 movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
170 movl $0xfefefeff, %edi /* magic value */
171 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
172 are now 0 */
173 addl %ecx, %edi /* add the magic value to the word. We get
174 carry bits reported for each byte which
175 is *not* 0 */
3f02f778 176 jnc L(7) /* highest byte is CHR => return pointer */
8f5ca04b
RM
177 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
178 orl $0xfefefeff, %edi /* set all non-carry bits */
179 incl %edi /* add 1: if one carry bit was *not* set
180 the addition will not result in 0. */
5929563f 181 jnz L(7) /* found it => return pointer */
8f5ca04b
RM
182
183 movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
184 movl $0xfefefeff, %edi /* magic value */
185 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
186 are now 0 */
187 addl %ecx, %edi /* add the magic value to the word. We get
188 carry bits reported for each byte which
189 is *not* 0 */
3f02f778 190 jnc L(6) /* highest byte is CHR => return pointer */
8f5ca04b
RM
191 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
192 orl $0xfefefeff, %edi /* set all non-carry bits */
193 incl %edi /* add 1: if one carry bit was *not* set
194 the addition will not result in 0. */
5929563f 195 jnz L(6) /* found it => return pointer */
8f5ca04b
RM
196
197 movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
198 movl $0xfefefeff, %edi /* magic value */
199 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
200 are now 0 */
201 addl %ecx, %edi /* add the magic value to the word. We get
202 carry bits reported for each byte which
203 is *not* 0 */
3f02f778 204 jnc L(5) /* highest byte is CHR => return pointer */
8f5ca04b
RM
205 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
206 orl $0xfefefeff, %edi /* set all non-carry bits */
207 incl %edi /* add 1: if one carry bit was *not* set
208 the addition will not result in 0. */
5929563f 209 jnz L(5) /* found it => return pointer */
8f5ca04b
RM
210
211 /* Adjust both counters for a full round, i.e. 16 bytes. */
212 addl $16, %eax
5929563f
UD
213L(2): subl $16, %esi
214 jae L(1) /* Still more than 16 bytes remaining */
8f5ca04b
RM
215
216 /* Process remaining bytes separately. */
217 cmpl $4-16, %esi /* rest < 4 bytes? */
5929563f 218 jb L(3) /* yes, than test byte by byte */
8f5ca04b
RM
219
220 movl (%eax), %ecx /* get word (= 4 bytes) in question */
221 movl $0xfefefeff, %edi /* magic value */
222 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
223 are now 0 */
224 addl %ecx, %edi /* add the magic value to the word. We get
225 carry bits reported for each byte which
226 is *not* 0 */
3f02f778 227 jnc L(8) /* highest byte is CHR => return pointer */
8f5ca04b
RM
228 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
229 orl $0xfefefeff, %edi /* set all non-carry bits */
230 incl %edi /* add 1: if one carry bit was *not* set
231 the addition will not result in 0. */
5929563f 232 jne L(8) /* found it => return pointer */
8f5ca04b
RM
233 addl $4, %eax /* adjust source pointer */
234
235 cmpl $8-16, %esi /* rest < 8 bytes? */
5929563f 236 jb L(3) /* yes, than test byte by byte */
8f5ca04b
RM
237
238 movl (%eax), %ecx /* get word (= 4 bytes) in question */
239 movl $0xfefefeff, %edi /* magic value */
240 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
241 are now 0 */
242 addl %ecx, %edi /* add the magic value to the word. We get
243 carry bits reported for each byte which
244 is *not* 0 */
3f02f778 245 jnc L(8) /* highest byte is CHR => return pointer */
8f5ca04b
RM
246 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
247 orl $0xfefefeff, %edi /* set all non-carry bits */
248 incl %edi /* add 1: if one carry bit was *not* set
249 the addition will not result in 0. */
5929563f 250 jne L(8) /* found it => return pointer */
8f5ca04b
RM
251 addl $4, %eax /* adjust source pointer */
252
253 cmpl $12-16, %esi /* rest < 12 bytes? */
5929563f 254 jb L(3) /* yes, than test byte by byte */
8f5ca04b
RM
255
256 movl (%eax), %ecx /* get word (= 4 bytes) in question */
257 movl $0xfefefeff, %edi /* magic value */
258 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
259 are now 0 */
260 addl %ecx, %edi /* add the magic value to the word. We get
261 carry bits reported for each byte which
262 is *not* 0 */
3f02f778 263 jnc L(8) /* highest byte is CHR => return pointer */
8f5ca04b
RM
264 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
265 orl $0xfefefeff, %edi /* set all non-carry bits */
266 incl %edi /* add 1: if one carry bit was *not* set
267 the addition will not result in 0. */
5929563f 268 jne L(8) /* found it => return pointer */
8f5ca04b
RM
269 addl $4, %eax /* adjust source pointer */
270
271 /* Check the remaining bytes one by one. */
5929563f
UD
272L(3): andl $3, %esi /* mask out uninteresting bytes */
273 jz L(4) /* no remaining bytes => return NULL */
8f5ca04b 274
3f02f778 275 cmpb %dl, (%eax) /* compare byte with CHR */
5929563f 276 je L(9) /* equal, than return pointer */
8f5ca04b
RM
277 incl %eax /* increment source pointer */
278 decl %esi /* decrement length */
5929563f 279 jz L(4) /* no remaining bytes => return NULL */
8f5ca04b 280
3f02f778 281 cmpb %dl, (%eax) /* compare byte with CHR */
5929563f 282 je L(9) /* equal, than return pointer */
8f5ca04b
RM
283 incl %eax /* increment source pointer */
284 decl %esi /* decrement length */
5929563f 285 jz L(4) /* no remaining bytes => return NULL */
8f5ca04b 286
3f02f778 287 cmpb %dl, (%eax) /* compare byte with CHR */
5929563f 288 je L(9) /* equal, than return pointer */
8f5ca04b 289
5929563f 290L(4): /* no byte found => return NULL */
8f5ca04b 291 xorl %eax, %eax
5929563f 292 jmp L(9)
8f5ca04b
RM
293
294 /* add missing source pointer increments */
5929563f
UD
295L(5): addl $4, %eax
296L(6): addl $4, %eax
297L(7): addl $4, %eax
8f5ca04b
RM
298
299 /* Test for the matching byte in the word. %ecx contains a NUL
300 char in the byte which originally was the byte we are looking
301 at. */
5929563f
UD
302L(8): testb %cl, %cl /* test first byte in dword */
303 jz L(9) /* if zero => return pointer */
8f5ca04b
RM
304 incl %eax /* increment source pointer */
305
306 testb %ch, %ch /* test second byte in dword */
5929563f 307 jz L(9) /* if zero => return pointer */
8f5ca04b
RM
308 incl %eax /* increment source pointer */
309
310 testl $0xff0000, %ecx /* test third byte in dword */
5929563f 311 jz L(9) /* if zero => return pointer */
8f5ca04b
RM
312 incl %eax /* increment source pointer */
313
6d52618b 314 /* No further test needed we we know it is one of the four bytes. */
2fc08826
GM
315L(9):
316#if __BOUNDED_POINTERS__
53c06508 317 CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb)
2fc08826
GM
318 /* If RTN pointer is phony, don't copy return value into it. */
319 movl RTN(%esp), %ecx
320 testl %ecx, %ecx
321 jz L(pop)
322 RETURN_BOUNDED_POINTER (STR(%esp))
323#endif
324L(pop): popl %edi /* pop saved registers */
1ad9da69
UD
325 cfi_adjust_cfa_offset (-4)
326 cfi_restore (edi)
8f5ca04b 327 popl %esi
1ad9da69
UD
328 cfi_adjust_cfa_offset (-4)
329 cfi_restore (esi)
8f5ca04b 330
3f02f778
GM
331 LEAVE
332 RET_PTR
abf70633
GM
333END (BP_SYM (__memchr))
334
335weak_alias (BP_SYM (__memchr), BP_SYM (memchr))
85dd1003 336libc_hidden_builtin_def (memchr)