]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/i386/memchr.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / i386 / memchr.S
CommitLineData
1ad9da69
UD
1/* memchr (str, chr, len) -- Return pointer to first occurrence of CHR in STR
2 less than LEN. For Intel 80x86, x>=3.
688903eb 3 Copyright (C) 1994-2018 Free Software Foundation, Inc.
6d52618b
UD
4 This file is part of the GNU C Library.
5 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
6 Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
6d52618b
UD
7 This version is developed using the same algorithm as the fast C
8 version which carries the following introduction:
6d52618b
UD
9 Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
10 with help from Dan Sahlin (dan@sics.se) and
11 commentary by Jim Blandy (jimb@ai.mit.edu);
12 adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
13 and implemented by Roland McGrath (roland@ai.mit.edu).
14
15 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 2.1 of the License, or (at your option) any later version.
6d52618b
UD
19
20 The GNU C Library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 23 Lesser General Public License for more details.
6d52618b 24
41bdb6e2 25 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
26 License along with the GNU C Library; if not, see
27 <http://www.gnu.org/licenses/>. */
8f5ca04b
RM
28
29#include <sysdep.h>
30#include "asm-syntax.h"
31
2366713d 32#define PARMS 4+8 /* space for 2 saved regs */
3f02f778 33#define RTN PARMS
2366713d
JM
34#define STR RTN
35#define CHR STR+4
3f02f778 36#define LEN CHR+4
8f5ca04b
RM
37
38 .text
2366713d 39ENTRY (__memchr)
3f02f778 40
8f5ca04b
RM
41 /* Save callee-safe registers used in this function. */
42 pushl %esi
1ad9da69 43 cfi_adjust_cfa_offset (4)
8f5ca04b 44 pushl %edi
1ad9da69
UD
45 cfi_adjust_cfa_offset (4)
46 cfi_rel_offset (edi, 0)
8f5ca04b
RM
47
48 /* Load parameters into registers. */
3f02f778
GM
49 movl STR(%esp), %eax /* str: pointer to memory block. */
50 movl CHR(%esp), %edx /* c: byte we are looking for. */
51 movl LEN(%esp), %esi /* len: length of memory block. */
1ad9da69 52 cfi_rel_offset (esi, 4)
8f5ca04b
RM
53
54 /* If my must not test more than three characters test
55 them one by one. This is especially true for 0. */
56 cmpl $4, %esi
5929563f 57 jb L(3)
8f5ca04b 58
3f02f778
GM
59 /* At the moment %edx contains CHR. What we need for the
60 algorithm is CHR in all bytes of the dword. Avoid
8f5ca04b
RM
61 operations on 16 bit words because these require an
62 prefix byte (and one more cycle). */
63 movb %dl, %dh /* Now it is 0|0|c|c */
64 movl %edx, %ecx
65 shll $16, %edx /* Now c|c|0|0 */
66 movw %cx, %dx /* And finally c|c|c|c */
67
68 /* Better performance can be achieved if the word (32
69 bit) memory access is aligned on a four-byte-boundary.
70 So process first bytes one by one until boundary is
71 reached. Don't use a loop for better performance. */
72
50304ef0 73 testb $3, %al /* correctly aligned ? */
5929563f 74 je L(2) /* yes => begin loop */
8f5ca04b 75 cmpb %dl, (%eax) /* compare byte */
5929563f 76 je L(9) /* target found => return */
8f5ca04b
RM
77 incl %eax /* increment source pointer */
78 decl %esi /* decrement length counter */
5929563f 79 je L(4) /* len==0 => return NULL */
8f5ca04b 80
50304ef0 81 testb $3, %al /* correctly aligned ? */
5929563f 82 je L(2) /* yes => begin loop */
8f5ca04b 83 cmpb %dl, (%eax) /* compare byte */
5929563f 84 je L(9) /* target found => return */
8f5ca04b
RM
85 incl %eax /* increment source pointer */
86 decl %esi /* decrement length counter */
5929563f 87 je L(4) /* len==0 => return NULL */
8f5ca04b 88
50304ef0 89 testb $3, %al /* correctly aligned ? */
5929563f 90 je L(2) /* yes => begin loop */
8f5ca04b 91 cmpb %dl, (%eax) /* compare byte */
5929563f 92 je L(9) /* target found => return */
8f5ca04b
RM
93 incl %eax /* increment source pointer */
94 decl %esi /* decrement length counter */
95 /* no test for len==0 here, because this is done in the
96 loop head */
5929563f 97 jmp L(2)
8f5ca04b
RM
98
99 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
100 change any of the hole bits of LONGWORD.
101
102 1) Is this safe? Will it catch all the zero bytes?
103 Suppose there is a byte with all zeros. Any carry bits
104 propagating from its left will fall into the hole at its
105 least significant bit and stop. Since there will be no
106 carry from its most significant bit, the LSB of the
107 byte to the left will be unchanged, and the zero will be
108 detected.
109
110 2) Is this worthwhile? Will it ignore everything except
111 zero bytes? Suppose every byte of LONGWORD has a bit set
112 somewhere. There will be a carry into bit 8. If bit 8
113 is set, this will carry into bit 16. If bit 8 is clear,
114 one of bits 9-15 must be set, so there will be a carry
115 into bit 16. Similarly, there will be a carry into bit
116 24. If one of bits 24-31 is set, there will be a carry
117 into bit 32 (=carry flag), so all of the hole bits will
118 be changed.
119
3f02f778 120 3) But wait! Aren't we looking for CHR, not zero?
8f5ca04b 121 Good point. So what we do is XOR LONGWORD with a longword,
3f02f778 122 each of whose bytes is CHR. This turns each byte that is CHR
8f5ca04b
RM
123 into a zero. */
124
125
126 /* Each round the main loop processes 16 bytes. */
127
128 ALIGN (4)
129
5929563f 130L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
8f5ca04b
RM
131 movl $0xfefefeff, %edi /* magic value */
132 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
133 are now 0 */
134 addl %ecx, %edi /* add the magic value to the word. We get
135 carry bits reported for each byte which
136 is *not* 0 */
137
138 /* According to the algorithm we had to reverse the effect of the
139 XOR first and then test the overflow bits. But because the
140 following XOR would destroy the carry flag and it would (in a
141 representation with more than 32 bits) not alter then last
142 overflow, we can now test this condition. If no carry is signaled
6d52618b 143 no overflow must have occurred in the last byte => it was 0. */
5929563f 144 jnc L(8)
8f5ca04b
RM
145
146 /* We are only interested in carry bits that change due to the
147 previous add, so remove original bits */
148 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
149
150 /* Now test for the other three overflow bits. */
151 orl $0xfefefeff, %edi /* set all non-carry bits */
152 incl %edi /* add 1: if one carry bit was *not* set
153 the addition will not result in 0. */
154
3f02f778 155 /* If at least one byte of the word is CHR we don't get 0 in %edi. */
5929563f 156 jnz L(8) /* found it => return pointer */
8f5ca04b
RM
157
158 /* This process is unfolded four times for better performance.
159 we don't increment the source pointer each time. Instead we
160 use offsets and increment by 16 in each run of the loop. But
161 before probing for the matching byte we need some extra code
162 (following LL(13) below). Even the len can be compared with
163 constants instead of decrementing each time. */
164
165 movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
166 movl $0xfefefeff, %edi /* magic value */
167 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
168 are now 0 */
169 addl %ecx, %edi /* add the magic value to the word. We get
170 carry bits reported for each byte which
171 is *not* 0 */
3f02f778 172 jnc L(7) /* highest byte is CHR => return pointer */
8f5ca04b
RM
173 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
174 orl $0xfefefeff, %edi /* set all non-carry bits */
175 incl %edi /* add 1: if one carry bit was *not* set
176 the addition will not result in 0. */
5929563f 177 jnz L(7) /* found it => return pointer */
8f5ca04b
RM
178
179 movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
180 movl $0xfefefeff, %edi /* magic value */
181 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
182 are now 0 */
183 addl %ecx, %edi /* add the magic value to the word. We get
184 carry bits reported for each byte which
185 is *not* 0 */
3f02f778 186 jnc L(6) /* highest byte is CHR => return pointer */
8f5ca04b
RM
187 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
188 orl $0xfefefeff, %edi /* set all non-carry bits */
189 incl %edi /* add 1: if one carry bit was *not* set
190 the addition will not result in 0. */
5929563f 191 jnz L(6) /* found it => return pointer */
8f5ca04b
RM
192
193 movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
194 movl $0xfefefeff, %edi /* magic value */
195 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
196 are now 0 */
197 addl %ecx, %edi /* add the magic value to the word. We get
198 carry bits reported for each byte which
199 is *not* 0 */
3f02f778 200 jnc L(5) /* highest byte is CHR => return pointer */
8f5ca04b
RM
201 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
202 orl $0xfefefeff, %edi /* set all non-carry bits */
203 incl %edi /* add 1: if one carry bit was *not* set
204 the addition will not result in 0. */
5929563f 205 jnz L(5) /* found it => return pointer */
8f5ca04b
RM
206
207 /* Adjust both counters for a full round, i.e. 16 bytes. */
208 addl $16, %eax
5929563f
UD
209L(2): subl $16, %esi
210 jae L(1) /* Still more than 16 bytes remaining */
8f5ca04b
RM
211
212 /* Process remaining bytes separately. */
213 cmpl $4-16, %esi /* rest < 4 bytes? */
5929563f 214 jb L(3) /* yes, than test byte by byte */
8f5ca04b
RM
215
216 movl (%eax), %ecx /* get word (= 4 bytes) in question */
217 movl $0xfefefeff, %edi /* magic value */
218 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
219 are now 0 */
220 addl %ecx, %edi /* add the magic value to the word. We get
221 carry bits reported for each byte which
222 is *not* 0 */
3f02f778 223 jnc L(8) /* highest byte is CHR => return pointer */
8f5ca04b
RM
224 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
225 orl $0xfefefeff, %edi /* set all non-carry bits */
226 incl %edi /* add 1: if one carry bit was *not* set
227 the addition will not result in 0. */
5929563f 228 jne L(8) /* found it => return pointer */
8f5ca04b
RM
229 addl $4, %eax /* adjust source pointer */
230
231 cmpl $8-16, %esi /* rest < 8 bytes? */
5929563f 232 jb L(3) /* yes, than test byte by byte */
8f5ca04b
RM
233
234 movl (%eax), %ecx /* get word (= 4 bytes) in question */
235 movl $0xfefefeff, %edi /* magic value */
236 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
237 are now 0 */
238 addl %ecx, %edi /* add the magic value to the word. We get
239 carry bits reported for each byte which
240 is *not* 0 */
3f02f778 241 jnc L(8) /* highest byte is CHR => return pointer */
8f5ca04b
RM
242 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
243 orl $0xfefefeff, %edi /* set all non-carry bits */
244 incl %edi /* add 1: if one carry bit was *not* set
245 the addition will not result in 0. */
5929563f 246 jne L(8) /* found it => return pointer */
8f5ca04b
RM
247 addl $4, %eax /* adjust source pointer */
248
249 cmpl $12-16, %esi /* rest < 12 bytes? */
5929563f 250 jb L(3) /* yes, than test byte by byte */
8f5ca04b
RM
251
252 movl (%eax), %ecx /* get word (= 4 bytes) in question */
253 movl $0xfefefeff, %edi /* magic value */
254 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
255 are now 0 */
256 addl %ecx, %edi /* add the magic value to the word. We get
257 carry bits reported for each byte which
258 is *not* 0 */
3f02f778 259 jnc L(8) /* highest byte is CHR => return pointer */
8f5ca04b
RM
260 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
261 orl $0xfefefeff, %edi /* set all non-carry bits */
262 incl %edi /* add 1: if one carry bit was *not* set
263 the addition will not result in 0. */
5929563f 264 jne L(8) /* found it => return pointer */
8f5ca04b
RM
265 addl $4, %eax /* adjust source pointer */
266
267 /* Check the remaining bytes one by one. */
5929563f
UD
268L(3): andl $3, %esi /* mask out uninteresting bytes */
269 jz L(4) /* no remaining bytes => return NULL */
8f5ca04b 270
3f02f778 271 cmpb %dl, (%eax) /* compare byte with CHR */
5929563f 272 je L(9) /* equal, than return pointer */
8f5ca04b
RM
273 incl %eax /* increment source pointer */
274 decl %esi /* decrement length */
5929563f 275 jz L(4) /* no remaining bytes => return NULL */
8f5ca04b 276
3f02f778 277 cmpb %dl, (%eax) /* compare byte with CHR */
5929563f 278 je L(9) /* equal, than return pointer */
8f5ca04b
RM
279 incl %eax /* increment source pointer */
280 decl %esi /* decrement length */
5929563f 281 jz L(4) /* no remaining bytes => return NULL */
8f5ca04b 282
3f02f778 283 cmpb %dl, (%eax) /* compare byte with CHR */
5929563f 284 je L(9) /* equal, than return pointer */
8f5ca04b 285
5929563f 286L(4): /* no byte found => return NULL */
8f5ca04b 287 xorl %eax, %eax
5929563f 288 jmp L(9)
8f5ca04b
RM
289
290 /* add missing source pointer increments */
5929563f
UD
291L(5): addl $4, %eax
292L(6): addl $4, %eax
293L(7): addl $4, %eax
8f5ca04b
RM
294
295 /* Test for the matching byte in the word. %ecx contains a NUL
296 char in the byte which originally was the byte we are looking
297 at. */
5929563f
UD
298L(8): testb %cl, %cl /* test first byte in dword */
299 jz L(9) /* if zero => return pointer */
8f5ca04b
RM
300 incl %eax /* increment source pointer */
301
302 testb %ch, %ch /* test second byte in dword */
5929563f 303 jz L(9) /* if zero => return pointer */
8f5ca04b
RM
304 incl %eax /* increment source pointer */
305
306 testl $0xff0000, %ecx /* test third byte in dword */
5929563f 307 jz L(9) /* if zero => return pointer */
8f5ca04b
RM
308 incl %eax /* increment source pointer */
309
6d52618b 310 /* No further test needed we we know it is one of the four bytes. */
92945b52 311L(9): popl %edi /* pop saved registers */
1ad9da69
UD
312 cfi_adjust_cfa_offset (-4)
313 cfi_restore (edi)
8f5ca04b 314 popl %esi
1ad9da69
UD
315 cfi_adjust_cfa_offset (-4)
316 cfi_restore (esi)
8f5ca04b 317
2366713d
JM
318 ret
319END (__memchr)
abf70633 320
2366713d 321weak_alias (__memchr, memchr)
85dd1003 322libc_hidden_builtin_def (memchr)