]>
Commit | Line | Data |
---|---|---|
0ecb606c JJ |
1 | /* memchr (str, chr, len) -- Return pointer to first occurrence of CHR in STR |
2 | less than LEN. For Intel 80x86, x>=3. | |
3 | Copyright (C) 1994-1998, 2000, 2003, 2005 Free Software Foundation, Inc. | |
6d52618b UD |
4 | This file is part of the GNU C Library. |
5 | Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu> | |
6 | Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au> | |
6d52618b UD |
7 | This version is developed using the same algorithm as the fast C |
8 | version which carries the following introduction: | |
6d52618b UD |
9 | Based on strlen implementation by Torbjorn Granlund (tege@sics.se), |
10 | with help from Dan Sahlin (dan@sics.se) and | |
11 | commentary by Jim Blandy (jimb@ai.mit.edu); | |
12 | adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu), | |
13 | and implemented by Roland McGrath (roland@ai.mit.edu). | |
14 | ||
15 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
16 | modify it under the terms of the GNU Lesser General Public |
17 | License as published by the Free Software Foundation; either | |
18 | version 2.1 of the License, or (at your option) any later version. | |
6d52618b UD |
19 | |
20 | The GNU C Library is distributed in the hope that it will be useful, | |
21 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
22 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 23 | Lesser General Public License for more details. |
6d52618b | 24 | |
41bdb6e2 AJ |
25 | You should have received a copy of the GNU Lesser General Public |
26 | License along with the GNU C Library; if not, write to the Free | |
27 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
28 | 02111-1307 USA. */ | |
8f5ca04b RM |
29 | |
30 | #include <sysdep.h> | |
31 | #include "asm-syntax.h" | |
2fc08826 | 32 | #include "bp-sym.h" |
3f02f778 | 33 | #include "bp-asm.h" |
8f5ca04b | 34 | |
3f02f778 GM |
35 | #define PARMS LINKAGE+8 /* space for 2 saved regs */ |
36 | #define RTN PARMS | |
37 | #define STR RTN+RTN_SIZE | |
38 | #define CHR STR+PTR_SIZE | |
39 | #define LEN CHR+4 | |
8f5ca04b RM |
40 | |
41 | .text | |
abf70633 | 42 | ENTRY (BP_SYM (__memchr)) |
3f02f778 GM |
43 | ENTER |
44 | ||
8f5ca04b RM |
45 | /* Save callee-safe registers used in this function. */ |
46 | pushl %esi | |
0ecb606c | 47 | cfi_adjust_cfa_offset (4) |
8f5ca04b | 48 | pushl %edi |
0ecb606c JJ |
49 | cfi_adjust_cfa_offset (4) |
50 | cfi_rel_offset (edi, 0) | |
8f5ca04b RM |
51 | |
52 | /* Load parameters into registers. */ | |
3f02f778 GM |
53 | movl STR(%esp), %eax /* str: pointer to memory block. */ |
54 | movl CHR(%esp), %edx /* c: byte we are looking for. */ | |
55 | movl LEN(%esp), %esi /* len: length of memory block. */ | |
0ecb606c | 56 | cfi_rel_offset (esi, 4) |
53c06508 | 57 | CHECK_BOUNDS_LOW (%eax, STR(%esp)) |
8f5ca04b RM |
58 | |
59 | /* If my must not test more than three characters test | |
60 | them one by one. This is especially true for 0. */ | |
61 | cmpl $4, %esi | |
5929563f | 62 | jb L(3) |
8f5ca04b | 63 | |
3f02f778 GM |
64 | /* At the moment %edx contains CHR. What we need for the |
65 | algorithm is CHR in all bytes of the dword. Avoid | |
8f5ca04b RM |
66 | operations on 16 bit words because these require an |
67 | prefix byte (and one more cycle). */ | |
68 | movb %dl, %dh /* Now it is 0|0|c|c */ | |
69 | movl %edx, %ecx | |
70 | shll $16, %edx /* Now c|c|0|0 */ | |
71 | movw %cx, %dx /* And finally c|c|c|c */ | |
72 | ||
73 | /* Better performance can be achieved if the word (32 | |
74 | bit) memory access is aligned on a four-byte-boundary. | |
75 | So process first bytes one by one until boundary is | |
76 | reached. Don't use a loop for better performance. */ | |
77 | ||
50304ef0 | 78 | testb $3, %al /* correctly aligned ? */ |
5929563f | 79 | je L(2) /* yes => begin loop */ |
8f5ca04b | 80 | cmpb %dl, (%eax) /* compare byte */ |
5929563f | 81 | je L(9) /* target found => return */ |
8f5ca04b RM |
82 | incl %eax /* increment source pointer */ |
83 | decl %esi /* decrement length counter */ | |
5929563f | 84 | je L(4) /* len==0 => return NULL */ |
8f5ca04b | 85 | |
50304ef0 | 86 | testb $3, %al /* correctly aligned ? */ |
5929563f | 87 | je L(2) /* yes => begin loop */ |
8f5ca04b | 88 | cmpb %dl, (%eax) /* compare byte */ |
5929563f | 89 | je L(9) /* target found => return */ |
8f5ca04b RM |
90 | incl %eax /* increment source pointer */ |
91 | decl %esi /* decrement length counter */ | |
5929563f | 92 | je L(4) /* len==0 => return NULL */ |
8f5ca04b | 93 | |
50304ef0 | 94 | testb $3, %al /* correctly aligned ? */ |
5929563f | 95 | je L(2) /* yes => begin loop */ |
8f5ca04b | 96 | cmpb %dl, (%eax) /* compare byte */ |
5929563f | 97 | je L(9) /* target found => return */ |
8f5ca04b RM |
98 | incl %eax /* increment source pointer */ |
99 | decl %esi /* decrement length counter */ | |
100 | /* no test for len==0 here, because this is done in the | |
101 | loop head */ | |
5929563f | 102 | jmp L(2) |
8f5ca04b RM |
103 | |
104 | /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to | |
105 | change any of the hole bits of LONGWORD. | |
106 | ||
107 | 1) Is this safe? Will it catch all the zero bytes? | |
108 | Suppose there is a byte with all zeros. Any carry bits | |
109 | propagating from its left will fall into the hole at its | |
110 | least significant bit and stop. Since there will be no | |
111 | carry from its most significant bit, the LSB of the | |
112 | byte to the left will be unchanged, and the zero will be | |
113 | detected. | |
114 | ||
115 | 2) Is this worthwhile? Will it ignore everything except | |
116 | zero bytes? Suppose every byte of LONGWORD has a bit set | |
117 | somewhere. There will be a carry into bit 8. If bit 8 | |
118 | is set, this will carry into bit 16. If bit 8 is clear, | |
119 | one of bits 9-15 must be set, so there will be a carry | |
120 | into bit 16. Similarly, there will be a carry into bit | |
121 | 24. If one of bits 24-31 is set, there will be a carry | |
122 | into bit 32 (=carry flag), so all of the hole bits will | |
123 | be changed. | |
124 | ||
3f02f778 | 125 | 3) But wait! Aren't we looking for CHR, not zero? |
8f5ca04b | 126 | Good point. So what we do is XOR LONGWORD with a longword, |
3f02f778 | 127 | each of whose bytes is CHR. This turns each byte that is CHR |
8f5ca04b RM |
128 | into a zero. */ |
129 | ||
130 | ||
131 | /* Each round the main loop processes 16 bytes. */ | |
132 | ||
133 | ALIGN (4) | |
134 | ||
5929563f | 135 | L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */ |
8f5ca04b RM |
136 | movl $0xfefefeff, %edi /* magic value */ |
137 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c | |
138 | are now 0 */ | |
139 | addl %ecx, %edi /* add the magic value to the word. We get | |
140 | carry bits reported for each byte which | |
141 | is *not* 0 */ | |
142 | ||
143 | /* According to the algorithm we had to reverse the effect of the | |
144 | XOR first and then test the overflow bits. But because the | |
145 | following XOR would destroy the carry flag and it would (in a | |
146 | representation with more than 32 bits) not alter then last | |
147 | overflow, we can now test this condition. If no carry is signaled | |
6d52618b | 148 | no overflow must have occurred in the last byte => it was 0. */ |
5929563f | 149 | jnc L(8) |
8f5ca04b RM |
150 | |
151 | /* We are only interested in carry bits that change due to the | |
152 | previous add, so remove original bits */ | |
153 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ | |
154 | ||
155 | /* Now test for the other three overflow bits. */ | |
156 | orl $0xfefefeff, %edi /* set all non-carry bits */ | |
157 | incl %edi /* add 1: if one carry bit was *not* set | |
158 | the addition will not result in 0. */ | |
159 | ||
3f02f778 | 160 | /* If at least one byte of the word is CHR we don't get 0 in %edi. */ |
5929563f | 161 | jnz L(8) /* found it => return pointer */ |
8f5ca04b RM |
162 | |
163 | /* This process is unfolded four times for better performance. | |
164 | we don't increment the source pointer each time. Instead we | |
165 | use offsets and increment by 16 in each run of the loop. But | |
166 | before probing for the matching byte we need some extra code | |
167 | (following LL(13) below). Even the len can be compared with | |
168 | constants instead of decrementing each time. */ | |
169 | ||
170 | movl 4(%eax), %ecx /* get word (= 4 bytes) in question */ | |
171 | movl $0xfefefeff, %edi /* magic value */ | |
172 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c | |
173 | are now 0 */ | |
174 | addl %ecx, %edi /* add the magic value to the word. We get | |
175 | carry bits reported for each byte which | |
176 | is *not* 0 */ | |
3f02f778 | 177 | jnc L(7) /* highest byte is CHR => return pointer */ |
8f5ca04b RM |
178 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ |
179 | orl $0xfefefeff, %edi /* set all non-carry bits */ | |
180 | incl %edi /* add 1: if one carry bit was *not* set | |
181 | the addition will not result in 0. */ | |
5929563f | 182 | jnz L(7) /* found it => return pointer */ |
8f5ca04b RM |
183 | |
184 | movl 8(%eax), %ecx /* get word (= 4 bytes) in question */ | |
185 | movl $0xfefefeff, %edi /* magic value */ | |
186 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c | |
187 | are now 0 */ | |
188 | addl %ecx, %edi /* add the magic value to the word. We get | |
189 | carry bits reported for each byte which | |
190 | is *not* 0 */ | |
3f02f778 | 191 | jnc L(6) /* highest byte is CHR => return pointer */ |
8f5ca04b RM |
192 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ |
193 | orl $0xfefefeff, %edi /* set all non-carry bits */ | |
194 | incl %edi /* add 1: if one carry bit was *not* set | |
195 | the addition will not result in 0. */ | |
5929563f | 196 | jnz L(6) /* found it => return pointer */ |
8f5ca04b RM |
197 | |
198 | movl 12(%eax), %ecx /* get word (= 4 bytes) in question */ | |
199 | movl $0xfefefeff, %edi /* magic value */ | |
200 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c | |
201 | are now 0 */ | |
202 | addl %ecx, %edi /* add the magic value to the word. We get | |
203 | carry bits reported for each byte which | |
204 | is *not* 0 */ | |
3f02f778 | 205 | jnc L(5) /* highest byte is CHR => return pointer */ |
8f5ca04b RM |
206 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ |
207 | orl $0xfefefeff, %edi /* set all non-carry bits */ | |
208 | incl %edi /* add 1: if one carry bit was *not* set | |
209 | the addition will not result in 0. */ | |
5929563f | 210 | jnz L(5) /* found it => return pointer */ |
8f5ca04b RM |
211 | |
212 | /* Adjust both counters for a full round, i.e. 16 bytes. */ | |
213 | addl $16, %eax | |
5929563f UD |
214 | L(2): subl $16, %esi |
215 | jae L(1) /* Still more than 16 bytes remaining */ | |
8f5ca04b RM |
216 | |
217 | /* Process remaining bytes separately. */ | |
218 | cmpl $4-16, %esi /* rest < 4 bytes? */ | |
5929563f | 219 | jb L(3) /* yes, than test byte by byte */ |
8f5ca04b RM |
220 | |
221 | movl (%eax), %ecx /* get word (= 4 bytes) in question */ | |
222 | movl $0xfefefeff, %edi /* magic value */ | |
223 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c | |
224 | are now 0 */ | |
225 | addl %ecx, %edi /* add the magic value to the word. We get | |
226 | carry bits reported for each byte which | |
227 | is *not* 0 */ | |
3f02f778 | 228 | jnc L(8) /* highest byte is CHR => return pointer */ |
8f5ca04b RM |
229 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ |
230 | orl $0xfefefeff, %edi /* set all non-carry bits */ | |
231 | incl %edi /* add 1: if one carry bit was *not* set | |
232 | the addition will not result in 0. */ | |
5929563f | 233 | jne L(8) /* found it => return pointer */ |
8f5ca04b RM |
234 | addl $4, %eax /* adjust source pointer */ |
235 | ||
236 | cmpl $8-16, %esi /* rest < 8 bytes? */ | |
5929563f | 237 | jb L(3) /* yes, than test byte by byte */ |
8f5ca04b RM |
238 | |
239 | movl (%eax), %ecx /* get word (= 4 bytes) in question */ | |
240 | movl $0xfefefeff, %edi /* magic value */ | |
241 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c | |
242 | are now 0 */ | |
243 | addl %ecx, %edi /* add the magic value to the word. We get | |
244 | carry bits reported for each byte which | |
245 | is *not* 0 */ | |
3f02f778 | 246 | jnc L(8) /* highest byte is CHR => return pointer */ |
8f5ca04b RM |
247 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ |
248 | orl $0xfefefeff, %edi /* set all non-carry bits */ | |
249 | incl %edi /* add 1: if one carry bit was *not* set | |
250 | the addition will not result in 0. */ | |
5929563f | 251 | jne L(8) /* found it => return pointer */ |
8f5ca04b RM |
252 | addl $4, %eax /* adjust source pointer */ |
253 | ||
254 | cmpl $12-16, %esi /* rest < 12 bytes? */ | |
5929563f | 255 | jb L(3) /* yes, than test byte by byte */ |
8f5ca04b RM |
256 | |
257 | movl (%eax), %ecx /* get word (= 4 bytes) in question */ | |
258 | movl $0xfefefeff, %edi /* magic value */ | |
259 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c | |
260 | are now 0 */ | |
261 | addl %ecx, %edi /* add the magic value to the word. We get | |
262 | carry bits reported for each byte which | |
263 | is *not* 0 */ | |
3f02f778 | 264 | jnc L(8) /* highest byte is CHR => return pointer */ |
8f5ca04b RM |
265 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ |
266 | orl $0xfefefeff, %edi /* set all non-carry bits */ | |
267 | incl %edi /* add 1: if one carry bit was *not* set | |
268 | the addition will not result in 0. */ | |
5929563f | 269 | jne L(8) /* found it => return pointer */ |
8f5ca04b RM |
270 | addl $4, %eax /* adjust source pointer */ |
271 | ||
272 | /* Check the remaining bytes one by one. */ | |
5929563f UD |
273 | L(3): andl $3, %esi /* mask out uninteresting bytes */ |
274 | jz L(4) /* no remaining bytes => return NULL */ | |
8f5ca04b | 275 | |
3f02f778 | 276 | cmpb %dl, (%eax) /* compare byte with CHR */ |
5929563f | 277 | je L(9) /* equal, than return pointer */ |
8f5ca04b RM |
278 | incl %eax /* increment source pointer */ |
279 | decl %esi /* decrement length */ | |
5929563f | 280 | jz L(4) /* no remaining bytes => return NULL */ |
8f5ca04b | 281 | |
3f02f778 | 282 | cmpb %dl, (%eax) /* compare byte with CHR */ |
5929563f | 283 | je L(9) /* equal, than return pointer */ |
8f5ca04b RM |
284 | incl %eax /* increment source pointer */ |
285 | decl %esi /* decrement length */ | |
5929563f | 286 | jz L(4) /* no remaining bytes => return NULL */ |
8f5ca04b | 287 | |
3f02f778 | 288 | cmpb %dl, (%eax) /* compare byte with CHR */ |
5929563f | 289 | je L(9) /* equal, than return pointer */ |
8f5ca04b | 290 | |
5929563f | 291 | L(4): /* no byte found => return NULL */ |
8f5ca04b | 292 | xorl %eax, %eax |
5929563f | 293 | jmp L(9) |
8f5ca04b RM |
294 | |
295 | /* add missing source pointer increments */ | |
5929563f UD |
296 | L(5): addl $4, %eax |
297 | L(6): addl $4, %eax | |
298 | L(7): addl $4, %eax | |
8f5ca04b RM |
299 | |
300 | /* Test for the matching byte in the word. %ecx contains a NUL | |
301 | char in the byte which originally was the byte we are looking | |
302 | at. */ | |
5929563f UD |
303 | L(8): testb %cl, %cl /* test first byte in dword */ |
304 | jz L(9) /* if zero => return pointer */ | |
8f5ca04b RM |
305 | incl %eax /* increment source pointer */ |
306 | ||
307 | testb %ch, %ch /* test second byte in dword */ | |
5929563f | 308 | jz L(9) /* if zero => return pointer */ |
8f5ca04b RM |
309 | incl %eax /* increment source pointer */ |
310 | ||
311 | testl $0xff0000, %ecx /* test third byte in dword */ | |
5929563f | 312 | jz L(9) /* if zero => return pointer */ |
8f5ca04b RM |
313 | incl %eax /* increment source pointer */ |
314 | ||
6d52618b | 315 | /* No further test needed we we know it is one of the four bytes. */ |
2fc08826 GM |
316 | L(9): |
317 | #if __BOUNDED_POINTERS__ | |
53c06508 | 318 | CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb) |
2fc08826 GM |
319 | /* If RTN pointer is phony, don't copy return value into it. */ |
320 | movl RTN(%esp), %ecx | |
321 | testl %ecx, %ecx | |
322 | jz L(pop) | |
323 | RETURN_BOUNDED_POINTER (STR(%esp)) | |
324 | #endif | |
325 | L(pop): popl %edi /* pop saved registers */ | |
0ecb606c JJ |
326 | cfi_adjust_cfa_offset (-4) |
327 | cfi_restore (edi) | |
8f5ca04b | 328 | popl %esi |
0ecb606c JJ |
329 | cfi_adjust_cfa_offset (-4) |
330 | cfi_restore (esi) | |
8f5ca04b | 331 | |
3f02f778 GM |
332 | LEAVE |
333 | RET_PTR | |
abf70633 GM |
334 | END (BP_SYM (__memchr)) |
335 | ||
336 | weak_alias (BP_SYM (__memchr), BP_SYM (memchr)) | |
2ed5fd9a GM |
337 | #if !__BOUNDED_POINTERS__ |
338 | weak_alias (__memchr, __ubp_memchr) | |
339 | #endif | |
85dd1003 | 340 | libc_hidden_builtin_def (memchr) |