]>
Commit | Line | Data |
---|---|---|
5929563f | 1 | /* Find character CH in a NUL terminated string. |
6d52618b | 2 | Highly optimized version for ix85, x>=5. |
0ecb606c | 3 | Copyright (C) 1995,1996,1997,2000,2003,2005 Free Software Foundation, Inc. |
6d52618b UD |
4 | This file is part of the GNU C Library. |
5 | Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. | |
6 | ||
7 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
8 | modify it under the terms of the GNU Lesser General Public |
9 | License as published by the Free Software Foundation; either | |
10 | version 2.1 of the License, or (at your option) any later version. | |
6d52618b UD |
11 | |
12 | The GNU C Library is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 15 | Lesser General Public License for more details. |
6d52618b | 16 | |
41bdb6e2 AJ |
17 | You should have received a copy of the GNU Lesser General Public |
18 | License along with the GNU C Library; if not, write to the Free | |
19 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
20 | 02111-1307 USA. */ | |
8f5ca04b RM |
21 | |
22 | #include <sysdep.h> | |
5929563f | 23 | #include "asm-syntax.h" |
2fc08826 | 24 | #include "bp-sym.h" |
3f02f778 | 25 | #include "bp-asm.h" |
8f5ca04b RM |
26 | |
27 | /* This version is especially optimized for the i586 (and following?) | |
28 | processors. This is mainly done by using the two pipelines. The | |
29 | version optimized for i486 is weak in this aspect because to get | |
6d52618b | 30 | as much parallelism we have to execute some *more* instructions. |
8f5ca04b RM |
31 | |
32 | The code below is structured to reflect the pairing of the instructions | |
33 | as *I think* it is. I have no processor data book to verify this. | |
34 | If you find something you think is incorrect let me know. */ | |
35 | ||
36 | ||
37 | /* The magic value which is used throughout in the whole code. */ | |
38 | #define magic 0xfefefeff | |
39 | ||
3f02f778 GM |
40 | #define PARMS LINKAGE+16 /* space for 4 saved regs */ |
41 | #define RTN PARMS | |
42 | #define STR RTN+RTN_SIZE | |
43 | #define CHR STR+PTR_SIZE | |
8f5ca04b RM |
44 | |
45 | .text | |
2fc08826 | 46 | ENTRY (BP_SYM (strchr)) |
3f02f778 GM |
47 | ENTER |
48 | ||
8f5ca04b | 49 | pushl %edi /* Save callee-safe registers. */ |
0ecb606c | 50 | cfi_adjust_cfa_offset (-4) |
8f5ca04b | 51 | pushl %esi |
0ecb606c | 52 | cfi_adjust_cfa_offset (-4) |
8f5ca04b RM |
53 | |
54 | pushl %ebx | |
0ecb606c | 55 | cfi_adjust_cfa_offset (-4) |
8f5ca04b | 56 | pushl %ebp |
0ecb606c | 57 | cfi_adjust_cfa_offset (-4) |
8f5ca04b | 58 | |
3f02f778 GM |
59 | movl STR(%esp), %eax |
60 | movl CHR(%esp), %edx | |
2fc08826 | 61 | CHECK_BOUNDS_LOW (%eax, STR(%esp)) |
8f5ca04b RM |
62 | |
63 | movl %eax, %edi /* duplicate string pointer for later */ | |
0ecb606c | 64 | cfi_rel_offset (edi, 12) |
8f5ca04b RM |
65 | xorl %ecx, %ecx /* clear %ecx */ |
66 | ||
67 | /* At the moment %edx contains C. What we need for the | |
68 | algorithm is C in all bytes of the dword. Avoid | |
69 | operations on 16 bit words because these require an | |
70 | prefix byte (and one more cycle). */ | |
71 | movb %dl, %dh /* now it is 0|0|c|c */ | |
72 | movb %dl, %cl /* we construct the lower half in %ecx */ | |
73 | ||
74 | shll $16, %edx /* now %edx is c|c|0|0 */ | |
75 | movb %cl, %ch /* now %ecx is 0|0|c|c */ | |
76 | ||
77 | orl %ecx, %edx /* and finally c|c|c|c */ | |
78 | andl $3, %edi /* mask alignment bits */ | |
79 | ||
5929563f | 80 | jz L(11) /* alignment is 0 => start loop */ |
8f5ca04b | 81 | |
11336c16 | 82 | movb %dl, %cl /* 0 is needed below */ |
5929563f | 83 | jp L(0) /* exactly two bits set */ |
8f5ca04b | 84 | |
11336c16 | 85 | xorb (%eax), %cl /* is byte the one we are looking for? */ |
5929563f | 86 | jz L(2) /* yes => return pointer */ |
8f5ca04b | 87 | |
11336c16 | 88 | xorb %dl, %cl /* load single byte and test for NUL */ |
5929563f | 89 | je L(3) /* yes => return NULL */ |
8f5ca04b | 90 | |
11336c16 UD |
91 | movb 1(%eax), %cl /* load single byte */ |
92 | incl %eax | |
8f5ca04b | 93 | |
8f5ca04b | 94 | cmpb %cl, %dl /* is byte == C? */ |
5929563f | 95 | je L(2) /* aligned => return pointer */ |
8f5ca04b | 96 | |
3e2ee727 | 97 | cmpb $0, %cl /* is byte NUL? */ |
5929563f | 98 | je L(3) /* yes => return NULL */ |
8f5ca04b | 99 | |
11336c16 UD |
100 | incl %eax |
101 | decl %edi | |
102 | ||
5929563f | 103 | jne L(11) |
11336c16 | 104 | |
5929563f | 105 | L(0): movb (%eax), %cl /* load single byte */ |
8f5ca04b | 106 | |
8f5ca04b | 107 | cmpb %cl, %dl /* is byte == C? */ |
5929563f | 108 | je L(2) /* aligned => return pointer */ |
8f5ca04b | 109 | |
3e2ee727 | 110 | cmpb $0, %cl /* is byte NUL? */ |
5929563f | 111 | je L(3) /* yes => return NULL */ |
8f5ca04b RM |
112 | |
113 | incl %eax /* increment pointer */ | |
114 | ||
0ecb606c JJ |
115 | cfi_rel_offset (esi, 8) |
116 | cfi_rel_offset (ebx, 4) | |
117 | cfi_rel_offset (ebp, 0) | |
118 | ||
8f5ca04b RM |
119 | /* The following code is the preparation for the loop. The |
120 | four instruction up to `L1' will not be executed in the loop | |
121 | because the same code is found at the end of the loop, but | |
122 | there it is executed in parallel with other instructions. */ | |
5929563f | 123 | L(11): movl (%eax), %ecx |
8f5ca04b RM |
124 | movl $magic, %ebp |
125 | ||
126 | movl $magic, %edi | |
127 | addl %ecx, %ebp | |
128 | ||
129 | /* The main loop: it looks complex and indeed it is. I would | |
130 | love to say `it was hard to write, so it should he hard to | |
131 | read' but I will give some more hints. To fully understand | |
132 | this code you should first take a look at the i486 version. | |
133 | The basic algorithm is the same, but here the code organized | |
134 | in a way which permits to use both pipelines all the time. | |
135 | ||
136 | I tried to make it a bit more understandable by indenting | |
137 | the code according to stage in the algorithm. It goes as | |
138 | follows: | |
139 | check for 0 in 1st word | |
140 | check for C in 1st word | |
141 | check for 0 in 2nd word | |
142 | check for C in 2nd word | |
143 | check for 0 in 3rd word | |
144 | check for C in 3rd word | |
145 | check for 0 in 4th word | |
146 | check for C in 4th word | |
147 | ||
148 | Please note that doing the test for NUL before the test for | |
149 | C allows us to overlap the test for 0 in the next word with | |
150 | the test for C. */ | |
151 | ||
5929563f | 152 | L(1): xorl %ecx, %ebp /* (word^magic) */ |
8f5ca04b RM |
153 | addl %ecx, %edi /* add magic word */ |
154 | ||
155 | leal 4(%eax), %eax /* increment pointer */ | |
5929563f | 156 | jnc L(4) /* previous addl caused overflow? */ |
8f5ca04b RM |
157 | |
158 | movl %ecx, %ebx /* duplicate original word */ | |
159 | orl $magic, %ebp /* (word^magic)|magic */ | |
160 | ||
161 | addl $1, %ebp /* (word^magic)|magic == 0xffffffff? */ | |
5929563f | 162 | jne L(4) /* yes => we found word with NUL */ |
8f5ca04b RM |
163 | |
164 | movl $magic, %esi /* load magic value */ | |
165 | xorl %edx, %ebx /* clear words which are C */ | |
166 | ||
167 | movl (%eax), %ecx | |
168 | addl %ebx, %esi /* (word+magic) */ | |
169 | ||
170 | movl $magic, %edi | |
5929563f | 171 | jnc L(5) /* previous addl caused overflow? */ |
8f5ca04b RM |
172 | |
173 | movl %edi, %ebp | |
174 | xorl %ebx, %esi /* (word+magic)^word */ | |
175 | ||
176 | addl %ecx, %ebp | |
177 | orl $magic, %esi /* ((word+magic)^word)|magic */ | |
178 | ||
179 | addl $1, %esi /* ((word+magic)^word)|magic==0xf..f?*/ | |
5929563f | 180 | jne L(5) /* yes => we found word with C */ |
8f5ca04b RM |
181 | |
182 | xorl %ecx, %ebp | |
183 | addl %ecx, %edi | |
184 | ||
185 | leal 4(%eax), %eax | |
5929563f | 186 | jnc L(4) |
8f5ca04b RM |
187 | |
188 | movl %ecx, %ebx | |
189 | orl $magic, %ebp | |
190 | ||
191 | addl $1, %ebp | |
5929563f | 192 | jne L(4) |
8f5ca04b RM |
193 | |
194 | movl $magic, %esi | |
195 | xorl %edx, %ebx | |
196 | ||
197 | movl (%eax), %ecx | |
198 | addl %ebx, %esi | |
199 | ||
200 | movl $magic, %edi | |
5929563f | 201 | jnc L(5) |
8f5ca04b RM |
202 | |
203 | movl %edi, %ebp | |
204 | xorl %ebx, %esi | |
205 | ||
206 | addl %ecx, %ebp | |
207 | orl $magic, %esi | |
208 | ||
209 | addl $1, %esi | |
5929563f | 210 | jne L(5) |
8f5ca04b RM |
211 | |
212 | xorl %ecx, %ebp | |
213 | addl %ecx, %edi | |
214 | ||
215 | leal 4(%eax), %eax | |
5929563f | 216 | jnc L(4) |
8f5ca04b RM |
217 | |
218 | movl %ecx, %ebx | |
219 | orl $magic, %ebp | |
220 | ||
221 | addl $1, %ebp | |
5929563f | 222 | jne L(4) |
8f5ca04b RM |
223 | |
224 | movl $magic, %esi | |
225 | xorl %edx, %ebx | |
226 | ||
227 | movl (%eax), %ecx | |
228 | addl %ebx, %esi | |
229 | ||
230 | movl $magic, %edi | |
5929563f | 231 | jnc L(5) |
8f5ca04b RM |
232 | |
233 | movl %edi, %ebp | |
234 | xorl %ebx, %esi | |
235 | ||
236 | addl %ecx, %ebp | |
237 | orl $magic, %esi | |
238 | ||
239 | addl $1, %esi | |
5929563f | 240 | jne L(5) |
8f5ca04b RM |
241 | |
242 | xorl %ecx, %ebp | |
243 | addl %ecx, %edi | |
244 | ||
245 | leal 4(%eax), %eax | |
5929563f | 246 | jnc L(4) |
8f5ca04b RM |
247 | |
248 | movl %ecx, %ebx | |
249 | orl $magic, %ebp | |
250 | ||
251 | addl $1, %ebp | |
5929563f | 252 | jne L(4) |
8f5ca04b RM |
253 | |
254 | movl $magic, %esi | |
255 | xorl %edx, %ebx | |
256 | ||
257 | movl (%eax), %ecx | |
258 | addl %ebx, %esi | |
259 | ||
260 | movl $magic, %edi | |
5929563f | 261 | jnc L(5) |
8f5ca04b RM |
262 | |
263 | movl %edi, %ebp | |
264 | xorl %ebx, %esi | |
265 | ||
266 | addl %ecx, %ebp | |
267 | orl $magic, %esi | |
268 | ||
269 | addl $1, %esi | |
270 | ||
5929563f | 271 | je L(1) |
8f5ca04b RM |
272 | |
273 | /* We know there is no NUL byte but a C byte in the word. | |
274 | %ebx contains NUL in this particular byte. */ | |
5929563f | 275 | L(5): subl $4, %eax /* adjust pointer */ |
8f5ca04b RM |
276 | testb %bl, %bl /* first byte == C? */ |
277 | ||
5929563f | 278 | jz L(2) /* yes => return pointer */ |
8f5ca04b RM |
279 | |
280 | incl %eax /* increment pointer */ | |
281 | testb %bh, %bh /* second byte == C? */ | |
282 | ||
5929563f | 283 | jz L(2) /* yes => return pointer */ |
8f5ca04b RM |
284 | |
285 | shrl $16, %ebx /* make upper bytes accessible */ | |
286 | incl %eax /* increment pointer */ | |
287 | ||
288 | cmp $0, %bl /* third byte == C */ | |
5929563f | 289 | je L(2) /* yes => return pointer */ |
8f5ca04b RM |
290 | |
291 | incl %eax /* increment pointer */ | |
292 | ||
2fc08826 GM |
293 | L(2): CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb) |
294 | RETURN_BOUNDED_POINTER (STR(%esp)) | |
0ecb606c JJ |
295 | L(out): popl %ebp /* restore saved registers */ |
296 | cfi_adjust_cfa_offset (-4) | |
297 | cfi_restore (ebp) | |
8f5ca04b | 298 | popl %ebx |
0ecb606c JJ |
299 | cfi_adjust_cfa_offset (-4) |
300 | cfi_restore (ebx) | |
8f5ca04b RM |
301 | |
302 | popl %esi | |
0ecb606c JJ |
303 | cfi_adjust_cfa_offset (-4) |
304 | cfi_restore (esi) | |
8f5ca04b | 305 | popl %edi |
0ecb606c JJ |
306 | cfi_adjust_cfa_offset (-4) |
307 | cfi_restore (edi) | |
8f5ca04b | 308 | |
3f02f778 GM |
309 | LEAVE |
310 | RET_PTR | |
8f5ca04b | 311 | |
0ecb606c JJ |
312 | cfi_adjust_cfa_offset (16) |
313 | cfi_rel_offset (edi, 12) | |
314 | cfi_rel_offset (esi, 8) | |
315 | cfi_rel_offset (ebx, 4) | |
316 | cfi_rel_offset (ebp, 0) | |
8f5ca04b RM |
317 | /* We know there is a NUL byte in the word. But we have to test |
318 | whether there is an C byte before it in the word. */ | |
5929563f | 319 | L(4): subl $4, %eax /* adjust pointer */ |
8f5ca04b RM |
320 | cmpb %dl, %cl /* first byte == C? */ |
321 | ||
5929563f | 322 | je L(2) /* yes => return pointer */ |
8f5ca04b RM |
323 | |
324 | cmpb $0, %cl /* first byte == NUL? */ | |
5929563f | 325 | je L(3) /* yes => return NULL */ |
8f5ca04b RM |
326 | |
327 | incl %eax /* increment pointer */ | |
328 | ||
329 | cmpb %dl, %ch /* second byte == C? */ | |
5929563f | 330 | je L(2) /* yes => return pointer */ |
8f5ca04b RM |
331 | |
332 | cmpb $0, %ch /* second byte == NUL? */ | |
5929563f | 333 | je L(3) /* yes => return NULL */ |
8f5ca04b RM |
334 | |
335 | shrl $16, %ecx /* make upper bytes accessible */ | |
336 | incl %eax /* increment pointer */ | |
337 | ||
338 | cmpb %dl, %cl /* third byte == C? */ | |
5929563f | 339 | je L(2) /* yes => return pointer */ |
8f5ca04b RM |
340 | |
341 | cmpb $0, %cl /* third byte == NUL? */ | |
5929563f | 342 | je L(3) /* yes => return NULL */ |
8f5ca04b RM |
343 | |
344 | incl %eax /* increment pointer */ | |
345 | ||
346 | /* The test four the fourth byte is necessary! */ | |
347 | cmpb %dl, %ch /* fourth byte == C? */ | |
5929563f | 348 | je L(2) /* yes => return pointer */ |
8f5ca04b | 349 | |
2fc08826 GM |
350 | L(3): xorl %eax, %eax |
351 | RETURN_NULL_BOUNDED_POINTER | |
0ecb606c | 352 | jmp L(out) |
2fc08826 | 353 | END (BP_SYM (strchr)) |
8f5ca04b RM |
354 | |
355 | #undef index | |
2fc08826 | 356 | weak_alias (BP_SYM (strchr), BP_SYM (index)) |
85dd1003 | 357 | libc_hidden_builtin_def (strchr) |