]>
Commit | Line | Data |
---|---|---|
8f5ca04b RM |
1 | /* strchr -- find character CH in a NUL terminated string. |
2 | Highly optimized version for ix85, x>=5. | |
da74e902 | 3 | Copyright (C) 1995, 1996 Free Software Foundation, Inc. |
8f5ca04b RM |
4 | This file is part of the GNU C Library. |
5 | Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. | |
6 | ||
7 | The GNU C Library is free software; you can redistribute it and/or | |
8 | modify it under the terms of the GNU Library General Public License as | |
9 | published by the Free Software Foundation; either version 2 of the | |
10 | License, or (at your option) any later version. | |
11 | ||
12 | The GNU C Library is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | Library General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU Library General Public | |
18 | License along with the GNU C Library; see the file COPYING.LIB. If | |
19 | not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
20 | Boston, MA 02111-1307, USA. */ | |
21 | ||
22 | #include <sysdep.h> | |
23 | ||
24 | /* This version is especially optimized for the i586 (and following?) | |
25 | processors. This is mainly done by using the two pipelines. The | |
26 | version optimized for i486 is weak in this aspect because to get | |
27 | as much parallelism we have to executs some *more* instructions. | |
28 | ||
29 | The code below is structured to reflect the pairing of the instructions | |
30 | as *I think* it is. I have no processor data book to verify this. | |
31 | If you find something you think is incorrect let me know. */ | |
32 | ||
33 | ||
34 | /* The magic value which is used throughout in the whole code. */ | |
35 | #define magic 0xfefefeff | |
36 | ||
37 | /* | |
38 | INPUT PARAMETERS: | |
39 | str (sp + 4) | |
40 | ch (sp + 8) | |
41 | */ | |
42 | ||
43 | .text | |
44 | ENTRY (strchr) | |
45 | pushl %edi /* Save callee-safe registers. */ | |
46 | pushl %esi | |
47 | ||
48 | pushl %ebx | |
49 | pushl %ebp | |
50 | ||
51 | movl 20(%esp), %eax /* get string pointer */ | |
52 | movl 24(%esp), %edx /* get character we are looking for */ | |
53 | ||
54 | movl %eax, %edi /* duplicate string pointer for later */ | |
55 | xorl %ecx, %ecx /* clear %ecx */ | |
56 | ||
57 | /* At the moment %edx contains C. What we need for the | |
58 | algorithm is C in all bytes of the dword. Avoid | |
59 | operations on 16 bit words because these require an | |
60 | prefix byte (and one more cycle). */ | |
61 | movb %dl, %dh /* now it is 0|0|c|c */ | |
62 | movb %dl, %cl /* we construct the lower half in %ecx */ | |
63 | ||
64 | shll $16, %edx /* now %edx is c|c|0|0 */ | |
65 | movb %cl, %ch /* now %ecx is 0|0|c|c */ | |
66 | ||
67 | orl %ecx, %edx /* and finally c|c|c|c */ | |
68 | andl $3, %edi /* mask alignment bits */ | |
69 | ||
70 | jz L11 /* alignment is 0 => start loop */ | |
71 | ||
11336c16 UD |
72 | movb %dl, %cl /* 0 is needed below */ |
73 | jp L0 /* exactly two bits set */ | |
8f5ca04b | 74 | |
11336c16 UD |
75 | xorb (%eax), %cl /* is byte the one we are looking for? */ |
76 | jz L2 /* yes => return pointer */ | |
8f5ca04b | 77 | |
11336c16 | 78 | xorb %dl, %cl /* load single byte and test for NUL */ |
8f5ca04b RM |
79 | je L3 /* yes => return NULL */ |
80 | ||
11336c16 UD |
81 | movb 1(%eax), %cl /* load single byte */ |
82 | incl %eax | |
8f5ca04b | 83 | |
8f5ca04b | 84 | cmpb %cl, %dl /* is byte == C? */ |
8f5ca04b RM |
85 | je L2 /* aligned => return pointer */ |
86 | ||
3e2ee727 | 87 | cmpb $0, %cl /* is byte NUL? */ |
8f5ca04b RM |
88 | je L3 /* yes => return NULL */ |
89 | ||
11336c16 UD |
90 | incl %eax |
91 | decl %edi | |
92 | ||
93 | jne L11 | |
94 | ||
95 | L0: movb (%eax), %cl /* load single byte */ | |
8f5ca04b | 96 | |
8f5ca04b | 97 | cmpb %cl, %dl /* is byte == C? */ |
8f5ca04b RM |
98 | je L2 /* aligned => return pointer */ |
99 | ||
3e2ee727 | 100 | cmpb $0, %cl /* is byte NUL? */ |
8f5ca04b RM |
101 | je L3 /* yes => return NULL */ |
102 | ||
103 | incl %eax /* increment pointer */ | |
104 | ||
105 | /* The following code is the preparation for the loop. The | |
106 | four instruction up to `L1' will not be executed in the loop | |
107 | because the same code is found at the end of the loop, but | |
108 | there it is executed in parallel with other instructions. */ | |
109 | L11: movl (%eax), %ecx | |
110 | movl $magic, %ebp | |
111 | ||
112 | movl $magic, %edi | |
113 | addl %ecx, %ebp | |
114 | ||
115 | /* The main loop: it looks complex and indeed it is. I would | |
116 | love to say `it was hard to write, so it should he hard to | |
117 | read' but I will give some more hints. To fully understand | |
118 | this code you should first take a look at the i486 version. | |
119 | The basic algorithm is the same, but here the code organized | |
120 | in a way which permits to use both pipelines all the time. | |
121 | ||
122 | I tried to make it a bit more understandable by indenting | |
123 | the code according to stage in the algorithm. It goes as | |
124 | follows: | |
125 | check for 0 in 1st word | |
126 | check for C in 1st word | |
127 | check for 0 in 2nd word | |
128 | check for C in 2nd word | |
129 | check for 0 in 3rd word | |
130 | check for C in 3rd word | |
131 | check for 0 in 4th word | |
132 | check for C in 4th word | |
133 | ||
134 | Please note that doing the test for NUL before the test for | |
135 | C allows us to overlap the test for 0 in the next word with | |
136 | the test for C. */ | |
137 | ||
138 | L1: xorl %ecx, %ebp /* (word^magic) */ | |
139 | addl %ecx, %edi /* add magic word */ | |
140 | ||
141 | leal 4(%eax), %eax /* increment pointer */ | |
142 | jnc L4 /* previous addl caused overflow? */ | |
143 | ||
144 | movl %ecx, %ebx /* duplicate original word */ | |
145 | orl $magic, %ebp /* (word^magic)|magic */ | |
146 | ||
147 | addl $1, %ebp /* (word^magic)|magic == 0xffffffff? */ | |
148 | jne L4 /* yes => we found word with NUL */ | |
149 | ||
150 | movl $magic, %esi /* load magic value */ | |
151 | xorl %edx, %ebx /* clear words which are C */ | |
152 | ||
153 | movl (%eax), %ecx | |
154 | addl %ebx, %esi /* (word+magic) */ | |
155 | ||
156 | movl $magic, %edi | |
157 | jnc L5 /* previous addl caused overflow? */ | |
158 | ||
159 | movl %edi, %ebp | |
160 | xorl %ebx, %esi /* (word+magic)^word */ | |
161 | ||
162 | addl %ecx, %ebp | |
163 | orl $magic, %esi /* ((word+magic)^word)|magic */ | |
164 | ||
165 | addl $1, %esi /* ((word+magic)^word)|magic==0xf..f?*/ | |
166 | jne L5 /* yes => we found word with C */ | |
167 | ||
168 | xorl %ecx, %ebp | |
169 | addl %ecx, %edi | |
170 | ||
171 | leal 4(%eax), %eax | |
172 | jnc L4 | |
173 | ||
174 | movl %ecx, %ebx | |
175 | orl $magic, %ebp | |
176 | ||
177 | addl $1, %ebp | |
178 | jne L4 | |
179 | ||
180 | movl $magic, %esi | |
181 | xorl %edx, %ebx | |
182 | ||
183 | movl (%eax), %ecx | |
184 | addl %ebx, %esi | |
185 | ||
186 | movl $magic, %edi | |
187 | jnc L5 | |
188 | ||
189 | movl %edi, %ebp | |
190 | xorl %ebx, %esi | |
191 | ||
192 | addl %ecx, %ebp | |
193 | orl $magic, %esi | |
194 | ||
195 | addl $1, %esi | |
196 | jne L5 | |
197 | ||
198 | xorl %ecx, %ebp | |
199 | addl %ecx, %edi | |
200 | ||
201 | leal 4(%eax), %eax | |
202 | jnc L4 | |
203 | ||
204 | movl %ecx, %ebx | |
205 | orl $magic, %ebp | |
206 | ||
207 | addl $1, %ebp | |
208 | jne L4 | |
209 | ||
210 | movl $magic, %esi | |
211 | xorl %edx, %ebx | |
212 | ||
213 | movl (%eax), %ecx | |
214 | addl %ebx, %esi | |
215 | ||
216 | movl $magic, %edi | |
217 | jnc L5 | |
218 | ||
219 | movl %edi, %ebp | |
220 | xorl %ebx, %esi | |
221 | ||
222 | addl %ecx, %ebp | |
223 | orl $magic, %esi | |
224 | ||
225 | addl $1, %esi | |
226 | jne L5 | |
227 | ||
228 | xorl %ecx, %ebp | |
229 | addl %ecx, %edi | |
230 | ||
231 | leal 4(%eax), %eax | |
232 | jnc L4 | |
233 | ||
234 | movl %ecx, %ebx | |
235 | orl $magic, %ebp | |
236 | ||
237 | addl $1, %ebp | |
238 | jne L4 | |
239 | ||
240 | movl $magic, %esi | |
241 | xorl %edx, %ebx | |
242 | ||
243 | movl (%eax), %ecx | |
244 | addl %ebx, %esi | |
245 | ||
246 | movl $magic, %edi | |
247 | jnc L5 | |
248 | ||
249 | movl %edi, %ebp | |
250 | xorl %ebx, %esi | |
251 | ||
252 | addl %ecx, %ebp | |
253 | orl $magic, %esi | |
254 | ||
255 | addl $1, %esi | |
256 | ||
257 | je L1 | |
258 | ||
259 | /* We know there is no NUL byte but a C byte in the word. | |
260 | %ebx contains NUL in this particular byte. */ | |
261 | L5: subl $4, %eax /* adjust pointer */ | |
262 | testb %bl, %bl /* first byte == C? */ | |
263 | ||
264 | jz L2 /* yes => return pointer */ | |
265 | ||
266 | incl %eax /* increment pointer */ | |
267 | testb %bh, %bh /* second byte == C? */ | |
268 | ||
269 | jz L2 /* yes => return pointer */ | |
270 | ||
271 | shrl $16, %ebx /* make upper bytes accessible */ | |
272 | incl %eax /* increment pointer */ | |
273 | ||
274 | cmp $0, %bl /* third byte == C */ | |
275 | je L2 /* yes => return pointer */ | |
276 | ||
277 | incl %eax /* increment pointer */ | |
278 | ||
279 | L2: popl %ebp /* restore saved registers */ | |
280 | popl %ebx | |
281 | ||
282 | popl %esi | |
283 | popl %edi | |
284 | ||
285 | ret | |
286 | ||
287 | /* We know there is a NUL byte in the word. But we have to test | |
288 | whether there is an C byte before it in the word. */ | |
289 | L4: subl $4, %eax /* adjust pointer */ | |
290 | cmpb %dl, %cl /* first byte == C? */ | |
291 | ||
292 | je L2 /* yes => return pointer */ | |
293 | ||
294 | cmpb $0, %cl /* first byte == NUL? */ | |
295 | je L3 /* yes => return NULL */ | |
296 | ||
297 | incl %eax /* increment pointer */ | |
298 | ||
299 | cmpb %dl, %ch /* second byte == C? */ | |
300 | je L2 /* yes => return pointer */ | |
301 | ||
302 | cmpb $0, %ch /* second byte == NUL? */ | |
303 | je L3 /* yes => return NULL */ | |
304 | ||
305 | shrl $16, %ecx /* make upper bytes accessible */ | |
306 | incl %eax /* increment pointer */ | |
307 | ||
308 | cmpb %dl, %cl /* third byte == C? */ | |
309 | je L2 /* yes => return pointer */ | |
310 | ||
311 | cmpb $0, %cl /* third byte == NUL? */ | |
312 | je L3 /* yes => return NULL */ | |
313 | ||
314 | incl %eax /* increment pointer */ | |
315 | ||
316 | /* The test four the fourth byte is necessary! */ | |
317 | cmpb %dl, %ch /* fourth byte == C? */ | |
318 | je L2 /* yes => return pointer */ | |
319 | ||
320 | L3: xorl %eax, %eax /* set return value = NULL */ | |
321 | ||
322 | popl %ebp /* restore saved registers */ | |
323 | popl %ebx | |
324 | ||
325 | popl %esi | |
326 | popl %edi | |
327 | ||
328 | ret | |
6ed0492f | 329 | END (strchr) |
8f5ca04b RM |
330 | |
331 | #undef index | |
332 | weak_alias (strchr, index) |