]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/i386/i586/strchr.S
update from main archvie 961022
[thirdparty/glibc.git] / sysdeps / i386 / i586 / strchr.S
CommitLineData
8f5ca04b
RM
1/* strchr -- find character CH in a NUL terminated string.
2Highly optimized version for ix85, x>=5.
da74e902 3Copyright (C) 1995, 1996 Free Software Foundation, Inc.
8f5ca04b
RM
4This file is part of the GNU C Library.
5Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
6
7The GNU C Library is free software; you can redistribute it and/or
8modify it under the terms of the GNU Library General Public License as
9published by the Free Software Foundation; either version 2 of the
10License, or (at your option) any later version.
11
12The GNU C Library is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15Library General Public License for more details.
16
17You should have received a copy of the GNU Library General Public
18License along with the GNU C Library; see the file COPYING.LIB. If
19not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
21
22#include <sysdep.h>
23
24/* This version is especially optimized for the i586 (and following?)
25 processors. This is mainly done by using the two pipelines. The
26 version optimized for i486 is weak in this aspect because to get
27 as much parallelism we have to executs some *more* instructions.
28
29 The code below is structured to reflect the pairing of the instructions
30 as *I think* it is. I have no processor data book to verify this.
31 If you find something you think is incorrect let me know. */
32
33
34/* The magic value which is used throughout in the whole code. */
35#define magic 0xfefefeff
36
37/*
38 INPUT PARAMETERS:
39 str (sp + 4)
40 ch (sp + 8)
41*/
42
43 .text
44ENTRY (strchr)
45 pushl %edi /* Save callee-safe registers. */
46 pushl %esi
47
48 pushl %ebx
49 pushl %ebp
50
51 movl 20(%esp), %eax /* get string pointer */
52 movl 24(%esp), %edx /* get character we are looking for */
53
54 movl %eax, %edi /* duplicate string pointer for later */
55 xorl %ecx, %ecx /* clear %ecx */
56
57 /* At the moment %edx contains C. What we need for the
58 algorithm is C in all bytes of the dword. Avoid
59 operations on 16 bit words because these require an
60 prefix byte (and one more cycle). */
61 movb %dl, %dh /* now it is 0|0|c|c */
62 movb %dl, %cl /* we construct the lower half in %ecx */
63
64 shll $16, %edx /* now %edx is c|c|0|0 */
65 movb %cl, %ch /* now %ecx is 0|0|c|c */
66
67 orl %ecx, %edx /* and finally c|c|c|c */
68 andl $3, %edi /* mask alignment bits */
69
70 jz L11 /* alignment is 0 => start loop */
71
11336c16
UD
72 movb %dl, %cl /* 0 is needed below */
73 jp L0 /* exactly two bits set */
8f5ca04b 74
11336c16
UD
75 xorb (%eax), %cl /* is byte the one we are looking for? */
76 jz L2 /* yes => return pointer */
8f5ca04b 77
11336c16 78 xorb %dl, %cl /* load single byte and test for NUL */
8f5ca04b
RM
79 je L3 /* yes => return NULL */
80
11336c16
UD
81 movb 1(%eax), %cl /* load single byte */
82 incl %eax
8f5ca04b 83
8f5ca04b 84 cmpb %cl, %dl /* is byte == C? */
8f5ca04b
RM
85 je L2 /* aligned => return pointer */
86
3e2ee727 87 cmpb $0, %cl /* is byte NUL? */
8f5ca04b
RM
88 je L3 /* yes => return NULL */
89
11336c16
UD
90 incl %eax
91 decl %edi
92
93 jne L11
94
95L0: movb (%eax), %cl /* load single byte */
8f5ca04b 96
8f5ca04b 97 cmpb %cl, %dl /* is byte == C? */
8f5ca04b
RM
98 je L2 /* aligned => return pointer */
99
3e2ee727 100 cmpb $0, %cl /* is byte NUL? */
8f5ca04b
RM
101 je L3 /* yes => return NULL */
102
103 incl %eax /* increment pointer */
104
105 /* The following code is the preparation for the loop. The
106 four instruction up to `L1' will not be executed in the loop
107 because the same code is found at the end of the loop, but
108 there it is executed in parallel with other instructions. */
109L11: movl (%eax), %ecx
110 movl $magic, %ebp
111
112 movl $magic, %edi
113 addl %ecx, %ebp
114
115 /* The main loop: it looks complex and indeed it is. I would
116 love to say `it was hard to write, so it should he hard to
117 read' but I will give some more hints. To fully understand
118 this code you should first take a look at the i486 version.
119 The basic algorithm is the same, but here the code organized
120 in a way which permits to use both pipelines all the time.
121
122 I tried to make it a bit more understandable by indenting
123 the code according to stage in the algorithm. It goes as
124 follows:
125 check for 0 in 1st word
126 check for C in 1st word
127 check for 0 in 2nd word
128 check for C in 2nd word
129 check for 0 in 3rd word
130 check for C in 3rd word
131 check for 0 in 4th word
132 check for C in 4th word
133
134 Please note that doing the test for NUL before the test for
135 C allows us to overlap the test for 0 in the next word with
136 the test for C. */
137
138L1: xorl %ecx, %ebp /* (word^magic) */
139 addl %ecx, %edi /* add magic word */
140
141 leal 4(%eax), %eax /* increment pointer */
142 jnc L4 /* previous addl caused overflow? */
143
144 movl %ecx, %ebx /* duplicate original word */
145 orl $magic, %ebp /* (word^magic)|magic */
146
147 addl $1, %ebp /* (word^magic)|magic == 0xffffffff? */
148 jne L4 /* yes => we found word with NUL */
149
150 movl $magic, %esi /* load magic value */
151 xorl %edx, %ebx /* clear words which are C */
152
153 movl (%eax), %ecx
154 addl %ebx, %esi /* (word+magic) */
155
156 movl $magic, %edi
157 jnc L5 /* previous addl caused overflow? */
158
159 movl %edi, %ebp
160 xorl %ebx, %esi /* (word+magic)^word */
161
162 addl %ecx, %ebp
163 orl $magic, %esi /* ((word+magic)^word)|magic */
164
165 addl $1, %esi /* ((word+magic)^word)|magic==0xf..f?*/
166 jne L5 /* yes => we found word with C */
167
168 xorl %ecx, %ebp
169 addl %ecx, %edi
170
171 leal 4(%eax), %eax
172 jnc L4
173
174 movl %ecx, %ebx
175 orl $magic, %ebp
176
177 addl $1, %ebp
178 jne L4
179
180 movl $magic, %esi
181 xorl %edx, %ebx
182
183 movl (%eax), %ecx
184 addl %ebx, %esi
185
186 movl $magic, %edi
187 jnc L5
188
189 movl %edi, %ebp
190 xorl %ebx, %esi
191
192 addl %ecx, %ebp
193 orl $magic, %esi
194
195 addl $1, %esi
196 jne L5
197
198 xorl %ecx, %ebp
199 addl %ecx, %edi
200
201 leal 4(%eax), %eax
202 jnc L4
203
204 movl %ecx, %ebx
205 orl $magic, %ebp
206
207 addl $1, %ebp
208 jne L4
209
210 movl $magic, %esi
211 xorl %edx, %ebx
212
213 movl (%eax), %ecx
214 addl %ebx, %esi
215
216 movl $magic, %edi
217 jnc L5
218
219 movl %edi, %ebp
220 xorl %ebx, %esi
221
222 addl %ecx, %ebp
223 orl $magic, %esi
224
225 addl $1, %esi
226 jne L5
227
228 xorl %ecx, %ebp
229 addl %ecx, %edi
230
231 leal 4(%eax), %eax
232 jnc L4
233
234 movl %ecx, %ebx
235 orl $magic, %ebp
236
237 addl $1, %ebp
238 jne L4
239
240 movl $magic, %esi
241 xorl %edx, %ebx
242
243 movl (%eax), %ecx
244 addl %ebx, %esi
245
246 movl $magic, %edi
247 jnc L5
248
249 movl %edi, %ebp
250 xorl %ebx, %esi
251
252 addl %ecx, %ebp
253 orl $magic, %esi
254
255 addl $1, %esi
256
257 je L1
258
259 /* We know there is no NUL byte but a C byte in the word.
260 %ebx contains NUL in this particular byte. */
261L5: subl $4, %eax /* adjust pointer */
262 testb %bl, %bl /* first byte == C? */
263
264 jz L2 /* yes => return pointer */
265
266 incl %eax /* increment pointer */
267 testb %bh, %bh /* second byte == C? */
268
269 jz L2 /* yes => return pointer */
270
271 shrl $16, %ebx /* make upper bytes accessible */
272 incl %eax /* increment pointer */
273
274 cmp $0, %bl /* third byte == C */
275 je L2 /* yes => return pointer */
276
277 incl %eax /* increment pointer */
278
279L2: popl %ebp /* restore saved registers */
280 popl %ebx
281
282 popl %esi
283 popl %edi
284
285 ret
286
287 /* We know there is a NUL byte in the word. But we have to test
288 whether there is an C byte before it in the word. */
289L4: subl $4, %eax /* adjust pointer */
290 cmpb %dl, %cl /* first byte == C? */
291
292 je L2 /* yes => return pointer */
293
294 cmpb $0, %cl /* first byte == NUL? */
295 je L3 /* yes => return NULL */
296
297 incl %eax /* increment pointer */
298
299 cmpb %dl, %ch /* second byte == C? */
300 je L2 /* yes => return pointer */
301
302 cmpb $0, %ch /* second byte == NUL? */
303 je L3 /* yes => return NULL */
304
305 shrl $16, %ecx /* make upper bytes accessible */
306 incl %eax /* increment pointer */
307
308 cmpb %dl, %cl /* third byte == C? */
309 je L2 /* yes => return pointer */
310
311 cmpb $0, %cl /* third byte == NUL? */
312 je L3 /* yes => return NULL */
313
314 incl %eax /* increment pointer */
315
316 /* The test four the fourth byte is necessary! */
317 cmpb %dl, %ch /* fourth byte == C? */
318 je L2 /* yes => return pointer */
319
320L3: xorl %eax, %eax /* set return value = NULL */
321
322 popl %ebp /* restore saved registers */
323 popl %ebx
324
325 popl %esi
326 popl %edi
327
328 ret
6ed0492f 329END (strchr)
8f5ca04b
RM
330
331#undef index
332weak_alias (strchr, index)