]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/strchr.S
Update copyright notices with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / i386 / strchr.S
1 /* strchr (str, ch) -- Return pointer to first occurrence of CH in STR.
2 For Intel 80x86, x>=3.
3 Copyright (C) 1994-2013 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
6 Some optimisations by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
7
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
12
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public
19 License along with the GNU C Library; if not, see
20 <http://www.gnu.org/licenses/>. */
21
22 #include <sysdep.h>
23 #include "asm-syntax.h"
24 #include "bp-sym.h"
25 #include "bp-asm.h"
26
27 #define PARMS LINKAGE+4 /* space for 1 saved reg */
28 #define RTN PARMS
29 #define STR RTN+RTN_SIZE
30 #define CHR STR+PTR_SIZE
31
32 .text
33 ENTRY (BP_SYM (strchr))
34 ENTER
35
36 pushl %edi /* Save callee-safe registers used here. */
37 cfi_adjust_cfa_offset (4)
38 cfi_rel_offset (edi, 0)
39 movl STR(%esp), %eax
40 movl CHR(%esp), %edx
41 CHECK_BOUNDS_LOW (%eax, STR(%esp))
42
43 /* At the moment %edx contains C. What we need for the
44 algorithm is C in all bytes of the dword. Avoid
45 operations on 16 bit words because these require an
46 prefix byte (and one more cycle). */
47 movb %dl, %dh /* now it is 0|0|c|c */
48 movl %edx, %ecx
49 shll $16, %edx /* now it is c|c|0|0 */
50 movw %cx, %dx /* and finally c|c|c|c */
51
52 /* Before we start with the main loop we process single bytes
53 until the source pointer is aligned. This has two reasons:
54 1. aligned 32-bit memory access is faster
55 and (more important)
56 2. we process in the main loop 32 bit in one step although
57 we don't know the end of the string. But accessing at
58 4-byte alignment guarantees that we never access illegal
59 memory if this would not also be done by the trivial
60 implementation (this is because all processor inherent
61 boundaries are multiples of 4. */
62
63 testb $3, %al /* correctly aligned ? */
64 jz L(11) /* yes => begin loop */
65 movb (%eax), %cl /* load byte in question (we need it twice) */
66 cmpb %cl, %dl /* compare byte */
67 je L(6) /* target found => return */
68 testb %cl, %cl /* is NUL? */
69 jz L(2) /* yes => return NULL */
70 incl %eax /* increment pointer */
71
72 testb $3, %al /* correctly aligned ? */
73 jz L(11) /* yes => begin loop */
74 movb (%eax), %cl /* load byte in question (we need it twice) */
75 cmpb %cl, %dl /* compare byte */
76 je L(6) /* target found => return */
77 testb %cl, %cl /* is NUL? */
78 jz L(2) /* yes => return NULL */
79 incl %eax /* increment pointer */
80
81 testb $3, %al /* correctly aligned ? */
82 jz L(11) /* yes => begin loop */
83 movb (%eax), %cl /* load byte in question (we need it twice) */
84 cmpb %cl, %dl /* compare byte */
85 je L(6) /* target found => return */
86 testb %cl, %cl /* is NUL? */
87 jz L(2) /* yes => return NULL */
88 incl %eax /* increment pointer */
89
90 /* No we have reached alignment. */
91 jmp L(11) /* begin loop */
92
93 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
94 change any of the hole bits of LONGWORD.
95
96 1) Is this safe? Will it catch all the zero bytes?
97 Suppose there is a byte with all zeros. Any carry bits
98 propagating from its left will fall into the hole at its
99 least significant bit and stop. Since there will be no
100 carry from its most significant bit, the LSB of the
101 byte to the left will be unchanged, and the zero will be
102 detected.
103
104 2) Is this worthwhile? Will it ignore everything except
105 zero bytes? Suppose every byte of LONGWORD has a bit set
106 somewhere. There will be a carry into bit 8. If bit 8
107 is set, this will carry into bit 16. If bit 8 is clear,
108 one of bits 9-15 must be set, so there will be a carry
109 into bit 16. Similarly, there will be a carry into bit
110 24. If one of bits 24-31 is set, there will be a carry
111 into bit 32 (=carry flag), so all of the hole bits will
112 be changed.
113
114 3) But wait! Aren't we looking for C, not zero?
115 Good point. So what we do is XOR LONGWORD with a longword,
116 each of whose bytes is C. This turns each byte that is C
117 into a zero. */
118
119 /* Each round the main loop processes 16 bytes. */
120
121 ALIGN(4)
122
123 L(1): addl $16, %eax /* adjust pointer for whole round */
124
125 L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
126 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
127 are now 0 */
128 movl $0xfefefeff, %edi /* magic value */
129 addl %ecx, %edi /* add the magic value to the word. We get
130 carry bits reported for each byte which
131 is *not* C */
132
133 /* According to the algorithm we had to reverse the effect of the
134 XOR first and then test the overflow bits. But because the
135 following XOR would destroy the carry flag and it would (in a
136 representation with more than 32 bits) not alter then last
137 overflow, we can now test this condition. If no carry is signaled
138 no overflow must have occurred in the last byte => it was 0. */
139 jnc L(7)
140
141 /* We are only interested in carry bits that change due to the
142 previous add, so remove original bits */
143 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
144
145 /* Now test for the other three overflow bits. */
146 orl $0xfefefeff, %edi /* set all non-carry bits */
147 incl %edi /* add 1: if one carry bit was *not* set
148 the addition will not result in 0. */
149
150 /* If at least one byte of the word is C we don't get 0 in %edi. */
151 jnz L(7) /* found it => return pointer */
152
153 /* Now we made sure the dword does not contain the character we are
154 looking for. But because we deal with strings we have to check
155 for the end of string before testing the next dword. */
156
157 xorl %edx, %ecx /* restore original dword without reload */
158 movl $0xfefefeff, %edi /* magic value */
159 addl %ecx, %edi /* add the magic value to the word. We get
160 carry bits reported for each byte which
161 is *not* 0 */
162 jnc L(2) /* highest byte is NUL => return NULL */
163 xorl %ecx, %edi /* (word+magic)^word */
164 orl $0xfefefeff, %edi /* set all non-carry bits */
165 incl %edi /* add 1: if one carry bit was *not* set
166 the addition will not result in 0. */
167 jnz L(2) /* found NUL => return NULL */
168
169 movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
170 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
171 are now 0 */
172 movl $0xfefefeff, %edi /* magic value */
173 addl %ecx, %edi /* add the magic value to the word. We get
174 carry bits reported for each byte which
175 is *not* C */
176 jnc L(71) /* highest byte is C => return pointer */
177 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
178 orl $0xfefefeff, %edi /* set all non-carry bits */
179 incl %edi /* add 1: if one carry bit was *not* set
180 the addition will not result in 0. */
181 jnz L(71) /* found it => return pointer */
182 xorl %edx, %ecx /* restore original dword without reload */
183 movl $0xfefefeff, %edi /* magic value */
184 addl %ecx, %edi /* add the magic value to the word. We get
185 carry bits reported for each byte which
186 is *not* 0 */
187 jnc L(2) /* highest byte is NUL => return NULL */
188 xorl %ecx, %edi /* (word+magic)^word */
189 orl $0xfefefeff, %edi /* set all non-carry bits */
190 incl %edi /* add 1: if one carry bit was *not* set
191 the addition will not result in 0. */
192 jnz L(2) /* found NUL => return NULL */
193
194 movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
195 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
196 are now 0 */
197 movl $0xfefefeff, %edi /* magic value */
198 addl %ecx, %edi /* add the magic value to the word. We get
199 carry bits reported for each byte which
200 is *not* C */
201 jnc L(72) /* highest byte is C => return pointer */
202 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
203 orl $0xfefefeff, %edi /* set all non-carry bits */
204 incl %edi /* add 1: if one carry bit was *not* set
205 the addition will not result in 0. */
206 jnz L(72) /* found it => return pointer */
207 xorl %edx, %ecx /* restore original dword without reload */
208 movl $0xfefefeff, %edi /* magic value */
209 addl %ecx, %edi /* add the magic value to the word. We get
210 carry bits reported for each byte which
211 is *not* 0 */
212 jnc L(2) /* highest byte is NUL => return NULL */
213 xorl %ecx, %edi /* (word+magic)^word */
214 orl $0xfefefeff, %edi /* set all non-carry bits */
215 incl %edi /* add 1: if one carry bit was *not* set
216 the addition will not result in 0. */
217 jnz L(2) /* found NUL => return NULL */
218
219 movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
220 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
221 are now 0 */
222 movl $0xfefefeff, %edi /* magic value */
223 addl %ecx, %edi /* add the magic value to the word. We get
224 carry bits reported for each byte which
225 is *not* C */
226 jnc L(73) /* highest byte is C => return pointer */
227 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
228 orl $0xfefefeff, %edi /* set all non-carry bits */
229 incl %edi /* add 1: if one carry bit was *not* set
230 the addition will not result in 0. */
231 jnz L(73) /* found it => return pointer */
232 xorl %edx, %ecx /* restore original dword without reload */
233 movl $0xfefefeff, %edi /* magic value */
234 addl %ecx, %edi /* add the magic value to the word. We get
235 carry bits reported for each byte which
236 is *not* 0 */
237 jnc L(2) /* highest byte is NUL => return NULL */
238 xorl %ecx, %edi /* (word+magic)^word */
239 orl $0xfefefeff, %edi /* set all non-carry bits */
240 incl %edi /* add 1: if one carry bit was *not* set
241 the addition will not result in 0. */
242 jz L(1) /* no NUL found => restart loop */
243
244 L(2): /* Return NULL. */
245 xorl %eax, %eax
246 RETURN_NULL_BOUNDED_POINTER
247 popl %edi /* restore saved register content */
248 cfi_adjust_cfa_offset (-4)
249 cfi_restore (edi)
250
251 LEAVE
252 RET_PTR
253
254 cfi_adjust_cfa_offset (4)
255 cfi_rel_offset (edi, 0)
256 L(73): addl $4, %eax /* adjust pointer */
257 L(72): addl $4, %eax
258 L(71): addl $4, %eax
259
260 /* We now scan for the byte in which the character was matched.
261 But we have to take care of the case that a NUL char is
262 found before this in the dword. Note that we XORed %ecx
263 with the byte we're looking for, therefore the tests below look
264 reversed. */
265
266 L(7): testb %cl, %cl /* is first byte C? */
267 jz L(6) /* yes => return pointer */
268 cmpb %dl, %cl /* is first byte NUL? */
269 je L(2) /* yes => return NULL */
270 incl %eax /* it's not in the first byte */
271
272 testb %ch, %ch /* is second byte C? */
273 jz L(6) /* yes => return pointer */
274 cmpb %dl, %ch /* is second byte NUL? */
275 je L(2) /* yes => return NULL? */
276 incl %eax /* it's not in the second byte */
277
278 shrl $16, %ecx /* make upper byte accessible */
279 testb %cl, %cl /* is third byte C? */
280 jz L(6) /* yes => return pointer */
281 cmpb %dl, %cl /* is third byte NUL? */
282 je L(2) /* yes => return NULL */
283
284 /* It must be in the fourth byte and it cannot be NUL. */
285 incl %eax
286
287 L(6):
288 CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb)
289 RETURN_BOUNDED_POINTER (STR(%esp))
290 popl %edi /* restore saved register content */
291 cfi_adjust_cfa_offset (-4)
292 cfi_restore (edi)
293
294 LEAVE
295 RET_PTR
296 END (BP_SYM (strchr))
297
298 weak_alias (BP_SYM (strchr), BP_SYM (index))
299 libc_hidden_builtin_def (strchr)