]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/strchrnul.S
(CFLAGS-tst-align.c): Add -mpreferred-stack-boundary=4.
[thirdparty/glibc.git] / sysdeps / i386 / strchrnul.S
1 /* strchrnul (str, chr) -- Return pointer to first occurrence of CHR in STR
2 or the final NUL byte.
3 For Intel 80x86, x>=3.
4 Copyright (C) 1994, 1995, 1996, 1997, 1999, 2000 Free Software Foundation, Inc.
5 This file is part of the GNU C Library.
6 Contributed by Ulrich Drepper <drepper@gnu.org>
7 Some optimisations by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
8
9 The GNU C Library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Lesser General Public
11 License as published by the Free Software Foundation; either
12 version 2.1 of the License, or (at your option) any later version.
13
14 The GNU C Library is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Lesser General Public License for more details.
18
19 You should have received a copy of the GNU Lesser General Public
20 License along with the GNU C Library; if not, write to the Free
21 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22 02111-1307 USA. */
23
24 #include <sysdep.h>
25 #include "asm-syntax.h"
26 #include "bp-sym.h"
27 #include "bp-asm.h"
28
29 #define PARMS LINKAGE+4 /* space for 1 saved reg */
30 #define RTN PARMS
31 #define STR RTN+RTN_SIZE
32 #define CHR STR+PTR_SIZE
33
34 .text
35 ENTRY (BP_SYM (__strchrnul))
36 ENTER
37
38 pushl %edi /* Save callee-safe registers used here. */
39
40 movl STR(%esp), %eax
41 movl CHR(%esp), %edx
42 CHECK_BOUNDS_LOW (%eax, STR(%esp))
43
44 /* At the moment %edx contains CHR. What we need for the
45 algorithm is CHR in all bytes of the dword. Avoid
46 operations on 16 bit words because these require an
47 prefix byte (and one more cycle). */
48 movb %dl, %dh /* now it is 0|0|c|c */
49 movl %edx, %ecx
50 shll $16, %edx /* now it is c|c|0|0 */
51 movw %cx, %dx /* and finally c|c|c|c */
52
53 /* Before we start with the main loop we process single bytes
54 until the source pointer is aligned. This has two reasons:
55 1. aligned 32-bit memory access is faster
56 and (more important)
57 2. we process in the main loop 32 bit in one step although
58 we don't know the end of the string. But accessing at
59 4-byte alignment guarantees that we never access illegal
60 memory if this would not also be done by the trivial
61 implementation (this is because all processor inherent
62 boundaries are multiples of 4. */
63
64 testb $3, %al /* correctly aligned ? */
65 jz L(11) /* yes => begin loop */
66 movb (%eax), %cl /* load byte in question (we need it twice) */
67 cmpb %cl, %dl /* compare byte */
68 je L(6) /* target found => return */
69 testb %cl, %cl /* is NUL? */
70 jz L(6) /* yes => return NULL */
71 incl %eax /* increment pointer */
72
73 testb $3, %al /* correctly aligned ? */
74 jz L(11) /* yes => begin loop */
75 movb (%eax), %cl /* load byte in question (we need it twice) */
76 cmpb %cl, %dl /* compare byte */
77 je L(6) /* target found => return */
78 testb %cl, %cl /* is NUL? */
79 jz L(6) /* yes => return NULL */
80 incl %eax /* increment pointer */
81
82 testb $3, %al /* correctly aligned ? */
83 jz L(11) /* yes => begin loop */
84 movb (%eax), %cl /* load byte in question (we need it twice) */
85 cmpb %cl, %dl /* compare byte */
86 je L(6) /* target found => return */
87 testb %cl, %cl /* is NUL? */
88 jz L(6) /* yes => return NULL */
89 incl %eax /* increment pointer */
90
91 /* No we have reached alignment. */
92 jmp L(11) /* begin loop */
93
94 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
95 change any of the hole bits of LONGWORD.
96
97 1) Is this safe? Will it catch all the zero bytes?
98 Suppose there is a byte with all zeros. Any carry bits
99 propagating from its left will fall into the hole at its
100 least significant bit and stop. Since there will be no
101 carry from its most significant bit, the LSB of the
102 byte to the left will be unchanged, and the zero will be
103 detected.
104
105 2) Is this worthwhile? Will it ignore everything except
106 zero bytes? Suppose every byte of LONGWORD has a bit set
107 somewhere. There will be a carry into bit 8. If bit 8
108 is set, this will carry into bit 16. If bit 8 is clear,
109 one of bits 9-15 must be set, so there will be a carry
110 into bit 16. Similarly, there will be a carry into bit
111 24. If one of bits 24-31 is set, there will be a carry
112 into bit 32 (=carry flag), so all of the hole bits will
113 be changed.
114
115 3) But wait! Aren't we looking for CHR, not zero?
116 Good point. So what we do is XOR LONGWORD with a longword,
117 each of whose bytes is CHR. This turns each byte that is CHR
118 into a zero. */
119
120 /* Each round the main loop processes 16 bytes. */
121
122 ALIGN(4)
123
124 L(1): addl $16, %eax /* adjust pointer for whole round */
125
126 L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
127 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
128 are now 0 */
129 movl $0xfefefeff, %edi /* magic value */
130 addl %ecx, %edi /* add the magic value to the word. We get
131 carry bits reported for each byte which
132 is *not* CHR */
133
134 /* According to the algorithm we had to reverse the effect of the
135 XOR first and then test the overflow bits. But because the
136 following XOR would destroy the carry flag and it would (in a
137 representation with more than 32 bits) not alter then last
138 overflow, we can now test this condition. If no carry is signaled
139 no overflow must have occurred in the last byte => it was 0. */
140 jnc L(7)
141
142 /* We are only interested in carry bits that change due to the
143 previous add, so remove original bits */
144 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
145
146 /* Now test for the other three overflow bits. */
147 orl $0xfefefeff, %edi /* set all non-carry bits */
148 incl %edi /* add 1: if one carry bit was *not* set
149 the addition will not result in 0. */
150
151 /* If at least one byte of the word is CHR we don't get 0 in %edi. */
152 jnz L(7) /* found it => return pointer */
153
154 /* Now we made sure the dword does not contain the character we are
155 looking for. But because we deal with strings we have to check
156 for the end of string before testing the next dword. */
157
158 xorl %edx, %ecx /* restore original dword without reload */
159 movl $0xfefefeff, %edi /* magic value */
160 addl %ecx, %edi /* add the magic value to the word. We get
161 carry bits reported for each byte which
162 is *not* 0 */
163 jnc L(7) /* highest byte is NUL => return NULL */
164 xorl %ecx, %edi /* (word+magic)^word */
165 orl $0xfefefeff, %edi /* set all non-carry bits */
166 incl %edi /* add 1: if one carry bit was *not* set
167 the addition will not result in 0. */
168 jnz L(7) /* found NUL => return NULL */
169
170 movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
171 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
172 are now 0 */
173 movl $0xfefefeff, %edi /* magic value */
174 addl %ecx, %edi /* add the magic value to the word. We get
175 carry bits reported for each byte which
176 is *not* CHR */
177 jnc L(71) /* highest byte is CHR => return pointer */
178 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
179 orl $0xfefefeff, %edi /* set all non-carry bits */
180 incl %edi /* add 1: if one carry bit was *not* set
181 the addition will not result in 0. */
182 jnz L(71) /* found it => return pointer */
183 xorl %edx, %ecx /* restore original dword without reload */
184 movl $0xfefefeff, %edi /* magic value */
185 addl %ecx, %edi /* add the magic value to the word. We get
186 carry bits reported for each byte which
187 is *not* 0 */
188 jnc L(71) /* highest byte is NUL => return NULL */
189 xorl %ecx, %edi /* (word+magic)^word */
190 orl $0xfefefeff, %edi /* set all non-carry bits */
191 incl %edi /* add 1: if one carry bit was *not* set
192 the addition will not result in 0. */
193 jnz L(71) /* found NUL => return NULL */
194
195 movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
196 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
197 are now 0 */
198 movl $0xfefefeff, %edi /* magic value */
199 addl %ecx, %edi /* add the magic value to the word. We get
200 carry bits reported for each byte which
201 is *not* CHR */
202 jnc L(72) /* highest byte is CHR => return pointer */
203 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
204 orl $0xfefefeff, %edi /* set all non-carry bits */
205 incl %edi /* add 1: if one carry bit was *not* set
206 the addition will not result in 0. */
207 jnz L(72) /* found it => return pointer */
208 xorl %edx, %ecx /* restore original dword without reload */
209 movl $0xfefefeff, %edi /* magic value */
210 addl %ecx, %edi /* add the magic value to the word. We get
211 carry bits reported for each byte which
212 is *not* 0 */
213 jnc L(72) /* highest byte is NUL => return NULL */
214 xorl %ecx, %edi /* (word+magic)^word */
215 orl $0xfefefeff, %edi /* set all non-carry bits */
216 incl %edi /* add 1: if one carry bit was *not* set
217 the addition will not result in 0. */
218 jnz L(72) /* found NUL => return NULL */
219
220 movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
221 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
222 are now 0 */
223 movl $0xfefefeff, %edi /* magic value */
224 addl %ecx, %edi /* add the magic value to the word. We get
225 carry bits reported for each byte which
226 is *not* CHR */
227 jnc L(73) /* highest byte is CHR => return pointer */
228 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
229 orl $0xfefefeff, %edi /* set all non-carry bits */
230 incl %edi /* add 1: if one carry bit was *not* set
231 the addition will not result in 0. */
232 jnz L(73) /* found it => return pointer */
233 xorl %edx, %ecx /* restore original dword without reload */
234 movl $0xfefefeff, %edi /* magic value */
235 addl %ecx, %edi /* add the magic value to the word. We get
236 carry bits reported for each byte which
237 is *not* 0 */
238 jnc L(73) /* highest byte is NUL => return NULL */
239 xorl %ecx, %edi /* (word+magic)^word */
240 orl $0xfefefeff, %edi /* set all non-carry bits */
241 incl %edi /* add 1: if one carry bit was *not* set
242 the addition will not result in 0. */
243 jz L(1) /* no NUL found => restart loop */
244
245 L(73): addl $4, %eax /* adjust pointer */
246 L(72): addl $4, %eax
247 L(71): addl $4, %eax
248
249 /* We now scan for the byte in which the character was matched.
250 But we have to take care of the case that a NUL char is
251 found before this in the dword. */
252
253 L(7): testb %cl, %cl /* is first byte CHR? */
254 jz L(6) /* yes => return pointer */
255 cmpb %dl, %cl /* is first byte NUL? */
256 je L(6) /* yes => return NULL */
257 incl %eax /* it's not in the first byte */
258
259 testb %ch, %ch /* is second byte CHR? */
260 jz L(6) /* yes => return pointer */
261 cmpb %dl, %ch /* is second byte NUL? */
262 je L(6) /* yes => return NULL? */
263 incl %eax /* it's not in the second byte */
264
265 shrl $16, %ecx /* make upper byte accessible */
266 testb %cl, %cl /* is third byte CHR? */
267 jz L(6) /* yes => return pointer */
268 cmpb %dl, %cl /* is third byte NUL? */
269 je L(6) /* yes => return NULL */
270
271 /* It must be in the fourth byte and it cannot be NUL. */
272 incl %eax
273
274 L(6): CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb)
275 RETURN_BOUNDED_POINTER (STR(%esp))
276 popl %edi /* restore saved register content */
277
278 LEAVE
279 RET_PTR
280 END (BP_SYM (__strchrnul))
281
282 weak_alias (BP_SYM (__strchrnul), BP_SYM (strchrnul))