]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/i386/rawmemchr.S
2.5-18.1
[thirdparty/glibc.git] / sysdeps / i386 / rawmemchr.S
CommitLineData
482eec0d
UD
1/* rawmemchr (str, ch) -- Return pointer to first occurrence of CH in STR.
2 For Intel 80x86, x>=3.
0ecb606c 3 Copyright (C) 1994-2000,2002,2005 Free Software Foundation, Inc.
482eec0d
UD
4 This file is part of the GNU C Library.
5 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
6 Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
482eec0d
UD
7 This version is developed using the same algorithm as the fast C
8 version which carries the following introduction:
482eec0d
UD
9 Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
10 with help from Dan Sahlin (dan@sics.se) and
11 commentary by Jim Blandy (jimb@ai.mit.edu);
12 adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
13 and implemented by Roland McGrath (roland@ai.mit.edu).
14
15 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 2.1 of the License, or (at your option) any later version.
482eec0d
UD
19
20 The GNU C Library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 23 Lesser General Public License for more details.
482eec0d 24
41bdb6e2
AJ
25 You should have received a copy of the GNU Lesser General Public
26 License along with the GNU C Library; if not, write to the Free
27 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307 USA. */
482eec0d
UD
29
30#include <sysdep.h>
31#include "asm-syntax.h"
2fc08826 32#include "bp-sym.h"
3f02f778 33#include "bp-asm.h"
482eec0d 34
3f02f778
GM
35#define PARMS LINKAGE+4 /* space for 1 saved reg */
36#define RTN PARMS
37#define STR RTN+RTN_SIZE
38#define CHR STR+PTR_SIZE
482eec0d
UD
39
40 .text
2fc08826 41ENTRY (BP_SYM (__rawmemchr))
3f02f778
GM
42 ENTER
43
482eec0d
UD
44 /* Save callee-safe register used in this function. */
45 pushl %edi
0ecb606c
JJ
46 cfi_adjust_cfa_offset (4)
47 cfi_rel_offset (edi, 0)
482eec0d
UD
48
49 /* Load parameters into registers. */
3f02f778
GM
50 movl STR(%esp), %eax
51 movl CHR(%esp), %edx
2fc08826 52 CHECK_BOUNDS_LOW (%eax, STR(%esp))
482eec0d
UD
53
54 /* At the moment %edx contains C. What we need for the
55 algorithm is C in all bytes of the dword. Avoid
56 operations on 16 bit words because these require an
57 prefix byte (and one more cycle). */
58 movb %dl, %dh /* Now it is 0|0|c|c */
59 movl %edx, %ecx
60 shll $16, %edx /* Now c|c|0|0 */
61 movw %cx, %dx /* And finally c|c|c|c */
62
63 /* Better performance can be achieved if the word (32
64 bit) memory access is aligned on a four-byte-boundary.
65 So process first bytes one by one until boundary is
66 reached. Don't use a loop for better performance. */
67
68 testb $3, %al /* correctly aligned ? */
69 je L(1) /* yes => begin loop */
70 cmpb %dl, (%eax) /* compare byte */
71 je L(9) /* target found => return */
72 incl %eax /* increment source pointer */
73
74 testb $3, %al /* correctly aligned ? */
75 je L(1) /* yes => begin loop */
76 cmpb %dl, (%eax) /* compare byte */
77 je L(9) /* target found => return */
78 incl %eax /* increment source pointer */
79
80 testb $3, %al /* correctly aligned ? */
81 je L(1) /* yes => begin loop */
82 cmpb %dl, (%eax) /* compare byte */
83 je L(9) /* target found => return */
84 incl %eax /* increment source pointer */
85
86 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
87 change any of the hole bits of LONGWORD.
88
89 1) Is this safe? Will it catch all the zero bytes?
90 Suppose there is a byte with all zeros. Any carry bits
91 propagating from its left will fall into the hole at its
92 least significant bit and stop. Since there will be no
93 carry from its most significant bit, the LSB of the
94 byte to the left will be unchanged, and the zero will be
95 detected.
96
97 2) Is this worthwhile? Will it ignore everything except
98 zero bytes? Suppose every byte of LONGWORD has a bit set
99 somewhere. There will be a carry into bit 8. If bit 8
100 is set, this will carry into bit 16. If bit 8 is clear,
101 one of bits 9-15 must be set, so there will be a carry
102 into bit 16. Similarly, there will be a carry into bit
103 24. If one of bits 24-31 is set, there will be a carry
104 into bit 32 (=carry flag), so all of the hole bits will
105 be changed.
106
107 3) But wait! Aren't we looking for C, not zero?
108 Good point. So what we do is XOR LONGWORD with a longword,
109 each of whose bytes is C. This turns each byte that is C
110 into a zero. */
111
112
113 /* Each round the main loop processes 16 bytes. */
114 ALIGN (4)
115
116L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
117 movl $0xfefefeff, %edi /* magic value */
118 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
119 are now 0 */
120 addl %ecx, %edi /* add the magic value to the word. We get
121 carry bits reported for each byte which
122 is *not* 0 */
123
124 /* According to the algorithm we had to reverse the effect of the
125 XOR first and then test the overflow bits. But because the
126 following XOR would destroy the carry flag and it would (in a
127 representation with more than 32 bits) not alter then last
128 overflow, we can now test this condition. If no carry is signaled
129 no overflow must have occurred in the last byte => it was 0. */
130 jnc L(8)
131
132 /* We are only interested in carry bits that change due to the
133 previous add, so remove original bits */
134 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
135
136 /* Now test for the other three overflow bits. */
137 orl $0xfefefeff, %edi /* set all non-carry bits */
138 incl %edi /* add 1: if one carry bit was *not* set
139 the addition will not result in 0. */
140
141 /* If at least one byte of the word is C we don't get 0 in %edi. */
142 jnz L(8) /* found it => return pointer */
143
144 /* This process is unfolded four times for better performance.
145 we don't increment the source pointer each time. Instead we
146 use offsets and increment by 16 in each run of the loop. But
147 before probing for the matching byte we need some extra code
148 (following LL(13) below). Even the len can be compared with
149 constants instead of decrementing each time. */
150
151 movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
152 movl $0xfefefeff, %edi /* magic value */
153 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
154 are now 0 */
155 addl %ecx, %edi /* add the magic value to the word. We get
156 carry bits reported for each byte which
157 is *not* 0 */
158 jnc L(7) /* highest byte is C => return pointer */
159 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
160 orl $0xfefefeff, %edi /* set all non-carry bits */
161 incl %edi /* add 1: if one carry bit was *not* set
162 the addition will not result in 0. */
163 jnz L(7) /* found it => return pointer */
164
165 movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
166 movl $0xfefefeff, %edi /* magic value */
167 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
168 are now 0 */
169 addl %ecx, %edi /* add the magic value to the word. We get
170 carry bits reported for each byte which
171 is *not* 0 */
172 jnc L(6) /* highest byte is C => return pointer */
173 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
174 orl $0xfefefeff, %edi /* set all non-carry bits */
175 incl %edi /* add 1: if one carry bit was *not* set
176 the addition will not result in 0. */
177 jnz L(6) /* found it => return pointer */
178
179 movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
180 movl $0xfefefeff, %edi /* magic value */
181 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
182 are now 0 */
183 addl %ecx, %edi /* add the magic value to the word. We get
184 carry bits reported for each byte which
185 is *not* 0 */
186 jnc L(5) /* highest byte is C => return pointer */
187 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
188 orl $0xfefefeff, %edi /* set all non-carry bits */
189 incl %edi /* add 1: if one carry bit was *not* set
190 the addition will not result in 0. */
191 jnz L(5) /* found it => return pointer */
192
193 /* Adjust both counters for a full round, i.e. 16 bytes. */
194 addl $16, %eax
195 jmp L(1)
196 /* add missing source pointer increments */
197L(5): addl $4, %eax
198L(6): addl $4, %eax
199L(7): addl $4, %eax
200
201 /* Test for the matching byte in the word. %ecx contains a NUL
202 char in the byte which originally was the byte we are looking
203 at. */
204L(8): testb %cl, %cl /* test first byte in dword */
205 jz L(9) /* if zero => return pointer */
206 incl %eax /* increment source pointer */
207
208 testb %ch, %ch /* test second byte in dword */
209 jz L(9) /* if zero => return pointer */
210 incl %eax /* increment source pointer */
211
212 testl $0xff0000, %ecx /* test third byte in dword */
213 jz L(9) /* if zero => return pointer */
214 incl %eax /* increment source pointer */
215
216 /* No further test needed we we know it is one of the four bytes. */
217
2fc08826
GM
218L(9):
219 CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb)
220 RETURN_BOUNDED_POINTER (STR(%esp))
221 popl %edi /* pop saved register */
0ecb606c
JJ
222 cfi_adjust_cfa_offset (-4)
223 cfi_restore (edi)
482eec0d 224
3f02f778 225 LEAVE
2fc08826
GM
226 RET_PTR
227END (BP_SYM (__rawmemchr))
228
37ba7d66 229libc_hidden_def (BP_SYM (__rawmemchr))
2fc08826 230weak_alias (BP_SYM (__rawmemchr), BP_SYM (rawmemchr))