]>
Commit | Line | Data |
---|---|---|
482eec0d UD |
1 | /* rawmemchr (str, ch) -- Return pointer to first occurrence of CH in STR. |
2 | For Intel 80x86, x>=3. | |
0ecb606c | 3 | Copyright (C) 1994-2000,2002,2005 Free Software Foundation, Inc. |
482eec0d UD |
4 | This file is part of the GNU C Library. |
5 | Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu> | |
6 | Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au> | |
482eec0d UD |
7 | This version is developed using the same algorithm as the fast C |
8 | version which carries the following introduction: | |
482eec0d UD |
9 | Based on strlen implementation by Torbjorn Granlund (tege@sics.se), |
10 | with help from Dan Sahlin (dan@sics.se) and | |
11 | commentary by Jim Blandy (jimb@ai.mit.edu); | |
12 | adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu), | |
13 | and implemented by Roland McGrath (roland@ai.mit.edu). | |
14 | ||
15 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
16 | modify it under the terms of the GNU Lesser General Public |
17 | License as published by the Free Software Foundation; either | |
18 | version 2.1 of the License, or (at your option) any later version. | |
482eec0d UD |
19 | |
20 | The GNU C Library is distributed in the hope that it will be useful, | |
21 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
22 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 23 | Lesser General Public License for more details. |
482eec0d | 24 | |
41bdb6e2 AJ |
25 | You should have received a copy of the GNU Lesser General Public |
26 | License along with the GNU C Library; if not, write to the Free | |
27 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
28 | 02111-1307 USA. */ | |
482eec0d UD |
29 | |
30 | #include <sysdep.h> | |
31 | #include "asm-syntax.h" | |
2fc08826 | 32 | #include "bp-sym.h" |
3f02f778 | 33 | #include "bp-asm.h" |
482eec0d | 34 | |
3f02f778 GM |
35 | #define PARMS LINKAGE+4 /* space for 1 saved reg */ |
36 | #define RTN PARMS | |
37 | #define STR RTN+RTN_SIZE | |
38 | #define CHR STR+PTR_SIZE | |
482eec0d UD |
39 | |
40 | .text | |
2fc08826 | 41 | ENTRY (BP_SYM (__rawmemchr)) |
3f02f778 GM |
42 | ENTER |
43 | ||
482eec0d UD |
44 | /* Save callee-safe register used in this function. */ |
45 | pushl %edi | |
0ecb606c JJ |
46 | cfi_adjust_cfa_offset (4) |
47 | cfi_rel_offset (edi, 0) | |
482eec0d UD |
48 | |
49 | /* Load parameters into registers. */ | |
3f02f778 GM |
50 | movl STR(%esp), %eax |
51 | movl CHR(%esp), %edx | |
2fc08826 | 52 | CHECK_BOUNDS_LOW (%eax, STR(%esp)) |
482eec0d UD |
53 | |
54 | /* At the moment %edx contains C. What we need for the | |
55 | algorithm is C in all bytes of the dword. Avoid | |
56 | operations on 16 bit words because these require an | |
57 | prefix byte (and one more cycle). */ | |
58 | movb %dl, %dh /* Now it is 0|0|c|c */ | |
59 | movl %edx, %ecx | |
60 | shll $16, %edx /* Now c|c|0|0 */ | |
61 | movw %cx, %dx /* And finally c|c|c|c */ | |
62 | ||
63 | /* Better performance can be achieved if the word (32 | |
64 | bit) memory access is aligned on a four-byte-boundary. | |
65 | So process first bytes one by one until boundary is | |
66 | reached. Don't use a loop for better performance. */ | |
67 | ||
68 | testb $3, %al /* correctly aligned ? */ | |
69 | je L(1) /* yes => begin loop */ | |
70 | cmpb %dl, (%eax) /* compare byte */ | |
71 | je L(9) /* target found => return */ | |
72 | incl %eax /* increment source pointer */ | |
73 | ||
74 | testb $3, %al /* correctly aligned ? */ | |
75 | je L(1) /* yes => begin loop */ | |
76 | cmpb %dl, (%eax) /* compare byte */ | |
77 | je L(9) /* target found => return */ | |
78 | incl %eax /* increment source pointer */ | |
79 | ||
80 | testb $3, %al /* correctly aligned ? */ | |
81 | je L(1) /* yes => begin loop */ | |
82 | cmpb %dl, (%eax) /* compare byte */ | |
83 | je L(9) /* target found => return */ | |
84 | incl %eax /* increment source pointer */ | |
85 | ||
86 | /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to | |
87 | change any of the hole bits of LONGWORD. | |
88 | ||
89 | 1) Is this safe? Will it catch all the zero bytes? | |
90 | Suppose there is a byte with all zeros. Any carry bits | |
91 | propagating from its left will fall into the hole at its | |
92 | least significant bit and stop. Since there will be no | |
93 | carry from its most significant bit, the LSB of the | |
94 | byte to the left will be unchanged, and the zero will be | |
95 | detected. | |
96 | ||
97 | 2) Is this worthwhile? Will it ignore everything except | |
98 | zero bytes? Suppose every byte of LONGWORD has a bit set | |
99 | somewhere. There will be a carry into bit 8. If bit 8 | |
100 | is set, this will carry into bit 16. If bit 8 is clear, | |
101 | one of bits 9-15 must be set, so there will be a carry | |
102 | into bit 16. Similarly, there will be a carry into bit | |
103 | 24. If one of bits 24-31 is set, there will be a carry | |
104 | into bit 32 (=carry flag), so all of the hole bits will | |
105 | be changed. | |
106 | ||
107 | 3) But wait! Aren't we looking for C, not zero? | |
108 | Good point. So what we do is XOR LONGWORD with a longword, | |
109 | each of whose bytes is C. This turns each byte that is C | |
110 | into a zero. */ | |
111 | ||
112 | ||
113 | /* Each round the main loop processes 16 bytes. */ | |
114 | ALIGN (4) | |
115 | ||
116 | L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */ | |
117 | movl $0xfefefeff, %edi /* magic value */ | |
118 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c | |
119 | are now 0 */ | |
120 | addl %ecx, %edi /* add the magic value to the word. We get | |
121 | carry bits reported for each byte which | |
122 | is *not* 0 */ | |
123 | ||
124 | /* According to the algorithm we had to reverse the effect of the | |
125 | XOR first and then test the overflow bits. But because the | |
126 | following XOR would destroy the carry flag and it would (in a | |
127 | representation with more than 32 bits) not alter then last | |
128 | overflow, we can now test this condition. If no carry is signaled | |
129 | no overflow must have occurred in the last byte => it was 0. */ | |
130 | jnc L(8) | |
131 | ||
132 | /* We are only interested in carry bits that change due to the | |
133 | previous add, so remove original bits */ | |
134 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ | |
135 | ||
136 | /* Now test for the other three overflow bits. */ | |
137 | orl $0xfefefeff, %edi /* set all non-carry bits */ | |
138 | incl %edi /* add 1: if one carry bit was *not* set | |
139 | the addition will not result in 0. */ | |
140 | ||
141 | /* If at least one byte of the word is C we don't get 0 in %edi. */ | |
142 | jnz L(8) /* found it => return pointer */ | |
143 | ||
144 | /* This process is unfolded four times for better performance. | |
145 | we don't increment the source pointer each time. Instead we | |
146 | use offsets and increment by 16 in each run of the loop. But | |
147 | before probing for the matching byte we need some extra code | |
148 | (following LL(13) below). Even the len can be compared with | |
149 | constants instead of decrementing each time. */ | |
150 | ||
151 | movl 4(%eax), %ecx /* get word (= 4 bytes) in question */ | |
152 | movl $0xfefefeff, %edi /* magic value */ | |
153 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c | |
154 | are now 0 */ | |
155 | addl %ecx, %edi /* add the magic value to the word. We get | |
156 | carry bits reported for each byte which | |
157 | is *not* 0 */ | |
158 | jnc L(7) /* highest byte is C => return pointer */ | |
159 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ | |
160 | orl $0xfefefeff, %edi /* set all non-carry bits */ | |
161 | incl %edi /* add 1: if one carry bit was *not* set | |
162 | the addition will not result in 0. */ | |
163 | jnz L(7) /* found it => return pointer */ | |
164 | ||
165 | movl 8(%eax), %ecx /* get word (= 4 bytes) in question */ | |
166 | movl $0xfefefeff, %edi /* magic value */ | |
167 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c | |
168 | are now 0 */ | |
169 | addl %ecx, %edi /* add the magic value to the word. We get | |
170 | carry bits reported for each byte which | |
171 | is *not* 0 */ | |
172 | jnc L(6) /* highest byte is C => return pointer */ | |
173 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ | |
174 | orl $0xfefefeff, %edi /* set all non-carry bits */ | |
175 | incl %edi /* add 1: if one carry bit was *not* set | |
176 | the addition will not result in 0. */ | |
177 | jnz L(6) /* found it => return pointer */ | |
178 | ||
179 | movl 12(%eax), %ecx /* get word (= 4 bytes) in question */ | |
180 | movl $0xfefefeff, %edi /* magic value */ | |
181 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c | |
182 | are now 0 */ | |
183 | addl %ecx, %edi /* add the magic value to the word. We get | |
184 | carry bits reported for each byte which | |
185 | is *not* 0 */ | |
186 | jnc L(5) /* highest byte is C => return pointer */ | |
187 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ | |
188 | orl $0xfefefeff, %edi /* set all non-carry bits */ | |
189 | incl %edi /* add 1: if one carry bit was *not* set | |
190 | the addition will not result in 0. */ | |
191 | jnz L(5) /* found it => return pointer */ | |
192 | ||
193 | /* Adjust both counters for a full round, i.e. 16 bytes. */ | |
194 | addl $16, %eax | |
195 | jmp L(1) | |
196 | /* add missing source pointer increments */ | |
197 | L(5): addl $4, %eax | |
198 | L(6): addl $4, %eax | |
199 | L(7): addl $4, %eax | |
200 | ||
201 | /* Test for the matching byte in the word. %ecx contains a NUL | |
202 | char in the byte which originally was the byte we are looking | |
203 | at. */ | |
204 | L(8): testb %cl, %cl /* test first byte in dword */ | |
205 | jz L(9) /* if zero => return pointer */ | |
206 | incl %eax /* increment source pointer */ | |
207 | ||
208 | testb %ch, %ch /* test second byte in dword */ | |
209 | jz L(9) /* if zero => return pointer */ | |
210 | incl %eax /* increment source pointer */ | |
211 | ||
212 | testl $0xff0000, %ecx /* test third byte in dword */ | |
213 | jz L(9) /* if zero => return pointer */ | |
214 | incl %eax /* increment source pointer */ | |
215 | ||
216 | /* No further test needed we we know it is one of the four bytes. */ | |
217 | ||
2fc08826 GM |
218 | L(9): |
219 | CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb) | |
220 | RETURN_BOUNDED_POINTER (STR(%esp)) | |
221 | popl %edi /* pop saved register */ | |
0ecb606c JJ |
222 | cfi_adjust_cfa_offset (-4) |
223 | cfi_restore (edi) | |
482eec0d | 224 | |
3f02f778 | 225 | LEAVE |
2fc08826 GM |
226 | RET_PTR |
227 | END (BP_SYM (__rawmemchr)) | |
228 | ||
37ba7d66 | 229 | libc_hidden_def (BP_SYM (__rawmemchr)) |
2fc08826 | 230 | weak_alias (BP_SYM (__rawmemchr), BP_SYM (rawmemchr)) |