]>
Commit | Line | Data |
---|---|---|
16d2ea4c UD |
1 | /* strchr (str, ch) -- Return pointer to first occurrence of CH in STR. |
2 | For AMD x86-64. | |
d4697bc9 | 3 | Copyright (C) 2002-2014 Free Software Foundation, Inc. |
16d2ea4c UD |
4 | This file is part of the GNU C Library. |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 PE |
17 | License along with the GNU C Library; if not, see |
18 | <http://www.gnu.org/licenses/>. */ | |
16d2ea4c UD |
19 | |
20 | #include <sysdep.h> | |
21 | #include "asm-syntax.h" | |
16d2ea4c UD |
22 | |
23 | ||
24 | .text | |
29691210 | 25 | ENTRY (strchr) |
16d2ea4c UD |
26 | |
27 | /* Before we start with the main loop we process single bytes | |
28 | until the source pointer is aligned. This has two reasons: | |
29 | 1. aligned 64-bit memory access is faster | |
30 | and (more important) | |
31 | 2. we process in the main loop 64 bit in one step although | |
32 | we don't know the end of the string. But accessing at | |
33 | 8-byte alignment guarantees that we never access illegal | |
34 | memory if this would not also be done by the trivial | |
35 | implementation (this is because all processor inherent | |
36 | boundaries are multiples of 8). */ | |
37 | ||
38 | movq %rdi, %rdx | |
39 | andl $7, %edx /* Mask alignment bits */ | |
40 | movq %rdi, %rax /* duplicate destination. */ | |
41 | jz 1f /* aligned => start loop */ | |
42 | neg %edx | |
43 | addl $8, %edx /* Align to 8 bytes. */ | |
44 | ||
45 | /* Search the first bytes directly. */ | |
46 | 0: movb (%rax), %cl /* load byte */ | |
47 | cmpb %cl,%sil /* compare byte. */ | |
48 | je 6f /* target found */ | |
49 | testb %cl,%cl /* is byte NUL? */ | |
50 | je 7f /* yes => return NULL */ | |
51 | incq %rax /* increment pointer */ | |
52 | decl %edx | |
53 | jnz 0b | |
54 | ||
55 | ||
56 | 1: | |
57 | /* At the moment %rsi contains C. What we need for the | |
58 | algorithm is C in all bytes of the register. Avoid | |
59 | operations on 16 bit words because these require an | |
60 | prefix byte (and one more cycle). */ | |
61 | /* Populate 8 bit data to full 64-bit. */ | |
62 | movabs $0x0101010101010101,%r9 | |
63 | movzbl %sil,%edx | |
64 | imul %rdx,%r9 | |
65 | ||
66 | movq $0xfefefefefefefeff, %r8 /* Save magic. */ | |
67 | ||
68 | /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to | |
69 | change any of the hole bits of LONGWORD. | |
70 | ||
71 | 1) Is this safe? Will it catch all the zero bytes? | |
72 | Suppose there is a byte with all zeros. Any carry bits | |
73 | propagating from its left will fall into the hole at its | |
74 | least significant bit and stop. Since there will be no | |
75 | carry from its most significant bit, the LSB of the | |
76 | byte to the left will be unchanged, and the zero will be | |
77 | detected. | |
78 | ||
79 | 2) Is this worthwhile? Will it ignore everything except | |
80 | zero bytes? Suppose every byte of QUARDWORD has a bit set | |
81 | somewhere. There will be a carry into bit 8. If bit 8 | |
82 | is set, this will carry into bit 16. If bit 8 is clear, | |
83 | one of bits 9-15 must be set, so there will be a carry | |
84 | into bit 16. Similarly, there will be a carry into bit | |
85 | 24 tec.. If one of bits 54-63 is set, there will be a carry | |
86 | into bit 64 (=carry flag), so all of the hole bits will | |
87 | be changed. | |
88 | ||
89 | 3) But wait! Aren't we looking for C, not zero? | |
90 | Good point. So what we do is XOR LONGWORD with a longword, | |
91 | each of whose bytes is C. This turns each byte that is C | |
92 | into a zero. */ | |
93 | ||
94 | .p2align 4 | |
95 | 4: | |
96 | /* Main Loop is unrolled 4 times. */ | |
97 | /* First unroll. */ | |
98 | movq (%rax), %rcx /* get double word (= 8 bytes) in question */ | |
99 | addq $8,%rax /* adjust pointer for next word */ | |
100 | movq %r8, %rdx /* magic value */ | |
101 | xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c | |
102 | are now 0 */ | |
103 | addq %rcx, %rdx /* add the magic value to the word. We get | |
104 | carry bits reported for each byte which | |
105 | is *not* 0 */ | |
106 | jnc 3f /* highest byte is NUL => return pointer */ | |
107 | xorq %rcx, %rdx /* (word+magic)^word */ | |
108 | orq %r8, %rdx /* set all non-carry bits */ | |
109 | incq %rdx /* add 1: if one carry bit was *not* set | |
110 | the addition will not result in 0. */ | |
111 | jnz 3f /* found c => return pointer */ | |
112 | ||
113 | /* The quadword we looked at does not contain the value we're looking | |
114 | for. Let's search now whether we have reached the end of the | |
115 | string. */ | |
116 | xorq %r9, %rcx /* restore original dword without reload */ | |
117 | movq %r8, %rdx /* magic value */ | |
118 | addq %rcx, %rdx /* add the magic value to the word. We get | |
119 | carry bits reported for each byte which | |
120 | is *not* 0 */ | |
121 | jnc 7f /* highest byte is NUL => return NULL */ | |
122 | xorq %rcx, %rdx /* (word+magic)^word */ | |
123 | orq %r8, %rdx /* set all non-carry bits */ | |
124 | incq %rdx /* add 1: if one carry bit was *not* set | |
125 | the addition will not result in 0. */ | |
126 | jnz 7f /* found NUL => return NULL */ | |
127 | ||
128 | /* Second unroll. */ | |
129 | movq (%rax), %rcx /* get double word (= 8 bytes) in question */ | |
130 | addq $8,%rax /* adjust pointer for next word */ | |
131 | movq %r8, %rdx /* magic value */ | |
132 | xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c | |
133 | are now 0 */ | |
134 | addq %rcx, %rdx /* add the magic value to the word. We get | |
135 | carry bits reported for each byte which | |
136 | is *not* 0 */ | |
137 | jnc 3f /* highest byte is NUL => return pointer */ | |
138 | xorq %rcx, %rdx /* (word+magic)^word */ | |
139 | orq %r8, %rdx /* set all non-carry bits */ | |
140 | incq %rdx /* add 1: if one carry bit was *not* set | |
141 | the addition will not result in 0. */ | |
142 | jnz 3f /* found c => return pointer */ | |
143 | ||
144 | /* The quadword we looked at does not contain the value we're looking | |
145 | for. Let's search now whether we have reached the end of the | |
146 | string. */ | |
147 | xorq %r9, %rcx /* restore original dword without reload */ | |
148 | movq %r8, %rdx /* magic value */ | |
149 | addq %rcx, %rdx /* add the magic value to the word. We get | |
150 | carry bits reported for each byte which | |
151 | is *not* 0 */ | |
152 | jnc 7f /* highest byte is NUL => return NULL */ | |
153 | xorq %rcx, %rdx /* (word+magic)^word */ | |
154 | orq %r8, %rdx /* set all non-carry bits */ | |
155 | incq %rdx /* add 1: if one carry bit was *not* set | |
156 | the addition will not result in 0. */ | |
157 | jnz 7f /* found NUL => return NULL */ | |
158 | /* Third unroll. */ | |
159 | movq (%rax), %rcx /* get double word (= 8 bytes) in question */ | |
160 | addq $8,%rax /* adjust pointer for next word */ | |
161 | movq %r8, %rdx /* magic value */ | |
162 | xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c | |
163 | are now 0 */ | |
164 | addq %rcx, %rdx /* add the magic value to the word. We get | |
165 | carry bits reported for each byte which | |
166 | is *not* 0 */ | |
167 | jnc 3f /* highest byte is NUL => return pointer */ | |
168 | xorq %rcx, %rdx /* (word+magic)^word */ | |
169 | orq %r8, %rdx /* set all non-carry bits */ | |
170 | incq %rdx /* add 1: if one carry bit was *not* set | |
171 | the addition will not result in 0. */ | |
172 | jnz 3f /* found c => return pointer */ | |
173 | ||
174 | /* The quadword we looked at does not contain the value we're looking | |
175 | for. Let's search now whether we have reached the end of the | |
176 | string. */ | |
177 | xorq %r9, %rcx /* restore original dword without reload */ | |
178 | movq %r8, %rdx /* magic value */ | |
179 | addq %rcx, %rdx /* add the magic value to the word. We get | |
180 | carry bits reported for each byte which | |
181 | is *not* 0 */ | |
182 | jnc 7f /* highest byte is NUL => return NULL */ | |
183 | xorq %rcx, %rdx /* (word+magic)^word */ | |
184 | orq %r8, %rdx /* set all non-carry bits */ | |
185 | incq %rdx /* add 1: if one carry bit was *not* set | |
186 | the addition will not result in 0. */ | |
187 | jnz 7f /* found NUL => return NULL */ | |
188 | /* Fourth unroll. */ | |
189 | movq (%rax), %rcx /* get double word (= 8 bytes) in question */ | |
190 | addq $8,%rax /* adjust pointer for next word */ | |
191 | movq %r8, %rdx /* magic value */ | |
192 | xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c | |
193 | are now 0 */ | |
194 | addq %rcx, %rdx /* add the magic value to the word. We get | |
195 | carry bits reported for each byte which | |
196 | is *not* 0 */ | |
197 | jnc 3f /* highest byte is NUL => return pointer */ | |
198 | xorq %rcx, %rdx /* (word+magic)^word */ | |
199 | orq %r8, %rdx /* set all non-carry bits */ | |
200 | incq %rdx /* add 1: if one carry bit was *not* set | |
201 | the addition will not result in 0. */ | |
202 | jnz 3f /* found c => return pointer */ | |
203 | ||
204 | /* The quadword we looked at does not contain the value we're looking | |
205 | for. Let's search now whether we have reached the end of the | |
206 | string. */ | |
207 | xorq %r9, %rcx /* restore original dword without reload */ | |
208 | movq %r8, %rdx /* magic value */ | |
209 | addq %rcx, %rdx /* add the magic value to the word. We get | |
210 | carry bits reported for each byte which | |
211 | is *not* 0 */ | |
212 | jnc 7f /* highest byte is NUL => return NULL */ | |
213 | xorq %rcx, %rdx /* (word+magic)^word */ | |
214 | orq %r8, %rdx /* set all non-carry bits */ | |
215 | incq %rdx /* add 1: if one carry bit was *not* set | |
216 | the addition will not result in 0. */ | |
217 | jz 4b /* no NUL found => restart loop */ | |
218 | ||
219 | ||
220 | 7: /* Return NULL. */ | |
221 | xorl %eax, %eax | |
222 | retq | |
223 | ||
224 | ||
225 | /* We now scan for the byte in which the character was matched. | |
226 | But we have to take care of the case that a NUL char is | |
227 | found before this in the dword. Note that we XORed %rcx | |
228 | with the byte we're looking for, therefore the tests below look | |
229 | reversed. */ | |
230 | ||
231 | ||
232 | .p2align 4 /* Align, it's a jump target. */ | |
233 | 3: movq %r9,%rdx /* move to %rdx so that we can access bytes */ | |
234 | subq $8,%rax /* correct pointer increment. */ | |
235 | testb %cl, %cl /* is first byte C? */ | |
236 | jz 6f /* yes => return pointer */ | |
237 | cmpb %dl, %cl /* is first byte NUL? */ | |
238 | je 7b /* yes => return NULL */ | |
239 | incq %rax /* increment pointer */ | |
240 | ||
241 | testb %ch, %ch /* is second byte C? */ | |
242 | jz 6f /* yes => return pointer */ | |
243 | cmpb %dl, %ch /* is second byte NUL? */ | |
244 | je 7b /* yes => return NULL? */ | |
245 | incq %rax /* increment pointer */ | |
246 | ||
247 | shrq $16, %rcx /* make upper bytes accessible */ | |
248 | testb %cl, %cl /* is third byte C? */ | |
249 | jz 6f /* yes => return pointer */ | |
250 | cmpb %dl, %cl /* is third byte NUL? */ | |
251 | je 7b /* yes => return NULL */ | |
252 | incq %rax /* increment pointer */ | |
253 | ||
254 | testb %ch, %ch /* is fourth byte C? */ | |
255 | jz 6f /* yes => return pointer */ | |
256 | cmpb %dl, %ch /* is fourth byte NUL? */ | |
257 | je 7b /* yes => return NULL? */ | |
258 | incq %rax /* increment pointer */ | |
259 | ||
260 | shrq $16, %rcx /* make upper bytes accessible */ | |
261 | testb %cl, %cl /* is fifth byte C? */ | |
262 | jz 6f /* yes => return pointer */ | |
263 | cmpb %dl, %cl /* is fifth byte NUL? */ | |
264 | je 7b /* yes => return NULL */ | |
265 | incq %rax /* increment pointer */ | |
266 | ||
267 | testb %ch, %ch /* is sixth byte C? */ | |
268 | jz 6f /* yes => return pointer */ | |
269 | cmpb %dl, %ch /* is sixth byte NUL? */ | |
270 | je 7b /* yes => return NULL? */ | |
271 | incq %rax /* increment pointer */ | |
272 | ||
273 | shrq $16, %rcx /* make upper bytes accessible */ | |
274 | testb %cl, %cl /* is seventh byte C? */ | |
275 | jz 6f /* yes => return pointer */ | |
276 | cmpb %dl, %cl /* is seventh byte NUL? */ | |
277 | je 7b /* yes => return NULL */ | |
278 | ||
279 | /* It must be in the eigth byte and it cannot be NUL. */ | |
280 | incq %rax | |
281 | ||
282 | 6: | |
283 | nop | |
284 | retq | |
29691210 | 285 | END (strchr) |
16d2ea4c | 286 | |
29691210 | 287 | weak_alias (strchr, index) |
16d2ea4c | 288 | libc_hidden_builtin_def (strchr) |