]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/x86_64/rtld-strchr.S
984c868f1d35869a3c6912c5716dfe60dfab4358
[thirdparty/glibc.git] / sysdeps / x86_64 / rtld-strchr.S
1 /* strchr (str, ch) -- Return pointer to first occurrence of CH in STR.
2 For AMD x86-64.
3 Copyright (C) 2002-2014 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20 #include <sysdep.h>
21 #include "asm-syntax.h"
22
23
24 .text
25 ENTRY (strchr)
26
27 /* Before we start with the main loop we process single bytes
28 until the source pointer is aligned. This has two reasons:
29 1. aligned 64-bit memory access is faster
30 and (more important)
31 2. we process in the main loop 64 bit in one step although
32 we don't know the end of the string. But accessing at
33 8-byte alignment guarantees that we never access illegal
34 memory if this would not also be done by the trivial
35 implementation (this is because all processor inherent
36 boundaries are multiples of 8). */
37
38 movq %rdi, %rdx
39 andl $7, %edx /* Mask alignment bits */
40 movq %rdi, %rax /* duplicate destination. */
41 jz 1f /* aligned => start loop */
42 neg %edx
43 addl $8, %edx /* Align to 8 bytes. */
44
45 /* Search the first bytes directly. */
46 0: movb (%rax), %cl /* load byte */
47 cmpb %cl,%sil /* compare byte. */
48 je 6f /* target found */
49 testb %cl,%cl /* is byte NUL? */
50 je 7f /* yes => return NULL */
51 incq %rax /* increment pointer */
52 decl %edx
53 jnz 0b
54
55
56 1:
57 /* At the moment %rsi contains C. What we need for the
58 algorithm is C in all bytes of the register. Avoid
59 operations on 16 bit words because these require an
60 prefix byte (and one more cycle). */
61 /* Populate 8 bit data to full 64-bit. */
62 movabs $0x0101010101010101,%r9
63 movzbl %sil,%edx
64 imul %rdx,%r9
65
66 movq $0xfefefefefefefeff, %r8 /* Save magic. */
67
68 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
69 change any of the hole bits of LONGWORD.
70
71 1) Is this safe? Will it catch all the zero bytes?
72 Suppose there is a byte with all zeros. Any carry bits
73 propagating from its left will fall into the hole at its
74 least significant bit and stop. Since there will be no
75 carry from its most significant bit, the LSB of the
76 byte to the left will be unchanged, and the zero will be
77 detected.
78
79 2) Is this worthwhile? Will it ignore everything except
80 zero bytes? Suppose every byte of QUARDWORD has a bit set
81 somewhere. There will be a carry into bit 8. If bit 8
82 is set, this will carry into bit 16. If bit 8 is clear,
83 one of bits 9-15 must be set, so there will be a carry
84 into bit 16. Similarly, there will be a carry into bit
85 24 tec.. If one of bits 54-63 is set, there will be a carry
86 into bit 64 (=carry flag), so all of the hole bits will
87 be changed.
88
89 3) But wait! Aren't we looking for C, not zero?
90 Good point. So what we do is XOR LONGWORD with a longword,
91 each of whose bytes is C. This turns each byte that is C
92 into a zero. */
93
94 .p2align 4
95 4:
96 /* Main Loop is unrolled 4 times. */
97 /* First unroll. */
98 movq (%rax), %rcx /* get double word (= 8 bytes) in question */
99 addq $8,%rax /* adjust pointer for next word */
100 movq %r8, %rdx /* magic value */
101 xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c
102 are now 0 */
103 addq %rcx, %rdx /* add the magic value to the word. We get
104 carry bits reported for each byte which
105 is *not* 0 */
106 jnc 3f /* highest byte is NUL => return pointer */
107 xorq %rcx, %rdx /* (word+magic)^word */
108 orq %r8, %rdx /* set all non-carry bits */
109 incq %rdx /* add 1: if one carry bit was *not* set
110 the addition will not result in 0. */
111 jnz 3f /* found c => return pointer */
112
113 /* The quadword we looked at does not contain the value we're looking
114 for. Let's search now whether we have reached the end of the
115 string. */
116 xorq %r9, %rcx /* restore original dword without reload */
117 movq %r8, %rdx /* magic value */
118 addq %rcx, %rdx /* add the magic value to the word. We get
119 carry bits reported for each byte which
120 is *not* 0 */
121 jnc 7f /* highest byte is NUL => return NULL */
122 xorq %rcx, %rdx /* (word+magic)^word */
123 orq %r8, %rdx /* set all non-carry bits */
124 incq %rdx /* add 1: if one carry bit was *not* set
125 the addition will not result in 0. */
126 jnz 7f /* found NUL => return NULL */
127
128 /* Second unroll. */
129 movq (%rax), %rcx /* get double word (= 8 bytes) in question */
130 addq $8,%rax /* adjust pointer for next word */
131 movq %r8, %rdx /* magic value */
132 xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c
133 are now 0 */
134 addq %rcx, %rdx /* add the magic value to the word. We get
135 carry bits reported for each byte which
136 is *not* 0 */
137 jnc 3f /* highest byte is NUL => return pointer */
138 xorq %rcx, %rdx /* (word+magic)^word */
139 orq %r8, %rdx /* set all non-carry bits */
140 incq %rdx /* add 1: if one carry bit was *not* set
141 the addition will not result in 0. */
142 jnz 3f /* found c => return pointer */
143
144 /* The quadword we looked at does not contain the value we're looking
145 for. Let's search now whether we have reached the end of the
146 string. */
147 xorq %r9, %rcx /* restore original dword without reload */
148 movq %r8, %rdx /* magic value */
149 addq %rcx, %rdx /* add the magic value to the word. We get
150 carry bits reported for each byte which
151 is *not* 0 */
152 jnc 7f /* highest byte is NUL => return NULL */
153 xorq %rcx, %rdx /* (word+magic)^word */
154 orq %r8, %rdx /* set all non-carry bits */
155 incq %rdx /* add 1: if one carry bit was *not* set
156 the addition will not result in 0. */
157 jnz 7f /* found NUL => return NULL */
158 /* Third unroll. */
159 movq (%rax), %rcx /* get double word (= 8 bytes) in question */
160 addq $8,%rax /* adjust pointer for next word */
161 movq %r8, %rdx /* magic value */
162 xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c
163 are now 0 */
164 addq %rcx, %rdx /* add the magic value to the word. We get
165 carry bits reported for each byte which
166 is *not* 0 */
167 jnc 3f /* highest byte is NUL => return pointer */
168 xorq %rcx, %rdx /* (word+magic)^word */
169 orq %r8, %rdx /* set all non-carry bits */
170 incq %rdx /* add 1: if one carry bit was *not* set
171 the addition will not result in 0. */
172 jnz 3f /* found c => return pointer */
173
174 /* The quadword we looked at does not contain the value we're looking
175 for. Let's search now whether we have reached the end of the
176 string. */
177 xorq %r9, %rcx /* restore original dword without reload */
178 movq %r8, %rdx /* magic value */
179 addq %rcx, %rdx /* add the magic value to the word. We get
180 carry bits reported for each byte which
181 is *not* 0 */
182 jnc 7f /* highest byte is NUL => return NULL */
183 xorq %rcx, %rdx /* (word+magic)^word */
184 orq %r8, %rdx /* set all non-carry bits */
185 incq %rdx /* add 1: if one carry bit was *not* set
186 the addition will not result in 0. */
187 jnz 7f /* found NUL => return NULL */
188 /* Fourth unroll. */
189 movq (%rax), %rcx /* get double word (= 8 bytes) in question */
190 addq $8,%rax /* adjust pointer for next word */
191 movq %r8, %rdx /* magic value */
192 xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c
193 are now 0 */
194 addq %rcx, %rdx /* add the magic value to the word. We get
195 carry bits reported for each byte which
196 is *not* 0 */
197 jnc 3f /* highest byte is NUL => return pointer */
198 xorq %rcx, %rdx /* (word+magic)^word */
199 orq %r8, %rdx /* set all non-carry bits */
200 incq %rdx /* add 1: if one carry bit was *not* set
201 the addition will not result in 0. */
202 jnz 3f /* found c => return pointer */
203
204 /* The quadword we looked at does not contain the value we're looking
205 for. Let's search now whether we have reached the end of the
206 string. */
207 xorq %r9, %rcx /* restore original dword without reload */
208 movq %r8, %rdx /* magic value */
209 addq %rcx, %rdx /* add the magic value to the word. We get
210 carry bits reported for each byte which
211 is *not* 0 */
212 jnc 7f /* highest byte is NUL => return NULL */
213 xorq %rcx, %rdx /* (word+magic)^word */
214 orq %r8, %rdx /* set all non-carry bits */
215 incq %rdx /* add 1: if one carry bit was *not* set
216 the addition will not result in 0. */
217 jz 4b /* no NUL found => restart loop */
218
219
220 7: /* Return NULL. */
221 xorl %eax, %eax
222 retq
223
224
225 /* We now scan for the byte in which the character was matched.
226 But we have to take care of the case that a NUL char is
227 found before this in the dword. Note that we XORed %rcx
228 with the byte we're looking for, therefore the tests below look
229 reversed. */
230
231
232 .p2align 4 /* Align, it's a jump target. */
233 3: movq %r9,%rdx /* move to %rdx so that we can access bytes */
234 subq $8,%rax /* correct pointer increment. */
235 testb %cl, %cl /* is first byte C? */
236 jz 6f /* yes => return pointer */
237 cmpb %dl, %cl /* is first byte NUL? */
238 je 7b /* yes => return NULL */
239 incq %rax /* increment pointer */
240
241 testb %ch, %ch /* is second byte C? */
242 jz 6f /* yes => return pointer */
243 cmpb %dl, %ch /* is second byte NUL? */
244 je 7b /* yes => return NULL? */
245 incq %rax /* increment pointer */
246
247 shrq $16, %rcx /* make upper bytes accessible */
248 testb %cl, %cl /* is third byte C? */
249 jz 6f /* yes => return pointer */
250 cmpb %dl, %cl /* is third byte NUL? */
251 je 7b /* yes => return NULL */
252 incq %rax /* increment pointer */
253
254 testb %ch, %ch /* is fourth byte C? */
255 jz 6f /* yes => return pointer */
256 cmpb %dl, %ch /* is fourth byte NUL? */
257 je 7b /* yes => return NULL? */
258 incq %rax /* increment pointer */
259
260 shrq $16, %rcx /* make upper bytes accessible */
261 testb %cl, %cl /* is fifth byte C? */
262 jz 6f /* yes => return pointer */
263 cmpb %dl, %cl /* is fifth byte NUL? */
264 je 7b /* yes => return NULL */
265 incq %rax /* increment pointer */
266
267 testb %ch, %ch /* is sixth byte C? */
268 jz 6f /* yes => return pointer */
269 cmpb %dl, %ch /* is sixth byte NUL? */
270 je 7b /* yes => return NULL? */
271 incq %rax /* increment pointer */
272
273 shrq $16, %rcx /* make upper bytes accessible */
274 testb %cl, %cl /* is seventh byte C? */
275 jz 6f /* yes => return pointer */
276 cmpb %dl, %cl /* is seventh byte NUL? */
277 je 7b /* yes => return NULL */
278
279 /* It must be in the eigth byte and it cannot be NUL. */
280 incq %rax
281
282 6:
283 nop
284 retq
285 END (strchr)
286
287 weak_alias (strchr, index)
288 libc_hidden_builtin_def (strchr)