]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/x86_64/rtld-strchr.S
Update copyright notices with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / x86_64 / rtld-strchr.S
1 /* strchr (str, ch) -- Return pointer to first occurrence of CH in STR.
2 For AMD x86-64.
3 Copyright (C) 2002-2013 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20 #include <sysdep.h>
21 #include "asm-syntax.h"
22 #include "bp-sym.h"
23 #include "bp-asm.h"
24
25
26 .text
27 ENTRY (BP_SYM (strchr))
28
29 /* Before we start with the main loop we process single bytes
30 until the source pointer is aligned. This has two reasons:
31 1. aligned 64-bit memory access is faster
32 and (more important)
33 2. we process in the main loop 64 bit in one step although
34 we don't know the end of the string. But accessing at
35 8-byte alignment guarantees that we never access illegal
36 memory if this would not also be done by the trivial
37 implementation (this is because all processor inherent
38 boundaries are multiples of 8). */
39
40 movq %rdi, %rdx
41 andl $7, %edx /* Mask alignment bits */
42 movq %rdi, %rax /* duplicate destination. */
43 jz 1f /* aligned => start loop */
44 neg %edx
45 addl $8, %edx /* Align to 8 bytes. */
46
47 /* Search the first bytes directly. */
48 0: movb (%rax), %cl /* load byte */
49 cmpb %cl,%sil /* compare byte. */
50 je 6f /* target found */
51 testb %cl,%cl /* is byte NUL? */
52 je 7f /* yes => return NULL */
53 incq %rax /* increment pointer */
54 decl %edx
55 jnz 0b
56
57
58 1:
59 /* At the moment %rsi contains C. What we need for the
60 algorithm is C in all bytes of the register. Avoid
61 operations on 16 bit words because these require an
62 prefix byte (and one more cycle). */
63 /* Populate 8 bit data to full 64-bit. */
64 movabs $0x0101010101010101,%r9
65 movzbl %sil,%edx
66 imul %rdx,%r9
67
68 movq $0xfefefefefefefeff, %r8 /* Save magic. */
69
70 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
71 change any of the hole bits of LONGWORD.
72
73 1) Is this safe? Will it catch all the zero bytes?
74 Suppose there is a byte with all zeros. Any carry bits
75 propagating from its left will fall into the hole at its
76 least significant bit and stop. Since there will be no
77 carry from its most significant bit, the LSB of the
78 byte to the left will be unchanged, and the zero will be
79 detected.
80
81 2) Is this worthwhile? Will it ignore everything except
82 zero bytes? Suppose every byte of QUARDWORD has a bit set
83 somewhere. There will be a carry into bit 8. If bit 8
84 is set, this will carry into bit 16. If bit 8 is clear,
85 one of bits 9-15 must be set, so there will be a carry
86 into bit 16. Similarly, there will be a carry into bit
87 24 tec.. If one of bits 54-63 is set, there will be a carry
88 into bit 64 (=carry flag), so all of the hole bits will
89 be changed.
90
91 3) But wait! Aren't we looking for C, not zero?
92 Good point. So what we do is XOR LONGWORD with a longword,
93 each of whose bytes is C. This turns each byte that is C
94 into a zero. */
95
96 .p2align 4
97 4:
98 /* Main Loop is unrolled 4 times. */
99 /* First unroll. */
100 movq (%rax), %rcx /* get double word (= 8 bytes) in question */
101 addq $8,%rax /* adjust pointer for next word */
102 movq %r8, %rdx /* magic value */
103 xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c
104 are now 0 */
105 addq %rcx, %rdx /* add the magic value to the word. We get
106 carry bits reported for each byte which
107 is *not* 0 */
108 jnc 3f /* highest byte is NUL => return pointer */
109 xorq %rcx, %rdx /* (word+magic)^word */
110 orq %r8, %rdx /* set all non-carry bits */
111 incq %rdx /* add 1: if one carry bit was *not* set
112 the addition will not result in 0. */
113 jnz 3f /* found c => return pointer */
114
115 /* The quadword we looked at does not contain the value we're looking
116 for. Let's search now whether we have reached the end of the
117 string. */
118 xorq %r9, %rcx /* restore original dword without reload */
119 movq %r8, %rdx /* magic value */
120 addq %rcx, %rdx /* add the magic value to the word. We get
121 carry bits reported for each byte which
122 is *not* 0 */
123 jnc 7f /* highest byte is NUL => return NULL */
124 xorq %rcx, %rdx /* (word+magic)^word */
125 orq %r8, %rdx /* set all non-carry bits */
126 incq %rdx /* add 1: if one carry bit was *not* set
127 the addition will not result in 0. */
128 jnz 7f /* found NUL => return NULL */
129
130 /* Second unroll. */
131 movq (%rax), %rcx /* get double word (= 8 bytes) in question */
132 addq $8,%rax /* adjust pointer for next word */
133 movq %r8, %rdx /* magic value */
134 xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c
135 are now 0 */
136 addq %rcx, %rdx /* add the magic value to the word. We get
137 carry bits reported for each byte which
138 is *not* 0 */
139 jnc 3f /* highest byte is NUL => return pointer */
140 xorq %rcx, %rdx /* (word+magic)^word */
141 orq %r8, %rdx /* set all non-carry bits */
142 incq %rdx /* add 1: if one carry bit was *not* set
143 the addition will not result in 0. */
144 jnz 3f /* found c => return pointer */
145
146 /* The quadword we looked at does not contain the value we're looking
147 for. Let's search now whether we have reached the end of the
148 string. */
149 xorq %r9, %rcx /* restore original dword without reload */
150 movq %r8, %rdx /* magic value */
151 addq %rcx, %rdx /* add the magic value to the word. We get
152 carry bits reported for each byte which
153 is *not* 0 */
154 jnc 7f /* highest byte is NUL => return NULL */
155 xorq %rcx, %rdx /* (word+magic)^word */
156 orq %r8, %rdx /* set all non-carry bits */
157 incq %rdx /* add 1: if one carry bit was *not* set
158 the addition will not result in 0. */
159 jnz 7f /* found NUL => return NULL */
160 /* Third unroll. */
161 movq (%rax), %rcx /* get double word (= 8 bytes) in question */
162 addq $8,%rax /* adjust pointer for next word */
163 movq %r8, %rdx /* magic value */
164 xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c
165 are now 0 */
166 addq %rcx, %rdx /* add the magic value to the word. We get
167 carry bits reported for each byte which
168 is *not* 0 */
169 jnc 3f /* highest byte is NUL => return pointer */
170 xorq %rcx, %rdx /* (word+magic)^word */
171 orq %r8, %rdx /* set all non-carry bits */
172 incq %rdx /* add 1: if one carry bit was *not* set
173 the addition will not result in 0. */
174 jnz 3f /* found c => return pointer */
175
176 /* The quadword we looked at does not contain the value we're looking
177 for. Let's search now whether we have reached the end of the
178 string. */
179 xorq %r9, %rcx /* restore original dword without reload */
180 movq %r8, %rdx /* magic value */
181 addq %rcx, %rdx /* add the magic value to the word. We get
182 carry bits reported for each byte which
183 is *not* 0 */
184 jnc 7f /* highest byte is NUL => return NULL */
185 xorq %rcx, %rdx /* (word+magic)^word */
186 orq %r8, %rdx /* set all non-carry bits */
187 incq %rdx /* add 1: if one carry bit was *not* set
188 the addition will not result in 0. */
189 jnz 7f /* found NUL => return NULL */
190 /* Fourth unroll. */
191 movq (%rax), %rcx /* get double word (= 8 bytes) in question */
192 addq $8,%rax /* adjust pointer for next word */
193 movq %r8, %rdx /* magic value */
194 xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c
195 are now 0 */
196 addq %rcx, %rdx /* add the magic value to the word. We get
197 carry bits reported for each byte which
198 is *not* 0 */
199 jnc 3f /* highest byte is NUL => return pointer */
200 xorq %rcx, %rdx /* (word+magic)^word */
201 orq %r8, %rdx /* set all non-carry bits */
202 incq %rdx /* add 1: if one carry bit was *not* set
203 the addition will not result in 0. */
204 jnz 3f /* found c => return pointer */
205
206 /* The quadword we looked at does not contain the value we're looking
207 for. Let's search now whether we have reached the end of the
208 string. */
209 xorq %r9, %rcx /* restore original dword without reload */
210 movq %r8, %rdx /* magic value */
211 addq %rcx, %rdx /* add the magic value to the word. We get
212 carry bits reported for each byte which
213 is *not* 0 */
214 jnc 7f /* highest byte is NUL => return NULL */
215 xorq %rcx, %rdx /* (word+magic)^word */
216 orq %r8, %rdx /* set all non-carry bits */
217 incq %rdx /* add 1: if one carry bit was *not* set
218 the addition will not result in 0. */
219 jz 4b /* no NUL found => restart loop */
220
221
222 7: /* Return NULL. */
223 xorl %eax, %eax
224 retq
225
226
227 /* We now scan for the byte in which the character was matched.
228 But we have to take care of the case that a NUL char is
229 found before this in the dword. Note that we XORed %rcx
230 with the byte we're looking for, therefore the tests below look
231 reversed. */
232
233
234 .p2align 4 /* Align, it's a jump target. */
235 3: movq %r9,%rdx /* move to %rdx so that we can access bytes */
236 subq $8,%rax /* correct pointer increment. */
237 testb %cl, %cl /* is first byte C? */
238 jz 6f /* yes => return pointer */
239 cmpb %dl, %cl /* is first byte NUL? */
240 je 7b /* yes => return NULL */
241 incq %rax /* increment pointer */
242
243 testb %ch, %ch /* is second byte C? */
244 jz 6f /* yes => return pointer */
245 cmpb %dl, %ch /* is second byte NUL? */
246 je 7b /* yes => return NULL? */
247 incq %rax /* increment pointer */
248
249 shrq $16, %rcx /* make upper bytes accessible */
250 testb %cl, %cl /* is third byte C? */
251 jz 6f /* yes => return pointer */
252 cmpb %dl, %cl /* is third byte NUL? */
253 je 7b /* yes => return NULL */
254 incq %rax /* increment pointer */
255
256 testb %ch, %ch /* is fourth byte C? */
257 jz 6f /* yes => return pointer */
258 cmpb %dl, %ch /* is fourth byte NUL? */
259 je 7b /* yes => return NULL? */
260 incq %rax /* increment pointer */
261
262 shrq $16, %rcx /* make upper bytes accessible */
263 testb %cl, %cl /* is fifth byte C? */
264 jz 6f /* yes => return pointer */
265 cmpb %dl, %cl /* is fifth byte NUL? */
266 je 7b /* yes => return NULL */
267 incq %rax /* increment pointer */
268
269 testb %ch, %ch /* is sixth byte C? */
270 jz 6f /* yes => return pointer */
271 cmpb %dl, %ch /* is sixth byte NUL? */
272 je 7b /* yes => return NULL? */
273 incq %rax /* increment pointer */
274
275 shrq $16, %rcx /* make upper bytes accessible */
276 testb %cl, %cl /* is seventh byte C? */
277 jz 6f /* yes => return pointer */
278 cmpb %dl, %cl /* is seventh byte NUL? */
279 je 7b /* yes => return NULL */
280
281 /* It must be in the eigth byte and it cannot be NUL. */
282 incq %rax
283
284 6:
285 nop
286 retq
287 END (BP_SYM (strchr))
288
289 weak_alias (BP_SYM (strchr), BP_SYM (index))
290 libc_hidden_builtin_def (strchr)