]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i586/strlen.S
Update copyright dates with scripts/update-copyrights
[thirdparty/glibc.git] / sysdeps / i386 / i586 / strlen.S
1 /* strlen -- Compute length of NUL terminated string.
2 Highly optimized version for ix86, x>=5.
3 Copyright (C) 1995-2021 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, see
19 <https://www.gnu.org/licenses/>. */
20
21 #include <sysdep.h>
22 #include "asm-syntax.h"
23
24 /* This version is especially optimized for the i586 (and following?)
25 processors. This is mainly done by using the two pipelines. The
26 version optimized for i486 is weak in this aspect because to get
27 as much parallelism we have to execute some *more* instructions.
28
29 The code below is structured to reflect the pairing of the instructions
30 as *I think* it is. I have no processor data book to verify this.
31 If you find something you think is incorrect let me know. */
32
33
34 /* The magic value which is used throughout in the whole code. */
35 #define magic 0xfefefeff
36
37 #define PARMS 4 /* no space for saved regs */
38 #define STR PARMS
39
40 .text
41 ENTRY (strlen)
42
43 movl STR(%esp), %eax
44 movl $3, %edx /* load mask (= 3) */
45
46 andl %eax, %edx /* separate last two bits of address */
47
48 jz L(1) /* aligned => start loop */
49 jp L(0) /* exactly two bits set */
50
51 cmpb %dh, (%eax) /* is byte NUL? */
52 je L(2) /* yes => return */
53
54 incl %eax /* increment pointer */
55 cmpb %dh, (%eax) /* is byte NUL? */
56
57 je L(2) /* yes => return */
58
59 incl %eax /* increment pointer */
60 xorl $2, %edx
61
62 jz L(1)
63
64 L(0): cmpb %dh, (%eax) /* is byte NUL? */
65 je L(2) /* yes => return */
66
67 incl %eax /* increment pointer */
68 xorl %edx, %edx /* We need %edx == 0 for later */
69
70 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
71 change any of the hole bits of LONGWORD.
72
73 1) Is this safe? Will it catch all the zero bytes?
74 Suppose there is a byte with all zeros. Any carry bits
75 propagating from its left will fall into the hole at its
76 least significant bit and stop. Since there will be no
77 carry from its most significant bit, the LSB of the
78 byte to the left will be unchanged, and the zero will be
79 detected.
80
81 2) Is this worthwhile? Will it ignore everything except
82 zero bytes? Suppose every byte of LONGWORD has a bit set
83 somewhere. There will be a carry into bit 8. If bit 8
84 is set, this will carry into bit 16. If bit 8 is clear,
85 one of bits 9-15 must be set, so there will be a carry
86 into bit 16. Similarly, there will be a carry into bit
87 24. If one of bits 24-31 is set, there will be a carry
88 into bit 32 (=carry flag), so all of the hole bits will
89 be changed.
90
91 Note: %edx == 0 in any case here. */
92
93 L(1):
94 movl (%eax), %ecx /* get word (= 4 bytes) in question */
95 addl $4, %eax /* adjust pointer for *next* word */
96
97 subl %ecx, %edx /* first step to negate word */
98 addl $magic, %ecx /* add magic word */
99
100 decl %edx /* complete negation of word */
101 jnc L(3) /* previous addl caused overflow? */
102
103 xorl %ecx, %edx /* (word+magic)^word */
104
105 andl $~magic, %edx /* any of the carry flags set? */
106
107 jne L(3) /* yes => determine byte */
108
109
110 movl (%eax), %ecx /* get word (= 4 bytes) in question */
111 addl $4, %eax /* adjust pointer for *next* word */
112
113 subl %ecx, %edx /* first step to negate word */
114 addl $magic, %ecx /* add magic word */
115
116 decl %edx /* complete negation of word */
117 jnc L(3) /* previous addl caused overflow? */
118
119 xorl %ecx, %edx /* (word+magic)^word */
120
121 andl $~magic, %edx /* any of the carry flags set? */
122
123 jne L(3) /* yes => determine byte */
124
125
126 movl (%eax), %ecx /* get word (= 4 bytes) in question */
127 addl $4, %eax /* adjust pointer for *next* word */
128
129 subl %ecx, %edx /* first step to negate word */
130 addl $magic, %ecx /* add magic word */
131
132 decl %edx /* complete negation of word */
133 jnc L(3) /* previous addl caused overflow? */
134
135 xorl %ecx, %edx /* (word+magic)^word */
136
137 andl $~magic, %edx /* any of the carry flags set? */
138
139 jne L(3) /* yes => determine byte */
140
141
142 movl (%eax), %ecx /* get word (= 4 bytes) in question */
143 addl $4, %eax /* adjust pointer for *next* word */
144
145 subl %ecx, %edx /* first step to negate word */
146 addl $magic, %ecx /* add magic word */
147
148 decl %edx /* complete negation of word */
149 jnc L(3) /* previous addl caused overflow? */
150
151 xorl %ecx, %edx /* (word+magic)^word */
152
153 andl $~magic, %edx /* any of the carry flags set? */
154
155 je L(1) /* no => start loop again */
156
157
158 L(3): subl $4, %eax /* correct too early pointer increment */
159 subl $magic, %ecx
160
161 cmpb $0, %cl /* lowest byte NUL? */
162 jz L(2) /* yes => return */
163
164 inc %eax /* increment pointer */
165 testb %ch, %ch /* second byte NUL? */
166
167 jz L(2) /* yes => return */
168
169 shrl $16, %ecx /* make upper bytes accessible */
170 incl %eax /* increment pointer */
171
172 cmpb $0, %cl /* is third byte NUL? */
173 jz L(2) /* yes => return */
174
175 incl %eax /* increment pointer */
176
177 L(2): subl STR(%esp), %eax /* now compute the length as difference
178 between start and terminating NUL
179 character */
180 ret
181 END (strlen)
182 libc_hidden_builtin_def (strlen)