]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/strlen.S
f40c262cbbd53c0f01d3dfe21270eb27126a26d0
[thirdparty/glibc.git] / sysdeps / i386 / strlen.S
1 /* strlen(str) -- determine the length of the string STR.
2 Optimized for Intel 80x86, x>=4.
3 Copyright (C) 1991-2020 Free Software Foundation, Inc.
4 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>.
5 This file is part of the GNU C Library.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, see
19 <https://www.gnu.org/licenses/>. */
20
21 #include <sysdep.h>
22 #include "asm-syntax.h"
23
24 #define PARMS 4 /* no space for saved regs */
25 #define STR PARMS
26
27 .text
28 ENTRY (strlen)
29
30 movl STR(%esp), %ecx
31 movl %ecx, %eax /* duplicate it */
32
33 andl $3, %ecx /* mask alignment bits */
34 jz L(1) /* aligned => start loop */
35 cmpb %ch, (%eax) /* is byte NUL? */
36 je L(2) /* yes => return */
37 incl %eax /* increment pointer */
38
39 xorl $3, %ecx /* was alignment = 3? */
40 jz L(1) /* yes => now it is aligned and start loop */
41 cmpb %ch, (%eax) /* is byte NUL? */
42 je L(2) /* yes => return */
43 addl $1, %eax /* increment pointer */
44
45 subl $1, %ecx /* was alignment = 2? */
46 jz L(1) /* yes => now it is aligned and start loop */
47 cmpb %ch, (%eax) /* is byte NUL? */
48 je L(2) /* yes => return */
49
50 /* Don't change the above `addl $1,%eax' and `subl $1, %ecx' into `incl %eax'
51 and `decl %ecx' resp. The additional two byte per instruction make the
52 label 4 to be aligned on a 16 byte boundary with nops.
53
54 The following `sub $15, %eax' is part of this trick, too. Together with
55 the next instruction (`addl $16, %eax') it is in fact a `incl %eax', just
56 as expected from the algorithm. But doing so has the advantage that
57 no jump to label 1 is necessary and so the pipeline is not flushed. */
58
59 subl $15, %eax /* effectively +1 */
60
61
62 L(4): addl $16, %eax /* adjust pointer for full loop */
63
64 L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
65 movl $0xfefefeff, %edx /* magic value */
66 addl %ecx, %edx /* add the magic value to the word. We get
67 carry bits reported for each byte which
68 is *not* 0 */
69 jnc L(3) /* highest byte is NUL => return pointer */
70 xorl %ecx, %edx /* (word+magic)^word */
71 orl $0xfefefeff, %edx /* set all non-carry bits */
72 incl %edx /* add 1: if one carry bit was *not* set
73 the addition will not result in 0. */
74 jnz L(3) /* found NUL => return pointer */
75
76 movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
77 movl $0xfefefeff, %edx /* magic value */
78 addl %ecx, %edx /* add the magic value to the word. We get
79 carry bits reported for each byte which
80 is *not* 0 */
81 jnc L(5) /* highest byte is NUL => return pointer */
82 xorl %ecx, %edx /* (word+magic)^word */
83 orl $0xfefefeff, %edx /* set all non-carry bits */
84 incl %edx /* add 1: if one carry bit was *not* set
85 the addition will not result in 0. */
86 jnz L(5) /* found NUL => return pointer */
87
88 movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
89 movl $0xfefefeff, %edx /* magic value */
90 addl %ecx, %edx /* add the magic value to the word. We get
91 carry bits reported for each byte which
92 is *not* 0 */
93 jnc L(6) /* highest byte is NUL => return pointer */
94 xorl %ecx, %edx /* (word+magic)^word */
95 orl $0xfefefeff, %edx /* set all non-carry bits */
96 incl %edx /* add 1: if one carry bit was *not* set
97 the addition will not result in 0. */
98 jnz L(6) /* found NUL => return pointer */
99
100 movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
101 movl $0xfefefeff, %edx /* magic value */
102 addl %ecx, %edx /* add the magic value to the word. We get
103 carry bits reported for each byte which
104 is *not* 0 */
105 jnc L(7) /* highest byte is NUL => return pointer */
106 xorl %ecx, %edx /* (word+magic)^word */
107 orl $0xfefefeff, %edx /* set all non-carry bits */
108 incl %edx /* add 1: if one carry bit was *not* set
109 the addition will not result in 0. */
110 jz L(4) /* no NUL found => continue loop */
111
112 L(7): addl $4, %eax /* adjust pointer */
113 L(6): addl $4, %eax
114 L(5): addl $4, %eax
115
116 L(3): testb %cl, %cl /* is first byte NUL? */
117 jz L(2) /* yes => return */
118 incl %eax /* increment pointer */
119
120 testb %ch, %ch /* is second byte NUL? */
121 jz L(2) /* yes => return */
122 incl %eax /* increment pointer */
123
124 testl $0xff0000, %ecx /* is third byte NUL? */
125 jz L(2) /* yes => return pointer */
126 incl %eax /* increment pointer */
127
128 L(2): subl STR(%esp), %eax /* compute difference to string start */
129
130 ret
131 END (strlen)
132 libc_hidden_builtin_def (strlen)