]>
Commit | Line | Data |
---|---|---|
78df0fcb | 1 | /* strcpy/stpcpy implementation for x86-64. |
d614a753 | 2 | Copyright (C) 2002-2020 Free Software Foundation, Inc. |
78df0fcb AJ |
3 | This file is part of the GNU C Library. |
4 | Contributed by Andreas Jaeger <aj@suse.de>, 2002. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 | 17 | License along with the GNU C Library; if not, see |
5a82c748 | 18 | <https://www.gnu.org/licenses/>. */ |
78df0fcb AJ |
19 | |
20 | #include <sysdep.h> | |
21 | #include "asm-syntax.h" | |
78df0fcb AJ |
22 | |
23 | #ifndef USE_AS_STPCPY | |
24 | # define STRCPY strcpy | |
25 | #endif | |
26 | ||
27 | .text | |
29691210 | 28 | ENTRY (STRCPY) |
78df0fcb AJ |
29 | movq %rsi, %rcx /* Source register. */ |
30 | andl $7, %ecx /* mask alignment bits */ | |
31 | movq %rdi, %rdx /* Duplicate destination pointer. */ | |
32 | ||
33 | jz 5f /* aligned => start loop */ | |
34 | ||
35 | neg %ecx /* We need to align to 8 bytes. */ | |
36 | addl $8,%ecx | |
37 | /* Search the first bytes directly. */ | |
38 | 0: | |
39 | movb (%rsi), %al /* Fetch a byte */ | |
40 | testb %al, %al /* Is it NUL? */ | |
41 | movb %al, (%rdx) /* Store it */ | |
42 | jz 4f /* If it was NUL, done! */ | |
43 | incq %rsi | |
44 | incq %rdx | |
45 | decl %ecx | |
46 | jnz 0b | |
47 | ||
48 | 5: | |
49 | movq $0xfefefefefefefeff,%r8 | |
50 | ||
51 | /* Now the sources is aligned. Unfortunatly we cannot force | |
52 | to have both source and destination aligned, so ignore the | |
53 | alignment of the destination. */ | |
54 | .p2align 4 | |
55 | 1: | |
56 | /* 1st unroll. */ | |
57 | movq (%rsi), %rax /* Read double word (8 bytes). */ | |
58 | addq $8, %rsi /* Adjust pointer for next word. */ | |
59 | movq %rax, %r9 /* Save a copy for NUL finding. */ | |
60 | addq %r8, %r9 /* add the magic value to the word. We get | |
61 | carry bits reported for each byte which | |
62 | is *not* 0 */ | |
63 | jnc 3f /* highest byte is NUL => return pointer */ | |
64 | xorq %rax, %r9 /* (word+magic)^word */ | |
65 | orq %r8, %r9 /* set all non-carry bits */ | |
66 | incq %r9 /* add 1: if one carry bit was *not* set | |
67 | the addition will not result in 0. */ | |
68 | ||
69 | jnz 3f /* found NUL => return pointer */ | |
70 | ||
71 | movq %rax, (%rdx) /* Write value to destination. */ | |
72 | addq $8, %rdx /* Adjust pointer. */ | |
73 | ||
74 | /* 2nd unroll. */ | |
75 | movq (%rsi), %rax /* Read double word (8 bytes). */ | |
76 | addq $8, %rsi /* Adjust pointer for next word. */ | |
77 | movq %rax, %r9 /* Save a copy for NUL finding. */ | |
78 | addq %r8, %r9 /* add the magic value to the word. We get | |
79 | carry bits reported for each byte which | |
80 | is *not* 0 */ | |
81 | jnc 3f /* highest byte is NUL => return pointer */ | |
82 | xorq %rax, %r9 /* (word+magic)^word */ | |
83 | orq %r8, %r9 /* set all non-carry bits */ | |
84 | incq %r9 /* add 1: if one carry bit was *not* set | |
85 | the addition will not result in 0. */ | |
86 | ||
87 | jnz 3f /* found NUL => return pointer */ | |
88 | ||
89 | movq %rax, (%rdx) /* Write value to destination. */ | |
90 | addq $8, %rdx /* Adjust pointer. */ | |
91 | ||
92 | /* 3rd unroll. */ | |
93 | movq (%rsi), %rax /* Read double word (8 bytes). */ | |
94 | addq $8, %rsi /* Adjust pointer for next word. */ | |
95 | movq %rax, %r9 /* Save a copy for NUL finding. */ | |
96 | addq %r8, %r9 /* add the magic value to the word. We get | |
97 | carry bits reported for each byte which | |
98 | is *not* 0 */ | |
99 | jnc 3f /* highest byte is NUL => return pointer */ | |
100 | xorq %rax, %r9 /* (word+magic)^word */ | |
101 | orq %r8, %r9 /* set all non-carry bits */ | |
102 | incq %r9 /* add 1: if one carry bit was *not* set | |
103 | the addition will not result in 0. */ | |
104 | ||
105 | jnz 3f /* found NUL => return pointer */ | |
106 | ||
107 | movq %rax, (%rdx) /* Write value to destination. */ | |
108 | addq $8, %rdx /* Adjust pointer. */ | |
109 | ||
110 | /* 4th unroll. */ | |
111 | movq (%rsi), %rax /* Read double word (8 bytes). */ | |
112 | addq $8, %rsi /* Adjust pointer for next word. */ | |
113 | movq %rax, %r9 /* Save a copy for NUL finding. */ | |
114 | addq %r8, %r9 /* add the magic value to the word. We get | |
115 | carry bits reported for each byte which | |
116 | is *not* 0 */ | |
117 | jnc 3f /* highest byte is NUL => return pointer */ | |
118 | xorq %rax, %r9 /* (word+magic)^word */ | |
119 | orq %r8, %r9 /* set all non-carry bits */ | |
120 | incq %r9 /* add 1: if one carry bit was *not* set | |
121 | the addition will not result in 0. */ | |
122 | ||
123 | jnz 3f /* found NUL => return pointer */ | |
124 | ||
125 | movq %rax, (%rdx) /* Write value to destination. */ | |
126 | addq $8, %rdx /* Adjust pointer. */ | |
127 | jmp 1b /* Next iteration. */ | |
128 | ||
129 | /* Do the last few bytes. %rax contains the value to write. | |
130 | The loop is unrolled twice. */ | |
131 | .p2align 4 | |
132 | 3: | |
133 | /* Note that stpcpy needs to return with the value of the NUL | |
134 | byte. */ | |
135 | movb %al, (%rdx) /* 1st byte. */ | |
136 | testb %al, %al /* Is it NUL. */ | |
137 | jz 4f /* yes, finish. */ | |
138 | incq %rdx /* Increment destination. */ | |
139 | movb %ah, (%rdx) /* 2nd byte. */ | |
140 | testb %ah, %ah /* Is it NUL?. */ | |
141 | jz 4f /* yes, finish. */ | |
142 | incq %rdx /* Increment destination. */ | |
143 | shrq $16, %rax /* Shift... */ | |
144 | jmp 3b /* and look at next two bytes in %rax. */ | |
145 | ||
146 | 4: | |
147 | #ifdef USE_AS_STPCPY | |
148 | movq %rdx, %rax /* Destination is return value. */ | |
149 | #else | |
150 | movq %rdi, %rax /* Source is return value. */ | |
151 | #endif | |
152 | retq | |
29691210 | 153 | END (STRCPY) |
85dd1003 UD |
154 | #ifndef USE_AS_STPCPY |
155 | libc_hidden_builtin_def (strcpy) | |
156 | #endif |