]>
Commit | Line | Data |
---|---|---|
39037048 | 1 | /* Optimized strcpy implementation for PowerPC64/POWER9. |
581c785b | 2 | Copyright (C) 2020-2022 Free Software Foundation, Inc. |
39037048 ABL |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with the GNU C Library; if not, see | |
17 | <https://www.gnu.org/licenses/>. */ | |
18 | ||
19 | #include <sysdep.h> | |
20 | ||
aa70d056 ABL |
21 | #ifdef USE_AS_STPCPY |
22 | # ifndef STPCPY | |
23 | # define FUNC_NAME __stpcpy | |
24 | # else | |
25 | # define FUNC_NAME STPCPY | |
26 | # endif | |
27 | #else | |
28 | # ifndef STRCPY | |
29 | # define FUNC_NAME strcpy | |
30 | # else | |
31 | # define FUNC_NAME STRCPY | |
32 | # endif | |
33 | #endif /* !USE_AS_STPCPY */ | |
39037048 ABL |
34 | |
35 | /* Implements the function | |
36 | ||
37 | char * [r3] strcpy (char *dest [r3], const char *src [r4]) | |
38 | ||
aa70d056 ABL |
39 | or |
40 | ||
41 | char * [r3] stpcpy (char *dest [r3], const char *src [r4]) | |
42 | ||
43 | if USE_AS_STPCPY is defined. | |
44 | ||
39037048 ABL |
45 | The implementation can load bytes past a null terminator, but only |
46 | up to the next 16B boundary, so it never crosses a page. */ | |
47 | ||
813c6ec8 PFC |
48 | /* Load quadword at addr+offset to vreg, check for null bytes, |
49 | and branch to label if any are found. */ | |
50 | #define CHECK16(vreg,offset,addr,label) \ | |
51 | lxv vreg+32,offset(addr); \ | |
52 | vcmpequb. v6,vreg,v18; \ | |
53 | bne cr6,L(label); | |
54 | ||
39037048 | 55 | .machine power9 |
aa70d056 | 56 | ENTRY_TOCLESS (FUNC_NAME, 4) |
39037048 ABL |
57 | CALL_MCOUNT 2 |
58 | ||
39037048 | 59 | vspltisb v18,0 /* Zeroes in v18 */ |
813c6ec8 | 60 | vspltisb v19,-1 /* 0xFF bytes in v19 */ |
39037048 | 61 | |
813c6ec8 PFC |
62 | /* Next 16B-aligned address. Prepare address for L(loop). */ |
63 | addi r5,r4,16 | |
64 | clrrdi r5,r5,4 | |
65 | subf r8,r4,r5 | |
66 | add r11,r3,r8 | |
39037048 | 67 | |
813c6ec8 | 68 | /* Align data and fill bytes not loaded with non matching char. */ |
39037048 ABL |
69 | lvx v0,0,r4 |
70 | lvsr v1,0,r4 | |
813c6ec8 | 71 | vperm v0,v19,v0,v1 |
39037048 | 72 | |
813c6ec8 PFC |
73 | vcmpequb. v6,v0,v18 /* 0xff if byte is NULL, 0x00 otherwise */ |
74 | beq cr6,L(no_null) | |
39037048 | 75 | |
813c6ec8 PFC |
76 | /* There's a null byte. */ |
77 | vctzlsbb r8,v6 /* Number of trailing zeroes */ | |
78 | addi r9,r8,1 /* Add null byte. */ | |
79 | sldi r10,r9,56 /* stxvl wants size in top 8 bits. */ | |
80 | stxvl 32+v0,r3,r10 /* Partial store */ | |
39037048 | 81 | |
aa70d056 ABL |
82 | #ifdef USE_AS_STPCPY |
83 | /* stpcpy returns the dest address plus the size not counting the | |
84 | final '\0'. */ | |
813c6ec8 | 85 | add r3,r3,r8 |
aa70d056 | 86 | #endif |
39037048 ABL |
87 | blr |
88 | ||
89 | L(no_null): | |
813c6ec8 PFC |
90 | sldi r10,r8,56 /* stxvl wants size in top 8 bits */ |
91 | stxvl 32+v0,r3,r10 /* Partial store */ | |
39037048 | 92 | |
813c6ec8 | 93 | .p2align 4 |
39037048 | 94 | L(loop): |
813c6ec8 PFC |
95 | CHECK16(v0,0,r5,tail1) |
96 | CHECK16(v1,16,r5,tail2) | |
97 | CHECK16(v2,32,r5,tail3) | |
98 | CHECK16(v3,48,r5,tail4) | |
99 | CHECK16(v4,64,r5,tail5) | |
100 | CHECK16(v5,80,r5,tail6) | |
39037048 ABL |
101 | |
102 | stxv 32+v0,0(r11) | |
103 | stxv 32+v1,16(r11) | |
104 | stxv 32+v2,32(r11) | |
105 | stxv 32+v3,48(r11) | |
813c6ec8 PFC |
106 | stxv 32+v4,64(r11) |
107 | stxv 32+v5,80(r11) | |
39037048 | 108 | |
813c6ec8 PFC |
109 | addi r5,r5,96 |
110 | addi r11,r11,96 | |
39037048 ABL |
111 | |
112 | b L(loop) | |
113 | ||
813c6ec8 | 114 | .p2align 4 |
39037048 | 115 | L(tail1): |
813c6ec8 PFC |
116 | vctzlsbb r8,v6 /* Number of trailing zeroes */ |
117 | addi r9,r8,1 /* Add null terminator */ | |
aa70d056 | 118 | sldi r9,r9,56 /* stxvl wants size in top 8 bits */ |
813c6ec8 | 119 | stxvl 32+v0,r11,r9 /* Partial store */ |
aa70d056 ABL |
120 | #ifdef USE_AS_STPCPY |
121 | /* stpcpy returns the dest address plus the size not counting the | |
122 | final '\0'. */ | |
123 | add r3,r11,r8 | |
124 | #endif | |
39037048 ABL |
125 | blr |
126 | ||
813c6ec8 | 127 | .p2align 4 |
39037048 ABL |
128 | L(tail2): |
129 | stxv 32+v0,0(r11) | |
813c6ec8 PFC |
130 | vctzlsbb r8,v6 |
131 | addi r9,r8,1 | |
132 | sldi r9,r9,56 | |
39037048 | 133 | addi r11,r11,16 |
813c6ec8 | 134 | stxvl 32+v1,r11,r9 |
aa70d056 | 135 | #ifdef USE_AS_STPCPY |
aa70d056 ABL |
136 | add r3,r11,r8 |
137 | #endif | |
39037048 ABL |
138 | blr |
139 | ||
813c6ec8 | 140 | .p2align 4 |
39037048 ABL |
141 | L(tail3): |
142 | stxv 32+v0,0(r11) | |
143 | stxv 32+v1,16(r11) | |
813c6ec8 PFC |
144 | vctzlsbb r8,v6 |
145 | addi r9,r8,1 | |
146 | sldi r9,r9,56 | |
39037048 | 147 | addi r11,r11,32 |
813c6ec8 | 148 | stxvl 32+v2,r11,r9 |
aa70d056 | 149 | #ifdef USE_AS_STPCPY |
aa70d056 ABL |
150 | add r3,r11,r8 |
151 | #endif | |
39037048 ABL |
152 | blr |
153 | ||
813c6ec8 | 154 | .p2align 4 |
39037048 ABL |
155 | L(tail4): |
156 | stxv 32+v0,0(r11) | |
157 | stxv 32+v1,16(r11) | |
158 | stxv 32+v2,32(r11) | |
813c6ec8 PFC |
159 | vctzlsbb r8,v6 |
160 | addi r9,r8,1 | |
161 | sldi r9,r9,56 | |
39037048 | 162 | addi r11,r11,48 |
813c6ec8 | 163 | stxvl 32+v3,r11,r9 |
aa70d056 | 164 | #ifdef USE_AS_STPCPY |
aa70d056 ABL |
165 | add r3,r11,r8 |
166 | #endif | |
39037048 | 167 | blr |
813c6ec8 PFC |
168 | |
169 | .p2align 4 | |
170 | L(tail5): | |
171 | stxv 32+v0,0(r11) | |
172 | stxv 32+v1,16(r11) | |
173 | stxv 32+v2,32(r11) | |
174 | stxv 32+v3,48(r11) | |
175 | vctzlsbb r8,v6 | |
176 | addi r9,r8,1 | |
177 | sldi r9,r9,56 | |
178 | addi r11,r11,64 | |
179 | stxvl 32+v4,r11,r9 | |
180 | #ifdef USE_AS_STPCPY | |
181 | add r3,r11,r8 | |
182 | #endif | |
183 | blr | |
184 | ||
185 | .p2align 4 | |
186 | L(tail6): | |
187 | stxv 32+v0,0(r11) | |
188 | stxv 32+v1,16(r11) | |
189 | stxv 32+v2,32(r11) | |
190 | stxv 32+v3,48(r11) | |
191 | stxv 32+v4,64(r11) | |
192 | vctzlsbb r8,v6 | |
193 | addi r9,r8,1 | |
194 | sldi r9,r9,56 | |
195 | addi r11,r11,80 | |
196 | stxvl 32+v5,r11,r9 | |
197 | #ifdef USE_AS_STPCPY | |
198 | add r3,r11,r8 | |
199 | #endif | |
200 | blr | |
201 | ||
aa70d056 ABL |
202 | END (FUNC_NAME) |
203 | #ifndef USE_AS_STPCPY | |
39037048 | 204 | libc_hidden_builtin_def (strcpy) |
aa70d056 | 205 | #endif |