]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/powerpc/powerpc64/le/power9/strcpy.S
Update copyright dates with scripts/update-copyrights
[thirdparty/glibc.git] / sysdeps / powerpc / powerpc64 / le / power9 / strcpy.S
CommitLineData
39037048 1/* Optimized strcpy implementation for PowerPC64/POWER9.
581c785b 2 Copyright (C) 2020-2022 Free Software Foundation, Inc.
39037048
ABL
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20
aa70d056
ABL
21#ifdef USE_AS_STPCPY
22# ifndef STPCPY
23# define FUNC_NAME __stpcpy
24# else
25# define FUNC_NAME STPCPY
26# endif
27#else
28# ifndef STRCPY
29# define FUNC_NAME strcpy
30# else
31# define FUNC_NAME STRCPY
32# endif
33#endif /* !USE_AS_STPCPY */
39037048
ABL
34
35/* Implements the function
36
37 char * [r3] strcpy (char *dest [r3], const char *src [r4])
38
aa70d056
ABL
39 or
40
41 char * [r3] stpcpy (char *dest [r3], const char *src [r4])
42
43 if USE_AS_STPCPY is defined.
44
39037048
ABL
45 The implementation can load bytes past a null terminator, but only
46 up to the next 16B boundary, so it never crosses a page. */
47
813c6ec8
PFC
48/* Load quadword at addr+offset to vreg, check for null bytes,
49 and branch to label if any are found. */
50#define CHECK16(vreg,offset,addr,label) \
51 lxv vreg+32,offset(addr); \
52 vcmpequb. v6,vreg,v18; \
53 bne cr6,L(label);
54
39037048 55.machine power9
aa70d056 56ENTRY_TOCLESS (FUNC_NAME, 4)
39037048
ABL
57 CALL_MCOUNT 2
58
39037048 59 vspltisb v18,0 /* Zeroes in v18 */
813c6ec8 60 vspltisb v19,-1 /* 0xFF bytes in v19 */
39037048 61
813c6ec8
PFC
62 /* Next 16B-aligned address. Prepare address for L(loop). */
63 addi r5,r4,16
64 clrrdi r5,r5,4
65 subf r8,r4,r5
66 add r11,r3,r8
39037048 67
813c6ec8 68 /* Align data and fill bytes not loaded with non matching char. */
39037048
ABL
69 lvx v0,0,r4
70 lvsr v1,0,r4
813c6ec8 71 vperm v0,v19,v0,v1
39037048 72
813c6ec8
PFC
73 vcmpequb. v6,v0,v18 /* 0xff if byte is NULL, 0x00 otherwise */
74 beq cr6,L(no_null)
39037048 75
813c6ec8
PFC
76 /* There's a null byte. */
77 vctzlsbb r8,v6 /* Number of trailing zeroes */
78 addi r9,r8,1 /* Add null byte. */
79 sldi r10,r9,56 /* stxvl wants size in top 8 bits. */
80 stxvl 32+v0,r3,r10 /* Partial store */
39037048 81
aa70d056
ABL
82#ifdef USE_AS_STPCPY
83 /* stpcpy returns the dest address plus the size not counting the
84 final '\0'. */
813c6ec8 85 add r3,r3,r8
aa70d056 86#endif
39037048
ABL
87 blr
88
89L(no_null):
813c6ec8
PFC
90 sldi r10,r8,56 /* stxvl wants size in top 8 bits */
91 stxvl 32+v0,r3,r10 /* Partial store */
39037048 92
813c6ec8 93 .p2align 4
39037048 94L(loop):
813c6ec8
PFC
95 CHECK16(v0,0,r5,tail1)
96 CHECK16(v1,16,r5,tail2)
97 CHECK16(v2,32,r5,tail3)
98 CHECK16(v3,48,r5,tail4)
99 CHECK16(v4,64,r5,tail5)
100 CHECK16(v5,80,r5,tail6)
39037048
ABL
101
102 stxv 32+v0,0(r11)
103 stxv 32+v1,16(r11)
104 stxv 32+v2,32(r11)
105 stxv 32+v3,48(r11)
813c6ec8
PFC
106 stxv 32+v4,64(r11)
107 stxv 32+v5,80(r11)
39037048 108
813c6ec8
PFC
109 addi r5,r5,96
110 addi r11,r11,96
39037048
ABL
111
112 b L(loop)
113
813c6ec8 114 .p2align 4
39037048 115L(tail1):
813c6ec8
PFC
116 vctzlsbb r8,v6 /* Number of trailing zeroes */
117 addi r9,r8,1 /* Add null terminator */
aa70d056 118 sldi r9,r9,56 /* stxvl wants size in top 8 bits */
813c6ec8 119 stxvl 32+v0,r11,r9 /* Partial store */
aa70d056
ABL
120#ifdef USE_AS_STPCPY
121 /* stpcpy returns the dest address plus the size not counting the
122 final '\0'. */
123 add r3,r11,r8
124#endif
39037048
ABL
125 blr
126
813c6ec8 127 .p2align 4
39037048
ABL
128L(tail2):
129 stxv 32+v0,0(r11)
813c6ec8
PFC
130 vctzlsbb r8,v6
131 addi r9,r8,1
132 sldi r9,r9,56
39037048 133 addi r11,r11,16
813c6ec8 134 stxvl 32+v1,r11,r9
aa70d056 135#ifdef USE_AS_STPCPY
aa70d056
ABL
136 add r3,r11,r8
137#endif
39037048
ABL
138 blr
139
813c6ec8 140 .p2align 4
39037048
ABL
141L(tail3):
142 stxv 32+v0,0(r11)
143 stxv 32+v1,16(r11)
813c6ec8
PFC
144 vctzlsbb r8,v6
145 addi r9,r8,1
146 sldi r9,r9,56
39037048 147 addi r11,r11,32
813c6ec8 148 stxvl 32+v2,r11,r9
aa70d056 149#ifdef USE_AS_STPCPY
aa70d056
ABL
150 add r3,r11,r8
151#endif
39037048
ABL
152 blr
153
813c6ec8 154 .p2align 4
39037048
ABL
155L(tail4):
156 stxv 32+v0,0(r11)
157 stxv 32+v1,16(r11)
158 stxv 32+v2,32(r11)
813c6ec8
PFC
159 vctzlsbb r8,v6
160 addi r9,r8,1
161 sldi r9,r9,56
39037048 162 addi r11,r11,48
813c6ec8 163 stxvl 32+v3,r11,r9
aa70d056 164#ifdef USE_AS_STPCPY
aa70d056
ABL
165 add r3,r11,r8
166#endif
39037048 167 blr
813c6ec8
PFC
168
169 .p2align 4
170L(tail5):
171 stxv 32+v0,0(r11)
172 stxv 32+v1,16(r11)
173 stxv 32+v2,32(r11)
174 stxv 32+v3,48(r11)
175 vctzlsbb r8,v6
176 addi r9,r8,1
177 sldi r9,r9,56
178 addi r11,r11,64
179 stxvl 32+v4,r11,r9
180#ifdef USE_AS_STPCPY
181 add r3,r11,r8
182#endif
183 blr
184
185 .p2align 4
186L(tail6):
187 stxv 32+v0,0(r11)
188 stxv 32+v1,16(r11)
189 stxv 32+v2,32(r11)
190 stxv 32+v3,48(r11)
191 stxv 32+v4,64(r11)
192 vctzlsbb r8,v6
193 addi r9,r8,1
194 sldi r9,r9,56
195 addi r11,r11,80
196 stxvl 32+v5,r11,r9
197#ifdef USE_AS_STPCPY
198 add r3,r11,r8
199#endif
200 blr
201
aa70d056
ABL
202END (FUNC_NAME)
203#ifndef USE_AS_STPCPY
39037048 204libc_hidden_builtin_def (strcpy)
aa70d056 205#endif