]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/powerpc/powerpc64/le/power9/strcpy.S
ce8f50329177fd06b98b88c47d9ff4af1b757087
[thirdparty/glibc.git] / sysdeps / powerpc / powerpc64 / le / power9 / strcpy.S
1 /* Optimized strcpy implementation for PowerPC64/POWER9.
2 Copyright (C) 2020 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19 #include <sysdep.h>
20
21 #ifdef USE_AS_STPCPY
22 # ifndef STPCPY
23 # define FUNC_NAME __stpcpy
24 # else
25 # define FUNC_NAME STPCPY
26 # endif
27 #else
28 # ifndef STRCPY
29 # define FUNC_NAME strcpy
30 # else
31 # define FUNC_NAME STRCPY
32 # endif
33 #endif /* !USE_AS_STPCPY */
34
35 /* Implements the function
36
37 char * [r3] strcpy (char *dest [r3], const char *src [r4])
38
39 or
40
41 char * [r3] stpcpy (char *dest [r3], const char *src [r4])
42
43 if USE_AS_STPCPY is defined.
44
45 The implementation can load bytes past a null terminator, but only
46 up to the next 16B boundary, so it never crosses a page. */
47
48 .machine power9
49 ENTRY_TOCLESS (FUNC_NAME, 4)
50 CALL_MCOUNT 2
51
52 /* NULL string optimisation */
53 lbz r0,0(r4)
54 stb r0,0(r3)
55 cmpwi r0,0
56 beqlr
57
58 addi r4,r4,1
59 addi r11,r3,1
60
61 vspltisb v18,0 /* Zeroes in v18 */
62
63 neg r5,r4
64 rldicl r9,r5,0,60 /* How many bytes to get source 16B aligned? */
65
66 /* Get source 16B aligned */
67 lvx v0,0,r4
68 lvsr v1,0,r4
69 vperm v0,v18,v0,v1
70
71 vcmpequb v6,v0,v18 /* 0xff if byte is NULL, 0x00 otherwise */
72 vctzlsbb r7,v6 /* Number of trailing zeroes */
73 addi r8,r7,1 /* Add null terminator */
74
75 /* r8 = bytes including null
76 r9 = bytes to get source 16B aligned
77 if r8 > r9
78 no null, copy r9 bytes
79 else
80 there is a null, copy r8 bytes and return. */
81 cmpd r8,r9
82 bgt L(no_null)
83
84 sldi r10,r8,56 /* stxvl wants size in top 8 bits */
85 stxvl 32+v0,r11,r10 /* Partial store */
86
87 #ifdef USE_AS_STPCPY
88 /* stpcpy returns the dest address plus the size not counting the
89 final '\0'. */
90 add r3,r11,r7
91 #endif
92 blr
93
94 L(no_null):
95 sldi r10,r9,56 /* stxvl wants size in top 8 bits */
96 stxvl 32+v0,r11,r10 /* Partial store */
97
98 add r4,r4,r9
99 add r11,r11,r9
100
101 L(loop):
102 lxv 32+v0,0(r4)
103 vcmpequb. v6,v0,v18 /* Any zero bytes? */
104 bne cr6,L(tail1)
105
106 lxv 32+v1,16(r4)
107 vcmpequb. v6,v1,v18 /* Any zero bytes? */
108 bne cr6,L(tail2)
109
110 lxv 32+v2,32(r4)
111 vcmpequb. v6,v2,v18 /* Any zero bytes? */
112 bne cr6,L(tail3)
113
114 lxv 32+v3,48(r4)
115 vcmpequb. v6,v3,v18 /* Any zero bytes? */
116 bne cr6,L(tail4)
117
118 stxv 32+v0,0(r11)
119 stxv 32+v1,16(r11)
120 stxv 32+v2,32(r11)
121 stxv 32+v3,48(r11)
122
123 addi r4,r4,64
124 addi r11,r11,64
125
126 b L(loop)
127
128 L(tail1):
129 vctzlsbb r8,v6
130 addi r9,r8,1
131 sldi r9,r9,56 /* stxvl wants size in top 8 bits */
132 stxvl 32+v0,r11,r9
133 #ifdef USE_AS_STPCPY
134 /* stpcpy returns the dest address plus the size not counting the
135 final '\0'. */
136 add r3,r11,r8
137 #endif
138 blr
139
140 L(tail2):
141 stxv 32+v0,0(r11)
142 vctzlsbb r8,v6 /* Number of trailing zeroes */
143 addi r9,r8,1 /* Add null terminator */
144 sldi r10,r9,56 /* stxvl wants size in top 8 bits */
145 addi r11,r11,16
146 stxvl 32+v1,r11,r10 /* Partial store */
147 #ifdef USE_AS_STPCPY
148 /* stpcpy returns the dest address plus the size not counting the
149 final '\0'. */
150 add r3,r11,r8
151 #endif
152 blr
153
154 L(tail3):
155 stxv 32+v0,0(r11)
156 stxv 32+v1,16(r11)
157 vctzlsbb r8,v6 /* Number of trailing zeroes */
158 addi r9,r8,1 /* Add null terminator */
159 sldi r10,r9,56 /* stxvl wants size in top 8 bits */
160 addi r11,r11,32
161 stxvl 32+v2,r11,r10 /* Partial store */
162 #ifdef USE_AS_STPCPY
163 /* stpcpy returns the dest address plus the size not counting the
164 final '\0'. */
165 add r3,r11,r8
166 #endif
167 blr
168
169 L(tail4):
170 stxv 32+v0,0(r11)
171 stxv 32+v1,16(r11)
172 stxv 32+v2,32(r11)
173 vctzlsbb r8,v6 /* Number of trailing zeroes */
174 addi r9,r8,1 /* Add null terminator */
175 sldi r10,r9,56 /* stxvl wants size in top 8 bits */
176 addi r11,r11,48
177 stxvl 32+v3,r11,r10 /* Partial store */
178 #ifdef USE_AS_STPCPY
179 /* stpcpy returns the dest address plus the size not counting the
180 final '\0'. */
181 add r3,r11,r8
182 #endif
183 blr
184 END (FUNC_NAME)
185 #ifndef USE_AS_STPCPY
186 libc_hidden_builtin_def (strcpy)
187 #endif