]>
Commit | Line | Data |
---|---|---|
6d7e8eda | 1 | /* Copyright (C) 2014-2023 Free Software Foundation, Inc. |
f360f94a VR |
2 | This file is part of the GNU C Library. |
3 | ||
4 | The GNU C Library is free software; you can redistribute it and/or | |
5 | modify it under the terms of the GNU Lesser General Public | |
6 | License as published by the Free Software Foundation; either | |
7 | version 2.1 of the License, or (at your option) any later version. | |
8 | ||
9 | The GNU C Library is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | Lesser General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU Lesser General Public | |
15 | License along with the GNU C Library; if not, see | |
5a82c748 | 16 | <https://www.gnu.org/licenses/>. */ |
f360f94a VR |
17 | |
18 | #include <sysdep.h> | |
19 | ||
20 | /* Implements the functions | |
21 | ||
22 | char * [r3] strncpy (char *dst [r3], const char *src [r4], size_t n [r5]) | |
23 | ||
24 | AND | |
25 | ||
26 | char * [r3] stpncpy (char *dst [r3], const char *src [r4], size_t n [r5]) | |
27 | ||
28 | The algorithm is as follows: | |
29 | > if src and dest are 8 byte aligned, perform double word copy | |
30 | else | |
31 | > copy byte by byte on unaligned addresses. | |
32 | ||
33 | The aligned comparison are made using cmpb instructions. */ | |
34 | ||
35 | /* The focus on optimization for performance improvements are as follows: | |
36 | 1. data alignment [gain from aligned memory access on read/write] | |
37 | 2. POWER7 gains performance with loop unrolling/unwinding | |
38 | [gain by reduction of branch penalty]. | |
39 | 3. The final pad with null bytes is done by calling an optimized | |
40 | memset. */ | |
41 | ||
42 | #ifdef USE_AS_STPNCPY | |
dbcc7d08 WSM |
43 | # ifndef STPNCPY |
44 | # define FUNC_NAME __stpncpy | |
45 | # else | |
46 | # define FUNC_NAME STPNCPY | |
47 | # endif | |
f360f94a | 48 | #else |
dbcc7d08 WSM |
49 | # ifndef STRNCPY |
50 | # define FUNC_NAME strncpy | |
51 | # else | |
52 | # define FUNC_NAME STRNCPY | |
53 | # endif | |
54 | #endif /* !USE_AS_STPNCPY */ | |
f360f94a | 55 | |
750a0e49 | 56 | #define FRAMESIZE (FRAME_MIN_SIZE+16) |
f360f94a VR |
57 | |
58 | #ifndef MEMSET | |
59 | /* For builds with no IFUNC support, local calls should be made to internal | |
60 | GLIBC symbol (created by libc_hidden_builtin_def). */ | |
61 | # ifdef SHARED | |
de7ee73d | 62 | # define MEMSET_is_local |
f360f94a VR |
63 | # define MEMSET __GI_memset |
64 | # else | |
65 | # define MEMSET memset | |
66 | # endif | |
67 | #endif | |
68 | ||
69 | .machine power7 | |
d5b41185 AM |
70 | #ifdef MEMSET_is_local |
71 | ENTRY_TOCLESS (FUNC_NAME, 4) | |
72 | #else | |
73 | ENTRY (FUNC_NAME, 4) | |
74 | #endif | |
f360f94a VR |
75 | CALL_MCOUNT 3 |
76 | ||
f360f94a VR |
77 | or r10, r3, r4 /* to verify source and destination */ |
78 | rldicl. r8, r10, 0, 61 /* is double word aligned .. ? */ | |
79 | ||
80 | std r19, -8(r1) /* save callers register , r19 */ | |
81 | std r18, -16(r1) /* save callers register , r18 */ | |
750a0e49 AM |
82 | cfi_offset(r19, -8) |
83 | cfi_offset(r18, -16) | |
f360f94a VR |
84 | |
85 | mr r9, r3 /* save r3 into r9 for use */ | |
86 | mr r18, r3 /* save r3 for retCode of strncpy */ | |
98408b95 | 87 | bne 0, L(unaligned) |
f360f94a | 88 | |
98408b95 | 89 | L(aligned): |
f360f94a VR |
90 | srdi r11, r5, 3 /* compute count for CTR ; count = n/8 */ |
91 | cmpldi cr7, r11, 3 /* if count > 4 ; perform unrolling 4 times */ | |
92 | ble 7, L(update1) | |
93 | ||
94 | ld r10, 0(r4) /* load doubleWord from src */ | |
95 | cmpb r8, r10, r8 /* compare src with NULL ,we read just now */ | |
96 | cmpdi cr7, r8, 0 /* if cmpb returned NULL ; we continue */ | |
97 | bne cr7, L(update3) | |
98 | ||
99 | std r10, 0(r3) /* copy doubleword at offset=0 */ | |
100 | ld r10, 8(r4) /* load next doubleword from offset=8 */ | |
101 | cmpb r8, r10, r8 /* compare src with NULL , we read just now */ | |
102 | cmpdi cr7, r8, 0 /* if cmpb returned NULL ; we continue */ | |
103 | bne 7,L(HopBy8) | |
104 | ||
105 | addi r8, r11, -4 | |
106 | mr r7, r3 | |
107 | srdi r8, r8, 2 | |
108 | mr r6, r4 | |
109 | addi r8, r8, 1 | |
110 | li r12, 0 | |
111 | mtctr r8 | |
112 | b L(dwordCopy) | |
113 | ||
114 | .p2align 4 | |
115 | L(dWordUnroll): | |
116 | std r8, 16(r9) | |
117 | ld r8, 24(r4) /* load dword,perform loop unrolling again */ | |
118 | cmpb r10, r8, r10 | |
119 | cmpdi cr7, r10, 0 | |
120 | bne cr7, L(HopBy24) | |
121 | ||
122 | std r8, 24(r7) /* copy dword at offset=24 */ | |
123 | addi r9, r9, 32 | |
124 | addi r4, r4, 32 | |
125 | bdz L(leftDwords) /* continue with loop on counter */ | |
126 | ||
127 | ld r3, 32(r6) | |
128 | cmpb r8, r3, r10 | |
129 | cmpdi cr7, r8, 0 | |
130 | bne cr7, L(update2) | |
131 | ||
132 | std r3, 32(r7) | |
133 | ld r10, 40(r6) | |
134 | cmpb r8, r10, r8 | |
135 | cmpdi cr7, r8, 0 | |
136 | bne cr7, L(HopBy40) | |
137 | ||
138 | mr r6, r4 /* update values */ | |
139 | mr r7, r9 | |
140 | mr r11, r0 | |
141 | mr r5, r19 | |
142 | ||
143 | L(dwordCopy): | |
144 | std r10, 8(r9) /* copy dword at offset=8 */ | |
145 | addi r19, r5, -32 | |
146 | addi r0, r11, -4 | |
147 | ld r8, 16(r4) | |
148 | cmpb r10, r8, r12 | |
149 | cmpdi cr7, r10, 0 | |
150 | beq cr7, L(dWordUnroll) | |
151 | ||
152 | addi r9, r9, 16 /* increment dst by 16 */ | |
153 | addi r4, r4, 16 /* increment src by 16 */ | |
154 | addi r5, r5, -16 /* decrement length 'n' by 16 */ | |
155 | addi r0, r11, -2 /* decrement loop counter */ | |
156 | ||
157 | L(dWordUnrollOFF): | |
158 | ld r10, 0(r4) /* load first dword */ | |
159 | li r8, 0 /* load mask */ | |
160 | cmpb r8, r10, r8 | |
161 | cmpdi cr7, r8, 0 | |
162 | bne cr7, L(byte_by_byte) | |
163 | mtctr r0 | |
164 | li r7, 0 | |
165 | b L(CopyDword) | |
166 | ||
167 | .p2align 4 | |
168 | L(loadDWordandCompare): | |
169 | ld r10, 0(r4) | |
170 | cmpb r8, r10, r7 | |
171 | cmpdi cr7, r8, 0 | |
172 | bne cr7, L(byte_by_byte) | |
173 | ||
174 | L(CopyDword): | |
175 | addi r9, r9, 8 | |
176 | std r10, -8(r9) | |
177 | addi r4, r4, 8 | |
178 | addi r5, r5, -8 | |
179 | bdnz L(loadDWordandCompare) | |
180 | ||
181 | L(byte_by_byte): | |
182 | cmpldi cr7, r5, 3 | |
183 | ble cr7, L(verifyByte) | |
184 | srdi r10, r5, 2 | |
185 | mr r19, r9 | |
186 | mtctr r10 | |
187 | b L(firstByteUnroll) | |
188 | ||
189 | .p2align 4 | |
190 | L(bytes_unroll): | |
191 | lbz r10, 1(r4) /* load byte from src */ | |
192 | cmpdi cr7, r10, 0 /* compare for NULL */ | |
193 | stb r10, 1(r19) /* store byte to dst */ | |
194 | beq cr7, L(updtDestComputeN2ndByte) | |
195 | ||
196 | addi r4, r4, 4 /* advance src */ | |
197 | ||
198 | lbz r10, -2(r4) /* perform loop unrolling for byte r/w */ | |
199 | cmpdi cr7, r10, 0 | |
200 | stb r10, 2(r19) | |
201 | beq cr7, L(updtDestComputeN3rdByte) | |
202 | ||
203 | lbz r10, -1(r4) /* perform loop unrolling for byte r/w */ | |
204 | addi r19, r19, 4 | |
205 | cmpdi cr7, r10, 0 | |
206 | stb r10, -1(r19) | |
207 | beq cr7, L(ComputeNByte) | |
208 | ||
209 | bdz L(update0) | |
210 | ||
211 | L(firstByteUnroll): | |
212 | lbz r10, 0(r4) /* perform loop unrolling for byte r/w */ | |
213 | cmpdi cr7, 10, 0 | |
214 | stb r10, 0(r19) | |
215 | bne cr7, L(bytes_unroll) | |
216 | addi r19, r19, 1 | |
217 | ||
218 | L(ComputeNByte): | |
219 | subf r9, r19, r9 /* compute 'n'n bytes to fill */ | |
220 | add r8, r9, r5 | |
221 | ||
222 | L(zeroFill): | |
223 | cmpdi cr7, r8, 0 /* compare if length is zero */ | |
224 | beq cr7, L(update3return) | |
225 | ||
750a0e49 AM |
226 | mflr r0 /* load link register LR to r0 */ |
227 | std r0, 16(r1) /* store the link register */ | |
228 | stdu r1, -FRAMESIZE(r1) /* create the stack frame */ | |
229 | cfi_adjust_cfa_offset(FRAMESIZE) | |
230 | cfi_offset(lr, 16) | |
f360f94a VR |
231 | mr r3, r19 /* fill buffer with */ |
232 | li r4, 0 /* zero fill buffer */ | |
233 | mr r5, r8 /* how many bytes to fill buffer with */ | |
234 | bl MEMSET /* call optimized memset */ | |
de7ee73d | 235 | #ifndef MEMSET_is_local |
f360f94a | 236 | nop |
de7ee73d | 237 | #endif |
750a0e49 AM |
238 | ld r0, FRAMESIZE+16(r1) /* read the saved link register */ |
239 | addi r1, r1, FRAMESIZE /* restore stack pointer */ | |
240 | cfi_adjust_cfa_offset(-FRAMESIZE) | |
241 | mtlr r0 | |
242 | cfi_restore(lr) | |
f360f94a VR |
243 | |
244 | L(update3return): | |
245 | #ifdef USE_AS_STPNCPY | |
246 | addi r3, r19, -1 /* update return value */ | |
247 | #endif | |
248 | ||
249 | L(hop2return): | |
250 | #ifndef USE_AS_STPNCPY | |
251 | mr r3, r18 /* set return value */ | |
252 | #endif | |
f360f94a VR |
253 | ld r18, -16(r1) /* restore callers save register, r18 */ |
254 | ld r19, -8(r1) /* restore callers save register, r19 */ | |
f360f94a VR |
255 | blr /* return */ |
256 | ||
257 | .p2align 4 | |
258 | L(update0): | |
259 | mr r9, r19 | |
260 | ||
261 | .p2align 4 | |
262 | L(verifyByte): | |
263 | rldicl. r8, r5, 0, 62 | |
264 | #ifdef USE_AS_STPNCPY | |
265 | mr r3, r9 | |
266 | #endif | |
267 | beq cr0, L(hop2return) | |
268 | mtctr r8 | |
269 | addi r4, r4, -1 | |
270 | mr r19, r9 | |
271 | b L(oneBYone) | |
272 | ||
273 | .p2align 4 | |
274 | L(proceed): | |
275 | bdz L(done) | |
276 | ||
277 | L(oneBYone): | |
278 | lbzu r10, 1(r4) /* copy byte */ | |
279 | addi r19, r19, 1 | |
280 | addi r8, r8, -1 | |
281 | cmpdi cr7, r10, 0 | |
282 | stb r10, -1(r19) | |
283 | bne cr7, L(proceed) | |
284 | b L(zeroFill) | |
285 | ||
286 | .p2align 4 | |
287 | L(done): | |
f360f94a VR |
288 | #ifdef USE_AS_STPNCPY |
289 | mr r3, r19 /* set the return value */ | |
290 | #else | |
291 | mr r3, r18 /* set the return value */ | |
292 | #endif | |
f360f94a VR |
293 | ld r18, -16(r1) /* restore callers save register, r18 */ |
294 | ld r19, -8(r1) /* restore callers save register, r19 */ | |
f360f94a VR |
295 | blr /* return */ |
296 | ||
297 | L(update1): | |
298 | mr r0, r11 | |
299 | mr r19, r5 | |
300 | ||
301 | .p2align 4 | |
302 | L(leftDwords): | |
303 | cmpdi cr7, r0, 0 | |
304 | mr r5, r19 | |
305 | bne cr7, L(dWordUnrollOFF) | |
306 | b L(byte_by_byte) | |
307 | ||
308 | .p2align 4 | |
309 | L(updtDestComputeN2ndByte): | |
310 | addi r19, r19, 2 /* update dst by 2 */ | |
311 | subf r9, r19, r9 /* compute distance covered */ | |
312 | add r8, r9, r5 | |
313 | b L(zeroFill) | |
314 | ||
315 | .p2align 4 | |
316 | L(updtDestComputeN3rdByte): | |
317 | addi r19, r19, 3 /* update dst by 3 */ | |
318 | subf r9, r19, r9 /* compute distance covered */ | |
319 | add r8, r9, r5 | |
320 | b L(zeroFill) | |
321 | ||
322 | .p2align 4 | |
323 | L(HopBy24): | |
324 | addi r9, r9, 24 /* increment dst by 24 */ | |
325 | addi r4, r4, 24 /* increment src by 24 */ | |
326 | addi r5, r5, -24 /* decrement length 'n' by 24 */ | |
327 | addi r0, r11, -3 /* decrement loop counter */ | |
328 | b L(dWordUnrollOFF) | |
329 | ||
330 | .p2align 4 | |
331 | L(update2): | |
332 | mr r5, r19 | |
333 | b L(dWordUnrollOFF) | |
334 | ||
335 | .p2align 4 | |
336 | L(HopBy40): | |
337 | addi r9, r7, 40 /* increment dst by 40 */ | |
338 | addi r4, r6, 40 /* increment src by 40 */ | |
339 | addi r5, r5, -40 /* decrement length 'n' by 40 */ | |
340 | addi r0, r11, -5 /* decrement loop counter */ | |
341 | b L(dWordUnrollOFF) | |
342 | ||
343 | L(update3): | |
344 | mr r0, r11 | |
345 | b L(dWordUnrollOFF) | |
346 | ||
347 | L(HopBy8): | |
348 | addi r9, r3, 8 /* increment dst by 8 */ | |
349 | addi r4, r4, 8 /* increment src by 8 */ | |
350 | addi r5, r5, -8 /* decrement length 'n' by 8 */ | |
351 | addi r0, r11, -1 /* decrement loop counter */ | |
352 | b L(dWordUnrollOFF) | |
98408b95 RS |
353 | |
354 | L(unaligned): | |
355 | cmpdi r5, 16 /* Proceed byte by byte for less than 16 */ | |
356 | ble L(byte_by_byte) | |
357 | rldicl r7, r3, 0, 61 | |
358 | rldicl r6, r4, 0, 61 | |
359 | cmpdi r6, 0 /* Check src alignment */ | |
360 | beq L(srcaligndstunalign) | |
361 | /* src is unaligned */ | |
362 | rlwinm r10, r4, 3,26,28 /* Calculate padding. */ | |
363 | clrrdi r4, r4, 3 /* Align the addr to dw boundary */ | |
364 | ld r8, 0(r4) /* Load doubleword from memory. */ | |
365 | li r0, 0 | |
366 | /* Discard bits not part of the string */ | |
367 | #ifdef __LITTLE_ENDIAN__ | |
368 | srd r7, r8, r10 | |
369 | #else | |
370 | sld r7, r8, r10 | |
371 | #endif | |
372 | cmpb r0, r7, r0 /* Compare each byte against null */ | |
373 | /* Discard bits not part of the string */ | |
374 | #ifdef __LITTLE_ENDIAN__ | |
375 | sld r0, r0, r10 | |
376 | #else | |
377 | srd r0, r0, r10 | |
378 | #endif | |
379 | cmpdi r0, 0 | |
380 | bne L(bytebybyte) /* if it has null, copy byte by byte */ | |
381 | subfic r6, r6, 8 | |
382 | rlwinm r12, r3, 3,26,28 /* Calculate padding in bits. */ | |
383 | rldicl r9, r3, 0, 61 /* Calculate padding in bytes. */ | |
384 | addi r3, r3, -1 | |
385 | ||
386 | cmpdi r12, 0 /* check dest alignment */ | |
387 | beq L(srcunaligndstalign) | |
388 | ||
389 | /* both src and dst unaligned */ | |
390 | #ifdef __LITTLE_ENDIAN__ | |
391 | sld r8, r7, r10 | |
392 | mr r11, r10 | |
393 | addi r11, r11, -8 /* Adjust byte pointer on loaded dw */ | |
394 | #else | |
395 | srd r8, r7, r10 | |
396 | subfic r11, r10, 64 | |
397 | #endif | |
398 | /* dst alignment is greater then src alignment? */ | |
399 | cmpd cr7, r12, r10 | |
400 | ble cr7, L(dst_align_small) | |
401 | /* src alignment is less than dst */ | |
402 | ||
403 | /* Calculate the dst alignment difference */ | |
404 | subfic r7, r9, 8 | |
405 | mtctr r7 | |
406 | ||
407 | /* Write until dst is aligned */ | |
408 | cmpdi r0, r7, 4 | |
409 | blt L(storebyte1) /* less than 4, store byte by byte */ | |
410 | beq L(equal1) /* if its 4, store word */ | |
411 | addi r0, r7, -4 /* greater than 4, so stb and stw */ | |
412 | mtctr r0 | |
413 | L(storebyte1): | |
414 | #ifdef __LITTLE_ENDIAN__ | |
415 | addi r11, r11, 8 /* Adjust byte pointer on loaded dw */ | |
416 | #else | |
417 | addi r11, r11, -8 | |
418 | #endif | |
419 | srd r7, r8, r11 | |
420 | stbu r7, 1(r3) | |
421 | addi r5, r5, -1 | |
422 | bdnz L(storebyte1) | |
423 | ||
424 | subfic r7, r9, 8 /* Check the remaining bytes */ | |
425 | cmpdi r0, r7, 4 | |
426 | blt L(proceed1) | |
427 | ||
428 | .align 4 | |
429 | L(equal1): | |
430 | #ifdef __LITTLE_ENDIAN__ | |
431 | addi r11, r11, 8 /* Adjust byte pointer on loaded dw */ | |
432 | srd r7, r8, r11 | |
433 | #else | |
434 | subfic r11, r11, 64 | |
435 | sld r7, r8, r11 | |
436 | srdi r7, r7, 32 | |
437 | #endif | |
438 | stw r7, 1(r3) | |
439 | addi r3, r3, 4 | |
440 | addi r5, r5, -4 | |
441 | ||
442 | L(proceed1): | |
443 | mr r7, r8 | |
444 | /* calculate the Left over bytes to be written */ | |
445 | subfic r11, r10, 64 | |
446 | subfic r12, r12, 64 | |
447 | subf r12, r12, r11 /* remaining bytes on second dw */ | |
448 | subfic r10, r12, 64 /* remaining bytes on first dw */ | |
449 | subfic r9, r9, 8 | |
450 | subf r6, r9, r6 /* recalculate padding */ | |
451 | L(srcunaligndstalign): | |
452 | addi r3, r3, 1 | |
453 | subfic r12, r10, 64 /* remaining bytes on second dw */ | |
454 | addi r4, r4, 8 | |
455 | li r0,0 | |
456 | b L(storedouble) | |
457 | ||
458 | .align 4 | |
459 | L(dst_align_small): | |
460 | mtctr r6 | |
461 | /* Write until src is aligned */ | |
462 | L(storebyte2): | |
463 | #ifdef __LITTLE_ENDIAN__ | |
464 | addi r11, r11, 8 /* Adjust byte pointer on dw */ | |
465 | #else | |
466 | addi r11, r11, -8 | |
467 | #endif | |
468 | srd r7, r8, r11 | |
469 | stbu r7, 1(r3) | |
470 | addi r5, r5, -1 | |
471 | bdnz L(storebyte2) | |
472 | ||
473 | addi r4, r4, 8 /* Increment src pointer */ | |
474 | addi r3, r3, 1 /* Increment dst pointer */ | |
475 | mr r9, r3 | |
476 | li r8, 0 | |
477 | cmpd cr7, r12, r10 | |
478 | beq cr7, L(aligned) | |
479 | rldicl r6, r3, 0, 61 /* Recalculate padding */ | |
480 | mr r7, r6 | |
481 | ||
482 | /* src is algined */ | |
483 | L(srcaligndstunalign): | |
484 | mr r9, r3 | |
485 | mr r6, r7 | |
486 | ld r8, 0(r4) | |
487 | subfic r10, r7, 8 | |
488 | mr r7, r8 | |
489 | li r0, 0 /* Check null */ | |
490 | cmpb r0, r8, r0 | |
491 | cmpdi r0, 0 | |
492 | bne L(byte_by_byte) /* Do byte by byte if there is NULL */ | |
493 | rlwinm r12, r3, 3,26,28 /* Calculate padding */ | |
494 | addi r3, r3, -1 | |
495 | /* write byte by byte until aligned */ | |
496 | #ifdef __LITTLE_ENDIAN__ | |
497 | li r11, -8 | |
498 | #else | |
499 | li r11, 64 | |
500 | #endif | |
501 | mtctr r10 | |
502 | cmpdi r0, r10, 4 | |
503 | blt L(storebyte) | |
504 | beq L(equal) | |
505 | addi r0, r10, -4 | |
506 | mtctr r0 | |
507 | L(storebyte): | |
508 | #ifdef __LITTLE_ENDIAN__ | |
509 | addi r11, r11, 8 /* Adjust byte pointer on dw */ | |
510 | #else | |
511 | addi r11, r11, -8 | |
512 | #endif | |
513 | srd r7, r8, r11 | |
514 | stbu r7, 1(r3) | |
515 | addi r5, r5, -1 | |
516 | bdnz L(storebyte) | |
517 | ||
518 | cmpdi r0, r10, 4 | |
519 | blt L(align) | |
520 | ||
521 | .align 4 | |
522 | L(equal): | |
523 | #ifdef __LITTLE_ENDIAN__ | |
524 | addi r11, r11, 8 | |
525 | srd r7, r8, r11 | |
526 | #else | |
527 | subfic r11, r11, 64 | |
528 | sld r7, r8, r11 | |
529 | srdi r7, r7, 32 | |
530 | #endif | |
531 | stw r7, 1(r3) | |
532 | addi r5, r5, -4 | |
533 | addi r3, r3, 4 | |
534 | L(align): | |
535 | addi r3, r3, 1 | |
536 | addi r4, r4, 8 /* Increment src pointer */ | |
537 | subfic r10, r12, 64 | |
538 | li r0, 0 | |
539 | /* dst addr aligned to 8 */ | |
540 | L(storedouble): | |
541 | cmpdi r5, 8 | |
542 | ble L(null1) | |
543 | ld r7, 0(r4) /* load next dw */ | |
544 | cmpb r0, r7, r0 | |
545 | cmpdi r0, 0 /* check for null on each new dw */ | |
546 | bne L(null) | |
547 | #ifdef __LITTLE_ENDIAN__ | |
548 | srd r9, r8, r10 /* bytes from first dw */ | |
549 | sld r11, r7, r12 /* bytes from second dw */ | |
550 | #else | |
551 | sld r9, r8, r10 | |
552 | srd r11, r7, r12 | |
553 | #endif | |
554 | or r11, r9, r11 /* make as a single dw */ | |
555 | std r11, 0(r3) /* store as std on aligned addr */ | |
556 | mr r8, r7 /* still few bytes left to be written */ | |
557 | addi r3, r3, 8 /* increment dst addr */ | |
558 | addi r4, r4, 8 /* increment src addr */ | |
559 | addi r5, r5, -8 | |
560 | b L(storedouble) /* Loop until NULL */ | |
561 | ||
562 | .align 4 | |
563 | ||
564 | /* We've hit the end of the string. Do the rest byte-by-byte. */ | |
565 | L(null): | |
566 | addi r3, r3, -1 | |
567 | mr r10, r12 | |
568 | mtctr r6 | |
569 | #ifdef __LITTLE_ENDIAN__ | |
570 | subfic r10, r10, 64 | |
571 | addi r10, r10, -8 | |
572 | #endif | |
573 | cmpdi r0, r5, 4 | |
574 | blt L(loop) | |
575 | cmpdi r0, r6, 4 | |
576 | blt L(loop) | |
577 | ||
578 | /* we can still use stw if leftover >= 4 */ | |
579 | #ifdef __LITTLE_ENDIAN__ | |
580 | addi r10, r10, 8 | |
581 | srd r11, r8, r10 | |
582 | #else | |
583 | subfic r10, r10, 64 | |
584 | sld r11, r8, r10 | |
585 | srdi r11, r11, 32 | |
586 | #endif | |
587 | stw r11, 1(r3) | |
588 | addi r5, r5, -4 | |
589 | addi r3, r3, 4 | |
590 | cmpdi r0, r5, 0 | |
591 | beq L(g1) | |
592 | cmpdi r0, r6, 4 | |
593 | beq L(bytebybyte1) | |
594 | addi r10, r10, 32 | |
595 | #ifdef __LITTLE_ENDIAN__ | |
596 | addi r10, r10, -8 | |
597 | #else | |
598 | subfic r10, r10, 64 | |
599 | #endif | |
600 | addi r0, r6, -4 | |
601 | mtctr r0 | |
602 | /* remaining byte by byte part of first dw */ | |
603 | L(loop): | |
604 | #ifdef __LITTLE_ENDIAN__ | |
605 | addi r10, r10, 8 | |
606 | #else | |
607 | addi r10, r10, -8 | |
608 | #endif | |
609 | srd r0, r8, r10 | |
610 | stbu r0, 1(r3) | |
611 | addi r5, r5, -1 | |
612 | cmpdi r0, r5, 0 | |
613 | beq L(g1) | |
614 | bdnz L(loop) | |
615 | L(bytebybyte1): | |
616 | addi r3, r3, 1 | |
617 | /* remaining byte by byte part of second dw */ | |
618 | L(bytebybyte): | |
619 | addi r3, r3, -8 | |
620 | addi r4, r4, -1 | |
621 | ||
622 | #ifdef __LITTLE_ENDIAN__ | |
623 | extrdi. r0, r7, 8, 56 | |
624 | stbu r7, 8(r3) | |
625 | addi r5, r5, -1 | |
626 | beq L(g2) | |
627 | cmpdi r5, 0 | |
628 | beq L(g1) | |
629 | extrdi. r0, r7, 8, 48 | |
630 | stbu r0, 1(r3) | |
631 | addi r5, r5, -1 | |
632 | beq L(g2) | |
633 | cmpdi r5, 0 | |
634 | beq L(g1) | |
635 | extrdi. r0, r7, 8, 40 | |
636 | stbu r0, 1(r3) | |
637 | addi r5, r5, -1 | |
638 | beq L(g2) | |
639 | cmpdi r5, 0 | |
640 | beq L(g1) | |
641 | extrdi. r0, r7, 8, 32 | |
642 | stbu r0, 1(r3) | |
643 | addi r5, r5, -1 | |
644 | beq L(g2) | |
645 | cmpdi r5, 0 | |
646 | beq L(g1) | |
647 | extrdi. r0, r7, 8, 24 | |
648 | stbu r0, 1(r3) | |
649 | addi r5, r5, -1 | |
650 | beq L(g2) | |
651 | cmpdi r5, 0 | |
652 | beq L(g1) | |
653 | extrdi. r0, r7, 8, 16 | |
654 | stbu r0, 1(r3) | |
655 | addi r5, r5, -1 | |
656 | beq L(g2) | |
657 | cmpdi r5, 0 | |
658 | beq L(g1) | |
659 | extrdi. r0, r7, 8, 8 | |
660 | stbu r0, 1(r3) | |
661 | addi r5, r5, -1 | |
662 | beq L(g2) | |
663 | cmpdi r5, 0 | |
664 | beq L(g1) | |
665 | extrdi r0, r7, 8, 0 | |
666 | stbu r0, 1(r3) | |
667 | addi r5, r5, -1 | |
668 | b L(g2) | |
669 | #else | |
670 | extrdi. r0, r7, 8, 0 | |
671 | stbu r0, 8(r3) | |
672 | addi r5, r5, -1 | |
673 | beq L(g2) | |
674 | cmpdi r5, 0 | |
675 | beq L(g1) | |
676 | extrdi. r0, r7, 8, 8 | |
677 | stbu r0, 1(r3) | |
678 | addi r5, r5, -1 | |
679 | beq L(g2) | |
680 | cmpdi r5, 0 | |
681 | beq L(g1) | |
682 | extrdi. r0, r7, 8, 16 | |
683 | stbu r0, 1(r3) | |
684 | addi r5, r5, -1 | |
685 | beq L(g2) | |
686 | cmpdi r5, 0 | |
687 | beq L(g1) | |
688 | extrdi. r0, r7, 8, 24 | |
689 | stbu r0, 1(r3) | |
690 | addi r5, r5, -1 | |
691 | beq L(g2) | |
692 | cmpdi r5, 0 | |
693 | beq L(g1) | |
694 | extrdi. r0, r7, 8, 32 | |
695 | stbu r0, 1(r3) | |
696 | addi r5, r5, -1 | |
697 | beq L(g2) | |
698 | cmpdi r5, 0 | |
699 | beq L(g1) | |
700 | extrdi. r0, r7, 8, 40 | |
701 | stbu r0, 1(r3) | |
702 | addi r5, r5, -1 | |
703 | beq L(g2) | |
704 | cmpdi r5, 0 | |
705 | beq L(g1) | |
706 | extrdi. r0, r7, 8, 48 | |
707 | stbu r0, 1(r3) | |
708 | addi r5, r5, -1 | |
709 | beq L(g2) | |
710 | cmpdi r5, 0 | |
711 | beq L(g1) | |
712 | stbu r7, 1(r3) | |
713 | addi r5, r5, -1 | |
714 | b L(g2) | |
715 | #endif | |
716 | L(g1): | |
717 | #ifdef USE_AS_STPNCPY | |
718 | addi r3, r3, 1 | |
719 | #endif | |
720 | L(g2): | |
721 | addi r3, r3, 1 | |
722 | mr r19, r3 | |
723 | mr r8, r5 | |
724 | b L(zeroFill) | |
725 | L(null1): | |
726 | mr r9, r3 | |
727 | subf r4, r6, r4 | |
728 | b L(byte_by_byte) | |
f360f94a VR |
729 | END(FUNC_NAME) |
730 | #ifndef USE_AS_STPNCPY | |
731 | libc_hidden_builtin_def (strncpy) | |
732 | #endif |