]>
Commit | Line | Data |
---|---|---|
a72cc2b2 | 1 | /* Optimized memcpy implementation for PowerPC476. |
f7a9f785 | 2 | Copyright (C) 2010-2016 Free Software Foundation, Inc. |
a72cc2b2 LM |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
ab84e3ff PE |
16 | License along with the GNU C Library. If not, see |
17 | <http://www.gnu.org/licenses/>. */ | |
a72cc2b2 LM |
18 | |
19 | #include <sysdep.h> | |
a72cc2b2 LM |
20 | |
21 | /* memcpy | |
22 | ||
23 | r0:return address | |
24 | r3:destination address | |
25 | r4:source address | |
26 | r5:byte count | |
27 | ||
28 | Save return address in r0. | |
29 | If destinationn and source are unaligned and copy count is greater than 256 | |
30 | then copy 0-3 bytes to make destination aligned. | |
31 | If 32 or more bytes to copy we use 32 byte copy loop. | |
32 | Finaly we copy 0-31 extra bytes. */ | |
33 | ||
b5510883 | 34 | EALIGN (memcpy, 5, 0) |
a72cc2b2 LM |
35 | /* Check if bytes to copy are greater than 256 and if |
36 | source and destination are unaligned */ | |
37 | cmpwi r5,0x0100 | |
38 | addi r0,r3,0 | |
39 | ble L(string_count_loop) | |
40 | neg r6,r3 | |
41 | clrlwi. r6,r6,30 | |
42 | beq L(string_count_loop) | |
43 | neg r6,r4 | |
44 | clrlwi. r6,r6,30 | |
45 | beq L(string_count_loop) | |
46 | mtctr r6 | |
47 | subf r5,r6,r5 | |
48 | ||
49 | L(unaligned_bytecopy_loop): /* Align destination by coping 0-3 bytes */ | |
50 | lbz r8,0x0(r4) | |
51 | addi r4,r4,1 | |
52 | stb r8,0x0(r3) | |
53 | addi r3,r3,1 | |
54 | bdnz L(unaligned_bytecopy_loop) | |
55 | srwi. r7,r5,5 | |
56 | beq L(preword2_count_loop) | |
57 | mtctr r7 | |
58 | ||
59 | L(word8_count_loop_no_dcbt): /* Copy 32 bytes at a time */ | |
60 | lwz r6,0(r4) | |
61 | lwz r7,4(r4) | |
62 | lwz r8,8(r4) | |
63 | lwz r9,12(r4) | |
64 | subi r5,r5,0x20 | |
65 | stw r6,0(r3) | |
66 | stw r7,4(r3) | |
67 | stw r8,8(r3) | |
68 | stw r9,12(r3) | |
69 | lwz r6,16(r4) | |
70 | lwz r7,20(r4) | |
71 | lwz r8,24(r4) | |
72 | lwz r9,28(r4) | |
73 | addi r4,r4,0x20 | |
74 | stw r6,16(r3) | |
75 | stw r7,20(r3) | |
76 | stw r8,24(r3) | |
77 | stw r9,28(r3) | |
78 | addi r3,r3,0x20 | |
79 | bdnz L(word8_count_loop_no_dcbt) | |
80 | ||
81 | L(preword2_count_loop): /* Copy remaining 0-31 bytes */ | |
82 | clrlwi. r12,r5,27 | |
83 | beq L(end_memcpy) | |
84 | mtxer r12 | |
85 | lswx r5,0,r4 | |
86 | stswx r5,0,r3 | |
87 | mr r3,r0 | |
88 | blr | |
89 | ||
90 | L(string_count_loop): /* Copy odd 0-31 bytes */ | |
91 | clrlwi. r12,r5,28 | |
92 | add r3,r3,r5 | |
93 | add r4,r4,r5 | |
94 | beq L(pre_string_copy) | |
95 | mtxer r12 | |
96 | subf r4,r12,r4 | |
97 | subf r3,r12,r3 | |
98 | lswx r6,0,r4 | |
99 | stswx r6,0,r3 | |
100 | ||
382466e0 | 101 | L(pre_string_copy): /* Check how many 32 byte chunks to copy */ |
a72cc2b2 LM |
102 | srwi. r7,r5,4 |
103 | beq L(end_memcpy) | |
104 | mtctr r7 | |
105 | ||
106 | L(word4_count_loop_no_dcbt): /* Copy 32 bytes at a time */ | |
107 | lwz r6,-4(r4) | |
108 | lwz r7,-8(r4) | |
109 | lwz r8,-12(r4) | |
110 | lwzu r9,-16(r4) | |
111 | stw r6,-4(r3) | |
112 | stw r7,-8(r3) | |
113 | stw r8,-12(r3) | |
114 | stwu r9,-16(r3) | |
115 | bdz L(end_memcpy) | |
116 | lwz r6,-4(r4) | |
117 | lwz r7,-8(r4) | |
118 | lwz r8,-12(r4) | |
119 | lwzu r9,-16(r4) | |
120 | stw r6,-4(r3) | |
121 | stw r7,-8(r3) | |
122 | stw r8,-12(r3) | |
123 | stwu r9,-16(r3) | |
124 | bdnz L(word4_count_loop_no_dcbt) | |
125 | ||
126 | L(end_memcpy): | |
127 | mr r3,r0 | |
128 | blr | |
b5510883 | 129 | END (memcpy) |
a72cc2b2 | 130 | libc_hidden_builtin_def (memcpy) |