]>
Commit | Line | Data |
---|---|---|
09dec6c3 | 1 | /* Optimized memset for PowerPC405,440,464 (32-byte cacheline). |
04277e02 | 2 | Copyright (C) 2012-2019 Free Software Foundation, Inc. |
a72cc2b2 LM |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
ab84e3ff PE |
16 | License along with the GNU C Library. If not, see |
17 | <http://www.gnu.org/licenses/>. */ | |
a72cc2b2 LM |
18 | |
19 | #include <sysdep.h> | |
a72cc2b2 LM |
20 | |
21 | /* memset | |
22 | ||
23 | r3:destination address and return address | |
24 | r4:source integer to copy | |
25 | r5:byte count | |
26 | r11:sources integer to copy in all 32 bits of reg | |
27 | r12:temp return address | |
28 | ||
29 | Save return address in r12 | |
30 | If destinationn is unaligned and count is greater tha 255 bytes | |
31 | set 0-3 bytes to make destination aligned | |
32 | If count is greater tha 255 bytes and setting zero to memory | |
33 | use dbcz to set memeory when we can | |
34 | otherwsie do the follwoing | |
35 | If 16 or more words to set we use 16 word copy loop. | |
36 | Finaly we set 0-15 extra bytes with string store. */ | |
37 | ||
b5510883 | 38 | EALIGN (memset, 5, 0) |
a72cc2b2 LM |
39 | rlwinm r11,r4,0,24,31 |
40 | rlwimi r11,r4,8,16,23 | |
41 | rlwimi r11,r11,16,0,15 | |
42 | addi r12,r3,0 | |
43 | cmpwi r5,0x00FF | |
44 | ble L(preword8_count_loop) | |
45 | cmpwi r4,0x00 | |
46 | beq L(use_dcbz) | |
47 | neg r6,r3 | |
48 | clrlwi. r6,r6,30 | |
49 | beq L(preword8_count_loop) | |
50 | addi r8,0,1 | |
51 | mtctr r6 | |
52 | subi r3,r3,1 | |
53 | ||
54 | L(unaligned_bytecopy_loop): | |
55 | stbu r11,0x1(r3) | |
56 | subf. r5,r8,r5 | |
57 | beq L(end_memset) | |
58 | bdnz L(unaligned_bytecopy_loop) | |
59 | addi r3,r3,1 | |
60 | ||
61 | L(preword8_count_loop): | |
62 | srwi. r6,r5,4 | |
63 | beq L(preword2_count_loop) | |
64 | mtctr r6 | |
65 | addi r3,r3,-4 | |
66 | mr r8,r11 | |
67 | mr r9,r11 | |
68 | mr r10,r11 | |
69 | ||
70 | L(word8_count_loop_no_dcbt): | |
71 | stwu r8,4(r3) | |
72 | stwu r9,4(r3) | |
73 | subi r5,r5,0x10 | |
74 | stwu r10,4(r3) | |
75 | stwu r11,4(r3) | |
76 | bdnz L(word8_count_loop_no_dcbt) | |
77 | addi r3,r3,4 | |
78 | ||
79 | L(preword2_count_loop): | |
80 | clrlwi. r7,r5,28 | |
81 | beq L(end_memset) | |
82 | mr r8,r11 | |
83 | mr r9,r11 | |
84 | mr r10,r11 | |
85 | mtxer r7 | |
86 | stswx r8,0,r3 | |
87 | ||
88 | L(end_memset): | |
89 | addi r3,r12,0 | |
90 | blr | |
91 | ||
92 | L(use_dcbz): | |
93 | neg r6,r3 | |
94 | clrlwi. r7,r6,28 | |
95 | beq L(skip_string_loop) | |
96 | mr r8,r11 | |
97 | mr r9,r11 | |
98 | mr r10,r11 | |
99 | subf r5,r7,r5 | |
100 | mtxer r7 | |
101 | stswx r8,0,r3 | |
102 | add r3,r3,r7 | |
103 | ||
104 | L(skip_string_loop): | |
09dec6c3 | 105 | clrlwi r8,r6,27 |
a72cc2b2 LM |
106 | srwi. r8,r8,4 |
107 | beq L(dcbz_pre_loop) | |
108 | mtctr r8 | |
109 | ||
110 | L(word_loop): | |
111 | stw r11,0(r3) | |
112 | subi r5,r5,0x10 | |
113 | stw r11,4(r3) | |
114 | stw r11,8(r3) | |
115 | stw r11,12(r3) | |
116 | addi r3,r3,0x10 | |
117 | bdnz L(word_loop) | |
118 | ||
119 | L(dcbz_pre_loop): | |
09dec6c3 | 120 | srwi r6,r5,5 |
a72cc2b2 LM |
121 | mtctr r6 |
122 | addi r7,0,0 | |
123 | ||
124 | L(dcbz_loop): | |
125 | dcbz r3,r7 | |
09dec6c3 RA |
126 | addi r3,r3,0x20 |
127 | subi r5,r5,0x20 | |
a72cc2b2 LM |
128 | bdnz L(dcbz_loop) |
129 | srwi. r6,r5,4 | |
130 | beq L(postword2_count_loop) | |
131 | mtctr r6 | |
132 | ||
133 | L(postword8_count_loop): | |
134 | stw r11,0(r3) | |
135 | subi r5,r5,0x10 | |
136 | stw r11,4(r3) | |
137 | stw r11,8(r3) | |
138 | stw r11,12(r3) | |
139 | addi r3,r3,0x10 | |
140 | bdnz L(postword8_count_loop) | |
141 | ||
142 | L(postword2_count_loop): | |
143 | clrlwi. r7,r5,28 | |
144 | beq L(end_memset) | |
145 | mr r8,r11 | |
146 | mr r9,r11 | |
147 | mr r10,r11 | |
148 | mtxer r7 | |
149 | stswx r8,0,r3 | |
150 | b L(end_memset) | |
b5510883 | 151 | END (memset) |
a72cc2b2 | 152 | libc_hidden_builtin_def (memset) |