]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/powerpc/powerpc32/405/memset.S
Update copyright dates with scripts/update-copyrights
[thirdparty/glibc.git] / sysdeps / powerpc / powerpc32 / 405 / memset.S
1 /* Optimized memset for PowerPC405,440,464 (32-byte cacheline).
2 Copyright (C) 2012-2021 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
17 <https://www.gnu.org/licenses/>. */
18
19 #include <sysdep.h>
20
21 /* memset
22
23 r3:destination address and return address
24 r4:source integer to copy
25 r5:byte count
26 r11:sources integer to copy in all 32 bits of reg
27 r12:temp return address
28
29 Save return address in r12
30 If destinationn is unaligned and count is greater tha 255 bytes
31 set 0-3 bytes to make destination aligned
32 If count is greater tha 255 bytes and setting zero to memory
33 use dbcz to set memeory when we can
34 otherwsie do the follwoing
35 If 16 or more words to set we use 16 word copy loop.
36 Finaly we set 0-15 extra bytes with string store. */
37
38 EALIGN (memset, 5, 0)
39 rlwinm r11,r4,0,24,31
40 rlwimi r11,r4,8,16,23
41 rlwimi r11,r11,16,0,15
42 addi r12,r3,0
43 cmpwi r5,0x00FF
44 ble L(preword8_count_loop)
45 cmpwi r4,0x00
46 beq L(use_dcbz)
47 neg r6,r3
48 clrlwi. r6,r6,30
49 beq L(preword8_count_loop)
50 addi r8,0,1
51 mtctr r6
52 subi r3,r3,1
53
54 L(unaligned_bytecopy_loop):
55 stbu r11,0x1(r3)
56 subf. r5,r8,r5
57 beq L(end_memset)
58 bdnz L(unaligned_bytecopy_loop)
59 addi r3,r3,1
60
61 L(preword8_count_loop):
62 srwi. r6,r5,4
63 beq L(preword2_count_loop)
64 mtctr r6
65 addi r3,r3,-4
66 mr r8,r11
67 mr r9,r11
68 mr r10,r11
69
70 L(word8_count_loop_no_dcbt):
71 stwu r8,4(r3)
72 stwu r9,4(r3)
73 subi r5,r5,0x10
74 stwu r10,4(r3)
75 stwu r11,4(r3)
76 bdnz L(word8_count_loop_no_dcbt)
77 addi r3,r3,4
78
79 L(preword2_count_loop):
80 clrlwi. r7,r5,28
81 beq L(end_memset)
82 mr r8,r11
83 mr r9,r11
84 mr r10,r11
85 mtxer r7
86 stswx r8,0,r3
87
88 L(end_memset):
89 addi r3,r12,0
90 blr
91
92 L(use_dcbz):
93 neg r6,r3
94 clrlwi. r7,r6,28
95 beq L(skip_string_loop)
96 mr r8,r11
97 mr r9,r11
98 mr r10,r11
99 subf r5,r7,r5
100 mtxer r7
101 stswx r8,0,r3
102 add r3,r3,r7
103
104 L(skip_string_loop):
105 clrlwi r8,r6,27
106 srwi. r8,r8,4
107 beq L(dcbz_pre_loop)
108 mtctr r8
109
110 L(word_loop):
111 stw r11,0(r3)
112 subi r5,r5,0x10
113 stw r11,4(r3)
114 stw r11,8(r3)
115 stw r11,12(r3)
116 addi r3,r3,0x10
117 bdnz L(word_loop)
118
119 L(dcbz_pre_loop):
120 srwi r6,r5,5
121 mtctr r6
122 addi r7,0,0
123
124 L(dcbz_loop):
125 dcbz r3,r7
126 addi r3,r3,0x20
127 subi r5,r5,0x20
128 bdnz L(dcbz_loop)
129 srwi. r6,r5,4
130 beq L(postword2_count_loop)
131 mtctr r6
132
133 L(postword8_count_loop):
134 stw r11,0(r3)
135 subi r5,r5,0x10
136 stw r11,4(r3)
137 stw r11,8(r3)
138 stw r11,12(r3)
139 addi r3,r3,0x10
140 bdnz L(postword8_count_loop)
141
142 L(postword2_count_loop):
143 clrlwi. r7,r5,28
144 beq L(end_memset)
145 mr r8,r11
146 mr r9,r11
147 mr r10,r11
148 mtxer r7
149 stswx r8,0,r3
150 b L(end_memset)
151 END (memset)
152 libc_hidden_builtin_def (memset)