1 /* memset/bzero with unaligned store and rep stosb
2 Copyright (C) 2016-2021 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 /* memset is implemented as:
20 1. Use overlapping store to avoid branch.
21 2. If size is less than VEC, use integer register stores.
22 3. If size is from VEC_SIZE to 2 * VEC_SIZE, use 2 VEC stores.
23 4. If size is from 2 * VEC_SIZE to 4 * VEC_SIZE, use 4 VEC stores.
24 5. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with
25 4 VEC stores and store 4 * VEC at a time until done. */
29 #ifndef MEMSET_CHK_SYMBOL
30 # define MEMSET_CHK_SYMBOL(p,s) MEMSET_SYMBOL(p, s)
33 #ifndef WMEMSET_CHK_SYMBOL
34 # define WMEMSET_CHK_SYMBOL(p,s) WMEMSET_SYMBOL(p, s)
47 # define VZEROUPPER vzeroupper
53 #ifndef VZEROUPPER_SHORT_RETURN
55 # define VZEROUPPER_SHORT_RETURN vzeroupper
57 # define VZEROUPPER_SHORT_RETURN rep
70 # error SECTION is not defined!
73 .section SECTION(.text),"ax",@progbits
74 #if VEC_SIZE == 16 && IS_IN (libc)
76 mov %RDI_LP, %RAX_LP /* Set return value. */
77 mov %RSI_LP, %RDX_LP /* Set n. */
79 jmp L(entry_from_bzero)
81 weak_alias (__bzero, bzero)
86 ENTRY_CHK (WMEMSET_CHK_SYMBOL (__wmemset_chk, unaligned))
88 jb HIDDEN_JUMPTARGET (__chk_fail)
89 END_CHK (WMEMSET_CHK_SYMBOL (__wmemset_chk, unaligned))
92 ENTRY (WMEMSET_SYMBOL (__wmemset, unaligned))
94 WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
95 jmp L(entry_from_bzero)
96 END (WMEMSET_SYMBOL (__wmemset, unaligned))
99 #if defined SHARED && IS_IN (libc)
100 ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned))
102 jb HIDDEN_JUMPTARGET (__chk_fail)
103 END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned))
106 ENTRY (MEMSET_SYMBOL (__memset, unaligned))
107 MEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
109 /* Clear the upper 32 bits. */
115 cmpq $(VEC_SIZE * 2), %rdx
117 /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
118 VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
119 VMOVU %VEC(0), (%rdi)
122 #if defined USE_MULTIARCH && IS_IN (libc)
123 END (MEMSET_SYMBOL (__memset, unaligned))
126 ENTRY (__memset_chk_erms)
128 jb HIDDEN_JUMPTARGET (__chk_fail)
129 END (__memset_chk_erms)
131 /* Only used to measure performance of REP STOSB. */
132 ENTRY (__memset_erms)
133 /* Skip zero length. */
134 test %RDX_LP, %RDX_LP
139 /* Provide a hidden symbol to debugger. */
140 .hidden MEMSET_SYMBOL (__memset, erms)
141 ENTRY (MEMSET_SYMBOL (__memset, erms))
144 /* Issue vzeroupper before rep stosb. */
155 END (MEMSET_SYMBOL (__memset, erms))
158 # if defined SHARED && IS_IN (libc)
159 ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms))
161 jb HIDDEN_JUMPTARGET (__chk_fail)
162 END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms))
165 ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms))
166 MEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
168 /* Clear the upper 32 bits. */
171 cmp $VEC_SIZE, %RDX_LP
173 cmp $(VEC_SIZE * 2), %RDX_LP
174 ja L(stosb_more_2x_vec)
175 /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
176 VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
177 VMOVU %VEC(0), (%rdi)
181 L(stosb_more_2x_vec):
182 cmp __x86_rep_stosb_threshold(%rip), %RDX_LP
186 cmpq $(VEC_SIZE * 4), %rdx
188 VMOVU %VEC(0), (%rdi)
189 VMOVU %VEC(0), VEC_SIZE(%rdi)
190 VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
191 VMOVU %VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx)
197 leaq (VEC_SIZE * 4)(%rdi), %rcx
198 VMOVU %VEC(0), (%rdi)
199 andq $-(VEC_SIZE * 4), %rcx
200 VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
201 VMOVU %VEC(0), VEC_SIZE(%rdi)
202 VMOVU %VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx)
203 VMOVU %VEC(0), (VEC_SIZE * 2)(%rdi)
204 VMOVU %VEC(0), -(VEC_SIZE * 3)(%rdi,%rdx)
205 VMOVU %VEC(0), (VEC_SIZE * 3)(%rdi)
206 VMOVU %VEC(0), -(VEC_SIZE * 4)(%rdi,%rdx)
208 andq $-(VEC_SIZE * 4), %rdx
212 VMOVA %VEC(0), (%rcx)
213 VMOVA %VEC(0), VEC_SIZE(%rcx)
214 VMOVA %VEC(0), (VEC_SIZE * 2)(%rcx)
215 VMOVA %VEC(0), (VEC_SIZE * 3)(%rcx)
216 addq $(VEC_SIZE * 4), %rcx
219 VZEROUPPER_SHORT_RETURN
222 /* Less than 1 VEC. */
223 # if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
224 # error Unsupported VEC_SIZE!
247 /* From 32 to 63. No branch when size == 32. */
249 VMOVU %YMM0, -32(%rdi,%rdx)
255 /* From 16 to 31. No branch when size == 16. */
257 VMOVU %XMM0, -16(%rdi,%rdx)
262 /* From 8 to 15. No branch when size == 8. */
264 movq %rcx, -8(%rdi,%rdx)
269 /* From 4 to 7. No branch when size == 4. */
270 movl %ecx, -4(%rdi,%rdx)
275 /* From 2 to 3. No branch when size == 2. */
276 movw %cx, -2(%rdi,%rdx)
280 END (MEMSET_SYMBOL (__memset, unaligned_erms))