]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/x86_64/memset.S
Faster memset on x64
[thirdparty/glibc.git] / sysdeps / x86_64 / memset.S
CommitLineData
78df0fcb
AJ
1/* memset/bzero -- set memory area to CH/0
2 Optimized version for x86-64.
568035b7 3 Copyright (C) 2002-2013 Free Software Foundation, Inc.
78df0fcb 4 This file is part of the GNU C Library.
78df0fcb
AJ
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
78df0fcb
AJ
19
20#include <sysdep.h>
78df0fcb 21
b2b671b6
OB
22#ifndef ALIGN
23# define ALIGN(n) .p2align n
24#endif
78df0fcb 25
7e4ba49c 26 .text
80f844c9 27#if !defined NOT_IN_libc
ed13ccf1 28ENTRY(__bzero)
b2b671b6
OB
29 movq %rdi, %rax /* Set return value. */
30 movq %rsi, %rdx /* Set n. */
31 pxor %xmm8, %xmm8
32 jmp L(entry_from_bzero)
ed13ccf1
JJ
33END(__bzero)
34weak_alias (__bzero, bzero)
b2b671b6
OB
35
36/* Like memset but takes additional parameter with return value. */
37ENTRY(__memset_tail)
38 movq %rcx, %rax /* Set return value. */
39
40 movd %esi, %xmm8
41 punpcklbw %xmm8, %xmm8
42 punpcklwd %xmm8, %xmm8
43 pshufd $0, %xmm8, %xmm8
44
45 jmp L(entry_from_bzero)
46END(__memset_tail)
406f28db
UD
47#endif
48
49#if defined PIC && !defined NOT_IN_libc
ff02d528 50ENTRY_CHK (__memset_chk)
b5cc329c
UD
51 cmpq %rdx, %rcx
52 jb HIDDEN_JUMPTARGET (__chk_fail)
ff02d528 53END_CHK (__memset_chk)
b5cc329c 54#endif
ff02d528 55
b2b671b6
OB
56ENTRY (memset)
57 movd %esi, %xmm8
58 movq %rdi, %rax
59 punpcklbw %xmm8, %xmm8
60 punpcklwd %xmm8, %xmm8
61 pshufd $0, %xmm8, %xmm8
62L(entry_from_bzero):
63 cmpq $64, %rdx
64 ja L(loop_start)
65 cmpq $16, %rdx
66 jbe L(less_16_bytes)
67 cmpq $32, %rdx
68 movdqu %xmm8, (%rdi)
69 movdqu %xmm8, -16(%rdi,%rdx)
70 ja L(between_32_64_bytes)
71L(return):
ff02d528 72 rep
b2b671b6
OB
73 ret
74 ALIGN (4)
75L(between_32_64_bytes):
76 movdqu %xmm8, 16(%rdi)
77 movdqu %xmm8, -32(%rdi,%rdx)
78 ret
79 ALIGN (4)
80L(loop_start):
81 leaq 64(%rdi), %rcx
82 movdqu %xmm8, (%rdi)
83 andq $-64, %rcx
84 movdqu %xmm8, -16(%rdi,%rdx)
85 movdqu %xmm8, 16(%rdi)
86 movdqu %xmm8, -32(%rdi,%rdx)
87 movdqu %xmm8, 32(%rdi)
88 movdqu %xmm8, -48(%rdi,%rdx)
89 movdqu %xmm8, 48(%rdi)
90 movdqu %xmm8, -64(%rdi,%rdx)
91 addq %rdi, %rdx
92 andq $-64, %rdx
93 cmpq %rdx, %rcx
94 je L(return)
95 ALIGN (4)
96L(loop):
97 movdqa %xmm8, (%rcx)
98 movdqa %xmm8, 16(%rcx)
99 movdqa %xmm8, 32(%rcx)
100 movdqa %xmm8, 48(%rcx)
101 addq $64, %rcx
102 cmpq %rcx, %rdx
103 jne L(loop)
104 rep
105 ret
106L(less_16_bytes):
107 movq %xmm8, %rcx
108 testb $24, %dl
109 jne L(between8_16bytes)
110 testb $4, %dl
111 jne L(between4_7bytes)
112 testb $1, %dl
113 je L(odd_byte)
114 movb %cl, (%rdi)
115L(odd_byte):
116 testb $2, %dl
117 je L(return)
118 movw %cx, -2(%rax,%rdx)
119 ret
120L(between4_7bytes):
121 movl %ecx, (%rdi)
122 movl %ecx, -4(%rdi,%rdx)
123 ret
124L(between8_16bytes):
125 movq %rcx, (%rdi)
126 movq %rcx, -8(%rdi,%rdx)
127 ret
78df0fcb
AJ
128
129END (memset)
85dd1003 130libc_hidden_builtin_def (memset)
553cc5f9 131
ff02d528 132#if defined PIC && !defined NOT_IN_libc && !defined USE_MULTIARCH
553cc5f9
UD
133strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
134 .section .gnu.warning.__memset_zero_constant_len_parameter
135 .string "memset used with constant zero length parameter; this could be due to transposed parameters"
136#endif