]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
Vector sincos for x86_64 and tests.
[thirdparty/glibc.git] / sysdeps / x86_64 / fpu / svml_s_wrapper_impl.h
CommitLineData
04f496d6
AS
1/* Wrapper implementations of vector math functions.
2 Copyright (C) 2014-2015 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19/* SSE2 ISA version as wrapper to scalar. */
20.macro WRAPPER_IMPL_SSE2 callee
21 subq $40, %rsp
22 cfi_adjust_cfa_offset(40)
23 movaps %xmm0, (%rsp)
24 call \callee@PLT
25 movss %xmm0, 16(%rsp)
26 movss 4(%rsp), %xmm0
27 call \callee@PLT
28 movss %xmm0, 20(%rsp)
29 movss 8(%rsp), %xmm0
30 call \callee@PLT
31 movss %xmm0, 24(%rsp)
32 movss 12(%rsp), %xmm0
33 call \callee@PLT
34 movss 16(%rsp), %xmm3
35 movss 20(%rsp), %xmm2
36 movss 24(%rsp), %xmm1
37 movss %xmm0, 28(%rsp)
38 unpcklps %xmm1, %xmm3
39 unpcklps %xmm0, %xmm2
40 unpcklps %xmm2, %xmm3
41 movaps %xmm3, %xmm0
42 addq $40, %rsp
43 cfi_adjust_cfa_offset(-40)
44 ret
45.endm
46
8aa92022
AS
47/* 2 argument SSE2 ISA version as wrapper to scalar. */
48.macro WRAPPER_IMPL_SSE2_ff callee
49 subq $56, %rsp
50 cfi_adjust_cfa_offset(56)
51 movaps %xmm0, (%rsp)
52 movaps %xmm1, 16(%rsp)
53 call \callee@PLT
54 movss %xmm0, 32(%rsp)
55 movss 4(%rsp), %xmm0
56 movss 20(%rsp), %xmm1
57 call \callee@PLT
58 movss %xmm0, 36(%rsp)
59 movss 8(%rsp), %xmm0
60 movss 24(%rsp), %xmm1
61 call \callee@PLT
62 movss %xmm0, 40(%rsp)
63 movss 12(%rsp), %xmm0
64 movss 28(%rsp), %xmm1
65 call \callee@PLT
66 movss 32(%rsp), %xmm3
67 movss 36(%rsp), %xmm2
68 movss 40(%rsp), %xmm1
69 movss %xmm0, 44(%rsp)
70 unpcklps %xmm1, %xmm3
71 unpcklps %xmm0, %xmm2
72 unpcklps %xmm2, %xmm3
73 movaps %xmm3, %xmm0
74 addq $56, %rsp
75 cfi_adjust_cfa_offset(-56)
76 ret
77.endm
78
04f496d6
AS
79/* AVX/AVX2 ISA version as wrapper to SSE ISA version. */
80.macro WRAPPER_IMPL_AVX callee
81 pushq %rbp
82 cfi_adjust_cfa_offset (8)
83 cfi_rel_offset (%rbp, 0)
84 movq %rsp, %rbp
85 cfi_def_cfa_register (%rbp)
86 andq $-32, %rsp
87 subq $32, %rsp
88 vextractf128 $1, %ymm0, (%rsp)
89 vzeroupper
90 call HIDDEN_JUMPTARGET(\callee)
91 vmovaps %xmm0, 16(%rsp)
92 vmovaps (%rsp), %xmm0
93 call HIDDEN_JUMPTARGET(\callee)
94 vmovaps %xmm0, %xmm1
95 vmovaps 16(%rsp), %xmm0
96 vinsertf128 $1, %xmm1, %ymm0, %ymm0
97 movq %rbp, %rsp
98 cfi_def_cfa_register (%rsp)
99 popq %rbp
100 cfi_adjust_cfa_offset (-8)
101 cfi_restore (%rbp)
102 ret
103.endm
104
8aa92022
AS
105/* 2 argument AVX/AVX2 ISA version as wrapper to SSE ISA version. */
106.macro WRAPPER_IMPL_AVX_ff callee
107 pushq %rbp
108 cfi_adjust_cfa_offset (8)
109 cfi_rel_offset (%rbp, 0)
110 movq %rsp, %rbp
111 cfi_def_cfa_register (%rbp)
112 andq $-32, %rsp
113 subq $64, %rsp
114 vextractf128 $1, %ymm0, 16(%rsp)
115 vextractf128 $1, %ymm1, (%rsp)
116 vzeroupper
117 call HIDDEN_JUMPTARGET(\callee)
118 vmovaps %xmm0, 32(%rsp)
119 vmovaps 16(%rsp), %xmm0
120 vmovaps (%rsp), %xmm1
121 call HIDDEN_JUMPTARGET(\callee)
122 vmovaps %xmm0, %xmm1
123 vmovaps 32(%rsp), %xmm0
124 vinsertf128 $1, %xmm1, %ymm0, %ymm0
125 movq %rbp, %rsp
126 cfi_def_cfa_register (%rsp)
127 popq %rbp
128 cfi_adjust_cfa_offset (-8)
129 cfi_restore (%rbp)
130 ret
131.endm
132
04f496d6
AS
133/* AVX512 ISA version as wrapper to AVX2 ISA version. */
134.macro WRAPPER_IMPL_AVX512 callee
135 pushq %rbp
136 cfi_adjust_cfa_offset (8)
137 cfi_rel_offset (%rbp, 0)
138 movq %rsp, %rbp
139 cfi_def_cfa_register (%rbp)
140 andq $-64, %rsp
141 subq $64, %rsp
142/* Below is encoding for vmovaps %zmm0, (%rsp). */
143 .byte 0x62
144 .byte 0xf1
145 .byte 0x7c
146 .byte 0x48
147 .byte 0x29
148 .byte 0x04
149 .byte 0x24
150/* Below is encoding for vmovaps (%rsp), %ymm0. */
151 .byte 0xc5
152 .byte 0xfc
153 .byte 0x28
154 .byte 0x04
155 .byte 0x24
156 call HIDDEN_JUMPTARGET(\callee)
157/* Below is encoding for vmovaps 32(%rsp), %ymm0. */
158 .byte 0xc5
159 .byte 0xfc
160 .byte 0x28
161 .byte 0x44
162 .byte 0x24
163 .byte 0x20
164 call HIDDEN_JUMPTARGET(\callee)
165 movq %rbp, %rsp
166 cfi_def_cfa_register (%rsp)
167 popq %rbp
168 cfi_adjust_cfa_offset (-8)
169 cfi_restore (%rbp)
170 ret
171.endm
8aa92022
AS
172
173/* 2 argument AVX512 ISA version as wrapper to AVX2 ISA version. */
174.macro WRAPPER_IMPL_AVX512_ff callee
175 pushq %rbp
176 cfi_adjust_cfa_offset (8)
177 cfi_rel_offset (%rbp, 0)
178 movq %rsp, %rbp
179 cfi_def_cfa_register (%rbp)
180 andq $-64, %rsp
181 subq $128, %rsp
182/* Below is encoding for vmovaps %zmm0, (%rsp). */
183 .byte 0x62
184 .byte 0xf1
185 .byte 0x7c
186 .byte 0x48
187 .byte 0x29
188 .byte 0x04
189 .byte 0x24
190/* Below is encoding for vmovaps %zmm1, 64(%rsp). */
191 .byte 0x62
192 .byte 0xf1
193 .byte 0x7c
194 .byte 0x48
195 .byte 0x29
196 .byte 0x4c
197 .byte 0x24
198/* Below is encoding for vmovaps (%rsp), %ymm0. */
199 .byte 0xc5
200 .byte 0xfc
201 .byte 0x28
202 .byte 0x04
203 .byte 0x24
204/* Below is encoding for vmovaps 64(%rsp), %ymm1. */
205 .byte 0xc5
206 .byte 0xfc
207 .byte 0x28
208 .byte 0x4c
209 .byte 0x24
210 .byte 0x40
211 call HIDDEN_JUMPTARGET(\callee)
212/* Below is encoding for vmovaps 32(%rsp), %ymm0. */
213 .byte 0xc5
214 .byte 0xfc
215 .byte 0x28
216 .byte 0x44
217 .byte 0x24
218 .byte 0x20
219/* Below is encoding for vmovaps 96(%rsp), %ymm1. */
220 .byte 0xc5
221 .byte 0xfc
222 .byte 0x28
223 .byte 0x4c
224 .byte 0x24
225 .byte 0x60
226 call HIDDEN_JUMPTARGET(\callee)
227 movq %rbp, %rsp
228 cfi_def_cfa_register (%rsp)
229 popq %rbp
230 cfi_adjust_cfa_offset (-8)
231 cfi_restore (%rbp)
232 ret
233.endm