1 /* Wrapper implementations of vector math functions.
2 Copyright (C) 2014-2020 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 /* SSE2 ISA version as wrapper to scalar. */
20 .macro WRAPPER_IMPL_SSE2 callee
22 cfi_adjust_cfa_offset(40)
24 call
JUMPTARGET(\callee
)
27 call
JUMPTARGET(\callee
)
33 cfi_adjust_cfa_offset(-40)
37 /* 2 argument SSE2 ISA version as wrapper to scalar. */
38 .macro WRAPPER_IMPL_SSE2_ff callee
40 cfi_adjust_cfa_offset(56)
42 movaps
%xmm1
, 16(%rsp
)
43 call
JUMPTARGET(\callee
)
47 call
JUMPTARGET(\callee
)
53 cfi_adjust_cfa_offset(-56)
57 /* 3 argument SSE2 ISA version as wrapper to scalar. */
58 .macro WRAPPER_IMPL_SSE2_fFF callee
60 cfi_adjust_cfa_offset (8)
61 cfi_rel_offset (%rbp
, 0)
63 cfi_adjust_cfa_offset (8)
64 cfi_rel_offset (%rbx
, 0)
68 cfi_adjust_cfa_offset(40)
72 call
JUMPTARGET(\callee
)
82 call
JUMPTARGET(\callee
)
88 cfi_adjust_cfa_offset(-40)
90 cfi_adjust_cfa_offset (-8)
93 cfi_adjust_cfa_offset (-8)
98 /* AVX/AVX2 ISA version as wrapper to SSE ISA version. */
99 .macro WRAPPER_IMPL_AVX callee
101 cfi_adjust_cfa_offset (8)
102 cfi_rel_offset (%rbp
, 0)
104 cfi_def_cfa_register (%rbp
)
107 vextractf128 $
1, %ymm0
, (%rsp
)
109 call
HIDDEN_JUMPTARGET(\callee
)
110 vmovapd
%xmm0
, 16(%rsp
)
111 vmovaps (%rsp
), %xmm0
112 call
HIDDEN_JUMPTARGET(\callee
)
114 vmovapd
16(%rsp
), %xmm0
115 vinsertf128 $
1, %xmm1
, %ymm0
, %ymm0
117 cfi_def_cfa_register (%rsp
)
119 cfi_adjust_cfa_offset (-8)
124 /* 2 argument AVX/AVX2 ISA version as wrapper to SSE ISA version. */
125 .macro WRAPPER_IMPL_AVX_ff callee
127 cfi_adjust_cfa_offset (8)
128 cfi_rel_offset (%rbp
, 0)
130 cfi_def_cfa_register (%rbp
)
133 vextractf128 $
1, %ymm0
, 16(%rsp
)
134 vextractf128 $
1, %ymm1
, (%rsp
)
136 call
HIDDEN_JUMPTARGET(\callee
)
137 vmovaps
%xmm0
, 32(%rsp
)
138 vmovaps
16(%rsp
), %xmm0
139 vmovaps (%rsp
), %xmm1
140 call
HIDDEN_JUMPTARGET(\callee
)
142 vmovaps
32(%rsp
), %xmm0
143 vinsertf128 $
1, %xmm1
, %ymm0
, %ymm0
145 cfi_def_cfa_register (%rsp
)
147 cfi_adjust_cfa_offset (-8)
152 /* 3 argument AVX/AVX2 ISA version as wrapper to SSE ISA version. */
153 .macro WRAPPER_IMPL_AVX_fFF callee
155 cfi_adjust_cfa_offset (8)
156 cfi_rel_offset (%rbp
, 0)
158 cfi_def_cfa_register (%rbp
)
161 cfi_adjust_cfa_offset (8)
162 cfi_rel_offset (%r13
, 0)
164 cfi_adjust_cfa_offset (8)
165 cfi_rel_offset (%r14
, 0)
169 vextractf128 $
1, %ymm0
, 32(%rsp
)
171 call
HIDDEN_JUMPTARGET(\callee
)
172 vmovaps
32(%rsp
), %xmm0
175 call
HIDDEN_JUMPTARGET(\callee
)
176 vmovapd (%rsp
), %xmm0
177 vmovapd
16(%rsp
), %xmm1
178 vmovapd
%xmm0
, 16(%r13
)
179 vmovapd
%xmm1
, 16(%r14
)
182 cfi_adjust_cfa_offset (-8)
185 cfi_adjust_cfa_offset (-8)
188 cfi_def_cfa_register (%rsp
)
190 cfi_adjust_cfa_offset (-8)
195 /* AVX512 ISA version as wrapper to AVX2 ISA version. */
196 .macro WRAPPER_IMPL_AVX512 callee
198 cfi_adjust_cfa_offset (8)
199 cfi_rel_offset (%rbp
, 0)
201 cfi_def_cfa_register (%rbp
)
204 vmovups
%zmm0
, (%rsp
)
205 vmovupd (%rsp
), %ymm0
206 call
HIDDEN_JUMPTARGET(\callee
)
207 vmovupd
%ymm0
, 64(%rsp
)
208 vmovupd
32(%rsp
), %ymm0
209 call
HIDDEN_JUMPTARGET(\callee
)
210 vmovupd
%ymm0
, 96(%rsp
)
211 vmovups
64(%rsp
), %zmm0
213 cfi_def_cfa_register (%rsp
)
215 cfi_adjust_cfa_offset (-8)
220 /* 2 argument AVX512 ISA version as wrapper to AVX2 ISA version. */
221 .macro WRAPPER_IMPL_AVX512_ff callee
223 cfi_adjust_cfa_offset (8)
224 cfi_rel_offset (%rbp
, 0)
226 cfi_def_cfa_register (%rbp
)
229 vmovups
%zmm0
, (%rsp
)
230 vmovups
%zmm1
, 64(%rsp
)
231 vmovupd (%rsp
), %ymm0
232 vmovupd
64(%rsp
), %ymm1
233 call
HIDDEN_JUMPTARGET(\callee
)
234 vmovupd
%ymm0
, 128(%rsp
)
235 vmovupd
32(%rsp
), %ymm0
236 vmovupd
96(%rsp
), %ymm1
237 call
HIDDEN_JUMPTARGET(\callee
)
238 vmovupd
%ymm0
, 160(%rsp
)
239 vmovups
128(%rsp
), %zmm0
241 cfi_def_cfa_register (%rsp
)
243 cfi_adjust_cfa_offset (-8)
248 /* 3 argument AVX512 ISA version as wrapper to AVX2 ISA version. */
249 .macro WRAPPER_IMPL_AVX512_fFF callee
251 cfi_adjust_cfa_offset (8)
252 cfi_rel_offset (%rbp
, 0)
254 cfi_def_cfa_register (%rbp
)
257 cfi_adjust_cfa_offset (8)
258 cfi_rel_offset (%r12
, 0)
260 cfi_adjust_cfa_offset (8)
261 cfi_rel_offset (%r13
, 0)
264 vmovups
%zmm0
, (%rsp
)
266 vmovupd (%rsp
), %ymm0
267 call
HIDDEN_JUMPTARGET(\callee
)
268 vmovupd
32(%rsp
), %ymm0
271 call
HIDDEN_JUMPTARGET(\callee
)
272 vmovupd
64(%rsp
), %ymm0
273 vmovupd
96(%rsp
), %ymm1
274 vmovupd
%ymm0
, 32(%r12
)
275 vmovupd
%ymm1
, 32(%r13
)
279 cfi_adjust_cfa_offset (-8)
282 cfi_adjust_cfa_offset (-8)
285 cfi_def_cfa_register (%rsp
)
287 cfi_adjust_cfa_offset (-8)