1 /* Wrapper implementations of vector math functions.
2 Copyright (C) 2014-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
19 /* SSE2 ISA version as wrapper to scalar. */
20 .macro WRAPPER_IMPL_SSE2 callee
22 cfi_adjust_cfa_offset(40)
24 call
JUMPTARGET(\callee
)
27 call
JUMPTARGET(\callee
)
30 call
JUMPTARGET(\callee
)
33 call
JUMPTARGET(\callee
)
43 cfi_adjust_cfa_offset(-40)
47 /* 2 argument SSE2 ISA version as wrapper to scalar. */
48 .macro WRAPPER_IMPL_SSE2_ff callee
50 cfi_adjust_cfa_offset(56)
52 movaps
%xmm1
, 16(%rsp
)
53 call
JUMPTARGET(\callee
)
57 call
JUMPTARGET(\callee
)
61 call
JUMPTARGET(\callee
)
65 call
JUMPTARGET(\callee
)
75 cfi_adjust_cfa_offset(-56)
79 /* 3 argument SSE2 ISA version as wrapper to scalar. */
80 .macro WRAPPER_IMPL_SSE2_fFF callee
82 cfi_adjust_cfa_offset (8)
83 cfi_rel_offset (%rbp
, 0)
85 cfi_adjust_cfa_offset (8)
86 cfi_rel_offset (%rbx
, 0)
90 cfi_adjust_cfa_offset(40)
94 call
JUMPTARGET(\callee
)
100 movss
24(%rsp
), %xmm0
103 shufps $
85, %xmm1
, %xmm0
104 call
JUMPTARGET(\callee
)
105 movss
28(%rsp
), %xmm0
110 movss
24(%rsp
), %xmm0
113 unpckhps
%xmm1
, %xmm0
114 call
JUMPTARGET(\callee
)
118 movss
28(%rsp
), %xmm0
119 shufps $
255, %xmm1
, %xmm1
121 movss
24(%rsp
), %xmm0
124 call
JUMPTARGET(\callee
)
125 movss
28(%rsp
), %xmm0
126 movss
%xmm0
, 12(%rbp
)
127 movss
24(%rsp
), %xmm0
128 movss
%xmm0
, 12(%rbx
)
130 cfi_adjust_cfa_offset(-40)
132 cfi_adjust_cfa_offset (-8)
135 cfi_adjust_cfa_offset (-8)
140 /* AVX/AVX2 ISA version as wrapper to SSE ISA version. */
141 .macro WRAPPER_IMPL_AVX callee
143 cfi_adjust_cfa_offset (8)
144 cfi_rel_offset (%rbp
, 0)
146 cfi_def_cfa_register (%rbp
)
149 vextractf128 $
1, %ymm0
, (%rsp
)
151 call
HIDDEN_JUMPTARGET(\callee
)
152 vmovaps
%xmm0
, 16(%rsp
)
153 vmovaps (%rsp
), %xmm0
154 call
HIDDEN_JUMPTARGET(\callee
)
156 vmovaps
16(%rsp
), %xmm0
157 vinsertf128 $
1, %xmm1
, %ymm0
, %ymm0
159 cfi_def_cfa_register (%rsp
)
161 cfi_adjust_cfa_offset (-8)
166 /* 2 argument AVX/AVX2 ISA version as wrapper to SSE ISA version. */
167 .macro WRAPPER_IMPL_AVX_ff callee
169 cfi_adjust_cfa_offset (8)
170 cfi_rel_offset (%rbp
, 0)
172 cfi_def_cfa_register (%rbp
)
175 vextractf128 $
1, %ymm0
, 16(%rsp
)
176 vextractf128 $
1, %ymm1
, (%rsp
)
178 call
HIDDEN_JUMPTARGET(\callee
)
179 vmovaps
%xmm0
, 32(%rsp
)
180 vmovaps
16(%rsp
), %xmm0
181 vmovaps (%rsp
), %xmm1
182 call
HIDDEN_JUMPTARGET(\callee
)
184 vmovaps
32(%rsp
), %xmm0
185 vinsertf128 $
1, %xmm1
, %ymm0
, %ymm0
187 cfi_def_cfa_register (%rsp
)
189 cfi_adjust_cfa_offset (-8)
194 /* 3 argument AVX/AVX2 ISA version as wrapper to SSE ISA version. */
195 .macro WRAPPER_IMPL_AVX_fFF callee
197 cfi_adjust_cfa_offset (8)
198 cfi_rel_offset (%rbp
, 0)
200 cfi_def_cfa_register (%rbp
)
203 cfi_adjust_cfa_offset (8)
204 cfi_rel_offset (%r13
, 0)
206 cfi_adjust_cfa_offset (8)
207 cfi_rel_offset (%r14
, 0)
210 vmovaps
%ymm0
, (%rsp
)
212 vmovaps
16(%rsp
), %xmm1
213 vmovaps
%xmm1
, 32(%rsp
)
215 vmovaps (%rsp
), %xmm0
216 call
HIDDEN_JUMPTARGET(\callee
)
217 vmovaps
32(%rsp
), %xmm0
220 call
HIDDEN_JUMPTARGET(\callee
)
221 vmovaps (%rsp
), %xmm0
222 vmovaps
16(%rsp
), %xmm1
223 vmovaps
%xmm0
, 16(%r13
)
224 vmovaps
%xmm1
, 16(%r14
)
227 cfi_adjust_cfa_offset (-8)
230 cfi_adjust_cfa_offset (-8)
233 cfi_def_cfa_register (%rsp
)
235 cfi_adjust_cfa_offset (-8)
240 /* AVX512 ISA version as wrapper to AVX2 ISA version. */
241 .macro WRAPPER_IMPL_AVX512 callee
243 cfi_adjust_cfa_offset (8)
244 cfi_rel_offset (%rbp
, 0)
246 cfi_def_cfa_register (%rbp
)
249 vmovups
%zmm0
, (%rsp
)
250 vmovupd (%rsp
), %ymm0
251 call
HIDDEN_JUMPTARGET(\callee
)
252 vmovupd
%ymm0
, 64(%rsp
)
253 vmovupd
32(%rsp
), %ymm0
254 call
HIDDEN_JUMPTARGET(\callee
)
255 vmovupd
%ymm0
, 96(%rsp
)
256 vmovups
64(%rsp
), %zmm0
258 cfi_def_cfa_register (%rsp
)
260 cfi_adjust_cfa_offset (-8)
265 /* 2 argument AVX512 ISA version as wrapper to AVX2 ISA version. */
266 .macro WRAPPER_IMPL_AVX512_ff callee
268 cfi_adjust_cfa_offset (8)
269 cfi_rel_offset (%rbp
, 0)
271 cfi_def_cfa_register (%rbp
)
274 vmovups
%zmm0
, (%rsp
)
275 vmovups
%zmm1
, 64(%rsp
)
276 vmovups (%rsp
), %ymm0
277 vmovups
64(%rsp
), %ymm1
278 call
HIDDEN_JUMPTARGET(\callee
)
279 vmovups
%ymm0
, 128(%rsp
)
280 vmovups
32(%rsp
), %ymm0
281 vmovups
96(%rsp
), %ymm1
282 call
HIDDEN_JUMPTARGET(\callee
)
283 vmovups
%ymm0
, 160(%rsp
)
284 vmovups
128(%rsp
), %zmm0
286 cfi_def_cfa_register (%rsp
)
288 cfi_adjust_cfa_offset (-8)
293 /* 3 argument AVX512 ISA version as wrapper to AVX2 ISA version. */
294 .macro WRAPPER_IMPL_AVX512_fFF callee
296 cfi_adjust_cfa_offset (8)
297 cfi_rel_offset (%rbp
, 0)
299 cfi_def_cfa_register (%rbp
)
305 vmovaps
%zmm0
, (%rsp
)
307 vmovaps (%rsp
), %ymm0
308 call
HIDDEN_JUMPTARGET(\callee
)
309 vmovaps
32(%rsp
), %ymm0
312 call
HIDDEN_JUMPTARGET(\callee
)
313 vmovaps
64(%rsp
), %ymm0
314 vmovaps
96(%rsp
), %ymm1
315 vmovaps
%ymm0
, 32(%r12
)
316 vmovaps
%ymm1
, 32(%r13
)
321 cfi_def_cfa_register (%rsp
)
323 cfi_adjust_cfa_offset (-8)