1 /* Function sincosf vectorized with AVX-512. Wrapper to AVX2 version.
2 Copyright (C) 2014-2020 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
20 #include "svml_s_wrapper_impl.h"
23 ENTRY (_ZGVeN16vl4l4_sincosf)
24 WRAPPER_IMPL_AVX512_fFF _ZGVdN8vl4l4_sincosf
25 END (_ZGVeN16vl4l4_sincosf)
27 /* AVX512 ISA version as wrapper to AVX2 ISA version (for vector
28 function declared with #pragma omp declare simd notinbranch). */
29 .macro WRAPPER_IMPL_AVX512_fFF_vvv callee
32 cfi_adjust_cfa_offset (8)
33 cfi_rel_offset (%rbp, 0)
35 cfi_def_cfa_register (%rbp)
38 vmovups %zmm0, 384(%rsp)
40 vmovups %zmm1, 128(%rdi)
41 vmovups %zmm2, 192(%rdi)
42 vmovups %zmm3, 256(%rdi)
43 vmovups %zmm4, 320(%rdi)
45 call HIDDEN_JUMPTARGET(\callee)
46 vmovdqu 416(%rsp), %ymm0
49 call HIDDEN_JUMPTARGET(\callee)
128 movl 104(%rsp), %r10d
141 movl 124(%rsp), %r11d
147 cfi_def_cfa_register (%rsp)
149 cfi_adjust_cfa_offset (-8)
158 .cfi_escape 0x10,0x6,0x2,0x76,0
161 leal -112(%rbp), %esi
163 .cfi_escape 0xf,0x3,0x76,0x70,0x6
164 .cfi_escape 0x10,0xc,0x2,0x76,0x78
165 leal -176(%rbp), %edi
168 .cfi_escape 0x10,0x3,0x2,0x76,0x68
171 vmovdqa64 %zmm1, -240(%ebp)
172 vmovdqa64 %zmm2, -304(%ebp)
173 vmovaps %zmm0, -368(%ebp)
174 call HIDDEN_JUMPTARGET(\callee)
176 vmovups -336(%ebp), %ymm0
178 call HIDDEN_JUMPTARGET(\callee)
179 movl -240(%ebp), %eax
180 vmovss -176(%ebp), %xmm0
182 movl -236(%ebp), %eax
183 vmovss -172(%ebp), %xmm0
185 movl -232(%ebp), %eax
186 vmovss -168(%ebp), %xmm0
188 movl -228(%ebp), %eax
189 vmovss -164(%ebp), %xmm0
191 movl -224(%ebp), %eax
192 vmovss -160(%ebp), %xmm0
194 movl -220(%ebp), %eax
195 vmovss -156(%ebp), %xmm0
197 movl -216(%ebp), %eax
198 vmovss -152(%ebp), %xmm0
200 movl -212(%ebp), %eax
201 vmovss -148(%ebp), %xmm0
203 movl -208(%ebp), %eax
204 vmovss -144(%ebp), %xmm0
206 movl -204(%ebp), %eax
207 vmovss -140(%ebp), %xmm0
209 movl -200(%ebp), %eax
210 vmovss -136(%ebp), %xmm0
212 movl -196(%ebp), %eax
213 vmovss -132(%ebp), %xmm0
215 movl -192(%ebp), %eax
216 vmovss -128(%ebp), %xmm0
218 movl -188(%ebp), %eax
219 vmovss -124(%ebp), %xmm0
221 movl -184(%ebp), %eax
222 vmovss -120(%ebp), %xmm0
224 movl -180(%ebp), %eax
225 vmovss -116(%ebp), %xmm0
227 movl -304(%ebp), %eax
228 vmovss -112(%ebp), %xmm0
230 movl -300(%ebp), %eax
231 vmovss -108(%ebp), %xmm0
233 movl -296(%ebp), %eax
234 vmovss -104(%ebp), %xmm0
236 movl -292(%ebp), %eax
237 vmovss -100(%ebp), %xmm0
239 movl -288(%ebp), %eax
240 vmovss -96(%ebp), %xmm0
242 movl -284(%ebp), %eax
243 vmovss -92(%ebp), %xmm0
245 movl -280(%ebp), %eax
246 vmovss -88(%ebp), %xmm0
248 movl -276(%ebp), %eax
249 vmovss -84(%ebp), %xmm0
251 movl -272(%ebp), %eax
252 vmovss -80(%ebp), %xmm0
254 movl -268(%ebp), %eax
255 vmovss -76(%ebp), %xmm0
257 movl -264(%ebp), %eax
258 vmovss -72(%ebp), %xmm0
260 movl -260(%ebp), %eax
261 vmovss -68(%ebp), %xmm0
263 movl -256(%ebp), %eax
264 vmovss -64(%ebp), %xmm0
266 movl -252(%ebp), %eax
267 vmovss -60(%ebp), %xmm0
269 movl -248(%ebp), %eax
270 vmovss -56(%ebp), %xmm0
272 movl -244(%ebp), %eax
273 vmovss -52(%ebp), %xmm0
287 ENTRY (_ZGVeN16vvv_sincosf)
288 WRAPPER_IMPL_AVX512_fFF_vvv _ZGVdN8vl4l4_sincosf
289 END (_ZGVeN16vvv_sincosf)