1 /* Function sincos vectorized with AVX-512. Wrapper to AVX2 version.
2 Copyright (C) 2014-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
20 #include "svml_d_wrapper_impl.h"
22 .section .text.evex512, "ax", @progbits
23 ENTRY (_ZGVeN8vl8l8_sincos)
24 WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos
25 END (_ZGVeN8vl8l8_sincos)
27 /* AVX512 ISA version as wrapper to AVX2 ISA version (for vector
28 function declared with #pragma omp declare simd notinbranch). */
29 .macro WRAPPER_IMPL_AVX512_fFF_vvv callee
32 cfi_adjust_cfa_offset (8)
33 cfi_rel_offset (%rbp, 0)
35 cfi_def_cfa_register (%rbp)
38 vmovups %zmm0, 256(%rsp)
40 vmovups %zmm1, 128(%rdi)
41 vmovups %zmm2, 192(%rdi)
43 call HIDDEN_JUMPTARGET(\callee)
44 vmovdqu 288(%rsp), %ymm0
47 call HIDDEN_JUMPTARGET(\callee)
97 cfi_def_cfa_register (%rsp)
99 cfi_adjust_cfa_offset (-8)
108 .cfi_escape 0x10,0x6,0x2,0x76,0
111 leal -112(%rbp), %esi
113 .cfi_escape 0xf,0x3,0x76,0x70,0x6
114 .cfi_escape 0x10,0xc,0x2,0x76,0x78
115 leal -176(%rbp), %edi
118 .cfi_escape 0x10,0x3,0x2,0x76,0x68
121 vmovdqa %ymm1, -208(%ebp)
122 vmovdqa %ymm2, -240(%ebp)
123 vmovapd %zmm0, -304(%ebp)
124 call HIDDEN_JUMPTARGET(\callee)
126 vmovupd -272(%ebp), %ymm0
128 call HIDDEN_JUMPTARGET(\callee)
129 movl -208(%ebp), %eax
130 vmovsd -176(%ebp), %xmm0
132 movl -204(%ebp), %eax
133 vmovsd -168(%ebp), %xmm0
135 movl -200(%ebp), %eax
136 vmovsd -160(%ebp), %xmm0
138 movl -196(%ebp), %eax
139 vmovsd -152(%ebp), %xmm0
141 movl -192(%ebp), %eax
142 vmovsd -144(%ebp), %xmm0
144 movl -188(%ebp), %eax
145 vmovsd -136(%ebp), %xmm0
147 movl -184(%ebp), %eax
148 vmovsd -128(%ebp), %xmm0
150 movl -180(%ebp), %eax
151 vmovsd -120(%ebp), %xmm0
153 movl -240(%ebp), %eax
154 vmovsd -112(%ebp), %xmm0
156 movl -236(%ebp), %eax
157 vmovsd -104(%ebp), %xmm0
159 movl -232(%ebp), %eax
160 vmovsd -96(%ebp), %xmm0
162 movl -228(%ebp), %eax
163 vmovsd -88(%ebp), %xmm0
165 movl -224(%ebp), %eax
166 vmovsd -80(%ebp), %xmm0
168 movl -220(%ebp), %eax
169 vmovsd -72(%ebp), %xmm0
171 movl -216(%ebp), %eax
172 vmovsd -64(%ebp), %xmm0
174 movl -212(%ebp), %eax
175 vmovsd -56(%ebp), %xmm0
189 ENTRY (_ZGVeN8vvv_sincos)
190 WRAPPER_IMPL_AVX512_fFF_vvv _ZGVdN4vl8l8_sincos
191 END (_ZGVeN8vvv_sincos)