]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/x86_64/fpu/multiarch/svml_s_asinhf16_core_avx512.S
Translations: Regenerate libc.pot
[thirdparty/glibc.git] / sysdeps / x86_64 / fpu / multiarch / svml_s_asinhf16_core_avx512.S
CommitLineData
e682d015 1/* Function asinhf vectorized with AVX-512.
dff8da6b 2 Copyright (C) 2021-2024 Free Software Foundation, Inc.
e682d015
SP
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 https://www.gnu.org/licenses/. */
18
19/*
20 * ALGORITHM DESCRIPTION:
21 *
22 * Compute asinh(x) as log(x + sqrt(x*x + 1))
23 * using RSQRT instructions for starting the
24 * square root approximation, and small table lookups for log
25 * that map to AVX-512 permute instructions
26 *
27 * Special cases:
28 *
29 * asinh(NaN) = quiet NaN, and raise invalid exception
30 * asinh(INF) = that INF
31 * asinh(0) = that 0
32 *
33 */
34
35/* Offsets for data table __svml_sasinh_data_internal_avx512
36 */
41d4013a
SP
37#define Log_tbl_H 0
38#define Log_tbl_L 128
39#define One 256
40#define AbsMask 320
41#define SmallThreshold 384
42#define Threshold 448
43#define LargeThreshold 512
44#define ca1 576
45#define c2s 640
46#define c1s 704
47#define AddB5 768
48#define RcpBitMask 832
49#define OneEighth 896
50#define Four 960
51#define poly_coeff3 1024
52#define poly_coeff2 1088
53#define poly_coeff1 1152
54#define L2H 1216
55#define L2L 1280
e682d015
SP
56
57#include <sysdep.h>
58
95177b78 59 .section .text.evex512, "ax", @progbits
e682d015 60ENTRY(_ZGVeN16v_asinhf_skx)
41d4013a
SP
61 pushq %rbp
62 cfi_def_cfa_offset(16)
63 movq %rsp, %rbp
64 cfi_def_cfa(6, 16)
65 cfi_offset(6, -16)
66 andq $-64, %rsp
67 subq $192, %rsp
68 vmovaps %zmm0, %zmm10
e682d015 69
41d4013a
SP
70 /* x^2 */
71 vmulps {rn-sae}, %zmm10, %zmm10, %zmm0
72 vmovups One+__svml_sasinh_data_internal_avx512(%rip), %zmm2
e682d015 73
41d4013a
SP
74 /* polynomial computation for small inputs */
75 vmovups ca1+__svml_sasinh_data_internal_avx512(%rip), %zmm1
e682d015 76
41d4013a
SP
77 /* not a very small input ? */
78 vmovups SmallThreshold+__svml_sasinh_data_internal_avx512(%rip), %zmm11
e682d015 79
41d4013a
SP
80 /* 1+x^2 */
81 vaddps {rn-sae}, %zmm2, %zmm0, %zmm7
e682d015 82
41d4013a
SP
83 /* |input| */
84 vandps AbsMask+__svml_sasinh_data_internal_avx512(%rip), %zmm10, %zmm12
e682d015 85
41d4013a
SP
86 /* A=max(x^2, 1); */
87 vmaxps {sae}, %zmm0, %zmm2, %zmm14
88 vrsqrt14ps %zmm7, %zmm8
e682d015 89
41d4013a
SP
90 /* B=min(x^2, 1); */
91 vminps {sae}, %zmm0, %zmm2, %zmm15
92 vcmpps $21, {sae}, %zmm11, %zmm12, %k2
e682d015 93
41d4013a
SP
94 /* B_high */
95 vsubps {rn-sae}, %zmm14, %zmm7, %zmm9
e682d015 96
41d4013a
SP
97 /* sign bit */
98 vxorps %zmm10, %zmm12, %zmm13
e682d015 99
41d4013a
SP
100 /* Sh ~sqrt(1+x^2) */
101 vmulps {rn-sae}, %zmm8, %zmm7, %zmm6
102 vmovups LargeThreshold+__svml_sasinh_data_internal_avx512(%rip), %zmm14
e682d015 103
41d4013a
SP
104 /* B_low */
105 vsubps {rn-sae}, %zmm9, %zmm15, %zmm3
e682d015 106
41d4013a
SP
107 /* Sh+x */
108 vaddps {rn-sae}, %zmm12, %zmm6, %zmm15
e682d015 109
41d4013a
SP
110 /* (Yh*R0)_low */
111 vfmsub213ps {rn-sae}, %zmm6, %zmm8, %zmm7
112 vmulps {rn-sae}, %zmm1, %zmm0, %zmm9
113 vcmpps $22, {sae}, %zmm14, %zmm12, %k0
114 vmovups c1s+__svml_sasinh_data_internal_avx512(%rip), %zmm1
e682d015 115
41d4013a
SP
116 /* polynomial computation for small inputs */
117 vfmadd213ps {rn-sae}, %zmm12, %zmm12, %zmm9
118 kmovw %k0, %edx
e682d015 119
41d4013a
SP
120 /* (x^2)_low */
121 vmovaps %zmm10, %zmm4
122 vfmsub213ps {rn-sae}, %zmm0, %zmm10, %zmm4
e682d015 123
41d4013a
SP
124 /* Yl = (x^2)_low + B_low */
125 vaddps {rn-sae}, %zmm4, %zmm3, %zmm5
e682d015 126
41d4013a
SP
127 /* rel. error term: Eh=1-Sh*R0 */
128 vmovaps %zmm2, %zmm0
129 vfnmadd231ps {rn-sae}, %zmm6, %zmm8, %zmm0
e682d015 130
41d4013a
SP
131 /* Sl = (Yh*R0)_low+(R0*Yl) */
132 vfmadd213ps {rn-sae}, %zmm7, %zmm8, %zmm5
e682d015 133
41d4013a
SP
134 /* very large inputs ? */
135 vmovups Threshold+__svml_sasinh_data_internal_avx512(%rip), %zmm7
e682d015 136
41d4013a
SP
137 /* rel. error term: Eh=(1-Sh*R0)-Sl*R0 */
138 vfnmadd231ps {rn-sae}, %zmm5, %zmm8, %zmm0
e682d015 139
41d4013a
SP
140 /* sqrt(1+x^2) ~ Sh + Sl + Sh*Eh*poly_s */
141 vmovups c2s+__svml_sasinh_data_internal_avx512(%rip), %zmm8
142 vcmpps $21, {sae}, %zmm7, %zmm12, %k1
e682d015 143
41d4013a
SP
144 /* Sh*Eh */
145 vmulps {rn-sae}, %zmm0, %zmm6, %zmm4
146 vfmadd231ps {rn-sae}, %zmm0, %zmm8, %zmm1
e682d015 147
41d4013a
SP
148 /* Sl + Sh*Eh*poly_s */
149 vfmadd213ps {rn-sae}, %zmm5, %zmm1, %zmm4
e682d015 150
41d4013a
SP
151 /* Xh */
152 vsubps {rn-sae}, %zmm6, %zmm15, %zmm5
e682d015 153
41d4013a
SP
154 /* fixup for very large inputs */
155 vmovups OneEighth+__svml_sasinh_data_internal_avx512(%rip), %zmm6
e682d015 156
41d4013a
SP
157 /* Xin0+Sl+Sh*Eh*poly_s ~ x+sqrt(1+x^2) */
158 vaddps {rn-sae}, %zmm4, %zmm15, %zmm3
e682d015 159
41d4013a
SP
160 /* Xl */
161 vsubps {rn-sae}, %zmm5, %zmm12, %zmm5
e682d015 162
41d4013a
SP
163 /* Sl_high */
164 vsubps {rn-sae}, %zmm15, %zmm3, %zmm0
165 vmulps {rn-sae}, %zmm6, %zmm12, %zmm3{%k1}
e682d015 166
41d4013a
SP
167 /* -K*L2H + Th */
168 vmovups L2H+__svml_sasinh_data_internal_avx512(%rip), %zmm15
e682d015 169
41d4013a
SP
170 /* Sl_l */
171 vsubps {rn-sae}, %zmm0, %zmm4, %zmm1
172 vrcp14ps %zmm3, %zmm6
e682d015 173
41d4013a
SP
174 /* Table lookups */
175 vmovups __svml_sasinh_data_internal_avx512(%rip), %zmm0
e682d015 176
41d4013a
SP
177 /* Xin_low */
178 vaddps {rn-sae}, %zmm5, %zmm1, %zmm7
e682d015 179
41d4013a
SP
180 /* round reciprocal to 1+4b mantissas */
181 vpaddd AddB5+__svml_sasinh_data_internal_avx512(%rip), %zmm6, %zmm4
182 vmovups poly_coeff1+__svml_sasinh_data_internal_avx512(%rip), %zmm5
183 vandps RcpBitMask+__svml_sasinh_data_internal_avx512(%rip), %zmm4, %zmm8
e682d015 184
41d4013a
SP
185 /* fixup for very large inputs */
186 vxorps %zmm7, %zmm7, %zmm7{%k1}
e682d015 187
41d4013a
SP
188 /* polynomial */
189 vmovups poly_coeff3+__svml_sasinh_data_internal_avx512(%rip), %zmm4
e682d015 190
41d4013a
SP
191 /* reduced argument for log(): (Rcp*Xin-1)+Rcp*Xin_low */
192 vfmsub231ps {rn-sae}, %zmm8, %zmm3, %zmm2
193 vmovups Four+__svml_sasinh_data_internal_avx512(%rip), %zmm3
e682d015 194
41d4013a
SP
195 /* exponents */
196 vgetexpps {sae}, %zmm8, %zmm1
e682d015 197
41d4013a
SP
198 /* Prepare table index */
199 vpsrld $18, %zmm8, %zmm14
200 vfmadd231ps {rn-sae}, %zmm8, %zmm7, %zmm2
201 vmovups poly_coeff2+__svml_sasinh_data_internal_avx512(%rip), %zmm7
202 vsubps {rn-sae}, %zmm3, %zmm1, %zmm1{%k1}
203 vpermt2ps Log_tbl_H+64+__svml_sasinh_data_internal_avx512(%rip), %zmm14, %zmm0
204 vmovups Log_tbl_L+__svml_sasinh_data_internal_avx512(%rip), %zmm3
205 vfmadd231ps {rn-sae}, %zmm2, %zmm4, %zmm7
206 vfnmadd231ps {rn-sae}, %zmm1, %zmm15, %zmm0
e682d015 207
41d4013a
SP
208 /* R^2 */
209 vmulps {rn-sae}, %zmm2, %zmm2, %zmm6
210 vfmadd213ps {rn-sae}, %zmm5, %zmm2, %zmm7
211 vpermt2ps Log_tbl_L+64+__svml_sasinh_data_internal_avx512(%rip), %zmm14, %zmm3
e682d015 212
41d4013a
SP
213 /* -K*L2L + Tl */
214 vmovups L2L+__svml_sasinh_data_internal_avx512(%rip), %zmm14
215 vfnmadd213ps {rn-sae}, %zmm3, %zmm14, %zmm1
e682d015 216
41d4013a
SP
217 /* Tl + R^2*Poly */
218 vfmadd213ps {rn-sae}, %zmm1, %zmm6, %zmm7
e682d015 219
41d4013a
SP
220 /* R+Tl + R^2*Poly */
221 vaddps {rn-sae}, %zmm2, %zmm7, %zmm2
222 vaddps {rn-sae}, %zmm2, %zmm0, %zmm9{%k2}
223 vxorps %zmm13, %zmm9, %zmm0
224 testl %edx, %edx
e682d015 225
41d4013a
SP
226 /* Go to special inputs processing branch */
227 jne L(SPECIAL_VALUES_BRANCH)
228 # LOE rbx r12 r13 r14 r15 edx zmm0 zmm10
e682d015 229
41d4013a
SP
230 /* Restore registers
231 * and exit the function
232 */
e682d015
SP
233
234L(EXIT):
41d4013a
SP
235 movq %rbp, %rsp
236 popq %rbp
237 cfi_def_cfa(7, 8)
238 cfi_restore(6)
239 ret
240 cfi_def_cfa(6, 16)
241 cfi_offset(6, -16)
242
243 /* Branch to process
244 * special inputs
245 */
e682d015
SP
246
247L(SPECIAL_VALUES_BRANCH):
41d4013a
SP
248 vmovups %zmm10, 64(%rsp)
249 vmovups %zmm0, 128(%rsp)
250 # LOE rbx r12 r13 r14 r15 edx zmm0
251
252 xorl %eax, %eax
253 # LOE rbx r12 r13 r14 r15 eax edx
254
255 vzeroupper
256 movq %r12, 16(%rsp)
257 /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
258 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
259 movl %eax, %r12d
260 movq %r13, 8(%rsp)
261 /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
262 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
263 movl %edx, %r13d
264 movq %r14, (%rsp)
265 /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
266 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
267 # LOE rbx r15 r12d r13d
268
269 /* Range mask
270 * bits check
271 */
e682d015
SP
272
273L(RANGEMASK_CHECK):
41d4013a 274 btl %r12d, %r13d
e682d015 275
41d4013a
SP
276 /* Call scalar math function */
277 jc L(SCALAR_MATH_CALL)
278 # LOE rbx r15 r12d r13d
e682d015 279
41d4013a
SP
280 /* Special inputs
281 * processing loop
282 */
e682d015
SP
283
284L(SPECIAL_VALUES_LOOP):
41d4013a
SP
285 incl %r12d
286 cmpl $16, %r12d
287
288 /* Check bits in range mask */
289 jl L(RANGEMASK_CHECK)
290 # LOE rbx r15 r12d r13d
291
292 movq 16(%rsp), %r12
293 cfi_restore(12)
294 movq 8(%rsp), %r13
295 cfi_restore(13)
296 movq (%rsp), %r14
297 cfi_restore(14)
298 vmovups 128(%rsp), %zmm0
299
300 /* Go to exit */
301 jmp L(EXIT)
302 /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
303 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
304 /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
305 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
306 /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
307 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
308 # LOE rbx r12 r13 r14 r15 zmm0
309
1d2971b5 310 /* Scalar math function call
41d4013a
SP
311 * to process special input
312 */
e682d015
SP
313
314L(SCALAR_MATH_CALL):
41d4013a 315 movl %r12d, %r14d
3079f652 316 vmovss 64(%rsp, %r14, 4), %xmm0
41d4013a
SP
317 call asinhf@PLT
318 # LOE rbx r14 r15 r12d r13d xmm0
e682d015 319
3079f652 320 vmovss %xmm0, 128(%rsp, %r14, 4)
e682d015 321
41d4013a
SP
322 /* Process special inputs in loop */
323 jmp L(SPECIAL_VALUES_LOOP)
324 # LOE rbx r15 r12d r13d
e682d015
SP
325END(_ZGVeN16v_asinhf_skx)
326
41d4013a
SP
327 .section .rodata, "a"
328 .align 64
e682d015
SP
329
330#ifdef __svml_sasinh_data_internal_avx512_typedef
331typedef unsigned int VUINT32;
332typedef struct {
41d4013a
SP
333 __declspec(align(64)) VUINT32 Log_tbl_H[32][1];
334 __declspec(align(64)) VUINT32 Log_tbl_L[32][1];
335 __declspec(align(64)) VUINT32 One[16][1];
336 __declspec(align(64)) VUINT32 AbsMask[16][1];
337 __declspec(align(64)) VUINT32 SmallThreshold[16][1];
338 __declspec(align(64)) VUINT32 Threshold[16][1];
339 __declspec(align(64)) VUINT32 LargeThreshold[16][1];
340 __declspec(align(64)) VUINT32 ca1[16][1];
341 __declspec(align(64)) VUINT32 c2s[16][1];
342 __declspec(align(64)) VUINT32 c1s[16][1];
343 __declspec(align(64)) VUINT32 AddB5[16][1];
344 __declspec(align(64)) VUINT32 RcpBitMask[16][1];
345 __declspec(align(64)) VUINT32 OneEighth[16][1];
346 __declspec(align(64)) VUINT32 Four[16][1];
347 __declspec(align(64)) VUINT32 poly_coeff3[16][1];
348 __declspec(align(64)) VUINT32 poly_coeff2[16][1];
349 __declspec(align(64)) VUINT32 poly_coeff1[16][1];
350 __declspec(align(64)) VUINT32 L2H[16][1];
351 __declspec(align(64)) VUINT32 L2L[16][1];
352} __svml_sasinh_data_internal_avx512;
e682d015
SP
353#endif
354__svml_sasinh_data_internal_avx512:
41d4013a
SP
355 /* Log_tbl_H */
356 .long 0x00000000
357 .long 0xbcfc0000
358 .long 0xbd788000
359 .long 0xbdb78000
360 .long 0xbdf14000
361 .long 0xbe14a000
362 .long 0xbe300000
363 .long 0xbe4aa000
364 .long 0xbe648000
365 .long 0xbe7dc000
366 .long 0xbe8b4000
367 .long 0xbe974000
368 .long 0xbea31000
369 .long 0xbeae9000
370 .long 0xbeb9d000
371 .long 0xbec4d000
372 .long 0xbecfa000
373 .long 0xbeda2000
374 .long 0xbee48000
375 .long 0xbeeea000
376 .long 0xbef89000
377 .long 0xbf012800
378 .long 0xbf05f000
379 .long 0xbf0aa800
380 .long 0xbf0f4000
381 .long 0xbf13c800
382 .long 0xbf184000
383 .long 0xbf1ca000
384 .long 0xbf20f000
385 .long 0xbf252800
386 .long 0xbf295000
387 .long 0xbf2d6800
388 /* Log_tbl_L */
389 .align 64
390 .long 0x80000000
391 .long 0xb726c39e
392 .long 0x3839e7fe
393 .long 0xb7528ae5
394 .long 0x377891d5
395 .long 0xb8297c10
396 .long 0x37cf8f58
397 .long 0x3852b186
398 .long 0x35838656
399 .long 0xb80c36af
400 .long 0x38235454
401 .long 0xb862bae1
402 .long 0x37e87bc7
403 .long 0x37848150
404 .long 0x37202511
405 .long 0xb74e1b05
406 .long 0x385c1340
407 .long 0xb8777bcd
408 .long 0x36038656
409 .long 0xb7d40984
410 .long 0xb80f5faf
411 .long 0xb8254b4c
412 .long 0xb865c84a
413 .long 0x37f0b42d
414 .long 0xb83ebce1
415 .long 0xb83c2513
416 .long 0x37a332c4
417 .long 0x3779654f
418 .long 0x38602f73
419 .long 0x367449f8
420 .long 0xb7b4996f
421 .long 0xb800986b
422 /* One */
423 .align 64
424 .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
425 /* AbsMask */
426 .align 64
427 .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
428 /* SmallThreshold */
429 .align 64
430 .long 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000
431 /* Threshold */
432 .align 64
433 .long 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000
434 /* LargeThreshold */
435 .align 64
436 .long 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff
437 /* ca1 */
438 .align 64
439 .long 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE
440 /* c2s */
441 .align 64
442 .long 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000
443 /* c1s */
444 .align 64
445 .long 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000
446 /* AddB5 */
447 .align 64
448 .long 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000
449 /* RcpBitMask */
450 .align 64
451 .long 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000
452 /* OneEighth */
453 .align 64
454 .long 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000
455 /* Four */
456 .align 64
457 .long 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000
458 /* poly_coeff3 */
459 .align 64
460 .long 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810
461 /* poly_coeff2 */
462 .align 64
463 .long 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e
464 /* poly_coeff1 */
465 .align 64
466 .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000
467 /* L2H = log(2)_high */
468 .align 64
469 .long 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000
470 /* L2L = log(2)_low */
471 .align 64
472 .long 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4
473 .align 64
474 .type __svml_sasinh_data_internal_avx512, @object
475 .size __svml_sasinh_data_internal_avx512, .-__svml_sasinh_data_internal_avx512