]>
Commit | Line | Data |
---|---|---|
c0f36fc3 | 1 | /* Function tanh vectorized with AVX-512. |
6d7e8eda | 2 | Copyright (C) 2021-2023 Free Software Foundation, Inc. |
c0f36fc3 SP |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with the GNU C Library; if not, see | |
17 | https://www.gnu.org/licenses/. */ | |
18 | ||
19 | /* | |
20 | * ALGORITHM DESCRIPTION: | |
21 | * | |
22 | * NOTE: Since the hyperbolic tangent function is odd | |
23 | * (tanh(x) = -tanh(-x)), below algorithm deals with the absolute | |
24 | * value of the argument |x|: tanh(x) = sign(x) * tanh(|x|) | |
25 | * | |
26 | * We use a table lookup method to compute tanh(|x|). | |
27 | * The basic idea is to split the input range into a number of subintervals | |
28 | * and to approximate tanh(.) with a polynomial on each of them. | |
29 | * | |
30 | * IEEE SPECIAL CONDITIONS: | |
6de743a4 | 31 | * x = [+, -]0, r = [+, -]0 |
c0f36fc3 SP |
32 | * x = +Inf, r = +1 |
33 | * x = -Inf, r = -1 | |
34 | * x = QNaN, r = QNaN | |
35 | * x = SNaN, r = QNaN | |
36 | * | |
37 | * | |
38 | * ALGORITHM DETAILS | |
39 | * We handle special values in a callout function, aside from main path | |
40 | * computations. "Special" for this algorithm are: | |
41 | * INF, NAN, |x| > HUGE_THRESHOLD | |
42 | * | |
43 | * | |
44 | * Main path computations are organized as follows: | |
45 | * Actually we split the interval [0, SATURATION_THRESHOLD) | |
46 | * into a number of subintervals. On each subinterval we approximate tanh(.) | |
47 | * with a minimax polynomial of pre-defined degree. Polynomial coefficients | |
48 | * are computed beforehand and stored in table. We also use | |
49 | * | |
50 | * y := |x| + B, | |
51 | * | |
52 | * here B depends on subinterval and is used to make argument | |
53 | * closer to zero. | |
54 | * We also add large fake interval [SATURATION_THRESHOLD, HUGE_THRESHOLD], | |
55 | * where 1.0 + 0.0*y + 0.0*y^2 ... coefficients are stored - just to | |
56 | * preserve main path computation logic but return 1.0 for all arguments. | |
57 | * | |
58 | * Hence reconstruction looks as follows: | |
59 | * we extract proper polynomial and range reduction coefficients | |
60 | * (Pj and B), corresponding to subinterval, to which |x| belongs, | |
61 | * and return | |
62 | * | |
63 | * r := sign(x) * (P0 + P1 * y + ... + Pn * y^n) | |
64 | * | |
65 | * NOTE: we use multiprecision technique to multiply and sum the first | |
66 | * K terms of the polynomial. So Pj, j = 0..K are stored in | |
67 | * table each as a pair of target precision numbers (Pj and PLj) to | |
68 | * achieve wider than target precision. | |
69 | * | |
70 | * | |
71 | */ | |
72 | ||
73 | /* Offsets for data table __svml_dtanh_data_internal | |
74 | */ | |
6de743a4 SP |
75 | #define _dC 0 |
76 | #define _dP0 128 | |
77 | #define _dP1 256 | |
78 | #define _dP2 384 | |
79 | #define _dP3 512 | |
80 | #define _dP4 640 | |
81 | #define _dP5 768 | |
82 | #define _dP6 896 | |
83 | #define _dP7 1024 | |
84 | #define _dP8 1152 | |
85 | #define _dP9 1280 | |
86 | #define _dP10 1408 | |
87 | #define _dP11 1536 | |
88 | #define _dP12 1664 | |
89 | #define _dP13 1792 | |
90 | #define _dP14 1920 | |
91 | #define _dP15 2048 | |
92 | #define _dP16 2176 | |
93 | #define _dP17 2304 | |
94 | #define _iExpMantMask_UISA 2432 | |
95 | #define _iMinIdxOfsMask_UISA 2496 | |
96 | #define _iMaxIdxMask_UISA 2560 | |
97 | #define _dbSignMask 2624 | |
98 | #define _dbAbsMask 2688 | |
99 | #define _iExpMantMask 2752 | |
100 | #define _iExpMask 2816 | |
101 | #define _iMinIdxOfsMask 2880 | |
102 | #define _iMaxIdxMask 2944 | |
c0f36fc3 SP |
103 | |
104 | #include <sysdep.h> | |
105 | ||
6de743a4 | 106 | .section .text.evex512, "ax", @progbits |
c0f36fc3 | 107 | ENTRY(_ZGVeN8v_tanh_skx) |
6de743a4 SP |
108 | pushq %rbp |
109 | cfi_def_cfa_offset(16) | |
110 | movq %rsp, %rbp | |
111 | cfi_def_cfa(6, 16) | |
112 | cfi_offset(6, -16) | |
113 | andq $-64, %rsp | |
114 | subq $320, %rsp | |
115 | vpsrlq $32, %zmm0, %zmm4 | |
116 | vmovups %zmm0, (%rsp) | |
117 | vmovups __svml_dtanh_data_internal(%rip), %zmm14 | |
118 | vmovups _dP0+__svml_dtanh_data_internal(%rip), %zmm15 | |
119 | vpmovqd %zmm4, %ymm5 | |
c0f36fc3 | 120 | |
6de743a4 SP |
121 | /* Constant loading */ |
122 | vandpd _dbAbsMask+__svml_dtanh_data_internal(%rip), %zmm0, %zmm13 | |
123 | vandpd _dbSignMask+__svml_dtanh_data_internal(%rip), %zmm0, %zmm3 | |
c0f36fc3 | 124 | |
6de743a4 SP |
125 | /* Here huge arguments, INF and NaNs are filtered out to callout. */ |
126 | vpand _iExpMantMask_UISA+__svml_dtanh_data_internal(%rip), %ymm5, %ymm7 | |
127 | vmovups _dP2+__svml_dtanh_data_internal(%rip), %zmm0 | |
128 | vmovups _dP16+__svml_dtanh_data_internal(%rip), %zmm4 | |
129 | vmovups _dP15+__svml_dtanh_data_internal(%rip), %zmm5 | |
130 | vmovups %zmm3, 64(%rsp) | |
131 | vmovups _dP3+__svml_dtanh_data_internal(%rip), %zmm3 | |
132 | vpsubd _iMinIdxOfsMask_UISA+__svml_dtanh_data_internal(%rip), %ymm7, %ymm8 | |
c0f36fc3 | 133 | |
6de743a4 SP |
134 | /* if VMIN, VMAX is defined for I type */ |
135 | vxorps %ymm9, %ymm9, %ymm9 | |
136 | vpmaxsd %ymm9, %ymm8, %ymm10 | |
137 | vpminsd _iMaxIdxMask_UISA+__svml_dtanh_data_internal(%rip), %ymm10, %ymm11 | |
138 | vpsrld $19, %ymm11, %ymm12 | |
139 | vmovups _dP12+__svml_dtanh_data_internal(%rip), %zmm8 | |
140 | vmovups _dP11+__svml_dtanh_data_internal(%rip), %zmm9 | |
141 | vmovups _dP10+__svml_dtanh_data_internal(%rip), %zmm10 | |
142 | vmovups _dP9+__svml_dtanh_data_internal(%rip), %zmm11 | |
143 | vpmovzxdq %ymm12, %zmm2 | |
144 | vmovups _dP8+__svml_dtanh_data_internal(%rip), %zmm12 | |
145 | vpermt2pd _dP2+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm0 | |
146 | vpermt2pd _dC+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm14 | |
147 | vpermt2pd _dP16+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm4 | |
148 | vpermt2pd _dP15+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm5 | |
149 | vsubpd {rn-sae}, %zmm14, %zmm13, %zmm1 | |
150 | vpermt2pd _dP12+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm8 | |
151 | vpermt2pd _dP11+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm9 | |
152 | vpermt2pd _dP10+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm10 | |
153 | vpermt2pd _dP9+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm11 | |
154 | vpermt2pd _dP8+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm12 | |
155 | vpermt2pd _dP3+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm3 | |
156 | vpermt2pd _dP0+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm15 | |
157 | vmovups %zmm0, 192(%rsp) | |
158 | vmovups _dP17+__svml_dtanh_data_internal(%rip), %zmm0 | |
159 | vmovups _dP7+__svml_dtanh_data_internal(%rip), %zmm13 | |
160 | vmovups _dP6+__svml_dtanh_data_internal(%rip), %zmm14 | |
161 | vmovups %zmm3, 256(%rsp) | |
162 | vmovups _dP5+__svml_dtanh_data_internal(%rip), %zmm3 | |
163 | vmovups %zmm15, 128(%rsp) | |
164 | vmovups _dP4+__svml_dtanh_data_internal(%rip), %zmm15 | |
165 | vpermt2pd _dP17+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm0 | |
166 | vpermt2pd _dP7+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm13 | |
167 | vpermt2pd _dP6+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm14 | |
168 | vpermt2pd _dP5+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm3 | |
169 | vpermt2pd _dP4+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm15 | |
170 | vfmadd213pd {rn-sae}, %zmm4, %zmm1, %zmm0 | |
171 | vpcmpgtd _iExpMask+__svml_dtanh_data_internal(%rip), %ymm7, %ymm6 | |
172 | vmovmskps %ymm6, %edx | |
173 | vmovups _dP14+__svml_dtanh_data_internal(%rip), %zmm6 | |
174 | vfmadd213pd {rn-sae}, %zmm5, %zmm1, %zmm0 | |
175 | vmovups _dP13+__svml_dtanh_data_internal(%rip), %zmm7 | |
176 | vpermt2pd _dP14+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm6 | |
177 | vpermt2pd _dP13+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm7 | |
178 | vfmadd213pd {rn-sae}, %zmm6, %zmm1, %zmm0 | |
179 | vmovups 256(%rsp), %zmm2 | |
180 | vfmadd213pd {rn-sae}, %zmm7, %zmm1, %zmm0 | |
181 | vfmadd213pd {rn-sae}, %zmm8, %zmm1, %zmm0 | |
182 | vfmadd213pd {rn-sae}, %zmm9, %zmm1, %zmm0 | |
183 | vfmadd213pd {rn-sae}, %zmm10, %zmm1, %zmm0 | |
184 | vfmadd213pd {rn-sae}, %zmm11, %zmm1, %zmm0 | |
185 | vfmadd213pd {rn-sae}, %zmm12, %zmm1, %zmm0 | |
186 | vfmadd213pd {rn-sae}, %zmm13, %zmm1, %zmm0 | |
187 | vfmadd213pd {rn-sae}, %zmm14, %zmm1, %zmm0 | |
188 | vfmadd213pd {rn-sae}, %zmm3, %zmm1, %zmm0 | |
189 | vmovups 128(%rsp), %zmm3 | |
190 | vfmadd213pd {rn-sae}, %zmm15, %zmm1, %zmm0 | |
191 | vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 | |
192 | vmovups 192(%rsp), %zmm2 | |
193 | vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 | |
194 | vfmadd213pd {rn-sae}, %zmm3, %zmm1, %zmm0 | |
195 | vorpd 64(%rsp), %zmm0, %zmm0 | |
196 | testl %edx, %edx | |
c0f36fc3 | 197 | |
6de743a4 SP |
198 | /* Go to special inputs processing branch */ |
199 | jne L(SPECIAL_VALUES_BRANCH) | |
200 | # LOE rbx r12 r13 r14 r15 edx zmm0 | |
c0f36fc3 | 201 | |
6de743a4 SP |
202 | /* Restore registers |
203 | * and exit the function | |
204 | */ | |
c0f36fc3 SP |
205 | |
206 | L(EXIT): | |
6de743a4 SP |
207 | movq %rbp, %rsp |
208 | popq %rbp | |
209 | cfi_def_cfa(7, 8) | |
210 | cfi_restore(6) | |
211 | ret | |
212 | cfi_def_cfa(6, 16) | |
213 | cfi_offset(6, -16) | |
c0f36fc3 | 214 | |
6de743a4 SP |
215 | /* Branch to process |
216 | * special inputs | |
217 | */ | |
c0f36fc3 SP |
218 | |
219 | L(SPECIAL_VALUES_BRANCH): | |
6de743a4 SP |
220 | vmovups (%rsp), %zmm1 |
221 | vmovups %zmm0, 128(%rsp) | |
222 | vmovups %zmm1, 64(%rsp) | |
223 | # LOE rbx r12 r13 r14 r15 edx zmm0 | |
c0f36fc3 | 224 | |
6de743a4 SP |
225 | xorl %eax, %eax |
226 | # LOE rbx r12 r13 r14 r15 eax edx | |
c0f36fc3 | 227 | |
6de743a4 SP |
228 | vzeroupper |
229 | movq %r12, 16(%rsp) | |
230 | /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -304; DW_OP_plus) */ | |
231 | .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xfe, 0xff, 0xff, 0x22 | |
232 | movl %eax, %r12d | |
233 | movq %r13, 8(%rsp) | |
234 | /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -312; DW_OP_plus) */ | |
235 | .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xfe, 0xff, 0xff, 0x22 | |
236 | movl %edx, %r13d | |
237 | movq %r14, (%rsp) | |
238 | /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -320; DW_OP_plus) */ | |
239 | .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xfe, 0xff, 0xff, 0x22 | |
240 | # LOE rbx r15 r12d r13d | |
c0f36fc3 | 241 | |
6de743a4 SP |
242 | /* Range mask |
243 | * bits check | |
244 | */ | |
c0f36fc3 SP |
245 | |
246 | L(RANGEMASK_CHECK): | |
6de743a4 | 247 | btl %r12d, %r13d |
c0f36fc3 | 248 | |
6de743a4 SP |
249 | /* Call scalar math function */ |
250 | jc L(SCALAR_MATH_CALL) | |
251 | # LOE rbx r15 r12d r13d | |
c0f36fc3 | 252 | |
6de743a4 SP |
253 | /* Special inputs |
254 | * processing loop | |
255 | */ | |
c0f36fc3 SP |
256 | |
257 | L(SPECIAL_VALUES_LOOP): | |
6de743a4 SP |
258 | incl %r12d |
259 | cmpl $8, %r12d | |
c0f36fc3 | 260 | |
6de743a4 SP |
261 | /* Check bits in range mask */ |
262 | jl L(RANGEMASK_CHECK) | |
263 | # LOE rbx r15 r12d r13d | |
c0f36fc3 | 264 | |
6de743a4 SP |
265 | movq 16(%rsp), %r12 |
266 | cfi_restore(12) | |
267 | movq 8(%rsp), %r13 | |
268 | cfi_restore(13) | |
269 | movq (%rsp), %r14 | |
270 | cfi_restore(14) | |
271 | vmovups 128(%rsp), %zmm0 | |
c0f36fc3 | 272 | |
6de743a4 SP |
273 | /* Go to exit */ |
274 | jmp L(EXIT) | |
275 | /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -304; DW_OP_plus) */ | |
276 | .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xfe, 0xff, 0xff, 0x22 | |
277 | /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -312; DW_OP_plus) */ | |
278 | .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xfe, 0xff, 0xff, 0x22 | |
279 | /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -320; DW_OP_plus) */ | |
280 | .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xfe, 0xff, 0xff, 0x22 | |
281 | # LOE rbx r12 r13 r14 r15 zmm0 | |
c0f36fc3 | 282 | |
6de743a4 SP |
283 | /* Scalar math fucntion call |
284 | * to process special input | |
285 | */ | |
c0f36fc3 SP |
286 | |
287 | L(SCALAR_MATH_CALL): | |
6de743a4 | 288 | movl %r12d, %r14d |
3079f652 | 289 | vmovsd 64(%rsp, %r14, 8), %xmm0 |
6de743a4 SP |
290 | call tanh@PLT |
291 | # LOE rbx r14 r15 r12d r13d xmm0 | |
c0f36fc3 | 292 | |
3079f652 | 293 | vmovsd %xmm0, 128(%rsp, %r14, 8) |
c0f36fc3 | 294 | |
6de743a4 SP |
295 | /* Process special inputs in loop */ |
296 | jmp L(SPECIAL_VALUES_LOOP) | |
297 | # LOE rbx r15 r12d r13d | |
c0f36fc3 SP |
298 | END(_ZGVeN8v_tanh_skx) |
299 | ||
6de743a4 SP |
300 | .section .rodata, "a" |
301 | .align 64 | |
c0f36fc3 SP |
302 | |
303 | #ifdef __svml_dtanh_data_internal_typedef | |
304 | typedef unsigned int VUINT32; | |
6de743a4 SP |
305 | typedef struct { |
306 | __declspec(align(64)) VUINT32 _dC[16][2]; | |
307 | __declspec(align(64)) VUINT32 _dP0[16][2]; | |
308 | __declspec(align(64)) VUINT32 _dP1[16][2]; | |
309 | __declspec(align(64)) VUINT32 _dP2[16][2]; | |
310 | __declspec(align(64)) VUINT32 _dP3[16][2]; | |
311 | __declspec(align(64)) VUINT32 _dP4[16][2]; | |
312 | __declspec(align(64)) VUINT32 _dP5[16][2]; | |
313 | __declspec(align(64)) VUINT32 _dP6[16][2]; | |
314 | __declspec(align(64)) VUINT32 _dP7[16][2]; | |
315 | __declspec(align(64)) VUINT32 _dP8[16][2]; | |
316 | __declspec(align(64)) VUINT32 _dP9[16][2]; | |
317 | __declspec(align(64)) VUINT32 _dP10[16][2]; | |
318 | __declspec(align(64)) VUINT32 _dP11[16][2]; | |
319 | __declspec(align(64)) VUINT32 _dP12[16][2]; | |
320 | __declspec(align(64)) VUINT32 _dP13[16][2]; | |
321 | __declspec(align(64)) VUINT32 _dP14[16][2]; | |
322 | __declspec(align(64)) VUINT32 _dP15[16][2]; | |
323 | __declspec(align(64)) VUINT32 _dP16[16][2]; | |
324 | __declspec(align(64)) VUINT32 _dP17[16][2]; | |
325 | __declspec(align(64)) VUINT32 _iExpMantMask_UISA[16][1]; | |
326 | __declspec(align(64)) VUINT32 _iMinIdxOfsMask_UISA[16][1]; | |
327 | __declspec(align(64)) VUINT32 _iMaxIdxMask_UISA[16][1]; | |
328 | __declspec(align(64)) VUINT32 _dbSignMask[8][2]; | |
329 | __declspec(align(64)) VUINT32 _dbAbsMask[8][2]; | |
330 | __declspec(align(64)) VUINT32 _iExpMantMask[16][1]; | |
331 | __declspec(align(64)) VUINT32 _iExpMask[16][1]; | |
332 | __declspec(align(64)) VUINT32 _iMinIdxOfsMask[16][1]; | |
333 | __declspec(align(64)) VUINT32 _iMaxIdxMask[16][1]; | |
c0f36fc3 SP |
334 | } __svml_dtanh_data_internal; |
335 | #endif | |
336 | __svml_dtanh_data_internal: | |
6de743a4 SP |
337 | /* _dC */ |
338 | .quad 0x0000000000000000, 0x3fcc000000000000, 0x3fd4000000000000, 0x3fdc000000000000 | |
339 | .quad 0x3fe4000000000000, 0x3fec000000000000, 0x3ff4000000000000, 0x3ffc000000000000 | |
340 | .quad 0x4004000000000000, 0x400c000000000000, 0x4014000000000000, 0x401c000000000000 | |
341 | .quad 0x4024000000000000, 0x402c000000000000, 0x4034000000000000, 0x0000000000000000 | |
342 | /* p0 */ | |
343 | .align 64 | |
344 | .quad 0x0000000000000000, 0x3fcb8fd0416a7c92, 0x3fd35f98a0ea650e, 0x3fda5729ee488037 | |
345 | .quad 0x3fe1bf47eabb8f95, 0x3fe686650b8c2015, 0x3feb2523bb6b2dee, 0x3fee1fbf97e33527 | |
346 | .quad 0x3fef9258260a71c2, 0x3feff112c63a9077, 0x3fefff419668df11, 0x3feffffc832750f2 | |
347 | .quad 0x3feffffffdc96f35, 0x3fefffffffffcf58, 0x3ff0000000000000, 0x3ff0000000000000 | |
348 | /* p1 */ | |
349 | .align 64 | |
350 | .quad 0x0000000000000000, 0x3c65e23ebcd3bcbe, 0xbc4c600bac3adf00, 0x3c6c44091785d040 | |
351 | .quad 0x3c8221d7a6e3674b, 0x3c69f89d2cf6b85c, 0x3c73b3e9ec0b8f1c, 0xbc7f8d4b0428aada | |
352 | .quad 0xbc7c52d880cf43c0, 0x3c7dd36e37096480, 0x3c7b4f6380c442ca, 0xbc729755de470096 | |
353 | .quad 0x3c84cf852845efbd, 0x3c6fc4fb440a5378, 0xbc63981083b55870, 0x0000000000000000 | |
354 | /* p2 */ | |
355 | .align 64 | |
356 | .quad 0x3ff0000000000000, 0x3fee842ca3f08532, 0x3fed11574af58f1b, 0x3fea945b9c24e4f9 | |
357 | .quad 0x3fe6284c3374f815, 0x3fe02500a09f8d6e, 0x3fd1f25131e3a8c0, 0x3fbd22ca1c24a139 | |
358 | .quad 0x3f9b3afe1fba5c76, 0x3f6dd37d19b22b21, 0x3f27ccec13a9ef96, 0x3ecbe6c3f33250ae | |
359 | .quad 0x3e41b4865394f75f, 0x3d8853f01bda5f28, 0x3c73953c0197ef58, 0x0000000000000000 | |
360 | /* p3 */ | |
361 | .align 64 | |
362 | .quad 0xbbf0b3ea3fdfaa19, 0xbfca48aaeb53bc21, 0xbfd19921f4329916, 0xbfd5e0f09bef8011 | |
363 | .quad 0xbfd893b59c35c882, 0xbfd6ba7cb7576538, 0xbfce7291743d7555, 0xbfbb6d85a01efb80 | |
364 | .quad 0xbf9addae58c7141a, 0xbf6dc59376c7aa19, 0xbf27cc5e74677410, 0xbecbe6c0e8b4cc87 | |
365 | .quad 0xbe41b486526b0565, 0xbd8853f01bef63a4, 0xbc73955be519be31, 0x0000000000000000 | |
366 | /* p4 */ | |
367 | .align 64 | |
368 | .quad 0xbfd5555555555555, 0xbfd183afc292ba11, 0xbfcc1a4b039c9bfa, 0xbfc16e1e6d8d0be6 | |
369 | .quad 0xbf92426c751e48a2, 0x3fb4f152b2bad124, 0x3fbbba40cbef72be, 0x3fb01ba038be6a3d | |
370 | .quad 0x3f916df44871efc8, 0x3f63c6869dfc8870, 0x3f1fb9aef915d828, 0x3ec299d1e27c6e11 | |
371 | .quad 0x3e379b5ddcca334c, 0x3d8037f57bc62c9a, 0x3c6a2d4b50a2cff7, 0x0000000000000000 | |
372 | /* p5 */ | |
373 | .align 64 | |
374 | .quad 0xbce6863ee44ed636, 0x3fc04dcd0476c75e, 0x3fc43d3449a80f08, 0x3fc5c26f3699b7e7 | |
375 | .quad 0x3fc1a686f6ab2533, 0x3faf203c316ce730, 0xbf89c7a02788557c, 0xbf98157e26e0d541 | |
376 | .quad 0xbf807b55c1c7d278, 0xbf53a18d5843190f, 0xbf0fb6bbc89b1a5b, 0xbeb299c9c684a963 | |
377 | .quad 0xbe279b5dd4fb3d01, 0xbd7037f57ae72aa6, 0xbc5a2ca2bba78e86, 0x0000000000000000 | |
378 | /* p6 */ | |
379 | .align 64 | |
380 | .quad 0x3fc1111111112ab5, 0x3fb5c19efdfc08ad, 0x3fa74c98dc34fbac, 0xbf790d6a8eff0a77 | |
381 | .quad 0xbfac3c021789a786, 0xbfae2196b7326859, 0xbf93a7a011ff8c2a, 0x3f6e4709c7e8430e | |
382 | .quad 0x3f67682afa611151, 0x3f3ef2ee77717cbf, 0x3ef95a4482f180b7, 0x3e9dc2c27da3b603 | |
383 | .quad 0x3e12e2afd9f7433e, 0x3d59f320348679ba, 0x3c44b61d9bbcc940, 0x0000000000000000 | |
384 | /* p7 */ | |
385 | .align 64 | |
386 | .quad 0xbda1ea19ddddb3b4, 0xbfb0b8df995ce4df, 0xbfb2955cf41e8164, 0xbfaf9d05c309f7c6 | |
387 | .quad 0xbf987d27ccff4291, 0x3f8b2ca62572b098, 0x3f8f1cf6c7f5b00a, 0x3f60379811e43dd5 | |
388 | .quad 0xbf4793826f78537e, 0xbf2405695e36240f, 0xbee0e08de39ce756, 0xbe83d709ba5f714e | |
389 | .quad 0xbdf92e3fc5ee63e0, 0xbd414cc030f2110e, 0xbc2ba022e8d82a87, 0x0000000000000000 | |
390 | /* p8 */ | |
391 | .align 64 | |
392 | .quad 0xbfaba1ba1990520b, 0xbf96e37bba52f6fc, 0x3ecff7df18455399, 0x3f97362834d33a4e | |
393 | .quad 0x3f9e7f8380184b45, 0x3f869543e7c420d4, 0xbf7326bd4914222a, 0xbf5fc15b0a9d98fa | |
394 | .quad 0x3f14cffcfa69fbb6, 0x3f057e48e5b79d10, 0x3ec33b66d7d77264, 0x3e66ac4e578b9b10 | |
395 | .quad 0x3ddcc74b8d3d5c42, 0x3d23c589137f92b4, 0x3c107f8e2c8707a1, 0x0000000000000000 | |
396 | /* p9 */ | |
397 | .align 64 | |
398 | .quad 0xbe351ca7f096011f, 0x3f9eaaf3320c3851, 0x3f9cf823fe761fc1, 0x3f9022271754ff1f | |
399 | .quad 0xbf731fe77c9c60af, 0xbf84a6046865ec7d, 0xbf4ca3f1f2b9192b, 0x3f4c77dee0afd227 | |
400 | .quad 0x3f04055bce68597a, 0xbee2bf0cb4a71647, 0xbea31eaafe73efd5, 0xbe46abb02c4368ed | |
401 | .quad 0xbdbcc749ca8079dd, 0xbd03c5883836b9d2, 0xbbf07a5416264aec, 0x0000000000000000 | |
402 | /* p10 */ | |
403 | .align 64 | |
404 | .quad 0x3f9664f94e6ac14e, 0xbf94d3343bae39dd, 0xbf7bc748e60df843, 0xbf8c89372b43ba85 | |
405 | .quad 0xbf8129a092de747a, 0x3f60c85b4d538746, 0x3f5be9392199ec18, 0xbf2a0c68a4489f10 | |
406 | .quad 0xbf00462601dc2faa, 0x3eb7b6a219dea9f4, 0x3e80cbcc8d4c5c8a, 0x3e2425bb231a5e29 | |
407 | .quad 0x3d9992a4beac8662, 0x3ce191ba5ed3fb67, 0x3bc892450bad44c4, 0x0000000000000000 | |
408 | /* p11 */ | |
409 | .align 64 | |
410 | .quad 0xbea8c4c1fd7852fe, 0xbfccce16b1046f13, 0xbf81a16f224bb7b6, 0xbf62cbf00406bc09 | |
411 | .quad 0x3f75b29bb02cf69b, 0x3f607df0f9f90c17, 0xbf4b852a6e0758d5, 0xbf0078c63d1b8445 | |
412 | .quad 0x3eec12eadd55be7a, 0xbe6fa600f593181b, 0xbe5a3c935dce3f7d, 0xbe001c6d95e3ae96 | |
413 | .quad 0xbd74755a00ea1fd3, 0xbcbc1c6c063bb7ac, 0xbba3be9a4460fe00, 0x0000000000000000 | |
414 | /* p12 */ | |
415 | .align 64 | |
416 | .quad 0xbf822404577aa9dd, 0x403d8b07f7a82aa3, 0xbf9f44ab92fbab0a, 0x3fb2eac604473d6a | |
417 | .quad 0x3f45f87d903aaac8, 0xbf5e104671036300, 0x3f19bc98ddf0f340, 0x3f0d4304bc9246e8 | |
418 | .quad 0xbed13c415f7b9d41, 0xbe722b8d9720cdb0, 0x3e322666d739bec0, 0x3dd76a553d7e7918 | |
419 | .quad 0x3d4de0fa59416a39, 0x3c948716cf3681b4, 0x3b873f9f2d2fda99, 0x0000000000000000 | |
420 | /* p13 */ | |
421 | .align 64 | |
422 | .quad 0xbefdd99a221ed573, 0x4070593a3735bab4, 0xbfccab654e44835e, 0x3fd13ed80037dbac | |
423 | .quad 0xbf6045b9076cc487, 0x3f2085ee7e8ac170, 0x3f23524622610430, 0xbeff12a6626911b4 | |
424 | .quad 0x3eab9008bca408af, 0x3e634df71865f620, 0xbe05bb1bcf83ca73, 0xbdaf2ac143fb6762 | |
425 | .quad 0xbd23eae52a3dbf57, 0xbc6b5e3e9ca0955e, 0xbb5eca68e2c1ba2e, 0x0000000000000000 | |
426 | /* p14 */ | |
427 | .align 64 | |
428 | .quad 0x3f6e3be689423841, 0xc0d263511f5baac1, 0x40169f73b15ebe5c, 0xc025c1dd41cd6cb5 | |
429 | .quad 0xbf58fd89fe05e0d1, 0x3f73f7af01d5af7a, 0xbf1e40bdead17e6b, 0x3ee224cd6c4513e5 | |
430 | .quad 0xbe24b645e68eeaa3, 0xbe4abfebfb72bc83, 0x3dd51c38f8695ed3, 0x3d8313ac38c6832b | |
431 | .quad 0x3cf7787935626685, 0x3c401ffc49c6bc29, 0xbabf0b21acfa52ab, 0x0000000000000000 | |
432 | /* p15 */ | |
433 | .align 64 | |
434 | .quad 0xbf2a1306713a4f3a, 0xc1045e509116b066, 0x4041fab9250984ce, 0xc0458d090ec3de95 | |
435 | .quad 0xbf74949d60113d63, 0x3f7c9fd6200d0ade, 0x3f02cd40e0ad0a9f, 0xbe858ab8e019f311 | |
436 | .quad 0xbe792fa6323b7cf8, 0x3e2df04d67876402, 0xbd95c72be95e4d2c, 0xbd55a89c30203106 | |
437 | .quad 0xbccad6b3bb9eff65, 0xbc12705ccd3dd884, 0xba8e0a4c47ae75f5, 0x0000000000000000 | |
438 | /* p16 */ | |
439 | .align 64 | |
440 | .quad 0xbf55d7e76dc56871, 0x41528c38809c90c7, 0xc076d57fb5190b02, 0x4085f09f888f8ada | |
441 | .quad 0x3fa246332a2fcba5, 0xbfb29d851a896fcd, 0x3ed9065ae369b212, 0xbeb8e1ba4c98a030 | |
442 | .quad 0x3e6ffd0766ad4016, 0xbe0c63c29f505f5b, 0xbd7fab216b9e0e49, 0x3d2826b62056aa27 | |
443 | .quad 0x3ca313e31762f523, 0x3bea37aa21895319, 0x3ae5c7f1fd871496, 0x0000000000000000 | |
444 | /* p17 */ | |
445 | .align 64 | |
446 | .quad 0x3f35e67ab76a26e7, 0x41848ee0627d8206, 0xc0a216d618b489ec, 0x40a5b89107c8af4f | |
447 | .quad 0x3fb69d8374520eda, 0xbfbded519f981716, 0xbef02d288b5b3371, 0x3eb290981209c1a6 | |
448 | .quad 0xbe567e924bf5ff6e, 0x3de3f7f7de6b0eb6, 0x3d69ed18bae3ebbc, 0xbcf7534c4f3dfa71 | |
449 | .quad 0xbc730b73f1eaff20, 0xbbba2cff8135d462, 0xbab5a71b5f7d9035, 0x0000000000000000 | |
450 | .align 64 | |
451 | .long 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000 /* _iExpMantMask_UISA */ | |
452 | .align 64 | |
453 | .long 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000 /* _iMinIdxOfsMask_UISA */ | |
454 | .align 64 | |
455 | .long 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000 /* _iMaxIdxMask_UISA */ | |
456 | .align 64 | |
457 | .quad 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000 /* _dbSignMask */ | |
458 | .align 64 | |
459 | .quad 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff /* _dbAbsMask */ | |
460 | .align 64 | |
461 | .long 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000 /* _iExpMantMask */ | |
462 | .align 64 | |
463 | .long 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000 /* _iExpMask */ | |
464 | .align 64 | |
465 | .long 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000 /* _iMinIdxOfsMask */ | |
466 | .align 64 | |
467 | .long 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000 /* _iMaxIdxMask */ | |
468 | .align 64 | |
469 | .type __svml_dtanh_data_internal, @object | |
470 | .size __svml_dtanh_data_internal, .-__svml_dtanh_data_internal |