]>
Commit | Line | Data |
---|---|---|
e682d015 | 1 | /* Function asinhf vectorized with AVX2. |
dff8da6b | 2 | Copyright (C) 2021-2024 Free Software Foundation, Inc. |
e682d015 SP |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with the GNU C Library; if not, see | |
17 | https://www.gnu.org/licenses/. */ | |
18 | ||
19 | /* | |
20 | * ALGORITHM DESCRIPTION: | |
21 | * | |
22 | * Compute asinh(x) as log(x + sqrt(x*x + 1)) | |
23 | * | |
24 | * Special cases: | |
25 | * | |
26 | * asinh(NaN) = quiet NaN, and raise invalid exception | |
27 | * asinh(INF) = that INF | |
28 | * asinh(0) = that 0 | |
29 | * | |
30 | */ | |
31 | ||
32 | /* Offsets for data table __svml_sasinh_data_internal | |
33 | */ | |
99088223 SP |
34 | #define SgnMask 0 |
35 | #define sOne 32 | |
36 | #define sPoly 64 | |
37 | #define iBrkValue 320 | |
38 | #define iOffExpoMask 352 | |
39 | #define sBigThreshold 384 | |
40 | #define sC2 416 | |
41 | #define sC3 448 | |
42 | #define sHalf 480 | |
43 | #define sLargestFinite 512 | |
44 | #define sLittleThreshold 544 | |
45 | #define sSign 576 | |
46 | #define sThirtyOne 608 | |
47 | #define sTopMask8 640 | |
48 | #define XScale 672 | |
49 | #define sLn2 704 | |
e682d015 SP |
50 | |
51 | #include <sysdep.h> | |
52 | ||
99088223 | 53 | .section .text.avx2, "ax", @progbits |
e682d015 | 54 | ENTRY(_ZGVdN8v_asinhf_avx2) |
99088223 SP |
55 | pushq %rbp |
56 | cfi_def_cfa_offset(16) | |
57 | movq %rsp, %rbp | |
58 | cfi_def_cfa(6, 16) | |
59 | cfi_offset(6, -16) | |
60 | andq $-32, %rsp | |
61 | subq $96, %rsp | |
62 | vmovaps %ymm0, %ymm9 | |
63 | ||
64 | /* Load the constant 1 and a sign mask */ | |
65 | vmovups sOne+__svml_sasinh_data_internal(%rip), %ymm8 | |
66 | ||
67 | /* No need to split X when FMA is available in hardware. */ | |
68 | vmulps %ymm9, %ymm9, %ymm5 | |
69 | vmovups sTopMask8+__svml_sasinh_data_internal(%rip), %ymm1 | |
70 | ||
71 | /* | |
72 | * Finally, express Y + W = X^2 + 1 accurately where Y has <= 8 bits. | |
73 | * If |X| <= 1 then |XHi| <= 1 and so |X2Hi| <= 1, so we can treat 1 | |
74 | * as the dominant component in the compensated summation. Otherwise, | |
75 | * if |X| >= 1, then since X2Hi only has 22 significant bits, the basic | |
76 | * addition will be exact anyway until we get to |X| >= 2^24. But by | |
77 | * that time the log function is well-conditioned enough that the | |
78 | * rounding error doesn't matter. Hence we can treat 1 as dominant even | |
79 | * if it literally isn't. | |
80 | */ | |
81 | vaddps %ymm5, %ymm8, %ymm13 | |
82 | vandps %ymm1, %ymm13, %ymm2 | |
83 | vmovaps %ymm9, %ymm4 | |
84 | vsubps %ymm13, %ymm8, %ymm11 | |
85 | vsubps %ymm2, %ymm13, %ymm15 | |
86 | ||
87 | /* | |
88 | * Compute R = 1/sqrt(Y + W) * (1 + d) | |
89 | * Force R to <= 8 significant bits. | |
90 | * This means that R * Y and R^2 * Y are exactly representable. | |
91 | */ | |
92 | vrsqrtps %ymm2, %ymm0 | |
93 | vfmsub213ps %ymm5, %ymm9, %ymm4 | |
94 | vaddps %ymm11, %ymm5, %ymm12 | |
95 | ||
96 | /* | |
97 | * Get the absolute value of the input, since we will exploit antisymmetry | |
98 | * and mostly assume X >= 0 in the core computation | |
99 | */ | |
100 | vandps SgnMask+__svml_sasinh_data_internal(%rip), %ymm9, %ymm6 | |
101 | ||
102 | /* | |
103 | * Check whether the input is finite, by checking |X| <= MaxFloat | |
104 | * Otherwise set the rangemask so that the callout will get used. | |
105 | * Note that this will also use the callout for NaNs since not(NaN <= MaxFloat) | |
106 | */ | |
107 | vcmpnle_uqps sLargestFinite+__svml_sasinh_data_internal(%rip), %ymm6, %ymm10 | |
108 | vaddps %ymm12, %ymm4, %ymm14 | |
109 | ||
110 | /* | |
111 | * Unfortunately, we can still be in trouble if |X| <= 2^-5, since | |
112 | * the absolute error 2^-(7+24)-ish in sqrt(1 + X^2) gets scaled up | |
113 | * by 1/X and comes close to our threshold. Hence if |X| <= 2^-4, | |
114 | * perform an alternative computation | |
115 | * sqrt(1 + X^2) - 1 = X^2/2 - X^4/8 + X^6/16 | |
116 | * X2 = X^2 | |
117 | */ | |
118 | vaddps %ymm4, %ymm5, %ymm4 | |
119 | ||
120 | /* | |
121 | * The following computation can go wrong for very large X, basically | |
122 | * because X^2 overflows. But for large X we have | |
123 | * asinh(X) / log(2 X) - 1 =~= 1/(4 * X^2), so for X >= 2^30 | |
124 | * we can just later stick X back into the log and tweak up the exponent. | |
125 | * Actually we scale X by 2^-30 and tweak the exponent up by 31, | |
126 | * to stay in the safe range for the later log computation. | |
127 | * Compute a flag now telling us when do do this. | |
128 | */ | |
129 | vcmplt_oqps sBigThreshold+__svml_sasinh_data_internal(%rip), %ymm6, %ymm7 | |
130 | vaddps %ymm15, %ymm14, %ymm3 | |
131 | ||
132 | /* | |
133 | * Now 1 / (1 + d) | |
134 | * = 1 / (1 + (sqrt(1 - e) - 1)) | |
135 | * = 1 / sqrt(1 - e) | |
136 | * = 1 + 1/2 * e + 3/8 * e^2 + 5/16 * e^3 + 35/128 * e^4 + ... | |
137 | * So compute the first three nonconstant terms of that, so that | |
138 | * we have a relative correction (1 + Corr) to apply to S etc. | |
139 | * C1 = 1/2 | |
140 | * C2 = 3/8 | |
141 | * C3 = 5/16 | |
142 | */ | |
143 | vmovups sC3+__svml_sasinh_data_internal(%rip), %ymm12 | |
144 | vmovmskps %ymm10, %edx | |
145 | vandps %ymm1, %ymm0, %ymm10 | |
146 | ||
147 | /* | |
148 | * Compute S = (Y/sqrt(Y + W)) * (1 + d) | |
149 | * and T = (W/sqrt(Y + W)) * (1 + d) | |
150 | * so that S + T = sqrt(Y + W) * (1 + d) | |
151 | * S is exact, and the rounding error in T is OK. | |
152 | */ | |
153 | vmulps %ymm10, %ymm2, %ymm15 | |
154 | vmulps %ymm3, %ymm10, %ymm14 | |
155 | vmovups sHalf+__svml_sasinh_data_internal(%rip), %ymm3 | |
156 | vsubps %ymm8, %ymm15, %ymm0 | |
157 | ||
158 | /* | |
159 | * Obtain sqrt(1 + X^2) - 1 in two pieces | |
160 | * sqrt(1 + X^2) - 1 | |
161 | * = sqrt(Y + W) - 1 | |
162 | * = (S + T) * (1 + Corr) - 1 | |
163 | * = [S - 1] + [T + (S + T) * Corr] | |
164 | * We need a compensated summation for the last part. We treat S - 1 | |
165 | * as the larger part; it certainly is until about X < 2^-4, and in that | |
166 | * case, the error is affordable since X dominates over sqrt(1 + X^2) - 1 | |
167 | * Final sum is dTmp5 (hi) + dTmp7 (lo) | |
168 | */ | |
169 | vaddps %ymm14, %ymm15, %ymm13 | |
170 | ||
171 | /* | |
172 | * Compute e = -(2 * d + d^2) | |
173 | * The first FMR is exact, and the rounding error in the other is acceptable | |
174 | * since d and e are ~ 2^-8 | |
175 | */ | |
176 | vmovaps %ymm8, %ymm11 | |
177 | vfnmadd231ps %ymm15, %ymm10, %ymm11 | |
178 | vfnmadd231ps %ymm14, %ymm10, %ymm11 | |
179 | vfmadd213ps sC2+__svml_sasinh_data_internal(%rip), %ymm11, %ymm12 | |
180 | vfmadd213ps %ymm3, %ymm11, %ymm12 | |
181 | vmulps %ymm12, %ymm11, %ymm1 | |
182 | ||
183 | /* Now multiplex the two possible computations */ | |
184 | vcmple_oqps sLittleThreshold+__svml_sasinh_data_internal(%rip), %ymm6, %ymm11 | |
185 | vfmadd213ps %ymm14, %ymm13, %ymm1 | |
186 | vaddps %ymm0, %ymm1, %ymm2 | |
187 | vsubps %ymm2, %ymm0, %ymm10 | |
188 | ||
189 | /* sX2over2 = X^2/2 */ | |
190 | vmulps %ymm4, %ymm3, %ymm0 | |
191 | vaddps %ymm10, %ymm1, %ymm1 | |
192 | ||
193 | /* sX4over4 = X^4/4 */ | |
194 | vmulps %ymm0, %ymm0, %ymm5 | |
195 | ||
196 | /* sX46 = -X^4/4 + X^6/8 */ | |
197 | vfmsub231ps %ymm0, %ymm5, %ymm5 | |
198 | ||
199 | /* sX46over2 = -X^4/8 + x^6/16 */ | |
200 | vmulps %ymm5, %ymm3, %ymm3 | |
201 | vaddps %ymm3, %ymm0, %ymm5 | |
202 | vblendvps %ymm11, %ymm5, %ymm2, %ymm2 | |
203 | vsubps %ymm5, %ymm0, %ymm4 | |
204 | ||
205 | /* | |
206 | * Now do another compensated sum to add |X| + [sqrt(1 + X^2) - 1]. | |
207 | * It's always safe to assume |X| is larger. | |
208 | * This is the final 2-part argument to the log1p function | |
209 | */ | |
210 | vaddps %ymm2, %ymm6, %ymm14 | |
211 | ||
212 | /* | |
213 | * Now resume the main code. | |
214 | * reduction: compute r, n | |
215 | */ | |
216 | vmovups iBrkValue+__svml_sasinh_data_internal(%rip), %ymm5 | |
217 | vaddps %ymm4, %ymm3, %ymm10 | |
218 | ||
219 | /* | |
220 | * Now we feed into the log1p code, using H in place of _VARG1 and | |
221 | * also adding L into Xl. | |
222 | * compute 1+x as high, low parts | |
223 | */ | |
224 | vmaxps %ymm14, %ymm8, %ymm15 | |
225 | vminps %ymm14, %ymm8, %ymm0 | |
226 | vblendvps %ymm11, %ymm10, %ymm1, %ymm12 | |
227 | vsubps %ymm14, %ymm6, %ymm1 | |
228 | vaddps %ymm0, %ymm15, %ymm3 | |
229 | ||
230 | /* Now multiplex to the case X = 2^-30 * input, Xl = sL = 0 in the "big" case. */ | |
231 | vmulps XScale+__svml_sasinh_data_internal(%rip), %ymm6, %ymm6 | |
232 | vaddps %ymm1, %ymm2, %ymm13 | |
233 | vsubps %ymm3, %ymm15, %ymm15 | |
234 | vaddps %ymm13, %ymm12, %ymm1 | |
235 | vaddps %ymm15, %ymm0, %ymm2 | |
236 | vblendvps %ymm7, %ymm3, %ymm6, %ymm0 | |
237 | vaddps %ymm2, %ymm1, %ymm4 | |
238 | vpsubd %ymm5, %ymm0, %ymm1 | |
239 | vpsrad $23, %ymm1, %ymm6 | |
240 | vpand iOffExpoMask+__svml_sasinh_data_internal(%rip), %ymm1, %ymm2 | |
241 | vmovups sPoly+224+__svml_sasinh_data_internal(%rip), %ymm1 | |
242 | vpslld $23, %ymm6, %ymm10 | |
243 | vpaddd %ymm5, %ymm2, %ymm13 | |
244 | vcvtdq2ps %ymm6, %ymm0 | |
245 | vpsubd %ymm10, %ymm8, %ymm12 | |
246 | ||
247 | /* polynomial evaluation */ | |
248 | vsubps %ymm8, %ymm13, %ymm8 | |
249 | ||
250 | /* Add 31 to the exponent in the "large" case to get log(2 * input) */ | |
251 | vaddps sThirtyOne+__svml_sasinh_data_internal(%rip), %ymm0, %ymm3 | |
252 | vandps %ymm7, %ymm4, %ymm11 | |
253 | vmulps %ymm12, %ymm11, %ymm14 | |
254 | vblendvps %ymm7, %ymm0, %ymm3, %ymm0 | |
255 | vaddps %ymm8, %ymm14, %ymm2 | |
256 | vfmadd213ps sPoly+192+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1 | |
257 | vfmadd213ps sPoly+160+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1 | |
258 | vfmadd213ps sPoly+128+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1 | |
259 | vfmadd213ps sPoly+96+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1 | |
260 | vfmadd213ps sPoly+64+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1 | |
261 | vfmadd213ps sPoly+32+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1 | |
262 | vfmadd213ps sPoly+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1 | |
263 | vmulps %ymm1, %ymm2, %ymm4 | |
264 | vfmadd213ps %ymm2, %ymm2, %ymm4 | |
265 | ||
266 | /* final reconstruction */ | |
267 | vfmadd132ps sLn2+__svml_sasinh_data_internal(%rip), %ymm4, %ymm0 | |
268 | ||
269 | /* Finally, reincorporate the original sign. */ | |
270 | vandps sSign+__svml_sasinh_data_internal(%rip), %ymm9, %ymm7 | |
271 | vxorps %ymm0, %ymm7, %ymm0 | |
272 | testl %edx, %edx | |
273 | ||
274 | /* Go to special inputs processing branch */ | |
275 | jne L(SPECIAL_VALUES_BRANCH) | |
276 | # LOE rbx r12 r13 r14 r15 edx ymm0 ymm9 | |
277 | ||
278 | /* Restore registers | |
279 | * and exit the function | |
280 | */ | |
e682d015 SP |
281 | |
282 | L(EXIT): | |
99088223 SP |
283 | movq %rbp, %rsp |
284 | popq %rbp | |
285 | cfi_def_cfa(7, 8) | |
286 | cfi_restore(6) | |
287 | ret | |
288 | cfi_def_cfa(6, 16) | |
289 | cfi_offset(6, -16) | |
290 | ||
291 | /* Branch to process | |
292 | * special inputs | |
293 | */ | |
e682d015 SP |
294 | |
295 | L(SPECIAL_VALUES_BRANCH): | |
99088223 SP |
296 | vmovups %ymm9, 32(%rsp) |
297 | vmovups %ymm0, 64(%rsp) | |
298 | # LOE rbx r12 r13 r14 r15 edx ymm0 | |
299 | ||
300 | xorl %eax, %eax | |
301 | # LOE rbx r12 r13 r14 r15 eax edx | |
302 | ||
303 | vzeroupper | |
304 | movq %r12, 16(%rsp) | |
305 | /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ | |
306 | .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 | |
307 | movl %eax, %r12d | |
308 | movq %r13, 8(%rsp) | |
309 | /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ | |
310 | .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 | |
311 | movl %edx, %r13d | |
312 | movq %r14, (%rsp) | |
313 | /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ | |
314 | .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 | |
315 | # LOE rbx r15 r12d r13d | |
316 | ||
317 | /* Range mask | |
318 | * bits check | |
319 | */ | |
e682d015 SP |
320 | |
321 | L(RANGEMASK_CHECK): | |
99088223 | 322 | btl %r12d, %r13d |
e682d015 | 323 | |
99088223 SP |
324 | /* Call scalar math function */ |
325 | jc L(SCALAR_MATH_CALL) | |
326 | # LOE rbx r15 r12d r13d | |
e682d015 | 327 | |
99088223 SP |
328 | /* Special inputs |
329 | * processing loop | |
330 | */ | |
e682d015 SP |
331 | |
332 | L(SPECIAL_VALUES_LOOP): | |
99088223 SP |
333 | incl %r12d |
334 | cmpl $8, %r12d | |
335 | ||
336 | /* Check bits in range mask */ | |
337 | jl L(RANGEMASK_CHECK) | |
338 | # LOE rbx r15 r12d r13d | |
339 | ||
340 | movq 16(%rsp), %r12 | |
341 | cfi_restore(12) | |
342 | movq 8(%rsp), %r13 | |
343 | cfi_restore(13) | |
344 | movq (%rsp), %r14 | |
345 | cfi_restore(14) | |
346 | vmovups 64(%rsp), %ymm0 | |
347 | ||
348 | /* Go to exit */ | |
349 | jmp L(EXIT) | |
350 | /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ | |
351 | .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 | |
352 | /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ | |
353 | .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 | |
354 | /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ | |
355 | .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 | |
356 | # LOE rbx r12 r13 r14 r15 ymm0 | |
357 | ||
1d2971b5 | 358 | /* Scalar math function call |
99088223 SP |
359 | * to process special input |
360 | */ | |
e682d015 SP |
361 | |
362 | L(SCALAR_MATH_CALL): | |
99088223 | 363 | movl %r12d, %r14d |
3079f652 | 364 | vmovss 32(%rsp, %r14, 4), %xmm0 |
99088223 SP |
365 | call asinhf@PLT |
366 | # LOE rbx r14 r15 r12d r13d xmm0 | |
e682d015 | 367 | |
3079f652 | 368 | vmovss %xmm0, 64(%rsp, %r14, 4) |
e682d015 | 369 | |
99088223 SP |
370 | /* Process special inputs in loop */ |
371 | jmp L(SPECIAL_VALUES_LOOP) | |
372 | # LOE rbx r15 r12d r13d | |
e682d015 SP |
373 | END(_ZGVdN8v_asinhf_avx2) |
374 | ||
99088223 SP |
375 | .section .rodata, "a" |
376 | .align 32 | |
e682d015 SP |
377 | |
378 | #ifdef __svml_sasinh_data_internal_typedef | |
379 | typedef unsigned int VUINT32; | |
380 | typedef struct { | |
99088223 SP |
381 | __declspec(align(32)) VUINT32 SgnMask[8][1]; |
382 | __declspec(align(32)) VUINT32 sOne[8][1]; | |
383 | __declspec(align(32)) VUINT32 sPoly[8][8][1]; | |
384 | __declspec(align(32)) VUINT32 iBrkValue[8][1]; | |
385 | __declspec(align(32)) VUINT32 iOffExpoMask[8][1]; | |
386 | __declspec(align(32)) VUINT32 sBigThreshold[8][1]; | |
387 | __declspec(align(32)) VUINT32 sC2[8][1]; | |
388 | __declspec(align(32)) VUINT32 sC3[8][1]; | |
389 | __declspec(align(32)) VUINT32 sHalf[8][1]; | |
390 | __declspec(align(32)) VUINT32 sLargestFinite[8][1]; | |
391 | __declspec(align(32)) VUINT32 sLittleThreshold[8][1]; | |
392 | __declspec(align(32)) VUINT32 sSign[8][1]; | |
393 | __declspec(align(32)) VUINT32 sThirtyOne[8][1]; | |
394 | __declspec(align(32)) VUINT32 sTopMask8[8][1]; | |
395 | __declspec(align(32)) VUINT32 XScale[8][1]; | |
396 | __declspec(align(32)) VUINT32 sLn2[8][1]; | |
e682d015 SP |
397 | } __svml_sasinh_data_internal; |
398 | #endif | |
399 | __svml_sasinh_data_internal: | |
99088223 SP |
400 | /* SgnMask */ |
401 | .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff | |
402 | /* sOne = SP 1.0 */ | |
403 | .align 32 | |
404 | .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 | |
405 | /* sPoly[] = SP polynomial */ | |
406 | .align 32 | |
407 | .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 /* -5.0000000000000000000000000e-01 P0 */ | |
408 | .long 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94 /* 3.3333265781402587890625000e-01 P1 */ | |
409 | .long 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e /* -2.5004237890243530273437500e-01 P2 */ | |
410 | .long 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190 /* 2.0007920265197753906250000e-01 P3 */ | |
411 | .long 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37 /* -1.6472326219081878662109375e-01 P4 */ | |
412 | .long 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12 /* 1.4042308926582336425781250e-01 P5 */ | |
413 | .long 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3 /* -1.5122179687023162841796875e-01 P6 */ | |
414 | .long 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed /* 1.3820238411426544189453125e-01 P7 */ | |
415 | /* iBrkValue = SP 2/3 */ | |
416 | .align 32 | |
417 | .long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab | |
418 | /* iOffExpoMask = SP significand mask */ | |
419 | .align 32 | |
420 | .long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff | |
421 | /* sBigThreshold */ | |
422 | .align 32 | |
423 | .long 0x4E800000, 0x4E800000, 0x4E800000, 0x4E800000, 0x4E800000, 0x4E800000, 0x4E800000, 0x4E800000 | |
424 | /* sC2 */ | |
425 | .align 32 | |
426 | .long 0x3EC00000, 0x3EC00000, 0x3EC00000, 0x3EC00000, 0x3EC00000, 0x3EC00000, 0x3EC00000, 0x3EC00000 | |
427 | /* sC3 */ | |
428 | .align 32 | |
429 | .long 0x3EA00000, 0x3EA00000, 0x3EA00000, 0x3EA00000, 0x3EA00000, 0x3EA00000, 0x3EA00000, 0x3EA00000 | |
430 | /* sHalf */ | |
431 | .align 32 | |
432 | .long 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000 | |
433 | /* sLargestFinite */ | |
434 | .align 32 | |
435 | .long 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF | |
436 | /* sLittleThreshold */ | |
437 | .align 32 | |
438 | .long 0x3D800000, 0x3D800000, 0x3D800000, 0x3D800000, 0x3D800000, 0x3D800000, 0x3D800000, 0x3D800000 | |
439 | /* sSign */ | |
440 | .align 32 | |
441 | .long 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000 | |
442 | /* sThirtyOne */ | |
443 | .align 32 | |
444 | .long 0x41F80000, 0x41F80000, 0x41F80000, 0x41F80000, 0x41F80000, 0x41F80000, 0x41F80000, 0x41F80000 | |
445 | /* sTopMask8 */ | |
446 | .align 32 | |
447 | .long 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000 | |
448 | /* XScale */ | |
449 | .align 32 | |
450 | .long 0x30800000, 0x30800000, 0x30800000, 0x30800000, 0x30800000, 0x30800000, 0x30800000, 0x30800000 | |
451 | /* sLn2 = SP ln(2) */ | |
452 | .align 32 | |
453 | .long 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218 | |
454 | .align 32 | |
455 | .type __svml_sasinh_data_internal, @object | |
456 | .size __svml_sasinh_data_internal, .-__svml_sasinh_data_internal |