sysdeps/x86_64/fpu/multiarch/svml_s_asinhf8_core_avx2.S

   1 /* Function asinhf vectorized with AVX2.
   2    Copyright (C) 2021-2024 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    https://www.gnu.org/licenses/.  */
  18
  19 /*
  20  * ALGORITHM DESCRIPTION:
  21  *
  22  *   Compute asinh(x) as log(x + sqrt(x*x + 1))
  23  *
  24  *   Special cases:
  25  *
  26  *   asinh(NaN) = quiet NaN, and raise invalid exception
  27  *   asinh(INF) = that INF
  28  *   asinh(0)   = that 0
  29  *
  30  */
  31
  32 /* Offsets for data table __svml_sasinh_data_internal
  33  */
  34 #define SgnMask                         0
  35 #define sOne                            32
  36 #define sPoly                           64
  37 #define iBrkValue                       320
  38 #define iOffExpoMask                    352
  39 #define sBigThreshold                   384
  40 #define sC2                             416
  41 #define sC3                             448
  42 #define sHalf                           480
  43 #define sLargestFinite                  512
  44 #define sLittleThreshold                544
  45 #define sSign                           576
  46 #define sThirtyOne                      608
  47 #define sTopMask8                       640
  48 #define XScale                          672
  49 #define sLn2                            704
  50
  51 #include <sysdep.h>
  52
  53         .section .text.avx2, "ax", @progbits
  54 ENTRY(_ZGVdN8v_asinhf_avx2)
  55         pushq   %rbp
  56         cfi_def_cfa_offset(16)
  57         movq    %rsp, %rbp
  58         cfi_def_cfa(6, 16)
  59         cfi_offset(6, -16)
  60         andq    $-32, %rsp
  61         subq    $96, %rsp
  62         vmovaps %ymm0, %ymm9
  63
  64         /* Load the constant 1 and a sign mask */
  65         vmovups sOne+__svml_sasinh_data_internal(%rip), %ymm8
  66
  67         /* No need to split X when FMA is available in hardware. */
  68         vmulps  %ymm9, %ymm9, %ymm5
  69         vmovups sTopMask8+__svml_sasinh_data_internal(%rip), %ymm1
  70
  71         /*
  72          * Finally, express Y + W = X^2 + 1 accurately where Y has <= 8 bits.
  73          * If |X| <= 1 then |XHi| <= 1 and so |X2Hi| <= 1, so we can treat 1
  74          * as the dominant component in the compensated summation. Otherwise,
  75          * if |X| >= 1, then since X2Hi only has 22 significant bits, the basic
  76          * addition will be exact anyway until we get to |X| >= 2^24. But by
  77          * that time the log function is well-conditioned enough that the
  78          * rounding error doesn't matter. Hence we can treat 1 as dominant even
  79          * if it literally isn't.
  80          */
  81         vaddps  %ymm5, %ymm8, %ymm13
  82         vandps  %ymm1, %ymm13, %ymm2
  83         vmovaps %ymm9, %ymm4
  84         vsubps  %ymm13, %ymm8, %ymm11
  85         vsubps  %ymm2, %ymm13, %ymm15
  86
  87         /*
  88          * Compute R = 1/sqrt(Y + W) * (1 + d)
  89          * Force R to <= 8 significant bits.
  90          * This means that R * Y and R^2 * Y are exactly representable.
  91          */
  92         vrsqrtps %ymm2, %ymm0
  93         vfmsub213ps %ymm5, %ymm9, %ymm4
  94         vaddps  %ymm11, %ymm5, %ymm12
  95
  96         /*
  97          * Get the absolute value of the input, since we will exploit antisymmetry
  98          * and mostly assume X >= 0 in the core computation
  99          */
 100         vandps  SgnMask+__svml_sasinh_data_internal(%rip), %ymm9, %ymm6
 101
 102         /*
 103          * Check whether the input is finite, by checking |X| <= MaxFloat
 104          * Otherwise set the rangemask so that the callout will get used.
 105          * Note that this will also use the callout for NaNs since not(NaN <= MaxFloat)
 106          */
 107         vcmpnle_uqps sLargestFinite+__svml_sasinh_data_internal(%rip), %ymm6, %ymm10
 108         vaddps  %ymm12, %ymm4, %ymm14
 109
 110         /*
 111          * Unfortunately, we can still be in trouble if |X| <= 2^-5, since
 112          * the absolute error 2^-(7+24)-ish in sqrt(1 + X^2) gets scaled up
 113          * by 1/X and comes close to our threshold. Hence if |X| <= 2^-4,
 114          * perform an alternative computation
 115          * sqrt(1 + X^2) - 1 = X^2/2 - X^4/8 + X^6/16
 116          * X2 = X^2
 117          */
 118         vaddps  %ymm4, %ymm5, %ymm4
 119
 120         /*
 121          * The following computation can go wrong for very large X, basically
 122          * because X^2 overflows. But for large X we have
 123          * asinh(X) / log(2 X) - 1 =~= 1/(4 * X^2), so for X >= 2^30
 124          * we can just later stick X back into the log and tweak up the exponent.
 125          * Actually we scale X by 2^-30 and tweak the exponent up by 31,
 126          * to stay in the safe range for the later log computation.
 127          * Compute a flag now telling us when do do this.
 128          */
 129         vcmplt_oqps sBigThreshold+__svml_sasinh_data_internal(%rip), %ymm6, %ymm7
 130         vaddps  %ymm15, %ymm14, %ymm3
 131
 132         /*
 133          * Now       1 / (1 + d)
 134          * = 1 / (1 + (sqrt(1 - e) - 1))
 135          * = 1 / sqrt(1 - e)
 136          * = 1 + 1/2 * e + 3/8 * e^2 + 5/16 * e^3 + 35/128 * e^4 + ...
 137          * So compute the first three nonconstant terms of that, so that
 138          * we have a relative correction (1 + Corr) to apply to S etc.
 139          * C1 = 1/2
 140          * C2 = 3/8
 141          * C3 = 5/16
 142          */
 143         vmovups sC3+__svml_sasinh_data_internal(%rip), %ymm12
 144         vmovmskps %ymm10, %edx
 145         vandps  %ymm1, %ymm0, %ymm10
 146
 147         /*
 148          * Compute S = (Y/sqrt(Y + W)) * (1 + d)
 149          * and T = (W/sqrt(Y + W)) * (1 + d)
 150          * so that S + T = sqrt(Y + W) * (1 + d)
 151          * S is exact, and the rounding error in T is OK.
 152          */
 153         vmulps  %ymm10, %ymm2, %ymm15
 154         vmulps  %ymm3, %ymm10, %ymm14
 155         vmovups sHalf+__svml_sasinh_data_internal(%rip), %ymm3
 156         vsubps  %ymm8, %ymm15, %ymm0
 157
 158         /*
 159          * Obtain sqrt(1 + X^2) - 1 in two pieces
 160          * sqrt(1 + X^2) - 1
 161          * = sqrt(Y + W) - 1
 162          * = (S + T) * (1 + Corr) - 1
 163          * = [S - 1] + [T + (S + T) * Corr]
 164          * We need a compensated summation for the last part. We treat S - 1
 165          * as the larger part; it certainly is until about X < 2^-4, and in that
 166          * case, the error is affordable since X dominates over sqrt(1 + X^2) - 1
 167          * Final sum is dTmp5 (hi) + dTmp7 (lo)
 168          */
 169         vaddps  %ymm14, %ymm15, %ymm13
 170
 171         /*
 172          * Compute e = -(2 * d + d^2)
 173          * The first FMR is exact, and the rounding error in the other is acceptable
 174          * since d and e are ~ 2^-8
 175          */
 176         vmovaps %ymm8, %ymm11
 177         vfnmadd231ps %ymm15, %ymm10, %ymm11
 178         vfnmadd231ps %ymm14, %ymm10, %ymm11
 179         vfmadd213ps sC2+__svml_sasinh_data_internal(%rip), %ymm11, %ymm12
 180         vfmadd213ps %ymm3, %ymm11, %ymm12
 181         vmulps  %ymm12, %ymm11, %ymm1
 182
 183         /* Now multiplex the two possible computations */
 184         vcmple_oqps sLittleThreshold+__svml_sasinh_data_internal(%rip), %ymm6, %ymm11
 185         vfmadd213ps %ymm14, %ymm13, %ymm1
 186         vaddps  %ymm0, %ymm1, %ymm2
 187         vsubps  %ymm2, %ymm0, %ymm10
 188
 189         /* sX2over2 = X^2/2 */
 190         vmulps  %ymm4, %ymm3, %ymm0
 191         vaddps  %ymm10, %ymm1, %ymm1
 192
 193         /* sX4over4 = X^4/4 */
 194         vmulps  %ymm0, %ymm0, %ymm5
 195
 196         /* sX46 = -X^4/4 + X^6/8 */
 197         vfmsub231ps %ymm0, %ymm5, %ymm5
 198
 199         /* sX46over2 = -X^4/8 + x^6/16 */
 200         vmulps  %ymm5, %ymm3, %ymm3
 201         vaddps  %ymm3, %ymm0, %ymm5
 202         vblendvps %ymm11, %ymm5, %ymm2, %ymm2
 203         vsubps  %ymm5, %ymm0, %ymm4
 204
 205         /*
 206          * Now do another compensated sum to add |X| + [sqrt(1 + X^2) - 1].
 207          * It's always safe to assume |X| is larger.
 208          * This is the final 2-part argument to the log1p function
 209          */
 210         vaddps  %ymm2, %ymm6, %ymm14
 211
 212         /*
 213          * Now resume the main code.
 214          * reduction: compute r, n
 215          */
 216         vmovups iBrkValue+__svml_sasinh_data_internal(%rip), %ymm5
 217         vaddps  %ymm4, %ymm3, %ymm10
 218
 219         /*
 220          * Now we feed into the log1p code, using H in place of _VARG1 and
 221          * also adding L into Xl.
 222          * compute 1+x as high, low parts
 223          */
 224         vmaxps  %ymm14, %ymm8, %ymm15
 225         vminps  %ymm14, %ymm8, %ymm0
 226         vblendvps %ymm11, %ymm10, %ymm1, %ymm12
 227         vsubps  %ymm14, %ymm6, %ymm1
 228         vaddps  %ymm0, %ymm15, %ymm3
 229
 230         /* Now multiplex to the case X = 2^-30 * input, Xl = sL = 0 in the "big" case. */
 231         vmulps  XScale+__svml_sasinh_data_internal(%rip), %ymm6, %ymm6
 232         vaddps  %ymm1, %ymm2, %ymm13
 233         vsubps  %ymm3, %ymm15, %ymm15
 234         vaddps  %ymm13, %ymm12, %ymm1
 235         vaddps  %ymm15, %ymm0, %ymm2
 236         vblendvps %ymm7, %ymm3, %ymm6, %ymm0
 237         vaddps  %ymm2, %ymm1, %ymm4
 238         vpsubd  %ymm5, %ymm0, %ymm1
 239         vpsrad  $23, %ymm1, %ymm6
 240         vpand   iOffExpoMask+__svml_sasinh_data_internal(%rip), %ymm1, %ymm2
 241         vmovups sPoly+224+__svml_sasinh_data_internal(%rip), %ymm1
 242         vpslld  $23, %ymm6, %ymm10
 243         vpaddd  %ymm5, %ymm2, %ymm13
 244         vcvtdq2ps %ymm6, %ymm0
 245         vpsubd  %ymm10, %ymm8, %ymm12
 246
 247         /* polynomial evaluation */
 248         vsubps  %ymm8, %ymm13, %ymm8
 249
 250         /* Add 31 to the exponent in the "large" case to get log(2 * input) */
 251         vaddps  sThirtyOne+__svml_sasinh_data_internal(%rip), %ymm0, %ymm3
 252         vandps  %ymm7, %ymm4, %ymm11
 253         vmulps  %ymm12, %ymm11, %ymm14
 254         vblendvps %ymm7, %ymm0, %ymm3, %ymm0
 255         vaddps  %ymm8, %ymm14, %ymm2
 256         vfmadd213ps sPoly+192+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1
 257         vfmadd213ps sPoly+160+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1
 258         vfmadd213ps sPoly+128+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1
 259         vfmadd213ps sPoly+96+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1
 260         vfmadd213ps sPoly+64+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1
 261         vfmadd213ps sPoly+32+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1
 262         vfmadd213ps sPoly+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1
 263         vmulps  %ymm1, %ymm2, %ymm4
 264         vfmadd213ps %ymm2, %ymm2, %ymm4
 265
 266         /* final reconstruction */
 267         vfmadd132ps sLn2+__svml_sasinh_data_internal(%rip), %ymm4, %ymm0
 268
 269         /* Finally, reincorporate the original sign. */
 270         vandps  sSign+__svml_sasinh_data_internal(%rip), %ymm9, %ymm7
 271         vxorps  %ymm0, %ymm7, %ymm0
 272         testl   %edx, %edx
 273
 274         /* Go to special inputs processing branch */
 275         jne     L(SPECIAL_VALUES_BRANCH)
 276         # LOE rbx r12 r13 r14 r15 edx ymm0 ymm9
 277
 278         /* Restore registers
 279          * and exit the function
 280          */
 281
 282 L(EXIT):
 283         movq    %rbp, %rsp
 284         popq    %rbp
 285         cfi_def_cfa(7, 8)
 286         cfi_restore(6)
 287         ret
 288         cfi_def_cfa(6, 16)
 289         cfi_offset(6, -16)
 290
 291         /* Branch to process
 292          * special inputs
 293          */
 294
 295 L(SPECIAL_VALUES_BRANCH):
 296         vmovups %ymm9, 32(%rsp)
 297         vmovups %ymm0, 64(%rsp)
 298         # LOE rbx r12 r13 r14 r15 edx ymm0
 299
 300         xorl    %eax, %eax
 301         # LOE rbx r12 r13 r14 r15 eax edx
 302
 303         vzeroupper
 304         movq    %r12, 16(%rsp)
 305         /*  DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus)  */
 306         .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
 307         movl    %eax, %r12d
 308         movq    %r13, 8(%rsp)
 309         /*  DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus)  */
 310         .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
 311         movl    %edx, %r13d
 312         movq    %r14, (%rsp)
 313         /*  DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus)  */
 314         .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
 315         # LOE rbx r15 r12d r13d
 316
 317         /* Range mask
 318          * bits check
 319          */
 320
 321 L(RANGEMASK_CHECK):
 322         btl     %r12d, %r13d
 323
 324         /* Call scalar math function */
 325         jc      L(SCALAR_MATH_CALL)
 326         # LOE rbx r15 r12d r13d
 327
 328         /* Special inputs
 329          * processing loop
 330          */
 331
 332 L(SPECIAL_VALUES_LOOP):
 333         incl    %r12d
 334         cmpl    $8, %r12d
 335
 336         /* Check bits in range mask */
 337         jl      L(RANGEMASK_CHECK)
 338         # LOE rbx r15 r12d r13d
 339
 340         movq    16(%rsp), %r12
 341         cfi_restore(12)
 342         movq    8(%rsp), %r13
 343         cfi_restore(13)
 344         movq    (%rsp), %r14
 345         cfi_restore(14)
 346         vmovups 64(%rsp), %ymm0
 347
 348         /* Go to exit */
 349         jmp     L(EXIT)
 350         /*  DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus)  */
 351         .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
 352         /*  DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus)  */
 353         .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
 354         /*  DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus)  */
 355         .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
 356         # LOE rbx r12 r13 r14 r15 ymm0
 357
 358         /* Scalar math function call
 359          * to process special input
 360          */
 361
 362 L(SCALAR_MATH_CALL):
 363         movl    %r12d, %r14d
 364         vmovss  32(%rsp, %r14, 4), %xmm0
 365         call    asinhf@PLT
 366         # LOE rbx r14 r15 r12d r13d xmm0
 367
 368         vmovss  %xmm0, 64(%rsp, %r14, 4)
 369
 370         /* Process special inputs in loop */
 371         jmp     L(SPECIAL_VALUES_LOOP)
 372         # LOE rbx r15 r12d r13d
 373 END(_ZGVdN8v_asinhf_avx2)
 374
 375         .section .rodata, "a"
 376         .align  32
 377
 378 #ifdef __svml_sasinh_data_internal_typedef
 379 typedef unsigned int VUINT32;
 380 typedef struct {
 381         __declspec(align(32)) VUINT32 SgnMask[8][1];
 382         __declspec(align(32)) VUINT32 sOne[8][1];
 383         __declspec(align(32)) VUINT32 sPoly[8][8][1];
 384         __declspec(align(32)) VUINT32 iBrkValue[8][1];
 385         __declspec(align(32)) VUINT32 iOffExpoMask[8][1];
 386         __declspec(align(32)) VUINT32 sBigThreshold[8][1];
 387         __declspec(align(32)) VUINT32 sC2[8][1];
 388         __declspec(align(32)) VUINT32 sC3[8][1];
 389         __declspec(align(32)) VUINT32 sHalf[8][1];
 390         __declspec(align(32)) VUINT32 sLargestFinite[8][1];
 391         __declspec(align(32)) VUINT32 sLittleThreshold[8][1];
 392         __declspec(align(32)) VUINT32 sSign[8][1];
 393         __declspec(align(32)) VUINT32 sThirtyOne[8][1];
 394         __declspec(align(32)) VUINT32 sTopMask8[8][1];
 395         __declspec(align(32)) VUINT32 XScale[8][1];
 396         __declspec(align(32)) VUINT32 sLn2[8][1];
 397 } __svml_sasinh_data_internal;
 398 #endif
 399 __svml_sasinh_data_internal:
 400         /* SgnMask */
 401         .long   0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
 402         /* sOne = SP 1.0 */
 403         .align  32
 404         .long   0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
 405         /* sPoly[] = SP polynomial */
 406         .align  32
 407         .long   0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 /* -5.0000000000000000000000000e-01 P0 */
 408         .long   0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94 /* 3.3333265781402587890625000e-01 P1 */
 409         .long   0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e /* -2.5004237890243530273437500e-01 P2 */
 410         .long   0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190 /* 2.0007920265197753906250000e-01 P3 */
 411         .long   0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37 /* -1.6472326219081878662109375e-01 P4 */
 412         .long   0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12 /* 1.4042308926582336425781250e-01 P5 */
 413         .long   0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3 /* -1.5122179687023162841796875e-01 P6 */
 414         .long   0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed /* 1.3820238411426544189453125e-01 P7 */
 415         /* iBrkValue = SP 2/3 */
 416         .align  32
 417         .long   0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab
 418         /* iOffExpoMask = SP significand mask */
 419         .align  32
 420         .long   0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff
 421         /* sBigThreshold */
 422         .align  32
 423         .long   0x4E800000, 0x4E800000, 0x4E800000, 0x4E800000, 0x4E800000, 0x4E800000, 0x4E800000, 0x4E800000
 424         /* sC2 */
 425         .align  32
 426         .long   0x3EC00000, 0x3EC00000, 0x3EC00000, 0x3EC00000, 0x3EC00000, 0x3EC00000, 0x3EC00000, 0x3EC00000
 427         /* sC3 */
 428         .align  32
 429         .long   0x3EA00000, 0x3EA00000, 0x3EA00000, 0x3EA00000, 0x3EA00000, 0x3EA00000, 0x3EA00000, 0x3EA00000
 430         /* sHalf */
 431         .align  32
 432         .long   0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000
 433         /* sLargestFinite */
 434         .align  32
 435         .long   0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF
 436         /* sLittleThreshold */
 437         .align  32
 438         .long   0x3D800000, 0x3D800000, 0x3D800000, 0x3D800000, 0x3D800000, 0x3D800000, 0x3D800000, 0x3D800000
 439         /* sSign */
 440         .align  32
 441         .long   0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000
 442         /* sThirtyOne */
 443         .align  32
 444         .long   0x41F80000, 0x41F80000, 0x41F80000, 0x41F80000, 0x41F80000, 0x41F80000, 0x41F80000, 0x41F80000
 445         /* sTopMask8 */
 446         .align  32
 447         .long   0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000
 448         /* XScale */
 449         .align  32
 450         .long   0x30800000, 0x30800000, 0x30800000, 0x30800000, 0x30800000, 0x30800000, 0x30800000, 0x30800000
 451         /* sLn2 = SP ln(2) */
 452         .align  32
 453         .long   0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218
 454         .align  32
 455         .type   __svml_sasinh_data_internal, @object
 456         .size   __svml_sasinh_data_internal, .-__svml_sasinh_data_internal