sysdeps/ia64/fpu/s_log1pl.S

   1 .file "log1pl.s"
   2
   3
   4 // Copyright (c) 2000 - 2003, Intel Corporation
   5 // All rights reserved.
   6 //
   7 //
   8 // Redistribution and use in source and binary forms, with or without
   9 // modification, are permitted provided that the following conditions are
  10 // met:
  11 //
  12 // * Redistributions of source code must retain the above copyright
  13 // notice, this list of conditions and the following disclaimer.
  14 //
  15 // * Redistributions in binary form must reproduce the above copyright
  16 // notice, this list of conditions and the following disclaimer in the
  17 // documentation and/or other materials provided with the distribution.
  18 //
  19 // * The name of Intel Corporation may not be used to endorse or promote
  20 // products derived from this software without specific prior written
  21 // permission.
  22
  23 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  24 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  25 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  26 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  27 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  28 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  29 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  30 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  31 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  32 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  33 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  34 //
  35 // Intel Corporation is the author of this code, and requests that all
  36 // problem reports or change requests be submitted to it directly at
  37 // http://www.intel.com/software/products/opensource/libraries/num.htm.
  38 //
  39 //*********************************************************************
  40 //
  41 // History:
  42 // 02/02/00 Initial version
  43 // 04/04/00 Unwind support added
  44 // 08/15/00 Bundle added after call to __libm_error_support to properly
  45 //          set [the previously overwritten] GR_Parameter_RESULT.
  46 // 05/21/01 Removed logl and log10l, putting them in a separate file
  47 // 06/29/01 Improved speed of all paths
  48 // 05/20/02 Cleaned up namespace and sf0 syntax
  49 // 02/10/03 Reordered header: .section, .global, .proc, .align;
  50 //          used data8 for long double table values
  51 //
  52 //*********************************************************************
  53 //
  54 //*********************************************************************
  55 //
  56 // Function:   log1pl(x) = ln(x+1), for double-extended precision x values
  57 //
  58 //*********************************************************************
  59 //
  60 // Resources Used:
  61 //
  62 //    Floating-Point Registers: f8 (Input and Return Value)
  63 //                              f34-f82
  64 //
  65 //    General Purpose Registers:
  66 //      r32-r56
  67 //      r53-r56 (Used to pass arguments to error handling routine)
  68 //
  69 //    Predicate Registers:      p6-p13
  70 //
  71 //*********************************************************************
  72 //
  73 // IEEE Special Conditions:
  74 //
  75 //    Denormal fault raised on denormal inputs
  76 //    Overflow exceptions cannot occur
  77 //    Underflow exceptions raised when appropriate for log1p
  78 //    Inexact raised when appropriate by algorithm
  79 //
  80 //    log1pl(inf) = inf
  81 //    log1pl(-inf) = QNaN
  82 //    log1pl(+/-0) = +/-0
  83 //    log1pl(-1) =  -inf
  84 //    log1pl(SNaN) = QNaN
  85 //    log1pl(QNaN) = QNaN
  86 //    log1pl(EM_special Values) = QNaN
  87 //
  88 //*********************************************************************
  89 //
  90 // Overview
  91 //
  92 // The method consists of three cases.
  93 //
  94 // If      |X| < 2^(-80)        use case log1p_small;
  95 // else    |X| < 2^(-7)         use case log_near1;
  96 // else                         use case log_regular;
  97 //
  98 // Case log1p_small:
  99 //
 100 //   log1pl( X ) = logl( X+1 ) can be approximated by X
 101 //
 102 // Case log_near1:
 103 //
 104 //   log1pl( X ) = log( X+1 ) can be approximated by a simple polynomial
 105 //   in W = X. This polynomial resembles the truncated Taylor
 106 //   series W - W^/2 + W^3/3 - ...
 107 //
 108 // Case log_regular:
 109 //
 110 //   Here we use a table lookup method. The basic idea is that in
 111 //   order to compute logl(Arg) = log1pl (Arg-1) for an argument Arg in [1,2),
 112 //   we construct a value G such that G*Arg is close to 1 and that
 113 //   logl(1/G) is obtainable easily from a table of values calculated
 114 //   beforehand. Thus
 115 //
 116 //      logl(Arg) = logl(1/G) + logl(G*Arg)
 117 //               = logl(1/G) + logl(1 + (G*Arg - 1))
 118 //
 119 //   Because |G*Arg - 1| is small, the second term on the right hand
 120 //   side can be approximated by a short polynomial. We elaborate
 121 //   this method in four steps.
 122 //
 123 //   Step 0: Initialization
 124 //
 125 //   We need to calculate logl( X+1 ). Obtain N, S_hi such that
 126 //
 127 //      X+1 = 2^N * ( S_hi + S_lo )   exactly
 128 //
 129 //   where S_hi in [1,2) and S_lo is a correction to S_hi in the sense
 130 //   that |S_lo| <= ulp(S_hi).
 131 //
 132 //   Step 1: Argument Reduction
 133 //
 134 //   Based on S_hi, obtain G_1, G_2, G_3 from a table and calculate
 135 //
 136 //      G := G_1 * G_2 * G_3
 137 //      r := (G * S_hi - 1) + G * S_lo
 138 //
 139 //   These G_j's have the property that the product is exactly
 140 //   representable and that |r| < 2^(-12) as a result.
 141 //
 142 //   Step 2: Approximation
 143 //
 144 //
 145 //   logl(1 + r) is approximated by a short polynomial poly(r).
 146 //
 147 //   Step 3: Reconstruction
 148 //
 149 //
 150 //   Finally, log1pl( X ) = logl( X+1 ) is given by
 151 //
 152 //   logl( X+1 )   =   logl( 2^N * (S_hi + S_lo) )
 153 //                 ~=~  N*logl(2) + logl(1/G) + logl(1 + r)
 154 //                 ~=~  N*logl(2) + logl(1/G) + poly(r).
 155 //
 156 // **** Algorithm ****
 157 //
 158 // Case log1p_small:
 159 //
 160 // Although log1pl(X) is basically X, we would like to preserve the inexactness
 161 // nature as well as consistent behavior under different rounding modes.
 162 // We can do this by computing the result as
 163 //
 164 //     log1pl(X) = X - X*X
 165 //
 166 //
 167 // Case log_near1:
 168 //
 169 // Here we compute a simple polynomial. To exploit parallelism, we split
 170 // the polynomial into two portions.
 171 //
 172 //       W := X
 173 //       Wsq := W * W
 174 //       W4  := Wsq*Wsq
 175 //       W6  := W4*Wsq
 176 //       Y_hi := W + Wsq*(P_1 + W*(P_2 + W*(P_3 + W*P_4))
 177 //       Y_lo := W6*(P_5 + W*(P_6 + W*(P_7 + W*P_8)))
 178 //
 179 // Case log_regular:
 180 //
 181 // We present the algorithm in four steps.
 182 //
 183 //   Step 0. Initialization
 184 //   ----------------------
 185 //
 186 //   Z := X + 1
 187 //   N := unbaised exponent of Z
 188 //   S_hi := 2^(-N) * Z
 189 //   S_lo := 2^(-N) * { (max(X,1)-Z) + min(X,1) }
 190 //
 191 //   Step 1. Argument Reduction
 192 //   --------------------------
 193 //
 194 //   Let
 195 //
 196 //      Z = 2^N * S_hi = 2^N * 1.d_1 d_2 d_3 ... d_63
 197 //
 198 //   We obtain G_1, G_2, G_3 by the following steps.
 199 //
 200 //
 201 //      Define          X_0 := 1.d_1 d_2 ... d_14. This is extracted
 202 //                      from S_hi.
 203 //
 204 //      Define          A_1 := 1.d_1 d_2 d_3 d_4. This is X_0 truncated
 205 //                      to lsb = 2^(-4).
 206 //
 207 //      Define          index_1 := [ d_1 d_2 d_3 d_4 ].
 208 //
 209 //      Fetch           Z_1 := (1/A_1) rounded UP in fixed point with
 210 //      fixed point     lsb = 2^(-15).
 211 //                      Z_1 looks like z_0.z_1 z_2 ... z_15
 212 //                      Note that the fetching is done using index_1.
 213 //                      A_1 is actually not needed in the implementation
 214 //                      and is used here only to explain how is the value
 215 //                      Z_1 defined.
 216 //
 217 //      Fetch           G_1 := (1/A_1) truncated to 21 sig. bits.
 218 //      floating pt.    Again, fetching is done using index_1. A_1
 219 //                      explains how G_1 is defined.
 220 //
 221 //      Calculate       X_1 := X_0 * Z_1 truncated to lsb = 2^(-14)
 222 //                           = 1.0 0 0 0 d_5 ... d_14
 223 //                      This is accomplished by integer multiplication.
 224 //                      It is proved that X_1 indeed always begin
 225 //                      with 1.0000 in fixed point.
 226 //
 227 //
 228 //      Define          A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1
 229 //                      truncated to lsb = 2^(-8). Similar to A_1,
 230 //                      A_2 is not needed in actual implementation. It
 231 //                      helps explain how some of the values are defined.
 232 //
 233 //      Define          index_2 := [ d_5 d_6 d_7 d_8 ].
 234 //
 235 //      Fetch           Z_2 := (1/A_2) rounded UP in fixed point with
 236 //      fixed point     lsb = 2^(-15). Fetch done using index_2.
 237 //                      Z_2 looks like z_0.z_1 z_2 ... z_15
 238 //
 239 //      Fetch           G_2 := (1/A_2) truncated to 21 sig. bits.
 240 //      floating pt.
 241 //
 242 //      Calculate       X_2 := X_1 * Z_2 truncated to lsb = 2^(-14)
 243 //                           = 1.0 0 0 0 0 0 0 0 d_9 d_10 ... d_14
 244 //                      This is accomplished by integer multiplication.
 245 //                      It is proved that X_2 indeed always begin
 246 //                      with 1.00000000 in fixed point.
 247 //
 248 //
 249 //      Define          A_3 := 1.0 0 0 0 0 0 0 0 d_9 d_10 d_11 d_12 d_13 1.
 250 //                      This is 2^(-14) + X_2 truncated to lsb = 2^(-13).
 251 //
 252 //      Define          index_3 := [ d_9 d_10 d_11 d_12 d_13 ].
 253 //
 254 //      Fetch           G_3 := (1/A_3) truncated to 21 sig. bits.
 255 //      floating pt.    Fetch is done using index_3.
 256 //
 257 //      Compute         G := G_1 * G_2 * G_3.
 258 //
 259 //      This is done exactly since each of G_j only has 21 sig. bits.
 260 //
 261 //      Compute
 262 //
 263 //              r := (G*S_hi - 1) + G*S_lo using 2 FMA operations.
 264 //
 265 //      Thus r approximates G*(S_hi + S_lo) - 1 to within a couple of
 266 //      rounding errors.
 267 //
 268 //
 269 //  Step 2. Approximation
 270 //  ---------------------
 271 //
 272 //   This step computes an approximation to logl( 1 + r ) where r is the
 273 //   reduced argument just obtained. It is proved that |r| <= 1.9*2^(-13);
 274 //   thus logl(1+r) can be approximated by a short polynomial:
 275 //
 276 //      logl(1+r) ~=~ poly = r + Q1 r^2 + ... + Q4 r^5
 277 //
 278 //
 279 //  Step 3. Reconstruction
 280 //  ----------------------
 281 //
 282 //   This step computes the desired result of logl(X+1):
 283 //
 284 //      logl(X+1) =   logl( 2^N * (S_hi + S_lo) )
 285 //                =   N*logl(2) + logl( S_hi + S_lo) )
 286 //                =   N*logl(2) + logl(1/G) +
 287 //                    logl(1 + G * ( S_hi + S_lo ) - 1 )
 288 //
 289 //   logl(2), logl(1/G_j) are stored as pairs of (single,double) numbers:
 290 //   log2_hi, log2_lo, log1byGj_hi, log1byGj_lo. The high parts are
 291 //   single-precision numbers and the low parts are double precision
 292 //   numbers. These have the property that
 293 //
 294 //      N*log2_hi + SUM ( log1byGj_hi )
 295 //
 296 //   is computable exactly in double-extended precision (64 sig. bits).
 297 //   Finally
 298 //
 299 //      Y_hi := N*log2_hi + SUM ( log1byGj_hi )
 300 //      Y_lo := poly_hi + [ poly_lo +
 301 //              ( SUM ( log1byGj_lo ) + N*log2_lo ) ]
 302 //
 303
 304 RODATA
 305 .align 64
 306
 307 // ************* DO NOT CHANGE THE ORDER OF THESE TABLES *************
 308
 309 // P_8, P_7, P_6, P_5, P_4, P_3, P_2, and P_1
 310
 311 LOCAL_OBJECT_START(Constants_P)
 312 //data4  0xEFD62B15,0xE3936754,0x00003FFB,0x00000000
 313 //data4  0xA5E56381,0x8003B271,0x0000BFFC,0x00000000
 314 //data4  0x73282DB0,0x9249248C,0x00003FFC,0x00000000
 315 //data4  0x47305052,0xAAAAAA9F,0x0000BFFC,0x00000000
 316 //data4  0xCCD17FC9,0xCCCCCCCC,0x00003FFC,0x00000000
 317 //data4  0x00067ED5,0x80000000,0x0000BFFD,0x00000000
 318 //data4  0xAAAAAAAA,0xAAAAAAAA,0x00003FFD,0x00000000
 319 //data4  0xFFFFFFFE,0xFFFFFFFF,0x0000BFFD,0x00000000
 320 data8  0xE3936754EFD62B15,0x00003FFB
 321 data8  0x8003B271A5E56381,0x0000BFFC
 322 data8  0x9249248C73282DB0,0x00003FFC
 323 data8  0xAAAAAA9F47305052,0x0000BFFC
 324 data8  0xCCCCCCCCCCD17FC9,0x00003FFC
 325 data8  0x8000000000067ED5,0x0000BFFD
 326 data8  0xAAAAAAAAAAAAAAAA,0x00003FFD
 327 data8  0xFFFFFFFFFFFFFFFE,0x0000BFFD
 328 LOCAL_OBJECT_END(Constants_P)
 329
 330 // log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1
 331
 332 LOCAL_OBJECT_START(Constants_Q)
 333 //data4  0x00000000,0xB1721800,0x00003FFE,0x00000000
 334 //data4  0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
 335 //data4  0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000
 336 //data4  0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000
 337 //data4  0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000
 338 //data4  0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
 339 data8  0xB172180000000000,0x00003FFE
 340 data8  0x82E308654361C4C6,0x0000BFE2
 341 data8  0xCCCCCAF2328833CB,0x00003FFC
 342 data8  0x80000077A9D4BAFB,0x0000BFFD
 343 data8  0xAAAAAAAAAAABE3D2,0x00003FFD
 344 data8  0xFFFFFFFFFFFFDAB7,0x0000BFFD
 345 LOCAL_OBJECT_END(Constants_Q)
 346
 347 // 1/ln10_hi, 1/ln10_lo
 348
 349 LOCAL_OBJECT_START(Constants_1_by_LN10)
 350 //data4  0x37287195,0xDE5BD8A9,0x00003FFD,0x00000000
 351 //data4  0xACCF70C8,0xD56EAABE,0x00003FBB,0x00000000
 352 data8  0xDE5BD8A937287195,0x00003FFD
 353 data8  0xD56EAABEACCF70C8,0x00003FBB
 354 LOCAL_OBJECT_END(Constants_1_by_LN10)
 355
 356
 357 // Z1 - 16 bit fixed
 358
 359 LOCAL_OBJECT_START(Constants_Z_1)
 360 data4  0x00008000
 361 data4  0x00007879
 362 data4  0x000071C8
 363 data4  0x00006BCB
 364 data4  0x00006667
 365 data4  0x00006187
 366 data4  0x00005D18
 367 data4  0x0000590C
 368 data4  0x00005556
 369 data4  0x000051EC
 370 data4  0x00004EC5
 371 data4  0x00004BDB
 372 data4  0x00004925
 373 data4  0x0000469F
 374 data4  0x00004445
 375 data4  0x00004211
 376 LOCAL_OBJECT_END(Constants_Z_1)
 377
 378 // G1 and H1 - IEEE single and h1 - IEEE double
 379
 380 LOCAL_OBJECT_START(Constants_G_H_h1)
 381 data4  0x3F800000,0x00000000
 382 data8  0x0000000000000000
 383 data4  0x3F70F0F0,0x3D785196
 384 data8  0x3DA163A6617D741C
 385 data4  0x3F638E38,0x3DF13843
 386 data8  0x3E2C55E6CBD3D5BB
 387 data4  0x3F579430,0x3E2FF9A0
 388 data8  0xBE3EB0BFD86EA5E7
 389 data4  0x3F4CCCC8,0x3E647FD6
 390 data8  0x3E2E6A8C86B12760
 391 data4  0x3F430C30,0x3E8B3AE7
 392 data8  0x3E47574C5C0739BA
 393 data4  0x3F3A2E88,0x3EA30C68
 394 data8  0x3E20E30F13E8AF2F
 395 data4  0x3F321640,0x3EB9CEC8
 396 data8  0xBE42885BF2C630BD
 397 data4  0x3F2AAAA8,0x3ECF9927
 398 data8  0x3E497F3497E577C6
 399 data4  0x3F23D708,0x3EE47FC5
 400 data8  0x3E3E6A6EA6B0A5AB
 401 data4  0x3F1D89D8,0x3EF8947D
 402 data8  0xBDF43E3CD328D9BE
 403 data4  0x3F17B420,0x3F05F3A1
 404 data8  0x3E4094C30ADB090A
 405 data4  0x3F124920,0x3F0F4303
 406 data8  0xBE28FBB2FC1FE510
 407 data4  0x3F0D3DC8,0x3F183EBF
 408 data8  0x3E3A789510FDE3FA
 409 data4  0x3F088888,0x3F20EC80
 410 data8  0x3E508CE57CC8C98F
 411 data4  0x3F042108,0x3F29516A
 412 data8  0xBE534874A223106C
 413 LOCAL_OBJECT_END(Constants_G_H_h1)
 414
 415 // Z2 - 16 bit fixed
 416
 417 LOCAL_OBJECT_START(Constants_Z_2)
 418 data4  0x00008000
 419 data4  0x00007F81
 420 data4  0x00007F02
 421 data4  0x00007E85
 422 data4  0x00007E08
 423 data4  0x00007D8D
 424 data4  0x00007D12
 425 data4  0x00007C98
 426 data4  0x00007C20
 427 data4  0x00007BA8
 428 data4  0x00007B31
 429 data4  0x00007ABB
 430 data4  0x00007A45
 431 data4  0x000079D1
 432 data4  0x0000795D
 433 data4  0x000078EB
 434 LOCAL_OBJECT_END(Constants_Z_2)
 435
 436 // G2 and H2 - IEEE single and h2 - IEEE double
 437
 438 LOCAL_OBJECT_START(Constants_G_H_h2)
 439 data4  0x3F800000,0x00000000
 440 data8  0x0000000000000000
 441 data4  0x3F7F00F8,0x3B7F875D
 442 data8  0x3DB5A11622C42273
 443 data4  0x3F7E03F8,0x3BFF015B
 444 data8  0x3DE620CF21F86ED3
 445 data4  0x3F7D08E0,0x3C3EE393
 446 data8  0xBDAFA07E484F34ED
 447 data4  0x3F7C0FC0,0x3C7E0586
 448 data8  0xBDFE07F03860BCF6
 449 data4  0x3F7B1880,0x3C9E75D2
 450 data8  0x3DEA370FA78093D6
 451 data4  0x3F7A2328,0x3CBDC97A
 452 data8  0x3DFF579172A753D0
 453 data4  0x3F792FB0,0x3CDCFE47
 454 data8  0x3DFEBE6CA7EF896B
 455 data4  0x3F783E08,0x3CFC15D0
 456 data8  0x3E0CF156409ECB43
 457 data4  0x3F774E38,0x3D0D874D
 458 data8  0xBE0B6F97FFEF71DF
 459 data4  0x3F766038,0x3D1CF49B
 460 data8  0xBE0804835D59EEE8
 461 data4  0x3F757400,0x3D2C531D
 462 data8  0x3E1F91E9A9192A74
 463 data4  0x3F748988,0x3D3BA322
 464 data8  0xBE139A06BF72A8CD
 465 data4  0x3F73A0D0,0x3D4AE46F
 466 data8  0x3E1D9202F8FBA6CF
 467 data4  0x3F72B9D0,0x3D5A1756
 468 data8  0xBE1DCCC4BA796223
 469 data4  0x3F71D488,0x3D693B9D
 470 data8  0xBE049391B6B7C239
 471 LOCAL_OBJECT_END(Constants_G_H_h2)
 472
 473 // G3 and H3 - IEEE single and h3 - IEEE double
 474
 475 LOCAL_OBJECT_START(Constants_G_H_h3)
 476 data4  0x3F7FFC00,0x38800100
 477 data8  0x3D355595562224CD
 478 data4  0x3F7FF400,0x39400480
 479 data8  0x3D8200A206136FF6
 480 data4  0x3F7FEC00,0x39A00640
 481 data8  0x3DA4D68DE8DE9AF0
 482 data4  0x3F7FE400,0x39E00C41
 483 data8  0xBD8B4291B10238DC
 484 data4  0x3F7FDC00,0x3A100A21
 485 data8  0xBD89CCB83B1952CA
 486 data4  0x3F7FD400,0x3A300F22
 487 data8  0xBDB107071DC46826
 488 data4  0x3F7FCC08,0x3A4FF51C
 489 data8  0x3DB6FCB9F43307DB
 490 data4  0x3F7FC408,0x3A6FFC1D
 491 data8  0xBD9B7C4762DC7872
 492 data4  0x3F7FBC10,0x3A87F20B
 493 data8  0xBDC3725E3F89154A
 494 data4  0x3F7FB410,0x3A97F68B
 495 data8  0xBD93519D62B9D392
 496 data4  0x3F7FAC18,0x3AA7EB86
 497 data8  0x3DC184410F21BD9D
 498 data4  0x3F7FA420,0x3AB7E101
 499 data8  0xBDA64B952245E0A6
 500 data4  0x3F7F9C20,0x3AC7E701
 501 data8  0x3DB4B0ECAABB34B8
 502 data4  0x3F7F9428,0x3AD7DD7B
 503 data8  0x3D9923376DC40A7E
 504 data4  0x3F7F8C30,0x3AE7D474
 505 data8  0x3DC6E17B4F2083D3
 506 data4  0x3F7F8438,0x3AF7CBED
 507 data8  0x3DAE314B811D4394
 508 data4  0x3F7F7C40,0x3B03E1F3
 509 data8  0xBDD46F21B08F2DB1
 510 data4  0x3F7F7448,0x3B0BDE2F
 511 data8  0xBDDC30A46D34522B
 512 data4  0x3F7F6C50,0x3B13DAAA
 513 data8  0x3DCB0070B1F473DB
 514 data4  0x3F7F6458,0x3B1BD766
 515 data8  0xBDD65DDC6AD282FD
 516 data4  0x3F7F5C68,0x3B23CC5C
 517 data8  0xBDCDAB83F153761A
 518 data4  0x3F7F5470,0x3B2BC997
 519 data8  0xBDDADA40341D0F8F
 520 data4  0x3F7F4C78,0x3B33C711
 521 data8  0x3DCD1BD7EBC394E8
 522 data4  0x3F7F4488,0x3B3BBCC6
 523 data8  0xBDC3532B52E3E695
 524 data4  0x3F7F3C90,0x3B43BAC0
 525 data8  0xBDA3961EE846B3DE
 526 data4  0x3F7F34A0,0x3B4BB0F4
 527 data8  0xBDDADF06785778D4
 528 data4  0x3F7F2CA8,0x3B53AF6D
 529 data8  0x3DCC3ED1E55CE212
 530 data4  0x3F7F24B8,0x3B5BA620
 531 data8  0xBDBA31039E382C15
 532 data4  0x3F7F1CC8,0x3B639D12
 533 data8  0x3D635A0B5C5AF197
 534 data4  0x3F7F14D8,0x3B6B9444
 535 data8  0xBDDCCB1971D34EFC
 536 data4  0x3F7F0CE0,0x3B7393BC
 537 data8  0x3DC7450252CD7ADA
 538 data4  0x3F7F04F0,0x3B7B8B6D
 539 data8  0xBDB68F177D7F2A42
 540 LOCAL_OBJECT_END(Constants_G_H_h3)
 541
 542
 543 // Floating Point Registers
 544
 545 FR_Input_X      = f8
 546
 547 FR_Y_hi         = f34
 548 FR_Y_lo         = f35
 549
 550 FR_Scale        = f36
 551 FR_X_Prime      = f37
 552 FR_S_hi         = f38
 553 FR_W            = f39
 554 FR_G            = f40
 555
 556 FR_H            = f41
 557 FR_wsq          = f42
 558 FR_w4           = f43
 559 FR_h            = f44
 560 FR_w6           = f45
 561
 562 FR_G2           = f46
 563 FR_H2           = f47
 564 FR_poly_lo      = f48
 565 FR_P8           = f49
 566 FR_poly_hi      = f50
 567
 568 FR_P7           = f51
 569 FR_h2           = f52
 570 FR_rsq          = f53
 571 FR_P6           = f54
 572 FR_r            = f55
 573
 574 FR_log2_hi      = f56
 575 FR_log2_lo      = f57
 576 FR_p87          = f58
 577 FR_p876         = f58
 578 FR_p8765        = f58
 579 FR_float_N      = f59
 580 FR_Q4           = f60
 581
 582 FR_p43          = f61
 583 FR_p432         = f61
 584 FR_p4321        = f61
 585 FR_P4           = f62
 586 FR_G3           = f63
 587 FR_H3           = f64
 588 FR_h3           = f65
 589
 590 FR_Q3           = f66
 591 FR_P3           = f67
 592 FR_Q2           = f68
 593 FR_P2           = f69
 594 FR_1LN10_hi     = f70
 595
 596 FR_Q1           = f71
 597 FR_P1           = f72
 598 FR_1LN10_lo     = f73
 599 FR_P5           = f74
 600 FR_rcub         = f75
 601
 602 FR_Output_X_tmp = f76
 603 FR_Neg_One      = f77
 604 FR_Z            = f78
 605 FR_AA           = f79
 606 FR_BB           = f80
 607 FR_S_lo         = f81
 608 FR_2_to_minus_N = f82
 609
 610 FR_X                = f8
 611 FR_Y                = f0
 612 FR_RESULT           = f76
 613
 614
 615 // General Purpose Registers
 616
 617 GR_ad_p         = r33
 618 GR_Index1       = r34
 619 GR_Index2       = r35
 620 GR_signif       = r36
 621 GR_X_0          = r37
 622 GR_X_1          = r38
 623 GR_X_2          = r39
 624 GR_minus_N      = r39
 625 GR_Z_1          = r40
 626 GR_Z_2          = r41
 627 GR_N            = r42
 628 GR_Bias         = r43
 629 GR_M            = r44
 630 GR_Index3       = r45
 631 GR_exp_2tom80   = r45
 632 GR_ad_p2        = r46
 633 GR_exp_mask     = r47
 634 GR_exp_2tom7    = r48
 635 GR_ad_ln10      = r49
 636 GR_ad_tbl_1     = r50
 637 GR_ad_tbl_2     = r51
 638 GR_ad_tbl_3     = r52
 639 GR_ad_q         = r53
 640 GR_ad_z_1       = r54
 641 GR_ad_z_2       = r55
 642 GR_ad_z_3       = r56
 643 GR_minus_N      = r39
 644
 645 //
 646 // Added for unwind support
 647 //
 648
 649 GR_SAVE_PFS         = r50
 650 GR_SAVE_B0          = r51
 651 GR_SAVE_GP          = r52
 652 GR_Parameter_X      = r53
 653 GR_Parameter_Y      = r54
 654 GR_Parameter_RESULT = r55
 655 GR_Parameter_TAG    = r56
 656
 657 .section .text
 658 GLOBAL_IEEE754_ENTRY(log1pl)
 659 { .mfi
 660       alloc r32 = ar.pfs,0,21,4,0
 661       fclass.m p6, p0 =  FR_Input_X, 0x1E3  // Test for natval, nan, inf
 662       nop.i 999
 663 }
 664 { .mfi
 665       addl GR_ad_z_1 = @ltoff(Constants_Z_1#),gp
 666       fma.s1 FR_Z = FR_Input_X, f1, f1      // x+1
 667       nop.i 999
 668 }
 669 ;;
 670
 671 { .mfi
 672       nop.m 999
 673       fmerge.ns FR_Neg_One = f1, f1         // Form -1.0
 674       nop.i 999
 675 }
 676 { .mfi
 677       nop.m 999
 678       fnorm.s1 FR_X_Prime = FR_Input_X      // Normalize x
 679       nop.i 999
 680 }
 681 ;;
 682
 683 { .mfi
 684       ld8    GR_ad_z_1 = [GR_ad_z_1]          // Get pointer to Constants_Z_1
 685       nop.f 999
 686       mov GR_exp_2tom7 = 0x0fff8              // Exponent of 2^-7
 687 }
 688 ;;
 689
 690 { .mfb
 691       getf.sig GR_signif = FR_Z               // Get significand of x+1
 692       fcmp.eq.s1 p9, p0 =  FR_Input_X, f0     // Test for x=0
 693 (p6)  br.cond.spnt LOG1P_special              // Branch for nan, inf, natval
 694 }
 695 ;;
 696
 697 { .mfi
 698       add   GR_ad_tbl_1 = 0x040, GR_ad_z_1    // Point to Constants_G_H_h1
 699       fcmp.lt.s1 p13, p0 =  FR_X_Prime, FR_Neg_One // Test for x<-1
 700       add   GR_ad_p = -0x100, GR_ad_z_1       // Point to Constants_P
 701 }
 702 { .mfi
 703       add   GR_ad_z_2 = 0x140, GR_ad_z_1      // Point to Constants_Z_2
 704       nop.f 999
 705       add   GR_ad_tbl_2 = 0x180, GR_ad_z_1    // Point to Constants_G_H_h2
 706 }
 707 ;;
 708
 709 { .mfi
 710       add   GR_ad_q = 0x080, GR_ad_p          // Point to Constants_Q
 711       fcmp.eq.s1 p8, p0 =  FR_X_Prime, FR_Neg_One // Test for x=-1
 712       extr.u GR_Index1 = GR_signif, 59, 4     // Get high 4 bits of signif
 713 }
 714 { .mfb
 715       add   GR_ad_tbl_3 = 0x280, GR_ad_z_1    // Point to Constants_G_H_h3
 716       nop.f 999
 717 (p9)  br.ret.spnt  b0                         // Exit if x=0, return input
 718 }
 719 ;;
 720
 721 { .mfi
 722       shladd GR_ad_z_1 = GR_Index1, 2, GR_ad_z_1  // Point to Z_1
 723       fclass.nm p10, p0 =  FR_Input_X, 0x1FF  // Test for unsupported
 724       extr.u GR_X_0 = GR_signif, 49, 15       // Get high 15 bits of significand
 725 }
 726 { .mfi
 727       ldfe FR_P8 = [GR_ad_p],16               // Load P_8 for near1 path
 728       fsub.s1 FR_W = FR_X_Prime, f0           // W = x
 729       add   GR_ad_ln10 = 0x060, GR_ad_q       // Point to Constants_1_by_LN10
 730 }
 731 ;;
 732
 733 { .mfi
 734       ld4 GR_Z_1 = [GR_ad_z_1]                // Load Z_1
 735       fmax.s1  FR_AA = FR_X_Prime, f1         // For S_lo, form AA = max(X,1.0)
 736       mov GR_exp_mask = 0x1FFFF               // Create exponent mask
 737 }
 738 { .mib
 739       shladd GR_ad_tbl_1 = GR_Index1, 4, GR_ad_tbl_1  // Point to G_1
 740       mov GR_Bias = 0x0FFFF                   // Create exponent bias
 741 (p13) br.cond.spnt LOG1P_LT_Minus_1           // Branch if x<-1
 742 }
 743 ;;
 744
 745 { .mfb
 746       ldfps  FR_G, FR_H = [GR_ad_tbl_1],8     // Load G_1, H_1
 747       fmerge.se FR_S_hi =  f1,FR_Z            // Form |x+1|
 748 (p8)  br.cond.spnt LOG1P_EQ_Minus_1           // Branch if x=-1
 749 }
 750 ;;
 751
 752 { .mmb
 753       getf.exp GR_N =  FR_Z                   // Get N = exponent of x+1
 754       ldfd  FR_h = [GR_ad_tbl_1]              // Load h_1
 755 (p10) br.cond.spnt LOG1P_unsupported          // Branch for unsupported type
 756 }
 757 ;;
 758
 759 { .mfi
 760       ldfe FR_log2_hi = [GR_ad_q],16          // Load log2_hi
 761       fcmp.eq.s0 p8, p0 =  FR_Input_X, f0     // Dummy op to flag denormals
 762       pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15    // Get bits 30-15 of X_0 * Z_1
 763 }
 764 ;;
 765
 766 //
 767 //    For performance, don't use result of pmpyshr2.u for 4 cycles.
 768 //
 769 { .mmi
 770       ldfe FR_log2_lo = [GR_ad_q],16          // Load log2_lo
 771       sub GR_N = GR_N, GR_Bias
 772       mov GR_exp_2tom80 = 0x0ffaf             // Exponent of 2^-80
 773 }
 774 ;;
 775
 776 { .mfi
 777       ldfe FR_Q4 = [GR_ad_q],16               // Load Q4
 778       fms.s1  FR_S_lo = FR_AA, f1, FR_Z       // Form S_lo = AA - Z
 779       sub GR_minus_N = GR_Bias, GR_N          // Form exponent of 2^(-N)
 780 }
 781 ;;
 782
 783 { .mmf
 784       ldfe FR_Q3 = [GR_ad_q],16               // Load Q3
 785       setf.sig FR_float_N = GR_N   // Put integer N into rightmost significand
 786       fmin.s1  FR_BB = FR_X_Prime, f1         // For S_lo, form BB = min(X,1.0)
 787 }
 788 ;;
 789
 790 { .mmi
 791       getf.exp GR_M = FR_W                    // Get signexp of w = x
 792       ldfe FR_Q2 = [GR_ad_q],16               // Load Q2
 793       extr.u GR_Index2 = GR_X_1, 6, 4         // Extract bits 6-9 of X_1
 794 }
 795 ;;
 796
 797 { .mmi
 798       ldfe FR_Q1 = [GR_ad_q]                  // Load Q1
 799       shladd GR_ad_z_2 = GR_Index2, 2, GR_ad_z_2  // Point to Z_2
 800       add GR_ad_p2  = 0x30,GR_ad_p            // Point to P_4
 801 }
 802 ;;
 803
 804 { .mmi
 805       ld4 GR_Z_2 = [GR_ad_z_2]                // Load Z_2
 806       shladd GR_ad_tbl_2 = GR_Index2, 4, GR_ad_tbl_2  // Point to G_2
 807       and GR_M = GR_exp_mask, GR_M            // Get exponent of w = x
 808 }
 809 ;;
 810
 811 { .mmi
 812       ldfps  FR_G2, FR_H2 = [GR_ad_tbl_2],8   // Load G_2, H_2
 813       cmp.lt  p8, p9 =  GR_M, GR_exp_2tom7    // Test |x| < 2^-7
 814       cmp.lt  p7, p0 =  GR_M, GR_exp_2tom80   // Test |x| < 2^-80
 815 }
 816 ;;
 817
 818 // Small path is separate code
 819 //  p7 is for the small path: |x| < 2^-80
 820 // near1 and regular paths are merged.
 821 //  p8 is for the near1 path: |x| < 2^-7
 822 //  p9 is for regular path:   |x| >= 2^-7
 823
 824 { .mfi
 825       ldfd  FR_h2 = [GR_ad_tbl_2]             // Load h_2
 826       nop.f 999
 827       nop.i 999
 828 }
 829 { .mfb
 830 (p9)  setf.exp FR_2_to_minus_N = GR_minus_N   // Form 2^(-N)
 831 (p7)  fnma.s0  f8 = FR_X_Prime, FR_X_Prime, FR_X_Prime // Result x - x*x
 832 (p7)  br.ret.spnt  b0                         // Branch if |x| < 2^-80
 833 }
 834 ;;
 835
 836 { .mmi
 837 (p8)  ldfe FR_P7 = [GR_ad_p],16               // Load P_7 for near1 path
 838 (p8)  ldfe FR_P4 = [GR_ad_p2],16              // Load P_4 for near1 path
 839 (p9)  pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15    // Get bits 30-15 of X_1 * Z_2
 840 }
 841 ;;
 842
 843 //
 844 //    For performance, don't use result of pmpyshr2.u for 4 cycles.
 845 //
 846 { .mmf
 847 (p8)  ldfe FR_P6 = [GR_ad_p],16               // Load P_6 for near1 path
 848 (p8)  ldfe FR_P3 = [GR_ad_p2],16              // Load P_3 for near1 path
 849 (p9)  fma.s1  FR_S_lo = FR_S_lo, f1, FR_BB    // S_lo = S_lo + BB
 850 }
 851 ;;
 852
 853 { .mmf
 854 (p8)  ldfe FR_P5 = [GR_ad_p],16               // Load P_5 for near1 path
 855 (p8)  ldfe FR_P2 = [GR_ad_p2],16              // Load P_2 for near1 path
 856 (p8)  fmpy.s1 FR_wsq = FR_W, FR_W             // wsq = w * w for near1 path
 857 }
 858 ;;
 859
 860 { .mmi
 861 (p8)  ldfe FR_P1 = [GR_ad_p2],16 ;;           // Load P_1 for near1 path
 862       nop.m 999
 863 (p9)  extr.u GR_Index3 = GR_X_2, 1, 5         // Extract bits 1-5 of X_2
 864 }
 865 ;;
 866
 867 { .mfi
 868 (p9)  shladd GR_ad_tbl_3 = GR_Index3, 4, GR_ad_tbl_3  // Point to G_3
 869 (p9)  fcvt.xf FR_float_N = FR_float_N
 870       nop.i 999
 871 }
 872 ;;
 873
 874 { .mfi
 875 (p9)  ldfps  FR_G3, FR_H3 = [GR_ad_tbl_3],8   // Load G_3, H_3
 876       nop.f 999
 877       nop.i 999
 878 }
 879 ;;
 880
 881 { .mfi
 882 (p9)  ldfd  FR_h3 = [GR_ad_tbl_3]             // Load h_3
 883 (p9)  fmpy.s1 FR_G = FR_G, FR_G2              // G = G_1 * G_2
 884       nop.i 999
 885 }
 886 { .mfi
 887       nop.m 999
 888 (p9)  fadd.s1 FR_H = FR_H, FR_H2              // H = H_1 + H_2
 889       nop.i 999
 890 }
 891 ;;
 892
 893 { .mmf
 894       nop.m 999
 895       nop.m 999
 896 (p9)  fadd.s1 FR_h = FR_h, FR_h2              // h = h_1 + h_2
 897 }
 898 ;;
 899
 900 { .mfi
 901       nop.m 999
 902 (p8)  fmpy.s1 FR_w4 = FR_wsq, FR_wsq          // w4 = w^4 for near1 path
 903       nop.i 999
 904 }
 905 { .mfi
 906       nop.m 999
 907 (p8)  fma.s1 FR_p87 = FR_W, FR_P8, FR_P7      // p87 = w * P8 + P7
 908       nop.i 999
 909 }
 910 ;;
 911
 912 { .mfi
 913       nop.m 999
 914 (p9)  fma.s1  FR_S_lo = FR_S_lo, FR_2_to_minus_N, f0 // S_lo = S_lo * 2^(-N)
 915       nop.i 999
 916 }
 917 { .mfi
 918       nop.m 999
 919 (p8)  fma.s1 FR_p43 = FR_W, FR_P4, FR_P3      // p43 = w * P4 + P3
 920       nop.i 999
 921 }
 922 ;;
 923
 924 { .mfi
 925       nop.m 999
 926 (p9)  fmpy.s1 FR_G = FR_G, FR_G3              // G = (G_1 * G_2) * G_3
 927       nop.i 999
 928 }
 929 { .mfi
 930       nop.m 999
 931 (p9)  fadd.s1 FR_H = FR_H, FR_H3              // H = (H_1 + H_2) + H_3
 932       nop.i 999
 933 }
 934 ;;
 935
 936 { .mfi
 937       nop.m 999
 938 (p9)  fadd.s1 FR_h = FR_h, FR_h3              // h = (h_1 + h_2) + h_3
 939       nop.i 999
 940 }
 941 { .mfi
 942       nop.m 999
 943 (p8)  fmpy.s1 FR_w6 = FR_w4, FR_wsq           // w6 = w^6 for near1 path
 944       nop.i 999
 945 }
 946 ;;
 947
 948 { .mfi
 949       nop.m 999
 950 (p8)  fma.s1 FR_p432 = FR_W, FR_p43, FR_P2    // p432 = w * p43 + P2
 951       nop.i 999
 952 }
 953 { .mfi
 954       nop.m 999
 955 (p8)  fma.s1 FR_p876 = FR_W, FR_p87, FR_P6    // p876 = w * p87 + P6
 956       nop.i 999
 957 }
 958 ;;
 959
 960 { .mfi
 961       nop.m 999
 962 (p9)  fms.s1 FR_r = FR_G, FR_S_hi, f1         // r = G * S_hi - 1
 963       nop.i 999
 964 }
 965 { .mfi
 966       nop.m 999
 967 (p9)  fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H // Y_hi = N * log2_hi + H
 968       nop.i 999
 969 }
 970 ;;
 971
 972 { .mfi
 973       nop.m 999
 974 (p9)  fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h  // h = N * log2_lo + h
 975       nop.i 999
 976 }
 977 ;;
 978
 979 { .mfi
 980       nop.m 999
 981 (p9)  fma.s1 FR_r = FR_G, FR_S_lo, FR_r        // r = G * S_lo + (G * S_hi - 1)
 982       nop.i 999
 983 }
 984 ;;
 985
 986 { .mfi
 987       nop.m 999
 988 (p8)  fma.s1 FR_p4321 = FR_W, FR_p432, FR_P1      // p4321 = w * p432 + P1
 989       nop.i 999
 990 }
 991 { .mfi
 992       nop.m 999
 993 (p8)  fma.s1 FR_p8765 = FR_W, FR_p876, FR_P5      // p8765 = w * p876 + P5
 994       nop.i 999
 995 }
 996 ;;
 997
 998 { .mfi
 999       nop.m 999
1000 (p9)  fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3      // poly_lo = r * Q4 + Q3
1001       nop.i 999
1002 }
1003 { .mfi
1004       nop.m 999
1005 (p9)  fmpy.s1 FR_rsq = FR_r, FR_r                 // rsq = r * r
1006       nop.i 999
1007 }
1008 ;;
1009
1010 { .mfi
1011       nop.m 999
1012 (p8)  fma.s1 FR_Y_lo = FR_wsq, FR_p4321, f0       // Y_lo = wsq * p4321
1013       nop.i 999
1014 }
1015 { .mfi
1016       nop.m 999
1017 (p8)  fma.s1 FR_Y_hi = FR_W, f1, f0               // Y_hi = w for near1 path
1018       nop.i 999
1019 }
1020 ;;
1021
1022 { .mfi
1023       nop.m 999
1024 (p9)  fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 // poly_lo = poly_lo * r + Q2
1025       nop.i 999
1026 }
1027 { .mfi
1028       nop.m 999
1029 (p9)  fma.s1 FR_rcub = FR_rsq, FR_r, f0           // rcub = r^3
1030       nop.i 999
1031 }
1032 ;;
1033
1034 { .mfi
1035       nop.m 999
1036 (p8)  fma.s1 FR_Y_lo = FR_w6, FR_p8765,FR_Y_lo // Y_lo = w6 * p8765 + w2 * p4321
1037       nop.i 999
1038 }
1039 ;;
1040
1041 { .mfi
1042       nop.m 999
1043 (p9)  fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r     // poly_hi = Q1 * rsq + r
1044       nop.i 999
1045 }
1046 ;;
1047
1048 { .mfi
1049       nop.m 999
1050 (p9)  fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h // poly_lo = poly_lo*r^3 + h
1051       nop.i 999
1052 }
1053 ;;
1054
1055 { .mfi
1056       nop.m 999
1057 (p9)  fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo    // Y_lo = poly_hi + poly_lo
1058       nop.i 999
1059 }
1060 ;;
1061
1062 // Remainder of code is common for near1 and regular paths
1063 { .mfb
1064       nop.m 999
1065       fadd.s0  f8 = FR_Y_lo,FR_Y_hi               // Result=Y_lo+Y_hi
1066       br.ret.sptk   b0                       // Common exit for 2^-80 < x < inf
1067 }
1068 ;;
1069
1070
1071 // Here if x=-1
1072 LOG1P_EQ_Minus_1:
1073 //
1074 //    If x=-1 raise divide by zero and return -inf
1075 //
1076 { .mfi
1077       mov   GR_Parameter_TAG = 138
1078       fsub.s1 FR_Output_X_tmp = f0, f1
1079       nop.i 999
1080 }
1081 ;;
1082
1083 { .mfb
1084       nop.m 999
1085       frcpa.s0 FR_Output_X_tmp, p8 =  FR_Output_X_tmp, f0
1086       br.cond.sptk __libm_error_region
1087 }
1088 ;;
1089
1090 LOG1P_special:
1091 { .mfi
1092       nop.m 999
1093       fclass.m.unc p8, p0 =  FR_Input_X, 0x1E1  // Test for natval, nan, +inf
1094       nop.i 999
1095 }
1096 ;;
1097
1098 //
1099 //    For SNaN raise invalid and return QNaN.
1100 //    For QNaN raise invalid and return QNaN.
1101 //    For +Inf return +Inf.
1102 //
1103 { .mfb
1104       nop.m 999
1105 (p8)  fmpy.s0 f8 =  FR_Input_X, f1
1106 (p8)  br.ret.sptk   b0                          // Return for natval, nan, +inf
1107 }
1108 ;;
1109
1110 //
1111 //    For -Inf raise invalid and return QNaN.
1112 //
1113 { .mfb
1114       mov   GR_Parameter_TAG = 139
1115       fmpy.s0 FR_Output_X_tmp =  FR_Input_X, f0
1116       br.cond.sptk __libm_error_region
1117 }
1118 ;;
1119
1120
1121 LOG1P_unsupported:
1122 //
1123 //    Return generated NaN or other value.
1124 //
1125 { .mfb
1126       nop.m 999
1127       fmpy.s0 f8 = FR_Input_X, f0
1128       br.ret.sptk   b0
1129 }
1130 ;;
1131
1132 // Here if -inf < x < -1
1133 LOG1P_LT_Minus_1:
1134 //
1135 //    Deal with x < -1 in a special way - raise
1136 //    invalid and produce QNaN indefinite.
1137 //
1138 { .mfb
1139       mov   GR_Parameter_TAG = 139
1140       frcpa.s0 FR_Output_X_tmp, p8 =  f0, f0
1141       br.cond.sptk __libm_error_region
1142 }
1143 ;;
1144
1145
1146 GLOBAL_IEEE754_END(log1pl)
1147 libm_alias_ldouble_other (__log1p, log1p)
1148
1149 LOCAL_LIBM_ENTRY(__libm_error_region)
1150 .prologue
1151 { .mfi
1152         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
1153         nop.f 0
1154 .save   ar.pfs,GR_SAVE_PFS
1155         mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
1156 }
1157 { .mfi
1158 .fframe 64
1159         add sp=-64,sp                           // Create new stack
1160         nop.f 0
1161         mov GR_SAVE_GP=gp                       // Save gp
1162 };;
1163 { .mmi
1164         stfe [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
1165         add GR_Parameter_X = 16,sp              // Parameter 1 address
1166 .save   b0, GR_SAVE_B0
1167         mov GR_SAVE_B0=b0                       // Save b0
1168 };;
1169 .body
1170 { .mib
1171         stfe [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
1172         add   GR_Parameter_RESULT = 0,GR_Parameter_Y
1173         nop.b 0                                 // Parameter 3 address
1174 }
1175 { .mib
1176         stfe [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
1177         add   GR_Parameter_Y = -16,GR_Parameter_Y
1178         br.call.sptk b0=__libm_error_support#  // Call error handling function
1179 };;
1180 { .mmi
1181         nop.m 999
1182         nop.m 999
1183         add   GR_Parameter_RESULT = 48,sp
1184 };;
1185 { .mmi
1186         ldfe  f8 = [GR_Parameter_RESULT]       // Get return result off stack
1187 .restore sp
1188         add   sp = 64,sp                       // Restore stack pointer
1189         mov   b0 = GR_SAVE_B0                  // Restore return address
1190 };;
1191 { .mib
1192         mov   gp = GR_SAVE_GP                  // Restore gp
1193         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
1194         br.ret.sptk     b0                     // Return
1195 };;
1196
1197 LOCAL_LIBM_END(__libm_error_region#)
1198
1199 .type   __libm_error_support#,@function
1200 .global __libm_error_support#