4 // Copyright (c) 2000 - 2003, Intel Corporation
5 // All rights reserved.
7 // Contributed 2000 by the Intel Numerics Group, Intel Corporation
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
13 // * Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
16 // * Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
20 // * The name of Intel Corporation may not be used to endorse or promote
21 // products derived from this software without specific prior written
24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
28 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
32 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
33 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 // Intel Corporation is the author of this code, and requests that all
37 // problem reports or change requests be submitted to it directly at
38 // http://www.intel.com/software/products/opensource/libraries/num.htm.
40 //*********************************************************************
42 // Function: powl(x,y), where
44 // powl(x,y) = x , for double extended precision x and y values
46 //*********************************************************************
49 // 02/02/00 (Hand Optimized)
50 // 04/04/00 Unwind support added
51 // 08/15/00 Bundle added after call to __libm_error_support to properly
52 // set [the previously overwritten] GR_Parameter_RESULT.
53 // 01/22/01 Corrected results for powl(1,inf), powl(1,nan), and
54 // powl(snan,0) to be 1 per C99, not nan. Fixed many flag settings.
55 // 02/06/01 Call __libm_error support if over/underflow when y=2.
56 // 04/17/01 Support added for y close to 1 and x a non-special value.
57 // Shared software under/overflow detection for all paths
58 // 02/07/02 Corrected sf3 setting to disable traps
59 // 05/13/02 Improved performance of all paths
60 // 02/10/03 Reordered header: .section, .global, .proc, .align;
61 // used data8 for long double table values
62 // 04/17/03 Added missing mutex directive
63 // 10/13/03 Corrected .endp names to match .proc names
65 //*********************************************************************
69 // Floating-Point Registers:
70 // f8 (Input x and Return Value)
74 // General Purpose Registers:
75 // Locals r14-24,r32-r65
76 // Parameters to __libm_error_support r62,r63,r64,r65
78 // Predicate Registers: p6-p15
80 //*********************************************************************
82 // Special Cases and IEEE special conditions:
84 // Denormal fault raised on denormal inputs
85 // Overflow exceptions raised when appropriate for pow
86 // Underflow exceptions raised when appropriate for pow
87 // (Error Handling Routine called for overflow and Underflow)
88 // Inexact raised when appropriate by algorithm
90 // 1. (anything) ** NatVal or (NatVal) ** anything is NatVal
91 // 2. X or Y unsupported or sNaN is qNaN/Invalid
92 // 3. (anything) ** 0 is 1
93 // 4. (anything) ** 1 is itself
94 // 5. (anything except 1) ** qNAN is qNAN
95 // 6. qNAN ** (anything except 0) is qNAN
96 // 7. +-(|x| > 1) ** +INF is +INF
97 // 8. +-(|x| > 1) ** -INF is +0
98 // 9. +-(|x| < 1) ** +INF is +0
99 // 10. +-(|x| < 1) ** -INF is +INF
100 // 11. +-1 ** +-INF is +1
101 // 12. +0 ** (+anything except 0, NAN) is +0
102 // 13. -0 ** (+anything except 0, NAN, odd integer) is +0
103 // 14. +0 ** (-anything except 0, NAN) is +INF/div_0
104 // 15. -0 ** (-anything except 0, NAN, odd integer) is +INF/div_0
105 // 16. -0 ** (odd integer) = -( +0 ** (odd integer) )
106 // 17. +INF ** (+anything except 0,NAN) is +INF
107 // 18. +INF ** (-anything except 0,NAN) is +0
108 // 19. -INF ** (anything except NAN) = -0 ** (-anything)
109 // 20. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
110 // 21. (-anything except 0 and inf) ** (non-integer) is qNAN/Invalid
111 // 22. X or Y denorm/unorm and denorm/unorm operand trap is enabled,
112 // generate denorm/unorm fault except if invalid or div_0 raised.
114 //*********************************************************************
121 // If Y = 2, return X*X.
122 // If Y = 0.5, return sqrt(X).
124 // Compute log(X) to extra precision.
126 // ker_log_80( X, logX_hi, logX_lo, Safe );
128 // ...logX_hi + logX_lo approximates log(X) to roughly 80
129 // ...significant bits of accuracy.
131 // Compute Y*log(X) to extra precision.
133 // P_hi := Y * logX_hi
134 // P_lo := Y * logX_hi - P_hi ...using FMA
135 // P_lo := Y * logX_lo + P_lo ...using FMA
137 // Compute exp(P_hi + P_lo)
140 // Expo_Range := 2; (assuming double-extended power function)
141 // ker_exp_64( P_hi, P_lo, Flag, Expo_Range,
142 // Z_hi, Z_lo, scale, Safe )
144 // scale := sgn * scale
146 // If (Safe) then ...result will not over/underflow
147 // return scale*Z_hi + (scale*Z_lo)
150 // take necessary precaution in computing
151 // scale*Z_hi + (scale*Z_lo)
152 // to set possible exceptions correctly.
157 // ...Follow the order of the case checks
159 // If Y is +-0, return +1 without raising any exception.
160 // If Y is +1, return X without raising any exception.
161 // If Y is qNaN, return Y without exception.
162 // If X is qNaN, return X without exception.
164 // At this point, X is real and Y is +-inf.
165 // Thus |X| can only be 1, strictly bigger than 1, or
166 // strictly less than 1.
169 // return ( Y == +inf? +0 : +inf )
170 // elseif |X| > 1, then
171 // return ( Y == +inf? +0 : +inf )
177 // ...Follow the order of the case checks
178 // ...Note that Y is real, finite, non-zero, and not +1.
180 // If X is qNaN, return X without exception.
183 // return ( Y > 0 ? +0 : +inf )
186 // return ( Y > 0 ? +inf : +0 )
190 // return ( Y > 0 ? +inf : +0 )
194 // Return 0 * inf to generate a quiet NaN together
195 // with an invalid exception.
200 // We describe the quick branch since this part is important
201 // in reaching the normal case efficiently.
205 // This stage contains two threads.
209 // fclass.m X_excep, X_ok = X, (NatVal or s/qNaN) or
212 // fclass.nm X_unsupp, X_supp = X, (NatVal or s/qNaN) or
213 // +-(0, unnorm, norm, infinity)
215 // X_norm := fnorm( X ) with traps disabled
217 // If (X_excep) goto Filtering (Step 2)
218 // If (X_unsupp) goto Filtering (Step 2)
223 // fclass.m Y_excep, Y_ok = Y, (NatVal or s/qNaN) or
226 // fclass.nm Y_unsupp, Y_supp = Y, (NatVal or s/qNaN) or
227 // +-(0, unnorm, norm, infinity)
229 // Y_norm := fnorm( Y ) with traps disabled
231 // If (Y_excep) goto Filtering (Step 2)
232 // If (Y_unsupp) goto Filtering (Step 2)
237 // This stage contains two threads.
242 // Set X_lt_0 if X < 0 (using fcmp)
244 // If (X_lt_0) goto Filtering (Step 2)
249 // Set Y_is_1 if Y = +1 (using fcmp)
250 // If (Y_is_1) goto Filtering (Step 2)
254 // This stage contains two threads.
260 // X := fnorm(X) in prevailing traps
266 // Y := fnorm(Y) in prevailing traps
271 // Go to Case_Normal.
275 // ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
277 // double-extended 1/ln(2)
278 // 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
279 // 3fff b8aa 3b29 5c17 f0bc
280 // For speed the significand will be loaded directly with a movl and setf.sig
281 // and the exponent will be bias+63 instead of bias+0. Thus subsequent
282 // computations need to scale appropriately.
283 // The constant 2^12/ln(2) is needed for the computation of N. This is also
284 // obtained by scaling the computations.
286 // Two shifting constants are loaded directly with movl and setf.d.
287 // 1. RSHF_2TO51 = 1.1000..00 * 2^(63-12)
288 // This constant is added to x*1/ln2 to shift the integer part of
289 // x*2^12/ln2 into the rightmost bits of the significand.
290 // The result of this fma is N_signif.
291 // 2. RSHF = 1.1000..00 * 2^(63)
292 // This constant is subtracted from N_signif * 2^(-51) to give
293 // the integer part of N, N_fix, as a floating-point number.
294 // The result of this fms is float_N.
299 LOCAL_OBJECT_START(Constants_exp_64_Arg)
300 data8 0xB17217F400000000,0x00003FF2 // L_hi = hi part log(2)/2^12
301 data8 0xF473DE6AF278ECE6,0x00003FD4 // L_lo = lo part log(2)/2^12
302 LOCAL_OBJECT_END(Constants_exp_64_Arg)
304 LOCAL_OBJECT_START(Constants_exp_64_A)
306 data8 0xAAAAAAABB1B736A0,0x00003FFA
307 data8 0xAAAAAAAB90CD6327,0x00003FFC
308 data8 0xFFFFFFFFFFFFFFFF,0x00003FFD
309 LOCAL_OBJECT_END(Constants_exp_64_A)
311 LOCAL_OBJECT_START(Constants_exp_64_P)
313 data8 0xD00D6C8143914A8A,0x00003FF2
314 data8 0xB60BC4AC30304B30,0x00003FF5
315 data8 0x888888887474C518,0x00003FF8
316 data8 0xAAAAAAAA8DAE729D,0x00003FFA
317 data8 0xAAAAAAAAAAAAAF61,0x00003FFC
318 data8 0x80000000000004C7,0x00003FFE
319 LOCAL_OBJECT_END(Constants_exp_64_P)
321 LOCAL_OBJECT_START(Constants_exp_64_T1)
322 data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29
323 data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5
324 data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
325 data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D
326 data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA
327 data4 0x3F9EF532,0x3FA0B051,0x3FA27043,0x3FA43516
328 data4 0x3FA5FED7,0x3FA7CD94,0x3FA9A15B,0x3FAB7A3A
329 data4 0x3FAD583F,0x3FAF3B79,0x3FB123F6,0x3FB311C4
330 data4 0x3FB504F3,0x3FB6FD92,0x3FB8FBAF,0x3FBAFF5B
331 data4 0x3FBD08A4,0x3FBF179A,0x3FC12C4D,0x3FC346CD
332 data4 0x3FC5672A,0x3FC78D75,0x3FC9B9BE,0x3FCBEC15
333 data4 0x3FCE248C,0x3FD06334,0x3FD2A81E,0x3FD4F35B
334 data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5
335 data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A
336 data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177
337 data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
338 LOCAL_OBJECT_END(Constants_exp_64_T1)
340 LOCAL_OBJECT_START(Constants_exp_64_T2)
341 data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4
342 data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7
343 data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E
344 data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349
345 data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987
346 data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA
347 data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610
348 data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A
349 data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8
350 data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA
351 data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50
352 data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA
353 data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
354 data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269
355 data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE
356 data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
357 LOCAL_OBJECT_END(Constants_exp_64_T2)
359 LOCAL_OBJECT_START(Constants_exp_64_W1)
360 data8 0x0000000000000000, 0xBE384454171EC4B4
361 data8 0xBE6947414AA72766, 0xBE5D32B6D42518F8
362 data8 0x3E68D96D3A319149, 0xBE68F4DA62415F36
363 data8 0xBE6DDA2FC9C86A3B, 0x3E6B2E50F49228FE
364 data8 0xBE49C0C21188B886, 0x3E64BFC21A4C2F1F
365 data8 0xBE6A2FBB2CB98B54, 0x3E5DC5DE9A55D329
366 data8 0x3E69649039A7AACE, 0x3E54728B5C66DBA5
367 data8 0xBE62B0DBBA1C7D7D, 0x3E576E0409F1AF5F
368 data8 0x3E6125001A0DD6A1, 0xBE66A419795FBDEF
369 data8 0xBE5CDE8CE1BD41FC, 0xBE621376EA54964F
370 data8 0x3E6370BE476E76EE, 0x3E390D1A3427EB92
371 data8 0x3E1336DE2BF82BF8, 0xBE5FF1CBD0F7BD9E
372 data8 0xBE60A3550CEB09DD, 0xBE5CA37E0980F30D
373 data8 0xBE5C541B4C082D25, 0xBE5BBECA3B467D29
374 data8 0xBE400D8AB9D946C5, 0xBE5E2A0807ED374A
375 data8 0xBE66CB28365C8B0A, 0x3E3AAD5BD3403BCA
376 data8 0x3E526055C7EA21E0, 0xBE442C75E72880D6
377 data8 0x3E58B2BB85222A43, 0xBE5AAB79522C42BF
378 data8 0xBE605CB4469DC2BC, 0xBE589FA7A48C40DC
379 data8 0xBE51C2141AA42614, 0xBE48D087C37293F4
380 data8 0x3E367A1CA2D673E0, 0xBE51BEBB114F7A38
381 data8 0xBE6348E5661A4B48, 0xBDF526431D3B9962
382 data8 0x3E3A3B5E35A78A53, 0xBE46C46C1CECD788
383 data8 0xBE60B7EC7857D689, 0xBE594D3DD14F1AD7
384 data8 0xBE4F9C304C9A8F60, 0xBE52187302DFF9D2
385 data8 0xBE5E4C8855E6D68F, 0xBE62140F667F3DC4
386 data8 0xBE36961B3BF88747, 0x3E602861C96EC6AA
387 data8 0xBE3B5151D57FD718, 0x3E561CD0FC4A627B
388 data8 0xBE3A5217CA913FEA, 0x3E40A3CC9A5D193A
389 data8 0xBE5AB71310A9C312, 0x3E4FDADBC5F57719
390 data8 0x3E361428DBDF59D5, 0x3E5DB5DB61B4180D
391 data8 0xBE42AD5F7408D856, 0x3E2A314831B2B707
392 LOCAL_OBJECT_END(Constants_exp_64_W1)
394 LOCAL_OBJECT_START(Constants_exp_64_W2)
395 data8 0x0000000000000000, 0xBE641F2537A3D7A2
396 data8 0xBE68DD57AD028C40, 0xBE5C77D8F212B1B6
397 data8 0x3E57878F1BA5B070, 0xBE55A36A2ECAE6FE
398 data8 0xBE620608569DFA3B, 0xBE53B50EA6D300A3
399 data8 0x3E5B5EF2223F8F2C, 0xBE56A0D9D6DE0DF4
400 data8 0xBE64EEF3EAE28F51, 0xBE5E5AE2367EA80B
401 data8 0x3E47CB1A5FCBC02D, 0xBE656BA09BDAFEB7
402 data8 0x3E6E70C6805AFEE7, 0xBE6E0509A3415EBA
403 data8 0xBE56856B49BFF529, 0x3E66DD3300508651
404 data8 0x3E51165FC114BC13, 0x3E53333DC453290F
405 data8 0x3E6A072B05539FDA, 0xBE47CD877C0A7696
406 data8 0xBE668BF4EB05C6D9, 0xBE67C3E36AE86C93
407 data8 0xBE533904D0B3E84B, 0x3E63E8D9556B53CE
408 data8 0x3E212C8963A98DC8, 0xBE33138F032A7A22
409 data8 0x3E530FA9BC584008, 0xBE6ADF82CCB93C97
410 data8 0x3E5F91138370EA39, 0x3E5443A4FB6A05D8
411 data8 0x3E63DACD181FEE7A, 0xBE62B29DF0F67DEC
412 data8 0x3E65C4833DDE6307, 0x3E5BF030D40A24C1
413 data8 0x3E658B8F14E437BE, 0xBE631C29ED98B6C7
414 data8 0x3E6335D204CF7C71, 0x3E529EEDE954A79D
415 data8 0x3E5D9257F64A2FB8, 0xBE6BED1B854ED06C
416 data8 0x3E5096F6D71405CB, 0xBE3D4893ACB9FDF5
417 data8 0xBDFEB15801B68349, 0x3E628D35C6A463B9
418 data8 0xBE559725ADE45917, 0xBE68C29C042FC476
419 data8 0xBE67593B01E511FA, 0xBE4A4313398801ED
420 data8 0x3E699571DA7C3300, 0x3E5349BE08062A9E
421 data8 0x3E5229C4755BB28E, 0x3E67E42677A1F80D
422 data8 0xBE52B33F6B69C352, 0xBE6B3550084DA57F
423 data8 0xBE6DB03FD1D09A20, 0xBE60CBC42161B2C1
424 data8 0x3E56ED9C78A2B771, 0xBE508E319D0FA795
425 data8 0xBE59482AFD1A54E9, 0xBE2A17CEB07FD23E
426 data8 0x3E68BF5C17365712, 0x3E3956F9B3785569
427 LOCAL_OBJECT_END(Constants_exp_64_W2)
429 LOCAL_OBJECT_START(Constants_log_80_P)
430 // P_8, P_7, ..., P_1
431 data8 0xCCCE8B883B1042BC, 0x0000BFFB // P_8
432 data8 0xE38997B7CADC2149, 0x00003FFB // P_7
433 data8 0xFFFFFFFEB1ACB090, 0x0000BFFB // P_6
434 data8 0x9249249806481C81, 0x00003FFC // P_5
435 data8 0x0000000000000000, 0x00000000 // Pad for bank conflicts
436 data8 0xAAAAAAAAAAAAB0EF, 0x0000BFFC // P_4
437 data8 0xCCCCCCCCCCC91416, 0x00003FFC // P_3
438 data8 0x8000000000000000, 0x0000BFFD // P_2
439 data8 0xAAAAAAAAAAAAAAAB, 0x00003FFD // P_1
440 LOCAL_OBJECT_END(Constants_log_80_P)
442 LOCAL_OBJECT_START(Constants_log_80_Q)
443 // log2_hi, log2_lo, Q_6, Q_5, Q_4, Q_3, Q_2, Q_1
444 data8 0xB172180000000000,0x00003FFE
445 data8 0x82E308654361C4C6,0x0000BFE2
446 data8 0x92492453A51BE0AF,0x00003FFC
447 data8 0xAAAAAB73A0CFD29F,0x0000BFFC
448 data8 0xCCCCCCCCCCCE3872,0x00003FFC
449 data8 0xFFFFFFFFFFFFB4FB,0x0000BFFC
450 data8 0xAAAAAAAAAAAAAAAB,0x00003FFD
451 data8 0x8000000000000000,0x0000BFFE
452 LOCAL_OBJECT_END(Constants_log_80_Q)
454 LOCAL_OBJECT_START(Constants_log_80_Z_G_H_h1)
455 // Z1 - 16 bit fixed, G1 and H1 IEEE single, h1 IEEE double
456 data4 0x00008000,0x3F800000,0x00000000,0x00000000
457 data4 0x00000000,0x00000000,0x00000000,0x00000000
458 data4 0x00007879,0x3F70F0F0,0x3D785196,0x00000000
459 data4 0xEBA0E0D1,0x8B1D330B,0x00003FDA,0x00000000
460 data4 0x000071C8,0x3F638E38,0x3DF13843,0x00000000
461 data4 0x9EADD553,0xE2AF365E,0x00003FE2,0x00000000
462 data4 0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000
463 data4 0x752F34A2,0xF585FEC3,0x0000BFE3,0x00000000
464 data4 0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000
465 data4 0x893B03F3,0xF3546435,0x00003FE2,0x00000000
466 data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000
467 data4 0x39CDD2AC,0xBABA62E0,0x00003FE4,0x00000000
468 data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000
469 data4 0x457978A1,0x8718789F,0x00003FE2,0x00000000
470 data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000
471 data4 0x3185E56A,0x9442DF96,0x0000BFE4,0x00000000
472 data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000
473 data4 0x2BBE2CBD,0xCBF9A4BF,0x00003FE4,0x00000000
474 data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000
475 data4 0x852D5935,0xF3537535,0x00003FE3,0x00000000
476 data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000
477 data4 0x46CDF32F,0xA1F1E699,0x0000BFDF,0x00000000
478 data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000
479 data4 0xD8484CE3,0x84A61856,0x00003FE4,0x00000000
480 data4 0x00004925,0x3F124920,0x3F0F4303,0x00000000
481 data4 0xFF28821B,0xC7DD97E0,0x0000BFE2,0x00000000
482 data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000
483 data4 0xEF1FD32F,0xD3C4A887,0x00003FE3,0x00000000
484 data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000
485 data4 0x464C76DA,0x84672BE6,0x00003FE5,0x00000000
486 data4 0x00004211,0x3F042108,0x3F29516A,0x00000000
487 data4 0x18835FB9,0x9A43A511,0x0000BFE5,0x00000000
488 LOCAL_OBJECT_END(Constants_log_80_Z_G_H_h1)
490 LOCAL_OBJECT_START(Constants_log_80_Z_G_H_h2)
491 // Z2 - 16 bit fixed, G2 and H2 IEEE single, h2 IEEE double
492 data4 0x00008000,0x3F800000,0x00000000,0x00000000
493 data4 0x00000000,0x00000000,0x00000000,0x00000000
494 data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000
495 data4 0x211398BF,0xAD08B116,0x00003FDB,0x00000000
496 data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000
497 data4 0xC376958E,0xB106790F,0x00003FDE,0x00000000
498 data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000
499 data4 0x79A7679A,0xFD03F242,0x0000BFDA,0x00000000
500 data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000
501 data4 0x05E7AE08,0xF03F81C3,0x0000BFDF,0x00000000
502 data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000
503 data4 0x049EB22F,0xD1B87D3C,0x00003FDE,0x00000000
504 data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000
505 data4 0x3A9E81E0,0xFABC8B95,0x00003FDF,0x00000000
506 data4 0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000
507 data4 0x7C4B5443,0xF5F3653F,0x00003FDF,0x00000000
508 data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000
509 data4 0xF65A1773,0xE78AB204,0x00003FE0,0x00000000
510 data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000
511 data4 0x7B8EF695,0xDB7CBFFF,0x0000BFE0,0x00000000
512 data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000
513 data4 0xCF773FB3,0xC0241AEA,0x0000BFE0,0x00000000
514 data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000
515 data4 0xC9539FDF,0xFC8F4D48,0x00003FE1,0x00000000
516 data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000
517 data4 0x954665C2,0x9CD035FB,0x0000BFE1,0x00000000
518 data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000
519 data4 0xDD367A30,0xEC9017C7,0x00003FE1,0x00000000
520 data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000
521 data4 0xCB11189C,0xEE6625D3,0x0000BFE1,0x00000000
522 data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000
523 data4 0xBE11C424,0xA49C8DB5,0x0000BFE0,0x00000000
524 LOCAL_OBJECT_END(Constants_log_80_Z_G_H_h2)
526 LOCAL_OBJECT_START(Constants_log_80_h3_G_H)
527 // h3 IEEE double extended, H3 and G3 IEEE single
528 data4 0x112666B0,0xAAACAAB1,0x00003FD3,0x3F7FFC00
529 data4 0x9B7FAD21,0x90051030,0x00003FD8,0x3F7FF400
530 data4 0xF4D783C4,0xA6B46F46,0x00003FDA,0x3F7FEC00
531 data4 0x11C6DDCA,0xDA148D88,0x0000BFD8,0x3F7FE400
532 data4 0xCA964D95,0xCE65C1D8,0x0000BFD8,0x3F7FDC00
533 data4 0x23412D13,0x883838EE,0x0000BFDB,0x3F7FD400
534 data4 0x983ED687,0xB7E5CFA1,0x00003FDB,0x3F7FCC08
535 data4 0xE3C3930B,0xDBE23B16,0x0000BFD9,0x3F7FC408
536 data4 0x48AA4DFC,0x9B92F1FC,0x0000BFDC,0x3F7FBC10
537 data4 0xCE9C8F7E,0x9A8CEB15,0x0000BFD9,0x3F7FB410
538 data4 0x0DECE74A,0x8C220879,0x00003FDC,0x3F7FAC18
539 data4 0x2F053150,0xB25CA912,0x0000BFDA,0x3F7FA420
540 data4 0xD9A5BE20,0xA5876555,0x00003FDB,0x3F7F9C20
541 data4 0x2053F087,0xC919BB6E,0x00003FD9,0x3F7F9428
542 data4 0x041E9A77,0xB70BDA79,0x00003FDC,0x3F7F8C30
543 data4 0xEA1C9C30,0xF18A5C08,0x00003FDA,0x3F7F8438
544 data4 0x796D89E5,0xA3790D84,0x0000BFDD,0x3F7F7C40
545 data4 0xA2915A3A,0xE1852369,0x0000BFDD,0x3F7F7448
546 data4 0xA39ED868,0xD803858F,0x00003FDC,0x3F7F6C50
547 data4 0x9417EBB7,0xB2EEE356,0x0000BFDD,0x3F7F6458
548 data4 0x9BB0D07F,0xED5C1F8A,0x0000BFDC,0x3F7F5C68
549 data4 0xE87C740A,0xD6D201A0,0x0000BFDD,0x3F7F5470
550 data4 0x1CA74025,0xE8DEBF5E,0x00003FDC,0x3F7F4C78
551 data4 0x1F34A7EB,0x9A995A97,0x0000BFDC,0x3F7F4488
552 data4 0x359EED97,0x9CB0F742,0x0000BFDA,0x3F7F3C90
553 data4 0xBBC6A1C8,0xD6F833C2,0x0000BFDD,0x3F7F34A0
554 data4 0xE71090EC,0xE1F68F2A,0x00003FDC,0x3F7F2CA8
555 data4 0xC160A74F,0xD1881CF1,0x0000BFDB,0x3F7F24B8
556 data4 0xD78CB5A4,0x9AD05AE2,0x00003FD6,0x3F7F1CC8
557 data4 0x9A77DC4B,0xE658CB8E,0x0000BFDD,0x3F7F14D8
558 data4 0x6BD6D312,0xBA281296,0x00003FDC,0x3F7F0CE0
559 data4 0xF95210D0,0xB478BBEB,0x0000BFDB,0x3F7F04F0
560 data4 0x38800100,0x39400480,0x39A00640,0x39E00C41 // H's start here
561 data4 0x3A100A21,0x3A300F22,0x3A4FF51C,0x3A6FFC1D
562 data4 0x3A87F20B,0x3A97F68B,0x3AA7EB86,0x3AB7E101
563 data4 0x3AC7E701,0x3AD7DD7B,0x3AE7D474,0x3AF7CBED
564 data4 0x3B03E1F3,0x3B0BDE2F,0x3B13DAAA,0x3B1BD766
565 data4 0x3B23CC5C,0x3B2BC997,0x3B33C711,0x3B3BBCC6
566 data4 0x3B43BAC0,0x3B4BB0F4,0x3B53AF6D,0x3B5BA620
567 data4 0x3B639D12,0x3B6B9444,0x3B7393BC,0x3B7B8B6D
568 LOCAL_OBJECT_END(Constants_log_80_h3_G_H)
576 GR_exp_square_oflow = r20
577 GR_exp_square_uflow = r21
578 GR_exp_ynear1_oflow = r22
579 GR_exp_ynear1_uflow = r23
619 GR_exp_bias_p_k = r47
635 GR_low_order_bit = r56
646 //** Registers for unwind support
653 GR_Parameter_RESULT = r64
654 GR_Parameter_TAG = r65
771 FR_Result_small = f62
777 FR_save_Input_X = f65
786 FR_INV_LN2_2TO63 = f71
798 GLOBAL_LIBM_ENTRY(powl)
800 // Get significand of x. It is the critical path.
803 getf.sig GR_signif_Z = FR_Input_X // Get significand of x
804 fclass.m p11, p12 = FR_Input_X, 0x0b // Test x unorm
809 fnorm.s1 FR_norm_X = FR_Input_X // Normalize x
810 mov GR_exp_half = 0xffff - 1 // Exponent for 0.5
815 alloc r32 = ar.pfs,0,30,4,0
816 fclass.m p7, p0 = FR_Input_Y, 0x1E7 // Test y natval, nan, inf, zero
817 mov GR_exp_pos_max = 0x13fff // Max exponent for pos oflow test
820 addl GR_table_base = @ltoff(Constants_exp_64_Arg#), gp // Ptr to tables
821 fnorm.s1 FR_norm_Y = FR_Input_Y // Normalize y
822 mov GR_exp_neg_max = 0x33fff // Max exponent for neg oflow test
827 getf.exp GR_signexp_y = FR_Input_Y // Get sign and exp of y
828 (p12) fclass.m p11, p0 = FR_Input_Y, 0x0b // Test y unorm
829 mov GR_sign_mask = 0x20000 // Sign mask
832 ld8 GR_table_base = [GR_table_base] // Get base address for tables
833 fadd.s1 FR_Two = f1, f1 // Form 2.0 for square test
834 mov GR_exp_mask = 0x1FFFF // Exponent mask
839 getf.sig GR_signif_y = FR_Input_Y // Get significand of y
840 fclass.m p6, p0 = FR_Input_X, 0x1E7 // Test x natval, nan, inf, zero
846 getf.exp GR_signexp_x = FR_Input_X // Get signexp of x
847 fmerge.s FR_save_Input_X = FR_Input_X, FR_Input_X
848 extr.u GR_Index1 = GR_signif_Z, 59, 4 // Extract upper 4 signif bits of x
851 setf.exp FR_Half = GR_exp_half // Load half
853 (p11) br.cond.spnt POWL_DENORM // Branch if x or y denorm/unorm
857 // Return here from POWL_DENORM
860 setf.exp FR_Big = GR_exp_pos_max // Form big pos value for oflow test
861 fclass.nm p11, p0 = FR_Input_Y, 0x1FF // Test Y unsupported
862 shl GR_Index1 = GR_Index1,5 // Adjust index1 pointer x 32
865 add GR_Table_Ptr = 0x7c0, GR_table_base // Constants_log_80_Z_G_H_h1
866 fma.s1 FR_Sgn = f1,f1,f0 // Assume result positive
867 mov GR_exp_bias = 0xFFFF // Form exponent bias
872 // Identify NatVals, NaNs, Infs, and Zeros.
875 // Remove sign bit from exponent of y.
877 // Branch on Infs, Nans, Zeros, and Natvals
878 // Check to see that exponent < 0
881 setf.exp FR_NBig = GR_exp_neg_max // Form big neg value for oflow test
882 fclass.nm p8, p0 = FR_Input_X, 0x1FF // Test X unsupported
883 and GR_exp_y = GR_exp_mask,GR_signexp_y // Get biased exponent of y
886 add GR_Index1 = GR_Index1,GR_Table_Ptr
888 (p6) br.cond.spnt POWL_64_SPECIAL // Branch if x natval, nan, inf, zero
892 // load Z_1 from Index1
894 // There is logic starting here to determine if y is an integer when x < 0.
895 // If 0 < |y| < 1 then clearly y is not an integer.
896 // If |y| > 1, then the significand of y is shifted left by the size of
897 // the exponent of y. This preserves the lsb of the integer part + the
898 // fractional bits. The lsb of the integer can be tested to determine if
899 // the integer is even or odd. The fractional bits can be tested. If zero,
900 // then y is an integer.
903 ld2 GR_Z_1 =[GR_Index1],4 // Load Z_1
904 fmerge.s FR_Z = f0, FR_norm_X // Z = |x|
905 extr.u GR_X_0 = GR_signif_Z, 49, 15 // Extract X_0 from significand
908 cmp.lt p9, p0 = GR_exp_y,GR_exp_bias // Test 0 < |y| < 1
910 (p7) br.cond.spnt POWL_64_SPECIAL // Branch if y natval, nan, inf, zero
915 ldfs FR_G_1 = [GR_Index1],4 // Load G_1
916 fcmp.eq.s1 p10, p0 = FR_Input_Y, f1 // Test Y = +1.0
917 (p8) br.cond.spnt POWL_64_UNSUPPORT // Branch if x unsupported
922 // X_0 = High order 15 bit of Z
925 ldfs FR_H_1 = [GR_Index1],8 // Load H_1
926 (p9) fcmp.lt.unc.s1 p9, p0 = FR_Input_X, f0 // Test x<0, 0 <|y|<1
927 (p11) br.cond.spnt POWL_64_UNSUPPORT // Branch if y unsupported
932 ldfe FR_h_1 = [GR_Index1] // Load h_1
933 fcmp.eq.s1 p7, p0 = FR_Input_Y, FR_Two // Test y = 2.0
934 pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 // X_1 = X_0 * Z_1 (bits 15-30)
935 // Wait 4 cycles to use result
938 add GR_Table_Ptr = 0x9c0, GR_table_base // Constants_log_80_Z_G_H_h2
940 sub GR_exp_y = GR_exp_y,GR_exp_bias // Get true exponent of y
945 // Branch for (x < 0) and Y not an integer.
949 fcmp.lt.s1 p6, p0 = FR_Input_X, f0 // Test x < 0
950 (p9) br.cond.spnt POWL_64_XNEG // Branch if x < 0, 0 < |y| < 1
956 fcmp.eq.s1 p12, p0 = FR_Input_X, f1 // Test x=+1.0
961 fsub.s1 FR_W = FR_Z, f1 // W = Z - 1
962 (p7) br.cond.spnt POWL_64_SQUARE // Branch if y=2
968 (p10) fmpy.s0 FR_Result = FR_Input_X, f1 // If y=+1.0, result=x
969 (p6) shl GR_fraction_y= GR_signif_y,GR_exp_y // Get lsb of int + fraction
970 // Wait 4 cycles to use result
976 (p12) fma.s0 FR_Result = FR_Input_Y, f0, f1 // If x=1.0, result=1, chk denorm
977 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract index2
985 getf.exp GR_N = FR_Z // Get exponent of Z (also x)
986 shl GR_Index2=GR_Index2,5 // Index2 x 32 bytes
987 (p10) br.ret.spnt b0 // Exit if y=+1.0
992 add GR_Index2 = GR_Index2, GR_Table_Ptr // Pointer to table 2
994 (p12) br.ret.spnt b0 // Exit if x=+1.0
999 ld2 GR_Z_2 =[GR_Index2],4 // Load Z_2
1001 ldfs FR_G_2 = [GR_Index2],4 // Load G_2
1007 ldfs FR_H_2 = [GR_Index2],8 // Load H_2
1008 (p6) tbit.nz.unc p9, p0 = GR_fraction_y, 63 // Test x<0 and y odd integer
1009 add GR_Table_Ptr = 0xbcc, GR_table_base // Constants_log_80_h3_G_H, G_3
1014 // For x < 0 and y odd integer,, set sign = -1.
1017 getf.exp GR_M = FR_W // Get signexp of W
1019 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // X_2 = X_1 * Z_2 (bits 15-30)
1022 ldfe FR_h_2 = [GR_Index2] // Load h_2
1023 (p9) fnma.s1 FR_Sgn = f1, f1, f0 // If x<0, y odd int, result negative
1024 sub GR_N = GR_N, GR_exp_bias // Get true exponent of x = N
1029 add GR_Table_Ptr1 = 0xdc0, GR_table_base // Ptr to H_3
1030 fcmp.eq.s0 p11, p0 = FR_Input_Y, FR_Half // Test y=0.5, also set denorm
1031 (p6) shl GR_fraction_y= GR_fraction_y, 1 // Shift left 1 to get fraction
1036 setf.sig FR_float_N = GR_N
1037 (p6) cmp.ne.unc p8, p0 = GR_fraction_y, r0 // Test x<0 and y not integer
1038 (p8) br.cond.spnt POWL_64_XNEG // Branch if x<0 and y not int
1043 // Raise possible denormal operand exception for both X and Y.
1044 // Set pointers in case |x| near 1
1045 // Branch to embedded sqrt(x) if y=0.5
1048 add GR_P_ptr1 = 0x6b0, GR_table_base // Constants_log_80_P, P8, NEAR path
1049 fcmp.eq.s0 p12, p0 = FR_Input_X, FR_Input_Y // Dummy to set denormal
1050 add GR_P_ptr2 = 0x700, GR_table_base // Constants_log_80_P, P4, NEAR path
1053 cmp.eq p15, p14 = r0, r0 // Assume result safe (no over/under)
1054 fsub.s1 FR_Delta = FR_Input_Y,f1 // Delta = y - 1.0
1055 (p11) br.cond.spnt POWL_64_SQRT // Branch if y=0.5
1060 // Computes ln( x ) to extra precision
1062 // Output FR 2: FR_Y_hi
1063 // Output FR 3: FR_Y_lo
1064 // Output PR 1: PR_Safe
1067 and GR_M = GR_exp_mask, GR_M // Mask to get exponent of W
1069 extr.u GR_Index3 = GR_X_2, 1, 5 // Get index3
1074 shladd GR_Table_Ptr1 = GR_Index3,2,GR_Table_Ptr1 // Ptr to H_3
1075 shladd GR_Index3 = GR_Index3,4,GR_Table_Ptr // Ptr to G_3
1076 sub GR_M = GR_M, GR_exp_bias // Get true exponent of W
1081 ldfs FR_G_3 = [GR_Index3],-12 // Load G_3
1082 cmp.gt p7, p14 = -8, GR_M // Test if |x-1| < 2^-8
1083 (p7) br.cond.spnt LOGL80_NEAR // Branch if |x-1| < 2^-8
1087 // Here if |x-1| >= 2^-8
1089 ldfs FR_H_3 = [GR_Table_Ptr1] // Load H_3
1096 ldfe FR_h_3 = [GR_Index3] // Load h_3
1097 fmerge.se FR_S = f1,FR_Z // S = merge of 1.0 and signif(Z)
1101 add GR_Table_Ptr = 0x740, GR_table_base // Constants_log_80_Q
1102 fmpy.s1 FR_G = FR_G_1, FR_G_2 // G = G_1 * G_2
1108 // Begin Loading Q's - load log2_hi part
1111 ldfe FR_log2_hi = [GR_Table_Ptr],16 // Load log2_hi
1112 fadd.s1 FR_H = FR_H_1, FR_H_2 // H = H_1 + H_2
1120 ldfe FR_log2_lo = [GR_Table_Ptr],16 // Load log2_lo
1121 fadd.s1 FR_h = FR_h_1, FR_h_2 // h = h_1 + h_2
1127 ldfe FR_Q_6 = [GR_Table_Ptr],16 // Load Q_6
1128 fcvt.xf FR_float_N = FR_float_N
1134 ldfe FR_Q_5 = [GR_Table_Ptr],16 // Load Q_5
1141 // G = G_1 * G_2 * G_3
1144 ldfe FR_Q_4 = [GR_Table_Ptr],16 // Load Q_4
1145 fmpy.s1 FR_G = FR_G, FR_G_3
1151 // H = H_1 + H_2 + H_3
1154 ldfe FR_Q_3 = [GR_Table_Ptr],16 // Load Q_3
1155 fadd.s1 FR_H = FR_H, FR_H_3
1161 // Y_lo = poly + Y_lo
1163 // h = h_1 + h_2 + h_3
1166 ldfe FR_Q_2 = [GR_Table_Ptr],16 // Load Q_2
1167 fadd.s1 FR_h = FR_h, FR_h_3
1177 ldfe FR_Q_1 = [GR_Table_Ptr],16 // Load Q_1
1178 fmpy.s1 FR_GS_hi = FR_G, FR_S
1183 fms.s1 FR_r = FR_G, FR_S, f1
1189 // poly_lo = Q_5 + r * Q_6
1192 getf.exp GR_Delta_Exp = FR_Delta // Get signexp of y-1 for exp calc
1193 fma.s1 FR_poly_lo = FR_r, FR_Q_6, FR_Q_5
1201 fsub.s1 FR_r_cor = FR_GS_hi, f1
1207 // GS_lo = G*S - GS_hi
1211 fms.s1 FR_GS_lo = FR_G, FR_S, FR_GS_hi
1221 fmpy.s1 FR_rsq = FR_r, FR_r
1225 // G = float_N*log2_hi + H
1229 fma.s1 FR_G = FR_float_N, FR_log2_hi, FR_H
1235 // Y_lo = float_N*log2_lo + h
1239 fma.s1 FR_Y_lo = FR_float_N, FR_log2_lo, FR_h
1245 // poly_lo = Q_4 + r * poly_lo
1246 // r_cor = r_cor - r
1250 fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_4
1255 fsub.s1 FR_r_cor = FR_r_cor, FR_r
1261 // poly_hi = r * Q_2 + Q_1
1266 fma.s1 FR_poly = FR_r, FR_Q_2, FR_Q_1
1271 fadd.s1 FR_Y_hi = FR_G, FR_r
1277 // poly_lo = Q_3 + r * poly_lo
1278 // r_cor = r_cor + GS_lo
1282 fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_3
1287 fadd.s1 FR_r_cor = FR_r_cor, FR_GS_lo
1297 fsub.s1 FR_Y_lo_2 = FR_G, FR_Y_hi
1303 // r_cor = r_cor + Y_lo
1304 // poly = poly_hi + rsq * poly_lo
1307 add GR_Table_Ptr = 0x0, GR_table_base // Constants_exp_64_Arg
1308 fadd.s1 FR_r_cor = FR_r_cor, FR_Y_lo
1313 fma.s1 FR_poly = FR_rsq, FR_poly_lo, FR_poly
1321 // all long before they are needed.
1322 // They are used in LOGL_RETURN PATH
1325 // poly = rsq * poly + r_cor
1328 ldfe FR_L_hi = [GR_Table_Ptr],16 // Load L_hi
1329 fadd.s1 FR_Y_lo = FR_Y_lo_2, FR_r
1334 fma.s1 FR_poly = FR_rsq, FR_poly, FR_r_cor
1340 ldfe FR_L_lo = [GR_Table_Ptr],16 // Load L_lo
1341 fadd.s1 FR_Y_lo = FR_Y_lo, FR_poly
1342 br.cond.sptk LOGL_RETURN // Branch to common code
1348 // Here if |x-1| < 2^-8
1350 // Branch LOGL80_NEAR
1354 ldfe FR_P_8 = [GR_P_ptr1],16 // Load P_8
1355 ldfe FR_P_4 = [GR_P_ptr2],16 // Load P_4
1356 fmpy.s1 FR_Wsq = FR_W, FR_W
1361 ldfe FR_P_7 = [GR_P_ptr1],16 // Load P_7
1362 ldfe FR_P_3 = [GR_P_ptr2],16 // Load P_3
1368 ldfe FR_P_6 = [GR_P_ptr1],16 // Load P_6
1369 ldfe FR_P_2 = [GR_P_ptr2],16 // Load P_2
1375 ldfe FR_P_5 = [GR_P_ptr1],16 // Load P_5
1376 ldfe FR_P_1 = [GR_P_ptr2],16 // Load P_1
1382 getf.exp GR_Delta_Exp = FR_Delta // Get signexp of y-1 for exp calc
1383 fmpy.s1 FR_W4 = FR_Wsq, FR_Wsq
1387 add GR_Table_Ptr = 0x0, GR_table_base // Constants_exp_64_Arg
1388 fmpy.s1 FR_W3 = FR_Wsq, FR_W
1395 fmpy.s1 FR_half_W = FR_Half, FR_W
1401 ldfe FR_L_hi = [GR_Table_Ptr],16
1402 fma.s1 FR_poly_lo = FR_W, FR_P_8,FR_P_7
1407 fma.s1 FR_poly = FR_W, FR_P_4, FR_P_3
1413 ldfe FR_L_lo = [GR_Table_Ptr],16
1414 fnma.s1 FR_Y_hi = FR_W, FR_half_W, FR_W
1421 fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_6
1426 fma.s1 FR_poly = FR_W, FR_poly, FR_P_2
1433 fsub.s1 FR_Y_lo = FR_W, FR_Y_hi
1440 fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_5
1445 fma.s1 FR_poly = FR_W, FR_poly, FR_P_1
1452 fnma.s1 FR_Y_lo = FR_W, FR_half_W, FR_Y_lo
1459 fma.s1 FR_poly = FR_poly_lo, FR_W4, FR_poly
1466 fma.s1 FR_Y_lo = FR_poly, FR_W3, FR_Y_lo
1473 // Common code for completion of both logx paths
1476 // L_hi, L_lo already loaded.
1479 // kernel_log_80 computed ln(X)
1480 // and return logX_hi and logX_lo as results.
1481 // PR_pow_Safe set as well.
1484 // Compute Y * (logX_hi + logX_lo)
1487 // (Manipulate names so that inputs are in
1488 // the place kernel_exp expects them)
1490 // This function computes exp( x + x_cor)
1492 // Input FR 2: FR_X_cor
1493 // Output FR 3: FR_Y_hi
1494 // Output FR 4: FR_Y_lo
1495 // Output FR 5: FR_Scale
1496 // Output PR 1: PR_Safe
1500 // Load constants used in computing N using right-shift technique
1502 mov GR_exp_2tom51 = 0xffff-51
1503 movl GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
1506 add GR_Special_Exp = -50,GR_exp_bias
1507 movl GR_rshf_2to51 = 0x4718000000000000 // 1.10000 2^(63+51)
1512 // Point to Table of W1s
1513 // Point to Table of W2s
1516 add GR_W1_ptr = 0x2b0, GR_table_base // Constants_exp_64_W1
1517 add GR_W2_ptr = 0x4b0, GR_table_base // Constants_exp_64_W2
1518 cmp.le p6,p0= GR_Delta_Exp,GR_Special_Exp
1521 // Form two constants we need
1522 // 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128
1523 // 1.1000..000 * 2^(63+63-12) to right shift int(N) into the significand
1526 setf.sig FR_INV_LN2_2TO63 = GR_sig_inv_ln2 // form 1/ln2 * 2^63
1528 and GR_Delta_Exp=GR_Delta_Exp,GR_exp_mask // Get exponent of y-1
1531 setf.d FR_RSHF_2TO51 = GR_rshf_2to51 // Form const 1.1000 * 2^(63+51)
1532 movl GR_rshf = 0x43e8000000000000 // 1.10000 2^63 for right shift
1538 fmpy.s1 FR_X_lo = FR_Input_Y, FR_logx_lo // logx_lo is Y_lo
1539 cmp.eq p15, p0= r0, r0 // Set p15, assume safe
1543 setf.exp FR_2TOM51 = GR_exp_2tom51 // Form 2^-51 for scaling float_N
1544 setf.d FR_RSHF = GR_rshf // Form right shift const 1.1000 * 2^63
1545 add GR_Table_Ptr1 = 0x50, GR_table_base // Constants_exp_64_P for
1551 ldfe FR_P_6 = [GR_Table_Ptr1],16 // Load P_6 for EXPL_SMALL path
1553 ldfe FR_P_5 = [GR_Table_Ptr1],16 // Load P_5 for EXPL_SMALL path
1559 ldfe FR_P_4 = [GR_Table_Ptr1],16 // Load P_4 for EXPL_SMALL path
1560 fma.s1 FR_P_hi = FR_Input_Y, FR_logx_hi,FR_X_lo // logx_hi ix Y_hi
1566 ldfe FR_P_3 = [GR_Table_Ptr1],16 // Load P_3 for EXPL_SMALL path
1568 ldfe FR_P_2 = [GR_Table_Ptr1],16 // Load P_2 for EXPL_SMALL path
1573 // N = X * Inv_log2_by_2^12
1574 // By adding 1.10...0*2^63 we shift and get round_int(N_signif) in significand.
1575 // We actually add 1.10...0*2^51 to X * Inv_log2 to do the same thing.
1577 ldfe FR_P_1 = [GR_Table_Ptr1] // Load P_1 for EXPL_SMALL path
1578 fma.s1 FR_N = FR_X, FR_INV_LN2_2TO63, FR_RSHF_2TO51
1583 fms.s1 FR_P_lo= FR_Input_Y, FR_logx_hi, FR_P_hi // P_hi is X
1584 (p6) br.cond.spnt POWL_Y_ALMOST_1 // Branch if |y-1| < 2^-50
1589 getf.exp GR_Expo_X = FR_X
1590 add GR_T1_ptr = 0x0b0, GR_table_base // Constants_exp_64_T1
1591 add GR_T2_ptr = 0x1b0, GR_table_base // Constants_exp_64_T2
1595 // float_N = round_int(N)
1596 // The signficand of N contains the rounded integer part of X * 2^12/ln2,
1597 // as a twos complement number in the lower bits (that is, it may be negative).
1598 // That twos complement number (called N) is put into GR_N_fix.
1600 // Since N is scaled by 2^51, it must be multiplied by 2^-51
1601 // before the shift constant 1.10000 * 2^63 is subtracted to yield float_N.
1602 // Thus, float_N contains the floating point version of N
1606 add GR_Table_Ptr = 0x20, GR_table_base // Constants_exp_64_A
1607 fms.s1 FR_float_N = FR_N, FR_2TOM51, FR_RSHF // Form float_N
1610 // Create low part of Y(ln(x)_hi + ln(x)_lo) as P_lo
1612 mov GR_Big_Pos_Exp = 0x3ffe // 16382, largest safe exponent
1613 fadd.s1 FR_P_lo = FR_P_lo, FR_X_lo
1614 mov GR_Big_Neg_Exp = -0x3ffd // -16381 smallest safe exponent
1619 fmpy.s1 FR_rsq = FR_X, FR_X // rsq = X*X for EXPL_SMALL path
1620 mov GR_vsm_expo = -70 // Exponent for very small path
1624 fma.s1 FR_poly_lo = FR_P_6, FR_X, FR_P_5 // poly_lo for EXPL_SMALL path
1625 add GR_temp = 0x1,r0 // For tiny signif if small path
1630 // If expo_X < -6 goto exp_small
1633 getf.sig GR_N_fix = FR_N
1634 ldfe FR_A_3 = [GR_Table_Ptr],16 // Load A_3
1635 and GR_Expo_X = GR_Expo_X, GR_exp_mask // Get exponent of X
1640 ldfe FR_A_2 = [GR_Table_Ptr],16 // Load A_2
1642 sub GR_Expo_X = GR_Expo_X, GR_exp_bias // Get true exponent of X
1647 // If -6 > Expo_X, set P9 and branch
1650 cmp.gt p9, p0 = -6, GR_Expo_X
1651 fnma.s1 FR_r = FR_L_hi, FR_float_N, FR_X // r = X - L_hi * float_N
1652 (p9) br.cond.spnt EXPL_SMALL // Branch if |X| < 2^-6
1657 // If 14 <= Expo_X, set P10
1660 cmp.le p10, p0 = 14, GR_Expo_X
1662 (p10) br.cond.spnt EXPL_HUGE // Branch if |X| >= 2^14
1674 extr.u GR_M1 = GR_N_fix, 6, 6 // Extract index M_1
1679 // k = extr.u(N_fix,0,6)
1682 shladd GR_W1_ptr = GR_M1,3,GR_W1_ptr // Point to W1
1683 shladd GR_T1_ptr = GR_M1,2,GR_T1_ptr // Point to T1
1684 extr.u GR_M2 = GR_N_fix, 0, 6 // Extract index M_2
1688 // N_fix is only correct up to 50 bits because of our right shift technique.
1689 // Actually in the normal path we will have restricted K to about 14 bits.
1690 // Somewhat arbitrarily we extract 32 bits.
1692 ldfd FR_W1 = [GR_W1_ptr]
1693 shladd GR_W2_ptr = GR_M2,3,GR_W2_ptr // Point to W2
1694 extr GR_k = GR_N_fix, 12, 32 // Extract k
1699 ldfs FR_T1 = [GR_T1_ptr]
1700 fnma.s1 FR_r = FR_L_lo, FR_float_N, FR_r
1701 shladd GR_T2_ptr = GR_M2,2,GR_T2_ptr // Point to T2
1704 add GR_exp_bias_p_k = GR_exp_bias, GR_k
1706 cmp.gt p14,p15 = GR_k,GR_Big_Pos_Exp
1711 // if k < big_neg_exp, set p14 and Safe=False
1714 ldfs FR_T2 = [GR_T2_ptr]
1715 (p15) cmp.lt p14,p15 = GR_k,GR_Big_Neg_Exp
1721 setf.exp FR_Scale = GR_exp_bias_p_k
1722 ldfd FR_W2 = [GR_W2_ptr]
1728 ldfe FR_A_1 = [GR_Table_Ptr],16
1729 fadd.s1 FR_r = FR_r, FR_X_cor
1736 fadd.s1 FR_W_1_p1 = FR_W1, f1
1743 fma.s1 FR_poly = FR_r, FR_A_3, FR_A_2
1748 fmpy.s1 FR_rsq = FR_r, FR_r
1755 fmpy.s1 FR_T = FR_T1, FR_T2
1762 fma.s1 FR_W = FR_W2, FR_W_1_p1, FR_W1
1769 fma.s1 FR_TMP1 = FR_Scale, FR_Sgn, f0
1776 fma.s1 FR_poly = FR_r, FR_poly, FR_A_1
1783 fma.s1 FR_TMP2 = FR_T, f1, f0 // TMP2 = Y_hi = T
1790 fadd.s1 FR_Wp1 = FR_W, f1
1797 fma.s1 FR_poly = FR_rsq, FR_poly,FR_r
1804 fma.s1 FR_Tscale = FR_T, FR_TMP1, f0 // Scale * Sgn * T
1809 fma.s1 FR_Y_lo = FR_Wp1, FR_poly, FR_W
1816 fmpy.s1 FR_TMP3 = FR_Y_lo, FR_Tscale
1817 br.cond.sptk POWL_64_SHARED
1823 // Here if |ylogx| < 2^-6
1825 // Begin creating lsb to perturb final result
1828 setf.sig FR_temp = GR_temp
1829 fma.s1 FR_poly_lo = FR_poly_lo, FR_X, FR_P_4
1830 cmp.lt p12, p0 = GR_Expo_X, GR_vsm_expo // Test |ylogx| < 2^-70
1834 fma.s1 FR_poly_hi = FR_P_2, FR_X, FR_P_1
1841 fmpy.s1 FR_TMP2 = f1, f1
1846 fmpy.s1 FR_TMP1 = FR_Sgn, f1
1853 fmpy.s1 FR_r4 = FR_rsq, FR_rsq
1854 (p12) cmp.eq p15, p0 = r0, r0 // Set safe if |ylogx| < 2^-70
1858 (p12) fmpy.s1 FR_TMP3 = FR_Sgn, FR_X
1859 (p12) br.cond.spnt POWL_64_SHARED // Branch if |ylogx| < 2^-70
1865 fma.s1 FR_poly_lo = FR_poly_lo, FR_X, FR_P_3
1870 fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, FR_X
1877 fma.s1 FR_Y_lo = FR_poly_lo, FR_r4, FR_poly_hi
1884 fmpy.s1 FR_TMP3 = FR_Y_lo, FR_TMP1 // Add sign info
1890 // Toggle on last bit of Y_lo
1891 // Set lsb of Y_lo to 1
1895 for FR_temp = FR_Y_lo,FR_temp
1902 fmerge.se FR_TMP3 = FR_TMP3,FR_temp
1903 br.cond.sptk POWL_64_SHARED
1909 // Here if |ylogx| >= 2^14
1911 mov GR_temp = 0x0A1DC // If X < 0, exponent -24100
1912 fcmp.gt.s1 p12, p13 = FR_X, f0 // Test X > 0
1913 cmp.eq p14, p15 = r0, r0 // Set Safe to false
1918 (p12) mov GR_Mask = 0x15DC0 // If X > 0, exponent +24000
1919 (p13) mov GR_Mask = 0x0A240 // If X < 0, exponent -24000
1925 setf.exp FR_TMP2 = GR_Mask // Form Y_hi = TMP2
1926 (p13) setf.exp FR_Y_lo = GR_temp // If X < 0, Y_lo = 2^-24100
1927 (p12) mov FR_Y_lo = f1 // IF X > 0, Y_lo = 1.0
1933 fmpy.s1 FR_TMP1 = FR_TMP2, FR_Sgn // TMP1 = Y_hi * Sgn
1940 fmpy.s1 FR_TMP3 = FR_Y_lo,FR_TMP1 // TMP3 = Y_lo * (Y_hi * Sgn)
1941 br.cond.sptk POWL_64_SHARED
1946 // Here if delta = |y-1| < 2^-50
1948 // x**(1 + delta) = x * e (ln(x)*delta) = x ( 1 + ln(x) * delta)
1950 // Computation will be safe for 2^-16381 <= x < 2^16383
1953 mov GR_exp_ynear1_oflow = 0xffff + 16383
1954 fma.s1 FR_TMP1 = FR_Input_X,FR_Delta,f0
1955 and GR_exp_x = GR_exp_mask, GR_signexp_x
1960 cmp.lt p15, p14 = GR_exp_x, GR_exp_ynear1_oflow
1961 fma.s1 FR_TMP2 = FR_logx_hi,f1,FR_X_lo
1962 mov GR_exp_ynear1_uflow = 0xffff - 16381
1967 (p15) cmp.ge p15, p14 = GR_exp_x, GR_exp_ynear1_uflow
1968 fma.s1 FR_TMP3 = FR_Input_X,f1,f0
1969 br.cond.sptk POWL_64_SHARED
1974 // Here if x not zero and y=2.
1976 // Setup for multipath code
1979 mov GR_exp_square_oflow = 0xffff + 8192 // Exponent where x*x overflows
1980 fmerge.se FR_TMP1 = FR_Input_X, FR_Input_X
1981 and GR_exp_x = GR_exp_mask, GR_signexp_x // Get exponent of x
1986 cmp.lt p15, p14 = GR_exp_x, GR_exp_square_oflow // Decide safe/unsafe
1987 fmerge.se FR_TMP2 = FR_Input_X, FR_Input_X
1988 mov GR_exp_square_uflow = 0xffff - 8191 // Exponent where x*x underflows
1993 (p15) cmp.ge p15, p14 = GR_exp_x, GR_exp_square_uflow // Decide safe/unsafe
1994 fma.s1 FR_TMP3 = f0,f0,f0
2000 // This is the shared path that will set overflow and underflow.
2005 // Return if no danger of over or underflow.
2009 fma.s0 FR_Result = FR_TMP1, FR_TMP2, FR_TMP3
2010 (p15) br.ret.sptk b0 // Main path return if certain no over/underflow
2015 // S0 user supplied status
2016 // S2 user supplied status + WRE + TD (Overflows)
2017 // S2 user supplied status + FZ + TD (Underflows)
2020 // If (Safe) is true, then
2021 // Compute result using user supplied status field.
2022 // No overflow or underflow here, but perhaps inexact.
2025 // Determine if overflow or underflow was raised.
2026 // Fetch +/- overflow threshold for IEEE double extended
2030 fsetc.s2 0x7F,0x41 // For underflow test, set S2=User+TD+FTZ
2037 fma.s2 FR_Result_small = FR_TMP1, FR_TMP2, FR_TMP3
2044 fsetc.s2 0x7F,0x42 // For overflow test, set S2=User+TD+WRE
2051 fma.s2 FR_Result_big = FR_TMP1, FR_TMP2,FR_TMP3
2058 fsetc.s2 0x7F,0x40 // Reset S2=User
2065 fclass.m p11, p0 = FR_Result_small, 0x00F // Test small result unorm/zero
2072 fcmp.ge.s1 p8, p0 = FR_Result_big , FR_Big // Test >= + oflow threshold
2078 (p11) mov GR_Parameter_TAG = 19 // Set tag for underflow
2079 fcmp.le.s1 p9, p0 = FR_Result_big, FR_NBig // Test <= - oflow threshold
2080 (p11) br.cond.spnt __libm_error_region // Branch if pow underflowed
2085 (p8) mov GR_Parameter_TAG = 18 // Set tag for overflow
2087 (p8) br.cond.spnt __libm_error_region // Branch if pow +overflow
2092 (p9) mov GR_Parameter_TAG = 18 // Set tag for overflow
2093 (p9) br.cond.spnt __libm_error_region // Branch if pow -overflow
2094 br.ret.sptk b0 // Branch if result really ok
2100 // Here if x or y is NatVal, nan, inf, or zero
2103 fcmp.eq.s1 p15, p0 = FR_Input_X, f1 // Test x=+1
2110 fclass.m p8, p0 = FR_Input_X, 0x143 // Test x natval, snan
2117 (p15) fcmp.eq.unc.s0 p6,p0 = FR_Input_Y, f0 // If x=1, flag invalid if y=SNaN
2122 (p15) fmpy.s0 FR_Result = f1,f1 // If x=1, result=1
2123 (p15) br.ret.spnt b0 // Exit if x=1
2129 fclass.m p6, p0 = FR_Input_Y, 0x007 // Test y zero
2136 fclass.m p9, p0 = FR_Input_Y, 0x143 // Test y natval, snan
2143 fclass.m p10, p0 = FR_Input_X, 0x083 // Test x qnan
2148 (p8) fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X // If x=snan, result=qnan
2149 (p6) cmp.ne p8,p0 = r0,r0 // Don't exit if x=snan, y=0 ==> result=+1
2155 (p6) fclass.m.unc p15, p0 = FR_Input_X,0x007 // Test x=0, y=0
2160 (p9) fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X // If y=snan, result=qnan
2161 (p8) br.ret.spnt b0 // Exit if x=snan, y not 0,
2168 fcmp.eq.s1 p7, p0 = FR_Input_Y, f1 // Test y +1.0
2173 (p10) fmpy.s0 FR_Result = FR_Input_X, f0 // If x=qnan, result=qnan
2174 (p9) br.ret.spnt b0 // Exit if y=snan, result=qnan
2180 (p6) fclass.m.unc p8, p0 = FR_Input_X,0x0C3 // Test x=nan, y=0
2187 (p6) fcmp.eq.s0 p9,p0 = FR_Input_X, f0 // If y=0, flag if x denormal
2192 (p6) fadd.s0 FR_Result = f1, f0 // If y=0, result=1
2199 fclass.m p11, p0 = FR_Input_Y, 0x083 // Test y qnan
2203 (p15) mov GR_Parameter_TAG = 20 // Error tag for x=0, y=0
2204 (p7) fmpy.s0 FR_Result = FR_Input_X,f1 // If y=1, result=x
2205 (p15) br.cond.spnt __libm_error_region // Branch if x=0, y=0, result=1
2210 (p8) mov GR_Parameter_TAG = 23 // Error tag for x=nan, y=0
2211 fclass.m p14, p0 = FR_Input_Y, 0x023 // Test y inf
2212 (p8) br.cond.spnt __libm_error_region // Branch if x=snan, y=0,
2219 fclass.m p13, p0 = FR_Input_X, 0x023 // Test x inf
2220 (p6) br.ret.spnt b0 // Exit y=0, x not nan or 0,
2227 (p14) fcmp.eq.unc.s1 p0,p14 = FR_Input_X,f0 // Test x not 0, y=inf
2228 (p7) br.ret.spnt b0 // Exit y=1, x not snan,
2235 (p10) fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X // If x=qnan, y not snan,
2237 (p10) br.ret.spnt b0 // Exit x=qnan, y not snan,
2244 (p11) fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X // If y=qnan, x not nan or 1,
2246 (p11) br.ret.spnt b0 // Exit y=qnan, x not nan or 1,
2253 (p14) br.cond.spnt POWL_64_Y_IS_INF // Branch if y=inf, x not 1 or nan
2254 (p13) br.cond.spnt POWL_64_X_IS_INF // Branch if x=inf, y not 1 or nan
2260 // Here if x=0, y not nan or 1 or inf or 0
2262 // There is logic starting here to determine if y is an integer when x = 0.
2263 // If 0 < |y| < 1 then clearly y is not an integer.
2264 // If |y| > 1, then the significand of y is shifted left by the size of
2265 // the exponent of y. This preserves the lsb of the integer part + the
2266 // fractional bits. The lsb of the integer can be tested to determine if
2267 // the integer is even or odd. The fractional bits can be tested. If zero,
2268 // then y is an integer.
2271 and GR_exp_y = GR_exp_mask,GR_signexp_y // Get biased exponent of y
2273 and GR_y_sign = GR_sign_mask,GR_signexp_y // Get sign of y
2278 // Maybe y is < 1 already, so
2279 // can never be an integer.
2282 cmp.lt p9, p8 = GR_exp_y,GR_exp_bias // Test 0 < |y| < 1
2284 sub GR_exp_y = GR_exp_y,GR_exp_bias // Get true exponent of y
2289 // Shift significand of y looking for nonzero bits
2290 // For y > 1, shift signif_y exp_y bits to the left
2291 // For y < 1, turn on 4 low order bits of significand of y
2292 // so that the fraction will always be non-zero
2295 (p9) or GR_exp_y= 0xF,GR_signif_y // Force nonzero fraction if y<1
2298 (p8) shl GR_exp_y= GR_signif_y,GR_exp_y // Get lsb of int + fraction
2299 // Wait 4 cycles to use result
2315 shl GR_fraction_y= GR_exp_y,1 // Shift left 1 to get fraction
2320 // Integer part of y shifted off.
2321 // Get y's low even or odd bit - y might not be an int.
2324 cmp.eq p13,p0 = GR_fraction_y, r0 // Test for y integer
2325 cmp.eq p8,p0 = GR_y_sign, r0 // Test for y > 0
2327 (p13) tbit.nz.unc p13,p0 = GR_exp_y, 63 // Test if y an odd integer
2332 (p13) cmp.eq.unc p13,p14 = GR_y_sign, r0 // Test y pos odd integer
2333 (p8) fcmp.eq.s0 p12,p0 = FR_Input_Y, f0 // If x=0 and y>0 flag if y denormal
2339 // Return +/-0 when x=+/-0 and y is positive odd integer
2343 (p13) mov FR_Result = FR_Input_X // If x=0, y pos odd int, result=x
2344 (p13) br.ret.spnt b0 // Exit x=0, y pos odd int, result=x
2349 // Return +/-inf when x=+/-0 and y is negative odd int
2352 (p14) mov GR_Parameter_TAG = 21
2353 (p14) frcpa.s0 FR_Result, p0 = f1, FR_Input_X // Result +-inf, set Z flag
2354 (p14) br.cond.spnt __libm_error_region
2359 // Return +0 when x=+/-0 and y positive and not an odd integer
2363 (p8) mov FR_Result = f0 // If x=0, y>0 and not odd integer, result=+0
2364 (p8) br.ret.sptk b0 // Exit x=0, y>0 and not odd integer, result=+0
2369 // Return +inf when x=+/-0 and y is negative and not odd int
2372 mov GR_Parameter_TAG = 21
2373 frcpa.s0 FR_Result, p10 = f1,f0 // Result +inf, raise Z flag
2374 br.cond.sptk __libm_error_region
2381 // Here if x=inf, y not 1 or nan
2384 and GR_exp_y = GR_exp_mask,GR_signexp_y // Get biased exponent y
2385 fclass.m p13, p0 = FR_Input_X,0x022 // Test x=-inf
2391 and GR_y_sign = GR_sign_mask,GR_signexp_y // Get sign of y
2392 fcmp.eq.s0 p9,p0 = FR_Input_Y, f0 // Dummy to set flag if y denorm
2398 // Maybe y is < 1 already, so
2402 (p13) cmp.lt.unc p9, p8 = GR_exp_y,GR_exp_bias // Test 0 < |y| < 1 if x=-inf
2403 fclass.m p11, p0 = FR_Input_X,0x021 // Test x=+inf
2404 sub GR_exp_y = GR_exp_y,GR_exp_bias // Get true exponent y
2409 // Shift significand of y looking for nonzero bits
2410 // For y > 1, shift signif_y exp_y bits to the left
2411 // For y < 1, turn on 4 low order bits of significand of y
2412 // so that the fraction will always be non-zero
2415 (p9) or GR_exp_y= 0xF,GR_signif_y // Force nonzero fraction if y<1
2417 (p11) cmp.eq.unc p14,p12 = GR_y_sign, r0 // Test x=+inf, y>0
2418 (p8) shl GR_exp_y= GR_signif_y,GR_exp_y // Get lsb of int + fraction
2419 // Wait 4 cycles to use result
2424 // Return +inf for x=+inf, y > 0
2425 // Return +0 for x=+inf, y < 0
2429 (p12) mov FR_Result = f0 // If x=+inf, y<0, result=+0
2434 (p14) fma.s0 FR_Result = FR_Input_X,f1,f0 // If x=+inf, y>0, result=+inf
2435 (p11) br.ret.sptk b0 // Exit x=+inf
2440 // Here only if x=-inf. Wait until can use result of shl...
2451 cmp.eq p8,p9 = GR_y_sign, r0 // Test y pos
2453 shl GR_fraction_y = GR_exp_y,1 // Shift left 1 to get fraction
2458 cmp.eq p13,p0 = GR_fraction_y, r0 // Test y integer
2461 (p13) tbit.nz.unc p13,p0 = GR_exp_y, 63 // Test y odd integer
2466 // Is y even or odd?
2469 (p13) cmp.eq.unc p14,p10 = GR_y_sign, r0 // Test x=-inf, y pos odd int
2470 (p13) cmp.ne.and p8,p9 = r0,r0 // If y odd int, turn off p8,p9
2476 // Return -0 for x = -inf and y < 0 and odd int.
2477 // Return -Inf for x = -inf and y > 0 and odd int.
2481 (p10) fmerge.ns FR_Result = f0, f0 // If x=-inf, y neg odd int, result=-0
2486 (p14) fmpy.s0 FR_Result = FR_Input_X,f1 // If x=-inf, y pos odd int, result=-inf
2492 // Return Inf for x = -inf and y > 0 not an odd int.
2493 // Return +0 for x = -inf and y < 0 not an odd int.
2495 .pred.rel "mutex",p8,p9
2498 (p8) fmerge.ns FR_Result = FR_Input_X, FR_Input_X // If x=-inf, y>0 not odd int
2504 (p9) fmpy.s0 FR_Result = f0,f0 // If x=-inf, y<0 not odd int
2506 br.ret.sptk b0 // Exit for x=-inf
2512 // Here if y=inf, x not 1 or nan
2514 // For y = +Inf and |x| < 1 returns 0
2515 // For y = +Inf and |x| > 1 returns Inf
2516 // For y = -Inf and |x| < 1 returns Inf
2517 // For y = -Inf and |x| > 1 returns 0
2518 // For y = Inf and |x| = 1 returns 1
2522 fclass.m p8, p0 = FR_Input_Y, 0x021 // Test y=+inf
2529 fclass.m p9, p0 = FR_Input_Y, 0x022 // Test y=-inf
2536 fabs FR_X = FR_Input_X // Form |x|
2543 fcmp.eq.s0 p10,p0 = FR_Input_X, f0 // flag if x denormal
2550 (p8) fcmp.lt.unc.s1 p6, p0 = FR_X, f1 // Test y=+inf, |x|<1
2557 (p8) fcmp.gt.unc.s1 p7, p0 = FR_X, f1 // Test y=+inf, |x|>1
2564 (p9) fcmp.lt.unc.s1 p12, p0 = FR_X, f1 // Test y=-inf, |x|<1
2569 (p6) fmpy.s0 FR_Result = f0,f0 // If y=+inf, |x|<1, result=+0
2576 (p9) fcmp.gt.unc.s1 p13, p0 = FR_X, f1 // Test y=-inf, |x|>1
2581 (p7) fmpy.s0 FR_Result = FR_Input_Y, f1 // If y=+inf, |x|>1, result=+inf
2588 fcmp.eq.s1 p14, p0 = FR_X, f1 // Test y=inf, |x|=1
2593 (p12) fnma.s0 FR_Result = FR_Input_Y, f1, f0 // If y=-inf, |x|<1, result=+inf
2600 (p13) mov FR_Result = f0 // If y=-inf, |x|>1, result=+0
2607 (p14) fmpy.s0 FR_Result = f1,f1 // If y=inf, |x|=1, result=+1
2608 br.ret.sptk b0 // Common return for y=inf
2613 // Here if x or y denorm/unorm
2616 getf.sig GR_signif_Z = FR_norm_X // Get significand of x
2618 getf.exp GR_signexp_y = FR_norm_Y // Get sign and exp of y
2624 getf.sig GR_signif_y = FR_norm_Y // Get significand of y
2631 getf.exp GR_signexp_x = FR_norm_X // Get sign and exp of x
2632 extr.u GR_Index1 = GR_signif_Z, 59, 4 // Extract upper 4 signif bits of x
2633 br.cond.sptk POWL_COMMON // Branch back to main path
2640 // Raise exceptions for specific
2641 // values - pseudo NaN and
2643 // Return NaN and raise invalid
2647 fmpy.s0 FR_Result = FR_Input_X,f0
2654 // Raise invalid for x < 0 and
2659 frcpa.s0 FR_Result, p8 = f0, f0
2660 mov GR_Parameter_TAG = 22
2665 br.cond.sptk __libm_error_region
2672 frsqrta.s0 FR_Result,p10 = FR_save_Input_X
2677 (p10) fma.s1 f62=FR_Half,FR_save_Input_X,f0
2682 (p10) fma.s1 f63=FR_Result,FR_Result,f0
2687 (p10) fnma.s1 f32=f63,f62,FR_Half
2692 (p10) fma.s1 f33=f32,FR_Result,FR_Result
2697 (p10) fma.s1 f34=f33,f62,f0
2702 (p10) fnma.s1 f35=f34,f33,FR_Half
2707 (p10) fma.s1 f63=f35,f33,f33
2712 (p10) fma.s1 f32=FR_save_Input_X,f63,f0
2717 (p10) fma.s1 FR_Result=f63,f62,f0
2722 (p10) fma.s1 f33=f11,f63,f0
2727 (p10) fnma.s1 f34=f32,f32,FR_save_Input_X
2732 (p10) fnma.s1 f35=FR_Result,f63,FR_Half
2737 (p10) fma.s1 f62=f33,f34,f32
2742 (p10) fma.s1 f63=f33,f35,f33
2747 (p10) fnma.s1 f32=f62,f62,FR_save_Input_X
2752 (p10) fma.s0 FR_Result=f32,f63,f62
2753 br.ret.sptk b0 // Exit for x > 0, y = 0.5
2757 GLOBAL_LIBM_END(powl)
2760 LOCAL_LIBM_ENTRY(__libm_error_region)
2763 add GR_Parameter_Y=-32,sp // Parameter 2 value
2765 .save ar.pfs,GR_SAVE_PFS
2766 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
2770 add sp=-64,sp // Create new stack
2772 mov GR_SAVE_GP=gp // Save gp
2775 stfe [GR_Parameter_Y] = FR_Input_Y,16 // Save Parameter 2 on stack
2776 add GR_Parameter_X = 16,sp // Parameter 1 address
2777 .save b0, GR_SAVE_B0
2778 mov GR_SAVE_B0=b0 // Save b0
2782 stfe [GR_Parameter_X] = FR_save_Input_X // Store Parameter 1 on stack
2783 add GR_Parameter_RESULT = 0,GR_Parameter_Y
2784 nop.b 0 // Parameter 3 address
2787 stfe [GR_Parameter_Y] = FR_Result // Store Parameter 3 on stack
2788 add GR_Parameter_Y = -16,GR_Parameter_Y
2789 br.call.sptk b0=__libm_error_support# // Call error handling function
2792 add GR_Parameter_RESULT = 48,sp
2797 ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
2799 add sp = 64,sp // Restore stack pointer
2800 mov b0 = GR_SAVE_B0 // Restore return address
2803 mov gp = GR_SAVE_GP // Restore gp
2804 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
2805 br.ret.sptk b0 // Return
2808 LOCAL_LIBM_END(__libm_error_region#)
2809 .type __libm_error_support#,@function
2810 .global __libm_error_support#