4 // Copyright (c) 2002 - 2005, Intel Corporation
5 // All rights reserved.
8 // Redistribution and use in source and binary forms, with or without
9 // modification, are permitted provided that the following conditions are
12 // * Redistributions of source code must retain the above copyright
13 // notice, this list of conditions and the following disclaimer.
15 // * Redistributions in binary form must reproduce the above copyright
16 // notice, this list of conditions and the following disclaimer in the
17 // documentation and/or other materials provided with the distribution.
19 // * The name of Intel Corporation may not be used to endorse or promote
20 // products derived from this software without specific prior written
23 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT
25 // LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
27 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL,
28 // EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO,
29 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR
30 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
31 // OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING
32 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 // SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 // Intel Corporation is the author of this code,and requests that all
36 // problem reports or change requests be submitted to it directly at
37 // http://www.intel.com/software/products/opensource/libraries/num.htm.
39 //*********************************************************************
42 // 03/28/02 Original version
43 // 05/20/02 Cleaned up namespace and sf0 syntax
44 // 08/21/02 Added support of SIGN(GAMMA(x)) calculation
45 // 09/26/02 Algorithm description improved
46 // 10/21/02 Now it returns SIGN(GAMMA(x))=-1 for negative zero
47 // 02/10/03 Reordered header: .section, .global, .proc, .align
48 // 03/31/05 Reformatted delimiters between data tables
50 //*********************************************************************
52 // Function: __libm_lgammal(long double x, int* signgam, int szsigngam)
53 // computes the principal value of the logarithm of the GAMMA function
54 // of x. Signum of GAMMA(x) is stored to memory starting at the address
55 // specified by the signgam.
57 //*********************************************************************
61 // Floating-Point Registers: f8 (Input and Return Value)
65 // General Purpose Registers:
66 // r2, r3, r8-r11, r14-r31
68 // r66-r69 (Used to pass arguments to error handling routine)
70 // Predicate Registers: p6-p15
72 //*********************************************************************
74 // IEEE Special Conditions:
76 // __libm_lgammal(+inf) = +inf
77 // __libm_lgammal(-inf) = QNaN
78 // __libm_lgammal(+/-0) = +inf
79 // __libm_lgammal(x<0, x - integer) = QNaN
80 // __libm_lgammal(SNaN) = QNaN
81 // __libm_lgammal(QNaN) = QNaN
83 //*********************************************************************
85 // ALGORITHM DESCRIPTION
87 // Below we suppose that there is log(z) function which takes an long
88 // double argument and returns result as a pair of long double numbers
89 // lnHi and lnLo (such that sum lnHi + lnLo provides ~80 correct bits
90 // of significand). Algorithm description for such log(z) function
92 // Also, it this algorithm description we use the following notational
94 // a) pair A = (Ahi, Alo) means number A represented as sum of Ahi and Alo
95 // b) C = A + B = (Ahi, Alo) + (Bhi, Blo) means multi-precision addition.
96 // The result would be C = (Chi, Clo). Notice, that Clo shouldn't be
98 // c) D = A*B = (Ahi, Alo)*(Bhi, Blo) = (Dhi, Dlo) multi-precisiion
101 // So, lgammal has the following computational paths:
103 // P = A1*|x| + A2*|x|^2 + ... + A22*|x|^22
104 // A1, A2, A3 represented as a sum of two double precision
105 // numbers and multi-precision computations are used for 3 higher
106 // terms of the polynomial. We get polynomial as a sum of two
107 // double extended numbers: P = (Phi, Plo)
109 // lgammal(x) = P - log(|x|) = (Phi, Plo) - (lnHi(|x|), lnLo(|x|))
111 // lgammal(x) = -P - log(|x|) - log(sin(Pi*x)/(Pi*x))
112 // P and log(|x|) are computed by the same way as in 1.1;
113 // - log(sin(Pi*x)/(Pi*x)) is approximated by a polynomial Plnsin.
114 // Plnsin:= fLnSin2*|x|^2 + fLnSin4*|x|^4 + ... + fLnSin36*|x|^36
115 // The first coefficient of Plnsin is represented as sum of two
116 // double precision numbers (fLnSin2, fLnSin2L). Multi-precision
117 // computations for higher two terms of Plnsin are used.
118 // So, the final result is reconstructed by the following formula
119 // lgammal(x) = (-(Phi, Plo) - (lnHi(|x|), lnLo(|x|))) -
120 // - (PlnsinHi,PlnsinLo)
122 // 2) 0.5 <= x < 0.75 -> t = x - 0.625
123 // -0.75 < x <= -0.5 -> t = x + 0.625
124 // 2.25 <= x < 4.0 -> t = x/2 - 1.5
125 // 4.0 <= x < 8.0 -> t = x/4 - 1.5
126 // -0.5 < x <= -0.40625 -> t = x + 0.5
127 // -2.6005859375 < x <= -2.5 -> t = x + 2.5
128 // 1.3125 <= x < 1.5625 -> t = x - LOC_MIN, where LOC_MIN is point in
129 // which lgammal has local minimum. Exact
130 // value can be found in the table below,
131 // approximate value is ~1.46
133 // lgammal(x) is approximated by the polynomial of 25th degree: P25(t)
134 // P25(t) = A0 + A1*t + ... + A25*t^25 = (Phi, Plo) + t^4*P21(t),
136 // (Phi, Plo) is sum of four highest terms of the polynomial P25(t):
137 // (Phi, Plo) = ((A0, A0L) + (A1, A1L)*t) + t^2 *((A2, A2L) + (A3, A3L)*t),
138 // (Ai, AiL) - coefficients represented as pairs of DP numbers.
140 // P21(t) = (PolC(t)*t^8 + PolD(t))*t^8 + PolE(t),
142 // PolC(t) = C21*t^5 + C20*t^4 + ... + C16,
143 // C21 = A25, C20 = A24, ..., C16 = A20
145 // PolD(t) = D7*t^7 + D6*t^6 + ... + D0,
146 // D7 = A19, D6 = A18, ..., D0 = A12
148 // PolE(t) = E7*t^7 + E6*t^6 + ... + E0,
149 // E7 = A11, E6 = A10, ..., E0 = A4
151 // Cis and Dis are represented as double precision numbers,
152 // Eis are represented as double extended numbers.
154 // 3) 0.75 <= x < 1.3125 -> t = x - 1.0
155 // 1.5625 <= x < 2.25 -> t = x - 2.0
156 // lgammal(x) is approximated by the polynomial of 25th degree: P25(t)
157 // P25(t) = A1*t + ... + A25*t^25, and computations are carried out
158 // by similar way as in the previous case
160 // 4) 10.0 < x <= Overflow Bound ("positive Sterling" range)
161 // lgammal(x) is approximated using Sterling's formula:
162 // lgammal(x) ~ ((x*(lnHi(x) - 1, lnLo(x))) - 0.5*(lnHi(x), lnLo(x))) +
163 // + ((Chi, Clo) + S(1/x))
165 // C = (Chi, Clo) - pair of double precision numbers representing constant
167 // S(1/x) = 1/x * (B2 + B4*(1/x)^2 + ... + B20*(1/x)^18), B2, ..., B20 are
168 // Bernulli numbers. S is computed in native precision and then added to
170 // lnHi(x) - 1 is computed in native precision and the multiprecision
171 // multiplication (x, 0) *(lnHi(x) - 1, lnLo(x)) is used.
173 // 5) -INF < x <= -2^63, any negative integer < 0
174 // All numbers in this range are integers -> error handler is called
176 // 6) -2^63 < x <= -0.75 ("negative Sterling" range), x is "far" from root,
177 // lgammal(-t) for positive t is approximated using the following formula:
178 // lgammal(-t) = -lgammal(t)-log(t)-log(|dT|)+log(sin(Pi*|dT|)/(Pi*|dT|))
179 // where dT = -t -round_to_nearest_integer(-t)
180 // Last item is approximated by the same polynomial as described in 1.2.
181 // We split the whole range into three subranges due to different ways of
182 // approximation of the first terms.
183 // 6.1) -2^63 < x < -6.0 ("negative Sterling" range)
184 // lgammal(t) is approximated exactly as in #4. The only difference that
185 // for -13.0 < x < -6.0 subrange instead of Bernulli numbers we use their
186 // minimax approximation on this range.
187 // log(t), log(|dT|) are approximated by the log routine mentioned above.
188 // 6.2) -6.0 < x <= -0.75, |x + 1|> 2^(-7)
189 // log(t), log(|dT|) are approximated by the log routine mentioned above,
190 // lgammal(t) is approximated by polynomials of the 25th degree similar
191 // to ones from #2. Arguments z of the polynomials are as follows
192 // a) 0.75 <= t < 1.0 - 2^(-7), z = 2*t - 1.5
193 // b) 1.0 - 2^(-7) < t < 2.0, z = t - 1.5
194 // c) 2.0 < t < 3.0, z = t/2 - 1.5
195 // d) 3.0 < t < 4.0, z = t/2 - 1.5. Notice, that range reduction is
196 // the same as in case c) but the set of coefficients is different
197 // e) 4.0 < t < 6.0, z = t/4 - 1.5
198 // 6.3) |x + 1| <= 2^(-7)
199 // log(1 + (x-1)) is approximated by Taylor series,
200 // log(sin(Pi*|dT|)/(Pi*|dT|)) is still approximated by polynomial but
201 // it has just 4th degree.
202 // log(|dT|) is approximated by the log routine mentioned above.
203 // lgammal(-x) is approximated by polynomial of 8th degree from (-x + 1).
205 // 7) -20.0 < x < -2.0, x falls in root "neighbourhood".
206 // "Neighbourhood" means that |lgammal(x)| < epsilon, where epsilon is
207 // different for every root (and it is stored in the table), but typically
208 // it is ~ 0.15. There are 35 roots significant from "double extended"
209 // point of view. We split all the roots into two subsets: "left" and "right"
210 // roots. Considering [-(N+1), -N] range we call root as "left" one if it
211 // lies closer to -(N+1) and "right" otherwise. There is no "left" root in
212 // the [-20, -19] range (it exists, but is insignificant for double extended
213 // precision). To determine if x falls in root "neighbourhood" we store
214 // significands of all the 35 roots as well as epsilon values (expressed
215 // by the left and right bound).
216 // In these ranges we approximate lgammal(x) by polynomial series of 19th
218 // lgammal(x) = P19(t) = A0 + A1*t + ...+ A19*t^19, where t = x - EDP_Root,
219 // EDP_Root is the exact value of the corresponding root rounded to double
220 // extended precision. So, we have 35 different polynomials which make our
221 // table rather big. We may hope that x falls in root "neighbourhood"
222 // quite rarely -> ther might be no need in frequent use of different
224 // A0, A1, A2, A3 are represented as pairs of double precision numbers,
225 // A4, A5 are long doubles, and to decrease the size of the table we
226 // keep the rest of coefficients in just double precision
228 //*********************************************************************
229 // Algorithm for log(X) = (lnHi(X), lnLo(X))
233 // Here we use a table lookup method. The basic idea is that in
234 // order to compute logl(Arg) for an argument Arg in [1,2), we
235 // construct a value G such that G*Arg is close to 1 and that
236 // logl(1/G) is obtainable easily from a table of values calculated
239 // logl(Arg) = logl(1/G) + logl(G*Arg)
240 // = logl(1/G) + logl(1 + (G*Arg - 1))
242 // Because |G*Arg - 1| is small, the second term on the right hand
243 // side can be approximated by a short polynomial. We elaborate
244 // this method in four steps.
246 // Step 0: Initialization
248 // We need to calculate logl( X ). Obtain N, S_hi such that
250 // X = 2^N * S_hi exactly
252 // where S_hi in [1,2)
254 // Step 1: Argument Reduction
256 // Based on S_hi, obtain G_1, G_2, G_3 from a table and calculate
258 // G := G_1 * G_2 * G_3
259 // r := (G * S_hi - 1)
261 // These G_j's have the property that the product is exactly
262 // representable and that |r| < 2^(-12) as a result.
264 // Step 2: Approximation
267 // logl(1 + r) is approximated by a short polynomial poly(r).
269 // Step 3: Reconstruction
272 // Finally, logl( X ) is given by
274 // logl( X ) = logl( 2^N * S_hi )
275 // ~=~ N*logl(2) + logl(1/G) + logl(1 + r)
276 // ~=~ N*logl(2) + logl(1/G) + poly(r).
280 // Step 0. Initialization
281 // ----------------------
284 // N := unbaised exponent of Z
285 // S_hi := 2^(-N) * Z
287 // Step 1. Argument Reduction
288 // --------------------------
292 // Z = 2^N * S_hi = 2^N * 1.d_1 d_2 d_3 ... d_63
294 // We obtain G_1, G_2, G_3 by the following steps.
297 // Define X_0 := 1.d_1 d_2 ... d_14. This is extracted
300 // Define A_1 := 1.d_1 d_2 d_3 d_4. This is X_0 truncated
303 // Define index_1 := [ d_1 d_2 d_3 d_4 ].
305 // Fetch Z_1 := (1/A_1) rounded UP in fixed point with
306 // fixed point lsb = 2^(-15).
307 // Z_1 looks like z_0.z_1 z_2 ... z_15
308 // Note that the fetching is done using index_1.
309 // A_1 is actually not needed in the implementation
310 // and is used here only to explain how is the value
313 // Fetch G_1 := (1/A_1) truncated to 21 sig. bits.
314 // floating pt. Again, fetching is done using index_1. A_1
315 // explains how G_1 is defined.
317 // Calculate X_1 := X_0 * Z_1 truncated to lsb = 2^(-14)
318 // = 1.0 0 0 0 d_5 ... d_14
319 // This is accomplished by integer multiplication.
320 // It is proved that X_1 indeed always begin
321 // with 1.0000 in fixed point.
324 // Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1
325 // truncated to lsb = 2^(-8). Similar to A_1,
326 // A_2 is not needed in actual implementation. It
327 // helps explain how some of the values are defined.
329 // Define index_2 := [ d_5 d_6 d_7 d_8 ].
331 // Fetch Z_2 := (1/A_2) rounded UP in fixed point with
332 // fixed point lsb = 2^(-15). Fetch done using index_2.
333 // Z_2 looks like z_0.z_1 z_2 ... z_15
335 // Fetch G_2 := (1/A_2) truncated to 21 sig. bits.
338 // Calculate X_2 := X_1 * Z_2 truncated to lsb = 2^(-14)
339 // = 1.0 0 0 0 0 0 0 0 d_9 d_10 ... d_14
340 // This is accomplished by integer multiplication.
341 // It is proved that X_2 indeed always begin
342 // with 1.00000000 in fixed point.
345 // Define A_3 := 1.0 0 0 0 0 0 0 0 d_9 d_10 d_11 d_12 d_13 1.
346 // This is 2^(-14) + X_2 truncated to lsb = 2^(-13).
348 // Define index_3 := [ d_9 d_10 d_11 d_12 d_13 ].
350 // Fetch G_3 := (1/A_3) truncated to 21 sig. bits.
351 // floating pt. Fetch is done using index_3.
353 // Compute G := G_1 * G_2 * G_3.
355 // This is done exactly since each of G_j only has 21 sig. bits.
362 // Step 2. Approximation
363 // ---------------------
365 // This step computes an approximation to logl( 1 + r ) where r is the
366 // reduced argument just obtained. It is proved that |r| <= 1.9*2^(-13);
367 // thus logl(1+r) can be approximated by a short polynomial:
369 // logl(1+r) ~=~ poly = r + Q1 r^2 + ... + Q4 r^5
372 // Step 3. Reconstruction
373 // ----------------------
375 // This step computes the desired result of logl(X):
377 // logl(X) = logl( 2^N * S_hi )
378 // = N*logl(2) + logl( S_hi )
379 // = N*logl(2) + logl(1/G) +
380 // logl(1 + G*S_hi - 1 )
382 // logl(2), logl(1/G_j) are stored as pairs of (single,double) numbers:
383 // log2_hi, log2_lo, log1byGj_hi, log1byGj_lo. The high parts are
384 // single-precision numbers and the low parts are double precision
385 // numbers. These have the property that
387 // N*log2_hi + SUM ( log1byGj_hi )
389 // is computable exactly in double-extended precision (64 sig. bits).
392 // lnHi(X) := N*log2_hi + SUM ( log1byGj_hi )
393 // lnLo(X) := poly_hi + [ poly_lo +
394 // ( SUM ( log1byGj_lo ) + N*log2_lo ) ]
397 //*********************************************************************
398 // General Purpose Registers
439 rNegSingularity = r44
463 // output parameters when calling error handling routine
466 GR_Parameter_RESULT = r68
467 GR_Parameter_TAG = r69
469 //********************************************************************
470 // Floating Point Registers
471 // CAUTION: due to the lack of registers there exist (below in the code)
472 // sometimes "unconventional" use of declared registers
477 // macros for error handling routine
478 FR_X = f10 // first argument
479 FR_Y = f1 // second argument (lgammal has just one)
480 FR_RESULT = f8 // result
482 // First 7 Bernulli numbers
499 // Polynomial coefficients: A0, ..., A25
568 // Coefficients of ln(sin(Pi*x)/Pi*x)
633 // Last three Bernulli numbers
645 //==============================================================
647 // ************* DO NOT CHANGE THE ORDER OF THESE TABLES *************
649 LOCAL_OBJECT_START(lgammal_right_roots_data)
650 // List of all right roots themselves
651 data8 0x9D3FE4B007C360AB, 0x0000C000 // Range [-3, -2]
652 data8 0xC9306DE4F2CD7BEE, 0x0000C000 // Range [-4, -3]
653 data8 0x814273C2CCAC0618, 0x0000C001 // Range [-5, -4]
654 data8 0xA04352BF85B6C865, 0x0000C001 // Range [-6, -5]
655 data8 0xC00B592C4BE4676C, 0x0000C001 // Range [-7, -6]
656 data8 0xE0019FEF6FF0F5BF, 0x0000C001 // Range [-8, -7]
657 data8 0x80001A01459FC9F6, 0x0000C002 // Range [-9, -8]
658 data8 0x900002E3BB47D86D, 0x0000C002 // Range [-10, -9]
659 data8 0xA0000049F93BB992, 0x0000C002 // Range [-11, -10]
660 data8 0xB0000006B9915316, 0x0000C002 // Range [-12, -11]
661 data8 0xC00000008F76C773, 0x0000C002 // Range [-13, -12]
662 data8 0xD00000000B09230A, 0x0000C002 // Range [-14, -13]
663 data8 0xE000000000C9CBA5, 0x0000C002 // Range [-15, -14]
664 data8 0xF0000000000D73FA, 0x0000C002 // Range [-16, -15]
665 data8 0x8000000000006BA0, 0x0000C003 // Range [-17, -16]
666 data8 0x8800000000000655, 0x0000C003 // Range [-18, -17]
667 data8 0x900000000000005A, 0x0000C003 // Range [-19, -18]
668 data8 0x9800000000000005, 0x0000C003 // Range [-20, -19]
669 // List of bounds of ranges with special polynomial approximation near root
670 // Only significands of bounds are actually stored
671 data8 0xA000000000000000, 0x9800000000000000 // Bounds for root on [-3, -2]
672 data8 0xCAB88035C5EFBB41, 0xC7E05E31F4B02115 // Bounds for root on [-4, -3]
673 data8 0x817831B899735C72, 0x8114633941B8053A // Bounds for root on [-5, -4]
674 data8 0xA04E8B34C6AA9476, 0xA039B4A42978197B // Bounds for root on [-6, -5]
675 data8 0xC00D3D5E588A78A9, 0xC009BA25F7E858A6 // Bounds for root on [-7, -6]
676 data8 0xE001E54202991EB4, 0xE001648416CE897F // Bounds for root on [-8, -7]
677 data8 0x80001E56D13A6B9F, 0x8000164A3BAD888A // Bounds for root on [-9, -8]
678 data8 0x9000035F0529272A, 0x9000027A0E3D94F0 // Bounds for root on [-10, -9]
679 data8 0xA00000564D705880, 0xA000003F67EA0CC7 // Bounds for root on [-11, -10]
680 data8 0xB0000007D87EE0EF, 0xB0000005C3A122A5 // Bounds for root on [-12, -11]
681 data8 0xC0000000A75FE8B1, 0xC00000007AF818AC // Bounds for root on [-13, -12]
682 data8 0xD00000000CDFFE36, 0xD000000009758BBF // Bounds for root on [-14, -13]
683 data8 0xE000000000EB6D96, 0xE000000000ACF7B2 // Bounds for root on [-15, -14]
684 data8 0xF0000000000FB1F9, 0xF0000000000B87FB // Bounds for root on [-16, -15]
685 data8 0x8000000000007D90, 0x8000000000005C40 // Bounds for root on [-17, -16]
686 data8 0x8800000000000763, 0x880000000000056D // Bounds for root on [-18, -17]
687 data8 0x9000000000000069, 0x900000000000004D // Bounds for root on [-19, -18]
688 data8 0x9800000000000006, 0x9800000000000005 // Bounds for root on [-20, -19]
689 // List of all left roots themselves
690 data8 0xAFDA0850DEC8065E, 0x0000C000 // Range [-3, -2]
691 data8 0xFD238AA3E17F285C, 0x0000C000 // Range [-4, -3]
692 data8 0x9FBABBD37757E6A2, 0x0000C001 // Range [-5, -4]
693 data8 0xBFF497AC8FA06AFC, 0x0000C001 // Range [-6, -5]
694 data8 0xDFFE5FBB5C377FE8, 0x0000C001 // Range [-7, -6]
695 data8 0xFFFFCBFC0ACE7879, 0x0000C001 // Range [-8, -7]
696 data8 0x8FFFFD1C425E8100, 0x0000C002 // Range [-9, -8]
697 data8 0x9FFFFFB606BDFDCD, 0x0000C002 // Range [-10, -9]
698 data8 0xAFFFFFF9466E9F1B, 0x0000C002 // Range [-11, -10]
699 data8 0xBFFFFFFF70893874, 0x0000C002 // Range [-12, -11]
700 data8 0xCFFFFFFFF4F6DCF6, 0x0000C002 // Range [-13, -12]
701 data8 0xDFFFFFFFFF36345B, 0x0000C002 // Range [-14, -13]
702 data8 0xEFFFFFFFFFF28C06, 0x0000C002 // Range [-15, -14]
703 data8 0xFFFFFFFFFFFF28C0, 0x0000C002 // Range [-16, -15]
704 data8 0x87FFFFFFFFFFF9AB, 0x0000C003 // Range [-17, -16]
705 data8 0x8FFFFFFFFFFFFFA6, 0x0000C003 // Range [-18, -17]
706 data8 0x97FFFFFFFFFFFFFB, 0x0000C003 // Range [-19, -18]
707 data8 0x0000000000000000, 0x00000000 // pad to keep logic in the main path
708 // List of bounds of ranges with special polynomial approximation near root
709 // Only significands of bounds are actually stored
710 data8 0xB235880944CC758E, 0xADD2F1A9FBE76C8B // Bounds for root on [-3, -2]
711 data8 0xFD8E7844F307B07C, 0xFCA655C2152BDE4D // Bounds for root on [-4, -3]
712 data8 0x9FC4D876EE546967, 0x9FAEE4AF68BC4292 // Bounds for root on [-5, -4]
713 data8 0xBFF641FFBFCC44F1, 0xBFF2A47919F4BA89 // Bounds for root on [-6, -5]
714 data8 0xDFFE9C803DEFDD59, 0xDFFE18932EB723FE // Bounds for root on [-7, -6]
715 data8 0xFFFFD393FA47AFC3, 0xFFFFC317CF638AE1 // Bounds for root on [-8, -7]
716 data8 0x8FFFFD8840279925, 0x8FFFFC9DCECEEE92 // Bounds for root on [-9, -8]
717 data8 0x9FFFFFC0D34E2AF8, 0x9FFFFFA9619AA3B7 // Bounds for root on [-10, -9]
718 data8 0xAFFFFFFA41C18246, 0xAFFFFFF82025A23C // Bounds for root on [-11, -10]
719 data8 0xBFFFFFFF857ACB4E, 0xBFFFFFFF58032378 // Bounds for root on [-12, -11]
720 data8 0xCFFFFFFFF6934AB8, 0xCFFFFFFFF313EF0A // Bounds for root on [-13, -12]
721 data8 0xDFFFFFFFFF53A9E9, 0xDFFFFFFFFF13B5A5 // Bounds for root on [-14, -13]
722 data8 0xEFFFFFFFFFF482CB, 0xEFFFFFFFFFF03F4F // Bounds for root on [-15, -14]
723 data8 0xFFFFFFFFFFFF482D, 0xFFFFFFFFFFFF03F5 // Bounds for root on [-16, -15]
724 data8 0x87FFFFFFFFFFFA98, 0x87FFFFFFFFFFF896 // Bounds for root on [-17, -16]
725 data8 0x8FFFFFFFFFFFFFB3, 0x8FFFFFFFFFFFFF97 // Bounds for root on [-18, -17]
726 data8 0x97FFFFFFFFFFFFFC, 0x97FFFFFFFFFFFFFB // Bounds for root on [-19, -18]
727 LOCAL_OBJECT_END(lgammal_right_roots_data)
729 LOCAL_OBJECT_START(lgammal_0_Half_data)
730 // Polynomial coefficients for the lgammal(x), 0.0 < |x| < 0.5
731 data8 0xBFD9A4D55BEAB2D6, 0xBC8AA3C097746D1F //A3
732 data8 0x3FEA51A6625307D3, 0x3C7180E7BD2D0DCC //A2
733 data8 0xBFE2788CFC6FB618, 0xBC9E9346C4692BCC //A1
734 data8 0x8A8991563EC1BD13, 0x00003FFD //A4
735 data8 0xD45CE0BD52C27EF2, 0x0000BFFC //A5
736 data8 0xADA06587FA2BBD47, 0x00003FFC //A6
737 data8 0x9381D0ED2194902A, 0x0000BFFC //A7
738 data8 0x80859B3CF92D4192, 0x00003FFC //A8
739 data8 0xE4033517C622A946, 0x0000BFFB //A9
740 data8 0xCD00CE67A51FC82A, 0x00003FFB //A10
741 data8 0xBA44E2A96C3B5700, 0x0000BFFB //A11
742 data8 0xAAAD008FA46DBD99, 0x00003FFB //A12
743 data8 0x9D604AC65A41153D, 0x0000BFFB //A13
744 data8 0x917CECB864B5A861, 0x00003FFB //A14
745 data8 0x85A4810EB730FDE4, 0x0000BFFB //A15
746 data8 0xEF2761C38BD21F77, 0x00003FFA //A16
747 data8 0xC913043A128367DA, 0x0000BFFA //A17
748 data8 0x96A29B71FF7AFFAA, 0x00003FFA //A18
749 data8 0xBB9FFA1A5FE649BB, 0x0000BFF9 //A19
750 data8 0xB17982CD2DAA0EE3, 0x00003FF8 //A20
751 data8 0xDE1DDCBFFB9453F0, 0x0000BFF6 //A21
752 data8 0x87FBF5D7ACD9FA9D, 0x00003FF4 //A22
753 LOCAL_OBJECT_END(lgammal_0_Half_data)
755 LOCAL_OBJECT_START(Constants_Q)
756 // log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1
757 data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
758 data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
759 data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000
760 data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000
761 data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000
762 data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
763 LOCAL_OBJECT_END(Constants_Q)
765 LOCAL_OBJECT_START(Constants_Z_1)
783 LOCAL_OBJECT_END(Constants_Z_1)
785 LOCAL_OBJECT_START(Constants_G_H_h1)
786 // G1 and H1 - IEEE single and h1 - IEEE double
787 data4 0x3F800000,0x00000000,0x00000000,0x00000000
788 data4 0x3F70F0F0,0x3D785196,0x617D741C,0x3DA163A6
789 data4 0x3F638E38,0x3DF13843,0xCBD3D5BB,0x3E2C55E6
790 data4 0x3F579430,0x3E2FF9A0,0xD86EA5E7,0xBE3EB0BF
791 data4 0x3F4CCCC8,0x3E647FD6,0x86B12760,0x3E2E6A8C
792 data4 0x3F430C30,0x3E8B3AE7,0x5C0739BA,0x3E47574C
793 data4 0x3F3A2E88,0x3EA30C68,0x13E8AF2F,0x3E20E30F
794 data4 0x3F321640,0x3EB9CEC8,0xF2C630BD,0xBE42885B
795 data4 0x3F2AAAA8,0x3ECF9927,0x97E577C6,0x3E497F34
796 data4 0x3F23D708,0x3EE47FC5,0xA6B0A5AB,0x3E3E6A6E
797 data4 0x3F1D89D8,0x3EF8947D,0xD328D9BE,0xBDF43E3C
798 data4 0x3F17B420,0x3F05F3A1,0x0ADB090A,0x3E4094C3
799 data4 0x3F124920,0x3F0F4303,0xFC1FE510,0xBE28FBB2
800 data4 0x3F0D3DC8,0x3F183EBF,0x10FDE3FA,0x3E3A7895
801 data4 0x3F088888,0x3F20EC80,0x7CC8C98F,0x3E508CE5
802 data4 0x3F042108,0x3F29516A,0xA223106C,0xBE534874
803 LOCAL_OBJECT_END(Constants_G_H_h1)
805 LOCAL_OBJECT_START(Constants_Z_2)
823 LOCAL_OBJECT_END(Constants_Z_2)
825 LOCAL_OBJECT_START(Constants_G_H_h2)
826 // G2 and H2 - IEEE single and h2 - IEEE double
827 data4 0x3F800000,0x00000000,0x00000000,0x00000000
828 data4 0x3F7F00F8,0x3B7F875D,0x22C42273,0x3DB5A116
829 data4 0x3F7E03F8,0x3BFF015B,0x21F86ED3,0x3DE620CF
830 data4 0x3F7D08E0,0x3C3EE393,0x484F34ED,0xBDAFA07E
831 data4 0x3F7C0FC0,0x3C7E0586,0x3860BCF6,0xBDFE07F0
832 data4 0x3F7B1880,0x3C9E75D2,0xA78093D6,0x3DEA370F
833 data4 0x3F7A2328,0x3CBDC97A,0x72A753D0,0x3DFF5791
834 data4 0x3F792FB0,0x3CDCFE47,0xA7EF896B,0x3DFEBE6C
835 data4 0x3F783E08,0x3CFC15D0,0x409ECB43,0x3E0CF156
836 data4 0x3F774E38,0x3D0D874D,0xFFEF71DF,0xBE0B6F97
837 data4 0x3F766038,0x3D1CF49B,0x5D59EEE8,0xBE080483
838 data4 0x3F757400,0x3D2C531D,0xA9192A74,0x3E1F91E9
839 data4 0x3F748988,0x3D3BA322,0xBF72A8CD,0xBE139A06
840 data4 0x3F73A0D0,0x3D4AE46F,0xF8FBA6CF,0x3E1D9202
841 data4 0x3F72B9D0,0x3D5A1756,0xBA796223,0xBE1DCCC4
842 data4 0x3F71D488,0x3D693B9D,0xB6B7C239,0xBE049391
843 LOCAL_OBJECT_END(Constants_G_H_h2)
845 LOCAL_OBJECT_START(Constants_G_H_h3)
846 // G3 and H3 - IEEE single and h3 - IEEE double
847 data4 0x3F7FFC00,0x38800100,0x562224CD,0x3D355595
848 data4 0x3F7FF400,0x39400480,0x06136FF6,0x3D8200A2
849 data4 0x3F7FEC00,0x39A00640,0xE8DE9AF0,0x3DA4D68D
850 data4 0x3F7FE400,0x39E00C41,0xB10238DC,0xBD8B4291
851 data4 0x3F7FDC00,0x3A100A21,0x3B1952CA,0xBD89CCB8
852 data4 0x3F7FD400,0x3A300F22,0x1DC46826,0xBDB10707
853 data4 0x3F7FCC08,0x3A4FF51C,0xF43307DB,0x3DB6FCB9
854 data4 0x3F7FC408,0x3A6FFC1D,0x62DC7872,0xBD9B7C47
855 data4 0x3F7FBC10,0x3A87F20B,0x3F89154A,0xBDC3725E
856 data4 0x3F7FB410,0x3A97F68B,0x62B9D392,0xBD93519D
857 data4 0x3F7FAC18,0x3AA7EB86,0x0F21BD9D,0x3DC18441
858 data4 0x3F7FA420,0x3AB7E101,0x2245E0A6,0xBDA64B95
859 data4 0x3F7F9C20,0x3AC7E701,0xAABB34B8,0x3DB4B0EC
860 data4 0x3F7F9428,0x3AD7DD7B,0x6DC40A7E,0x3D992337
861 data4 0x3F7F8C30,0x3AE7D474,0x4F2083D3,0x3DC6E17B
862 data4 0x3F7F8438,0x3AF7CBED,0x811D4394,0x3DAE314B
863 data4 0x3F7F7C40,0x3B03E1F3,0xB08F2DB1,0xBDD46F21
864 data4 0x3F7F7448,0x3B0BDE2F,0x6D34522B,0xBDDC30A4
865 data4 0x3F7F6C50,0x3B13DAAA,0xB1F473DB,0x3DCB0070
866 data4 0x3F7F6458,0x3B1BD766,0x6AD282FD,0xBDD65DDC
867 data4 0x3F7F5C68,0x3B23CC5C,0xF153761A,0xBDCDAB83
868 data4 0x3F7F5470,0x3B2BC997,0x341D0F8F,0xBDDADA40
869 data4 0x3F7F4C78,0x3B33C711,0xEBC394E8,0x3DCD1BD7
870 data4 0x3F7F4488,0x3B3BBCC6,0x52E3E695,0xBDC3532B
871 data4 0x3F7F3C90,0x3B43BAC0,0xE846B3DE,0xBDA3961E
872 data4 0x3F7F34A0,0x3B4BB0F4,0x785778D4,0xBDDADF06
873 data4 0x3F7F2CA8,0x3B53AF6D,0xE55CE212,0x3DCC3ED1
874 data4 0x3F7F24B8,0x3B5BA620,0x9E382C15,0xBDBA3103
875 data4 0x3F7F1CC8,0x3B639D12,0x5C5AF197,0x3D635A0B
876 data4 0x3F7F14D8,0x3B6B9444,0x71D34EFC,0xBDDCCB19
877 data4 0x3F7F0CE0,0x3B7393BC,0x52CD7ADA,0x3DC74502
878 data4 0x3F7F04F0,0x3B7B8B6D,0x7D7F2A42,0xBDB68F17
879 LOCAL_OBJECT_END(Constants_G_H_h3)
881 LOCAL_OBJECT_START(lgammal_data)
882 // Positive overflow value
883 data8 0xB8D54C8BFFFDEBF4, 0x00007FF1
884 LOCAL_OBJECT_END(lgammal_data)
886 LOCAL_OBJECT_START(lgammal_Stirling)
887 // Coefficients needed for Strirling's formula
888 data8 0x3FED67F1C864BEB4 // High part of 0.5*ln(2*Pi)
889 data8 0x3C94D252F2400510 // Low part of 0.5*ln(2*Pi)
891 // Bernulli numbers used in Striling's formula for -2^63 < |x| < -13.0
892 //(B1H, B1L) = 8.3333333333333333333262747254e-02
893 data8 0x3FB5555555555555, 0x3C55555555555555
894 data8 0xB60B60B60B60B60B, 0x0000BFF6 //B2 = -2.7777777777777777777777777778e-03
895 data8 0xD00D00D00D00D00D, 0x00003FF4 //B3 = 7.9365079365079365079365079365e-04
896 data8 0x9C09C09C09C09C0A, 0x0000BFF4 //B4 = -5.9523809523809523809523809524e-04
897 data8 0xDCA8F158C7F91AB8, 0x00003FF4 //B5 = 8.4175084175084175084175084175e-04
898 data8 0xFB5586CCC9E3E410, 0x0000BFF5 //B6 = -1.9175269175269175269175269175e-03
899 data8 0xD20D20D20D20D20D, 0x00003FF7 //B7 = 6.4102564102564102564102564103e-03
900 data8 0xF21436587A9CBEE1, 0x0000BFF9 //B8 = -2.9550653594771241830065359477e-02
901 data8 0xB7F4B1C0F033FFD1, 0x00003FFC //B9 = 1.7964437236883057316493849002e-01
902 data8 0xB23B3808C0F9CF6E, 0x0000BFFF //B10 = -1.3924322169059011164274322169e+00
903 // Polynomial coefficients for Stirling's formula, -13.0 < x < -6.0
904 data8 0x3FB5555555555555, 0x3C4D75060289C58B //A0
905 data8 0xB60B60B60B0F0876, 0x0000BFF6 //A1
906 data8 0xD00D00CE54B1256C, 0x00003FF4 //A2
907 data8 0x9C09BF46B58F75E1, 0x0000BFF4 //A3
908 data8 0xDCA8483BC91ACC6D, 0x00003FF4 //A4
909 data8 0xFB3965C939CC9FEE, 0x0000BFF5 //A5
910 data8 0xD0723ADE3F0BC401, 0x00003FF7 //A6
911 data8 0xE1ED7434E81F0B73, 0x0000BFF9 //A7
912 data8 0x8069C6982F993283, 0x00003FFC //A8
913 data8 0xC271F65BFA5BEE3F, 0x0000BFFD //A9
914 LOCAL_OBJECT_END(lgammal_Stirling)
916 LOCAL_OBJECT_START(lgammal_lnsin_data)
917 // polynomial approximation of -ln(sin(Pi*x)/(Pi*x)), 0 < x <= 0.5
918 data8 0x3FFA51A6625307D3, 0x3C81873332FAF94C //A2
919 data8 0x8A8991563EC241C3, 0x00003FFE //A4
920 data8 0xADA06588061805DF, 0x00003FFD //A6
921 data8 0x80859B57C338D0F7, 0x00003FFD //A8
922 data8 0xCD00F1C2D78754BD, 0x00003FFC //A10
923 data8 0xAAB56B1D3A1F4655, 0x00003FFC //A12
924 data8 0x924B6F2FBBED12B1, 0x00003FFC //A14
925 data8 0x80008E58765F43FC, 0x00003FFC //A16
926 data8 0x3FBC718EC115E429//A18
927 data8 0x3FB99CE544FE183E//A20
928 data8 0x3FB7251C09EAAD89//A22
929 data8 0x3FB64A970733628C//A24
930 data8 0x3FAC92D6802A3498//A26
931 data8 0x3FC47E1165261586//A28
932 data8 0xBFCA1BAA434750D4//A30
933 data8 0x3FE460001C4D5961//A32
934 data8 0xBFE6F06A3E4908AD//A34
935 data8 0x3FE300889EBB203A//A36
936 LOCAL_OBJECT_END(lgammal_lnsin_data)
938 LOCAL_OBJECT_START(lgammal_half_3Q_data)
939 // Polynomial coefficients for the lgammal(x), 0.5 <= x < 0.75
940 data8 0xBFF7A648EE90C62E, 0x3C713F326857E066 // A3, A0L
941 data8 0xBFF73E4B8BA780AE, 0xBCA953BC788877EF // A1, A1L
942 data8 0x403774DCD58D0291, 0xC0415254D5AE6623 // D0, D1
943 data8 0x40B07213855CBFB0, 0xC0B8855E25D2D229 // C20, C21
944 data8 0x3FFB359F85FF5000, 0x3C9BAECE6EF9EF3A // A2, A2L
945 data8 0x3FD717D498A3A8CC, 0xBC9088E101CFEDFA // A0, A3L
946 data8 0xAFEF36CC5AEC3FF0, 0x00004002 // E6
947 data8 0xABE2054E1C34E791, 0x00004001 // E4
948 data8 0xB39343637B2900D1, 0x00004000 // E2
949 data8 0xD74FB710D53F58F6, 0x00003FFF // E0
950 data8 0x4070655963BA4256, 0xC078DA9D263C4EA3 // D6, D7
951 data8 0x405CD2B6A9B90978, 0xC065B3B9F4F4F171 // D4, D5
952 data8 0x4049BC2204CF61FF, 0xC05337227E0BA152 // D2, D3
953 data8 0x4095509A50C07A96, 0xC0A0747949D2FB45 // C18, C19
954 data8 0x4082ECCBAD709414, 0xC08CD02FB088A702 // C16, C17
955 data8 0xFFE4B2A61B508DD5, 0x0000C002 // E7
956 data8 0xF461ADB8AE17E0A5, 0x0000C001 // E5
957 data8 0xF5BE8B0B90325F20, 0x0000C000 // E3
958 data8 0x877B275F3FB78DCA, 0x0000C000 // E1
959 LOCAL_OBJECT_END(lgammal_half_3Q_data)
961 LOCAL_OBJECT_START(lgammal_half_3Q_neg_data)
962 // Polynomial coefficients for the lgammal(x), -0.75 < x <= -0.5
963 data8 0xC014836EFD94899C, 0x3C9835679663B44F // A3, A0L
964 data8 0xBFF276C7B4FB1875, 0xBC92D3D9FA29A1C0 // A1, A1L
965 data8 0x40C5178F24E1A435, 0xC0D9DE84FBC5D76A // D0, D1
966 data8 0x41D4D1B236BF6E93, 0xC1EBB0445CE58550 // C20, C21
967 data8 0x4015718CD67F63D3, 0x3CC5354B6F04B59C // A2, A2L
968 data8 0x3FF554493087E1ED, 0xBCB72715E37B02B9 // A0, A3L
969 data8 0xE4AC7E915FA72229, 0x00004009 // E6
970 data8 0xA28244206395FCC6, 0x00004007 // E4
971 data8 0xFB045F19C07B2544, 0x00004004 // E2
972 data8 0xE5C8A6E6A9BA7D7B, 0x00004002 // E0
973 data8 0x4143943B55BF5118, 0xC158AC05EA675406 // D6, D7
974 data8 0x4118F6833D19717C, 0xC12F51A6F375CC80 // D4, D5
975 data8 0x40F00C209483481C, 0xC103F1DABF750259 // D2, D3
976 data8 0x4191038F2D8F9E40, 0xC1A413066DA8AE4A // C18, C19
977 data8 0x4170B537EDD833DE, 0xC1857E79424C61CE // C16, C17
978 data8 0x8941D8AB4855DB73, 0x0000C00B // E7
979 data8 0xBB822B131BD2E813, 0x0000C008 // E5
980 data8 0x852B4C03B83D2D4F, 0x0000C006 // E3
981 data8 0xC754CA7E2DDC0F1F, 0x0000C003 // E1
982 LOCAL_OBJECT_END(lgammal_half_3Q_neg_data)
984 LOCAL_OBJECT_START(lgammal_2Q_4_data)
985 // Polynomial coefficients for the lgammal(x), 2.25 <= |x| < 4.0
986 data8 0xBFCA4D55BEAB2D6F, 0x3C7ABC9DA14141F5 // A3, A0L
987 data8 0x3FFD8773039049E7, 0x3C66CB7957A95BA4 // A1, A1L
988 data8 0x3F45C3CC79E91E7D, 0xBF3A8E5005937E97 // D0, D1
989 data8 0x3EC951E35E1C9203, 0xBEB030A90026C5DF // C20, C21
990 data8 0x3FE94699894C1F4C, 0x3C91884D21D123F1 // A2, A2L
991 data8 0x3FE62E42FEFA39EF, 0xBC66480CEB70870F // A0, A3L
992 data8 0xF1C2EAFF0B3A7579, 0x00003FF5 // E6
993 data8 0xB36AF863926B55A3, 0x00003FF7 // E4
994 data8 0x9620656185BB44CA, 0x00003FF9 // E2
995 data8 0xA264558FB0906AFF, 0x00003FFB // E0
996 data8 0x3F03D59E9666C961, 0xBEF91115893D84A6 // D6, D7
997 data8 0x3F19333611C46225, 0xBF0F89EB7D029870 // D4, D5
998 data8 0x3F3055A96B347AFE, 0xBF243B5153E178A8 // D2, D3
999 data8 0x3ED9A4AEF30C4BB2, 0xBED388138B1CEFF2 // C18, C19
1000 data8 0x3EEF7945A3C3A254, 0xBEE36F32A938EF11 // C16, C17
1001 data8 0x9028923F47C82118, 0x0000BFF5 // E7
1002 data8 0xCE0DAAFB6DC93B22, 0x0000BFF6 // E5
1003 data8 0xA0D0983B34AC4C8D, 0x0000BFF8 // E3
1004 data8 0x94D6C50FEB8B0CE7, 0x0000BFFA // E1
1005 LOCAL_OBJECT_END(lgammal_2Q_4_data)
1007 LOCAL_OBJECT_START(lgammal_4_8_data)
1008 // Polynomial coefficients for the lgammal(x), 4.0 <= |x| < 8.0
1009 data8 0xBFD6626BC9B31B54, 0x3CAA53C82493A92B // A3, A0L
1010 data8 0x401B4C420A50AD7C, 0x3C8C6E9929F789A3 // A1, A1L
1011 data8 0x3F49410427E928C2, 0xBF3E312678F8C146 // D0, D1
1012 data8 0x3ED51065F7CD5848, 0xBED052782A03312F // C20, C21
1013 data8 0x3FF735973273D5EC, 0x3C831DFC65BF8CCF // A2, A2L
1014 data8 0x401326643C4479C9, 0xBC6FA0498C5548A6 // A0, A3L
1015 data8 0x9382D8B3CD4EB7E3, 0x00003FF6 // E6
1016 data8 0xE9F92CAD8A85CBCD, 0x00003FF7 // E4
1017 data8 0xD58389FE38258CEC, 0x00003FF9 // E2
1018 data8 0x81310136363AE8AA, 0x00003FFC // E0
1019 data8 0x3F04F0AE38E78570, 0xBEF9E2144BB8F03C // D6, D7
1020 data8 0x3F1B5E992A6CBC2A, 0xBF10F3F400113911 // D4, D5
1021 data8 0x3F323EE00AAB7DEE, 0xBF2640FDFA9FB637 // D2, D3
1022 data8 0x3ED2143EBAFF067A, 0xBEBBDEB92D6FF35D // C18, C19
1023 data8 0x3EF173A42B69AAA4, 0xBEE78B9951A2EAA5 // C16, C17
1024 data8 0xAB3CCAC6344E52AA, 0x0000BFF5 // E7
1025 data8 0x81ACCB8915B16508, 0x0000BFF7 // E5
1026 data8 0xDA62C7221102C426, 0x0000BFF8 // E3
1027 data8 0xDF1BD44C4083580A, 0x0000BFFA // E1
1028 LOCAL_OBJECT_END(lgammal_4_8_data)
1030 LOCAL_OBJECT_START(lgammal_loc_min_data)
1031 // Polynomial coefficients for the lgammal(x), 1.3125 <= x < 1.5625
1032 data8 0xBB16C31AB5F1FB71, 0x00003FFF // xMin - point of local minimum
1033 data8 0xBFC2E4278DC6BC23, 0xBC683DA8DDCA9650 // A3, A0L
1034 data8 0x3BD4DB7D0CA61D5F, 0x386E719EDD01D801 // A1, A1L
1035 data8 0x3F4CC72638E1D93F, 0xBF4228EC9953CCB9 // D0, D1
1036 data8 0x3ED222F97A04613E,0xBED3DDD58095CB6C // C20, C21
1037 data8 0x3FDEF72BC8EE38AB, 0x3C863AFF3FC48940 // A2, A2L
1038 data8 0xBFBF19B9BCC38A41, 0xBC7425F1BFFC1442// A0, A3L
1039 data8 0x941890032BEB34C3, 0x00003FF6 // E6
1040 data8 0xC7E701591CE534BC, 0x00003FF7 // E4
1041 data8 0x93373CBD05138DD4, 0x00003FF9 // E2
1042 data8 0x845A14A6A81C05D6, 0x00003FFB // E0
1043 data8 0x3F0F6C4DF6D47A13, 0xBF045DCDB5B49E19 // D6, D7
1044 data8 0x3F22E23345DDE59C, 0xBF1851159AFB1735 // D4, D5
1045 data8 0x3F37101EA4022B78, 0xBF2D721E6323AF13 // D2, D3
1046 data8 0x3EE691EBE82DF09D, 0xBEDD42550961F730 // C18, C19
1047 data8 0x3EFA793EDE99AD85, 0xBEF14000108E70BE // C16, C17
1048 data8 0xB7CBC033ACE0C99C, 0x0000BFF5 // E7
1049 data8 0xF178D1F7B1A45E27, 0x0000BFF6 // E5
1050 data8 0xA8FCFCA8106F471C, 0x0000BFF8 // E3
1051 data8 0x864D46FA898A9AD2, 0x0000BFFA // E1
1052 LOCAL_OBJECT_END(lgammal_loc_min_data)
1054 LOCAL_OBJECT_START(lgammal_03Q_1Q_data)
1055 // Polynomial coefficients for the lgammal(x), 0.75 <= |x| < 1.3125
1056 data8 0x3FD151322AC7D848, 0x3C7184DE0DB7B4EE // A4, A2L
1057 data8 0x3FD9A4D55BEAB2D6, 0x3C9E934AAB10845F // A3, A1L
1058 data8 0x3FB111289C381259, 0x3FAFFFCFB32AE18D // D2, D3
1059 data8 0x3FB3B1D9E0E3E00D, 0x3FB2496F0D3768DF // D0, D1
1060 data8 0xBA461972C057D439, 0x00003FFB // E6
1061 data8 0x3FEA51A6625307D3, 0x3C76ABC886A72DA2 // A2, A4L
1062 data8 0x3FA8EFE46B32A70E, 0x3F8F31B3559576B6 // C17, C20
1063 data8 0xE403383700387D85, 0x00003FFB // E4
1064 data8 0x9381D0EE74BF7251, 0x00003FFC // E2
1065 data8 0x3FAA2177A6D28177, 0x3FA4895E65FBD995 // C18, C19
1066 data8 0x3FAAED2C77DBEE5D, 0x3FA94CA59385512C // D6, D7
1067 data8 0x3FAE1F522E8A5941, 0x3FAC785EF56DD87E // D4, D5
1068 data8 0x3FB556AD5FA56F0A, 0x3FA81F416E87C783 // E7, C16
1069 data8 0xCD00F1C2DC2C9F1E, 0x00003FFB // E5
1070 data8 0x3FE2788CFC6FB618, 0x3C8E52519B5B17CB // A1, A3L
1071 data8 0x80859B57C3E7F241, 0x00003FFC // E3
1072 data8 0xADA065880615F401, 0x00003FFC // E1
1073 data8 0xD45CE0BD530AB50E, 0x00003FFC // E0
1074 LOCAL_OBJECT_END(lgammal_03Q_1Q_data)
1076 LOCAL_OBJECT_START(lgammal_13Q_2Q_data)
1077 // Polynomial coefficients for the lgammal(x), 1.5625 <= |x| < 2.25
1078 data8 0x3F951322AC7D8483, 0x3C71873D88C6539D // A4, A2L
1079 data8 0xBFB13E001A557606, 0x3C56CB907018A101 // A3, A1L
1080 data8 0xBEC11B2EC1E7F6FC, 0x3EB0064ED9824CC7 // D2, D3
1081 data8 0xBEE3CBC963EC103A, 0x3ED2597A330C107D // D0, D1
1082 data8 0xBC6F2DEBDFE66F38, 0x0000BFF0 // E6
1083 data8 0x3FD4A34CC4A60FA6, 0x3C3AFC9BF775E8A0 // A2, A4L
1084 data8 0x3E48B0C542F85B32, 0xBE347F12EAF787AB // C17, C20
1085 data8 0xE9FEA63B6984FA1E, 0x0000BFF2 // E4
1086 data8 0x9C562E15FC703BBF, 0x0000BFF5 // E2
1087 data8 0xBE3C12A50AB0355E, 0xBE1C941626AE4717 // C18, C19
1088 data8 0xBE7AFA8714342BC4,0x3E69A12D2B7761CB // D6, D7
1089 data8 0xBE9E25EF1D526730, 0x3E8C762291889B99 // D4, D5
1090 data8 0x3EF580DCEE754733, 0xBE57C811D070549C // E7, C16
1091 data8 0xD093D878BE209C98, 0x00003FF1 // E5
1092 data8 0x3FDB0EE6072093CE, 0xBC6024B9E81281C4 // A1, A3L
1093 data8 0x859B57C31CB77D96, 0x00003FF4 // E3
1094 data8 0xBD6EB756DB617E8D, 0x00003FF6 // E1
1095 data8 0xF2027E10C7AF8C38, 0x0000BFF7 // E0
1096 LOCAL_OBJECT_END(lgammal_13Q_2Q_data)
1098 LOCAL_OBJECT_START(lgammal_8_10_data)
1099 // Polynomial coefficients for the lgammal(x), 8.0 <= |x| < 10.0
1100 // Multi Precision terms
1101 data8 0x40312008A3A23E5C, 0x3CE020B4F2E4083A //A1
1102 data8 0x4025358E82FCB70C, 0x3CD4A5A74AF7B99C //A0
1103 // Native precision terms
1104 data8 0xF0AA239FFBC616D2, 0x00004000 //A2
1105 data8 0x96A8EA798FE57D66, 0x0000BFFF //A3
1106 data8 0x8D501B7E3B9B9BDB, 0x00003FFE //A4
1107 data8 0x9EE062401F4B1DC2, 0x0000BFFD //A5
1108 data8 0xC63FD8CD31E93431, 0x00003FFC //A6
1109 data8 0x8461101709C23C30, 0x0000BFFC //A7
1110 data8 0xB96D7EA7EF3648B2, 0x00003FFB //A8
1111 data8 0x86886759D2ACC906, 0x0000BFFB //A9
1112 data8 0xC894B6E28265B183, 0x00003FFA //A10
1113 data8 0x98C4348CAD821662, 0x0000BFFA //A11
1114 data8 0xEC9B092226A94DF2, 0x00003FF9 //A12
1115 data8 0xB9F169FF9B98CDDC, 0x0000BFF9 //A13
1116 data8 0x9A3A32BB040894D3, 0x00003FF9 //A14
1117 data8 0xF9504CCC1003B3C3, 0x0000BFF8 //A15
1118 LOCAL_OBJECT_END(lgammal_8_10_data)
1120 LOCAL_OBJECT_START(lgammal_03Q_6_data)
1121 // Polynomial coefficients for the lgammal(x), 0.75 <= |x| < 1.0
1122 data8 0xBFBC47DCA479E295, 0xBC607E6C1A379D55 //A3
1123 data8 0x3FCA051C372609ED, 0x3C7B02D73EB7D831 //A0
1124 data8 0xBFE15FAFA86B04DB, 0xBC3F52EE4A8945B5 //A1
1125 data8 0x3FD455C4FF28F0BF, 0x3C75F8C6C99F30BB //A2
1126 data8 0xD2CF04CD934F03E1, 0x00003FFA //A4
1127 data8 0xDB4ED667E29256E1, 0x0000BFF9 //A5
1128 data8 0xF155A33A5B6021BF, 0x00003FF8 //A6
1129 data8 0x895E9B9D386E0338, 0x0000BFF8 //A7
1130 data8 0xA001BE94B937112E, 0x00003FF7 //A8
1131 data8 0xBD82846E490ED048, 0x0000BFF6 //A9
1132 data8 0xE358D24EC30DBB5D, 0x00003FF5 //A10
1133 data8 0x89C4F3652446B78B, 0x0000BFF5 //A11
1134 data8 0xA86043E10280193D, 0x00003FF4 //A12
1135 data8 0xCF3A2FBA61EB7682, 0x0000BFF3 //A13
1136 data8 0x3F300900CC9200EC //A14
1137 data8 0xBF23F42264B94AE8 //A15
1138 data8 0x3F18EEF29895FE73 //A16
1139 data8 0xBF0F3C4563E3EDFB //A17
1140 data8 0x3F0387DBBC385056 //A18
1141 data8 0xBEF81B4004F92900 //A19
1142 data8 0x3EECA6692A9A5B81 //A20
1143 data8 0xBEDF61A0059C15D3 //A21
1144 data8 0x3ECDA9F40DCA0111 //A22
1145 data8 0xBEB60FE788217BAF //A23
1146 data8 0x3E9661D795DFC8C6 //A24
1147 data8 0xBE66C7756A4EDEE5 //A25
1148 // Polynomial coefficients for the lgammal(x), 1.0 <= |x| < 2.0
1149 data8 0xBFC1AE55B180726B, 0xBC7DE1BC478453F5 //A3
1150 data8 0xBFBEEB95B094C191, 0xBC53456FF6F1C9D9 //A0
1151 data8 0x3FA2AED059BD608A, 0x3C0B65CC647D557F //A1
1152 data8 0x3FDDE9E64DF22EF2, 0x3C8993939A8BA8E4 //A2
1153 data8 0xF07C206D6B100CFF, 0x00003FFA //A4
1154 data8 0xED2CEA9BA52FE7FB, 0x0000BFF9 //A5
1155 data8 0xFCE51CED52DF3602, 0x00003FF8 //A6
1156 data8 0x8D45D27872326619, 0x0000BFF8 //A7
1157 data8 0xA2B78D6BCEBE27F7, 0x00003FF7 //A8
1158 data8 0xBF6DC0996A895B6F, 0x0000BFF6 //A9
1159 data8 0xE4B9AD335AF82D79, 0x00003FF5 //A10
1160 data8 0x8A451880195362A1, 0x0000BFF5 //A11
1161 data8 0xA8BE35E63089A7A9, 0x00003FF4 //A12
1162 data8 0xCF7FA175FA11C40C, 0x0000BFF3 //A13
1163 data8 0x3F300C282FAA3B02 //A14
1164 data8 0xBF23F6AEBDA68B80 //A15
1165 data8 0x3F18F6860E2224DD //A16
1166 data8 0xBF0F542B3CE32F28 //A17
1167 data8 0x3F039436218C9BF8 //A18
1168 data8 0xBEF8AE6307677AEC //A19
1169 data8 0x3EF0B55527B3A211 //A20
1170 data8 0xBEE576AC995E7605 //A21
1171 data8 0x3ED102DDC1365D2D //A22
1172 data8 0xBEC442184F97EA54 //A23
1173 data8 0x3ED4D2283DFE5FC6 //A24
1174 data8 0xBECB9219A9B46787 //A25
1175 // Polynomial coefficients for the lgammal(x), 2.0 <= |x| < 3.0
1176 data8 0xBFCA4D55BEAB2D6F, 0xBC66F80E5BFD5AF5 //A3
1177 data8 0x3FE62E42FEFA39EF, 0x3C7ABC9E3B347E3D //A0
1178 data8 0x3FFD8773039049E7, 0x3C66CB9007C426EA //A1
1179 data8 0x3FE94699894C1F4C, 0x3C918726EB111663 //A2
1180 data8 0xA264558FB0906209, 0x00003FFB //A4
1181 data8 0x94D6C50FEB902ADC, 0x0000BFFA //A5
1182 data8 0x9620656184243D17, 0x00003FF9 //A6
1183 data8 0xA0D0983B8BCA910B, 0x0000BFF8 //A7
1184 data8 0xB36AF8559B222BD3, 0x00003FF7 //A8
1185 data8 0xCE0DACB3260AE6E5, 0x0000BFF6 //A9
1186 data8 0xF1C2C0BF0437C7DB, 0x00003FF5 //A10
1187 data8 0x902A2F2F3AB74A92, 0x0000BFF5 //A11
1188 data8 0xAE05009B1B2C6E4C, 0x00003FF4 //A12
1189 data8 0xD5B71F6456D7D4CB, 0x0000BFF3 //A13
1190 data8 0x3F2F0351D71BC9C6 //A14
1191 data8 0xBF2B53BC56A3B793 //A15
1192 data8 0xBF18B12DC6F6B861 //A16
1193 data8 0xBF43EE6EB5215C2F //A17
1194 data8 0xBF5474787CDD455E //A18
1195 data8 0xBF642B503C9C060A //A19
1196 data8 0xBF6E07D1AA254AA3 //A20
1197 data8 0xBF71C785443AAEE8 //A21
1198 data8 0xBF6F67BF81B71052 //A22
1199 data8 0xBF63E4BCCF4FFABF //A23
1200 data8 0xBF50067F8C671D5A //A24
1201 data8 0xBF29C770D680A5AC //A25
1202 // Polynomial coefficients for the lgammal(x), 4.0 <= |x| < 6.0
1203 data8 0xBFD6626BC9B31B54, 0xBC85AABE08680902 //A3
1204 data8 0x401326643C4479C9, 0x3CAA53C26F31E364 //A0
1205 data8 0x401B4C420A50AD7C, 0x3C8C76D55E57DD8D //A1
1206 data8 0x3FF735973273D5EC, 0x3C83A0B78E09188A //A2
1207 data8 0x81310136363AAB6D, 0x00003FFC //A4
1208 data8 0xDF1BD44C4075C0E6, 0x0000BFFA //A5
1209 data8 0xD58389FE38D8D664, 0x00003FF9 //A6
1210 data8 0xDA62C7221D5B5F87, 0x0000BFF8 //A7
1211 data8 0xE9F92CAD0263E157, 0x00003FF7 //A8
1212 data8 0x81ACCB8606C165FE, 0x0000BFF7 //A9
1213 data8 0x9382D8D263D1C2A3, 0x00003FF6 //A10
1214 data8 0xAB3CCBA4C853B12C, 0x0000BFF5 //A11
1215 data8 0xCA0818BBCCC59296, 0x00003FF4 //A12
1216 data8 0xF18912691CBB5BD0, 0x0000BFF3 //A13
1217 data8 0x3F323EF5D8330339 //A14
1218 data8 0xBF2641132EA571F7 //A15
1219 data8 0x3F1B5D9576175CA9 //A16
1220 data8 0xBF10F56A689C623D //A17
1221 data8 0x3F04CACA9141A18D //A18
1222 data8 0xBEFA307AC9B4E85D //A19
1223 data8 0x3EF4B625939FBE32 //A20
1224 data8 0xBECEE6AC1420F86F //A21
1225 data8 0xBE9A95AE2E485964 //A22
1226 data8 0xBF039EF47F8C09BB //A23
1227 data8 0xBF05345957F7B7A9 //A24
1228 data8 0xBEF85AE6385D4CCC //A25
1229 // Polynomial coefficients for the lgammal(x), 3.0 <= |x| < 4.0
1230 data8 0xBFCA4D55BEAB2D6F, 0xBC667B20FF46C6A8 //A3
1231 data8 0x3FE62E42FEFA39EF, 0x3C7ABC9E3B398012 //A0
1232 data8 0x3FFD8773039049E7, 0x3C66CB9070238D77 //A1
1233 data8 0x3FE94699894C1F4C, 0x3C91873D8839B1CD //A2
1234 data8 0xA264558FB0906D7E, 0x00003FFB //A4
1235 data8 0x94D6C50FEB8AFD72, 0x0000BFFA //A5
1236 data8 0x9620656185B68F14, 0x00003FF9 //A6
1237 data8 0xA0D0983B34B7088A, 0x0000BFF8 //A7
1238 data8 0xB36AF863964AA440, 0x00003FF7 //A8
1239 data8 0xCE0DAAFB5497AFB8, 0x0000BFF6 //A9
1240 data8 0xF1C2EAFA79CC2864, 0x00003FF5 //A10
1241 data8 0x9028922A839572B8, 0x0000BFF5 //A11
1242 data8 0xAE1E62F870BA0278, 0x00003FF4 //A12
1243 data8 0xD4726F681E2ABA29, 0x0000BFF3 //A13
1244 data8 0x3F30559B9A02FADF //A14
1245 data8 0xBF243ADEB1266CAE //A15
1246 data8 0x3F19303B6F552603 //A16
1247 data8 0xBF0F768C288EC643 //A17
1248 data8 0x3F039D5356C21DE1 //A18
1249 data8 0xBEF81BCA8168E6BE //A19
1250 data8 0x3EEC74A53A06AD54 //A20
1251 data8 0xBEDED52D1A5DACDF //A21
1252 data8 0x3ECCB4C2C7087342 //A22
1253 data8 0xBEB4F1FAFDFF5C2F //A23
1254 data8 0x3E94C80B52D58904 //A24
1255 data8 0xBE64A328CBE92A27 //A25
1256 LOCAL_OBJECT_END(lgammal_03Q_6_data)
1258 LOCAL_OBJECT_START(lgammal_1pEps_data)
1259 // Polynomial coefficients for the lgammal(x), 1 - 2^(-7) <= |x| < 1 + 2^(-7)
1260 data8 0x93C467E37DB0C7A5, 0x00003FFE //A1
1261 data8 0xD28D3312983E9919, 0x00003FFE //A2
1262 data8 0xCD26AADF559A47E3, 0x00003FFD //A3
1263 data8 0x8A8991563EC22E81, 0x00003FFD //A4
1264 data8 0x3FCA8B9C168D52FE //A5
1265 data8 0x3FC5B40CB0696370 //A6
1266 data8 0x3FC270AC2229A65D //A7
1267 data8 0x3FC0110AF10FCBFC //A8
1268 // Polynomial coefficients for the log1p(x), - 2^(-7) <= |x| < 2^(-7)
1269 data8 0x3FBC71C71C71C71C //P8
1270 data8 0xBFC0000000000000 //P7
1271 data8 0x3FC2492492492492 //P6
1272 data8 0xBFC5555555555555 //P5
1273 data8 0x3FC999999999999A //P4
1274 data8 0xBFD0000000000000 //P3
1275 data8 0x3FD5555555555555 //P2
1276 data8 0xBFE0000000000000 //P1
1277 // short version of "lnsin" polynomial
1278 data8 0xD28D3312983E9918, 0x00003FFF //A2
1279 data8 0x8A8991563EC241B6, 0x00003FFE //A4
1280 data8 0xADA06588061830A5, 0x00003FFD //A6
1281 data8 0x80859B57C31CB746, 0x00003FFD //A8
1282 LOCAL_OBJECT_END(lgammal_1pEps_data)
1284 LOCAL_OBJECT_START(lgammal_neg2andHalf_data)
1285 // Polynomial coefficients for the lgammal(x), -2.005859375 <= x < -2.5
1286 data8 0xBF927781D4BB093A, 0xBC511D86D85B7045 // A3, A0L
1287 data8 0x3FF1A68793DEFC15, 0x3C9852AE2DA7DEEF // A1, A1L
1288 data8 0x408555562D45FAFD, 0xBF972CDAFE5FEFAD // D0, D1
1289 data8 0xC18682331EF492A5, 0xC1845E3E0D29606B // C20, C21
1290 data8 0x4013141822E16979, 0x3CCF8718B6E75F6C // A2, A2L
1291 data8 0xBFACCBF9F5ED0F15, 0xBBDD1AEB73297401 // A0, A3L
1292 data8 0xCCCDB17423046445, 0x00004006 // E6
1293 data8 0x800514E230A3A452, 0x00004005 // E4
1294 data8 0xAAE9A48EC162E76F, 0x00004003 // E2
1295 data8 0x81D4F88B3F3EA0FC, 0x00004002 // E0
1296 data8 0x40CF3F3E35238DA0, 0xC0F8B340945F1A7E // D6, D7
1297 data8 0x40BF89EC0BD609C6, 0xC095897242AEFEE2 // D4, D5
1298 data8 0x40A2482FF01DBC5C, 0xC02095E275FDCF62 // D2, D3
1299 data8 0xC1641354F2312A6A, 0xC17B3657F85258E9 // C18, C19
1300 data8 0xC11F964E9ECBE2C9, 0xC146D7A90F70696C // C16, C17
1301 data8 0xE7AECDE6AF8EA816, 0x0000BFEF // E7
1302 data8 0xD711252FEBBE1091, 0x0000BFEB // E5
1303 data8 0xE648BD10F8C43391, 0x0000BFEF // E3
1304 data8 0x948A1E78AA00A98D, 0x0000BFF4 // E1
1305 LOCAL_OBJECT_END(lgammal_neg2andHalf_data)
1307 LOCAL_OBJECT_START(lgammal_near_neg_half_data)
1308 // Polynomial coefficients for the lgammal(x), -0.5 < x < -0.40625
1309 data8 0xBFC1AE55B180726C, 0x3C8053CD734E6A1D // A3, A0L
1310 data8 0x3FA2AED059BD608A, 0x3C0CD3D2CDBA17F4 // A1, A1L
1311 data8 0x40855554DBCD1E1E, 0x3F96C51AC2BEE9E1 // D0, D1
1312 data8 0xC18682331EF4927D, 0x41845E3E0D295DFC // C20, C21
1313 data8 0x4011DE9E64DF22EF, 0x3CA692B70DAD6B7B // A2, A2L
1314 data8 0x3FF43F89A3F0EDD6, 0xBC4955AED0FA087D // A0, A3L
1315 data8 0xCCCD3F1DF4A2C1DD, 0x00004006 // E6
1316 data8 0x80028ADE33C7FCD9, 0x00004005 // E4
1317 data8 0xAACA474E485507EF, 0x00004003 // E2
1318 data8 0x80F07C206D6B0ECD, 0x00004002 // E0
1319 data8 0x40CF3F3E33E83056, 0x40F8B340944633D9 // D6, D7
1320 data8 0x40BF89EC059931F0, 0x409589723307AD20 // D4, D5
1321 data8 0x40A2482FD0054824, 0x402095CE7F19D011 // D2, D3
1322 data8 0xC1641354F2313614, 0x417B3657F8525354 // C18, C19
1323 data8 0xC11F964E9ECFD21C, 0x4146D7A90F701836 // C16, C17
1324 data8 0x86A9C01F0EA11E5A, 0x0000BFF5 // E7
1325 data8 0xBF6D8469142881C0, 0x0000BFF6 // E5
1326 data8 0x8D45D277BA8255F1, 0x0000BFF8 // E3
1327 data8 0xED2CEA9BA528BCC3, 0x0000BFF9 // E1
1328 LOCAL_OBJECT_END(lgammal_near_neg_half_data)
1330 //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1331 ////////////// POLYNOMIAL COEFFICIENTS FOR "NEAR ROOTS" RANGES /////////////
1332 ////////////// THIS PART OF TABLE SHOULD BE ADDRESSED REALLY RARE /////////////
1333 //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1334 LOCAL_OBJECT_START(lgammal_right_roots_polynomial_data)
1335 // Polynomial coefficients for right root on [-3, -2]
1336 // Lgammal is approximated by polynomial within [-.056244 ; .158208 ] range
1337 data8 0xBBBD5E9DCD11030B, 0xB867411D9FF87DD4 //A0
1338 data8 0x3FF83FE966AF535E, 0x3CAA21235B8A769A //A1
1339 data8 0x40136EEBB002F55C, 0x3CC3959A6029838E //A2
1340 data8 0xB4A5302C53C2BEDD, 0x00003FFF //A3
1341 data8 0x8B8C6BE504F2DA1C, 0x00004002 //A4
1342 data8 0xB99CFF02593B4D98, 0x00004001 //A5
1343 data8 0x4038D32F682AA1CF //A6
1344 data8 0x403809F04EE6C5B5 //A7
1345 data8 0x40548EAA81634CEE //A8
1346 data8 0x4059297ADB6BC03D //A9
1347 data8 0x407286FB8EC5C9DA //A10
1348 data8 0x407A92E05B744CFB //A11
1349 data8 0x4091A9D4144258CD //A12
1350 data8 0x409C4D01D24F367E //A13
1351 data8 0x40B1871B9A426A83 //A14
1352 data8 0x40BE51C48BD9A583 //A15
1353 data8 0x40D2140D0C6153E7 //A16
1354 data8 0x40E0FB2C989CE4A3 //A17
1355 data8 0x40E52739AB005641 //A18
1356 data8 0x41161E3E6DDF503A //A19
1357 // Polynomial coefficients for right root on [-4, -3]
1358 // Lgammal is approximated by polynomial within [-.172797 ; .171573 ] range
1359 data8 0x3C172712B248E42E, 0x38CB8D17801A5D67 //A0
1360 data8 0x401F20A65F2FAC54, 0x3CCB9EA1817A824E //A1
1361 data8 0x4039D4D2977150EF, 0x3CDA42E149B6276A //A2
1362 data8 0xE089B8926AE2D9CB, 0x00004005 //A3
1363 data8 0x933901EBBB586C37, 0x00004008 //A4
1364 data8 0xCCD319BED1CFA1CD, 0x0000400A //A5
1365 data8 0x40D293C3F78D3C37 //A6
1366 data8 0x40FBB97AA0B6DD02 //A7
1367 data8 0x41251EA3345E5EB9 //A8
1368 data8 0x415057F65C92E7B0 //A9
1369 data8 0x41799C865241B505 //A10
1370 data8 0x41A445209EFE896B //A11
1371 data8 0x41D02D21880C953B //A12
1372 data8 0x41F9FFDE8C63E16D //A13
1373 data8 0x422504DC8302D2BE //A14
1374 data8 0x425111BF18C95414 //A15
1375 data8 0x427BCBE74A2B8EF7 //A16
1376 data8 0x42A7256F59B286F7 //A17
1377 data8 0x42D462D1586DE61F //A18
1378 data8 0x42FBB1228D6C5118 //A19
1379 // Polynomial coefficients for right root on [-5, -4]
1380 // Lgammal is approximated by polynomial within [-.163171 ; .161988 ] range
1381 data8 0x3C5840FBAFDEE5BB, 0x38CAC0336E8C490A //A0
1382 data8 0x403ACA5CF4921642, 0x3CCEDCDDA5491E56 //A1
1383 data8 0x40744415CD813F8E, 0x3CFBFEBC17E39146 //A2
1384 data8 0xAACD88D954E3E1BD, 0x0000400B //A3
1385 data8 0xCB68C710D75ED802, 0x0000400F //A4
1386 data8 0x8130F5AB997277AC, 0x00004014 //A5
1387 data8 0x41855E3DBF99EBA7 //A6
1388 data8 0x41CD14FE49C49FC2 //A7
1389 data8 0x421433DCE281F07D //A8
1390 data8 0x425C8399C7A92B6F //A9
1391 data8 0x42A45FBE67840F1A //A10
1392 data8 0x42ED68D75F9E6C98 //A11
1393 data8 0x433567291C27E5BE //A12
1394 data8 0x437F5ED7A9D9FD28 //A13
1395 data8 0x43C720A65C8AB711 //A14
1396 data8 0x441120A6C1D40B9B //A15
1397 data8 0x44596F561F2D1CBE //A16
1398 data8 0x44A3507DA81D5C01 //A17
1399 data8 0x44EF06A31E39EEDF //A18
1400 data8 0x45333774C99F523F //A19
1401 // Polynomial coefficients for right root on [-6, -5]
1402 // Lgammal is approximated by polynomial within [-.156450 ; .156126 ] range
1403 data8 0x3C71B82D6B2B3304, 0x3917186E3C0DC231 //A0
1404 data8 0x405ED72E0829AE02, 0x3C960C25157980EB //A1
1405 data8 0x40BCECC32EC22F9B, 0x3D5D8335A32F019C //A2
1406 data8 0x929EC2B1FB931F17, 0x00004012 //A3
1407 data8 0xD112EF96D37316DE, 0x00004018 //A4
1408 data8 0x9F00BB9BB13416AB, 0x0000401F //A5
1409 data8 0x425F7D8D5BDCB223 //A6
1410 data8 0x42C9A8D00C776CC6 //A7
1411 data8 0x433557FD8C481424 //A8
1412 data8 0x43A209221A953EF0 //A9
1413 data8 0x440EDC98D5618AB7 //A10
1414 data8 0x447AABD25E367378 //A11
1415 data8 0x44E73DE20CC3B288 //A12
1416 data8 0x455465257B4E0BD8 //A13
1417 data8 0x45C2011532085353 //A14
1418 data8 0x462FEE4CC191945B //A15
1419 data8 0x469C63AEEFEF0A7F //A16
1420 data8 0x4709D045390A3810 //A17
1421 data8 0x4778D360873C9F64 //A18
1422 data8 0x47E26965BE9A682A //A19
1423 // Polynomial coefficients for right root on [-7, -6]
1424 // Lgammal is approximated by polynomial within [-.154582 ; .154521 ] range
1425 data8 0x3C75F103A1B00A48, 0x391C041C190C726D //A0
1426 data8 0x40869DE49E3AF2AA, 0x3D1C17E1F813063B //A1
1427 data8 0x410FCE23484CFD10, 0x3DB6F38C2F11DAB9 //A2
1428 data8 0xEF281D1E1BE2055A, 0x00004019 //A3
1429 data8 0xFCE3DA92AC55DFF8, 0x00004022 //A4
1430 data8 0x8E9EA838A20BD58E, 0x0000402C //A5
1431 data8 0x4354F21E2FB9E0C9 //A6
1432 data8 0x43E9500994CD4F09 //A7
1433 data8 0x447F3A2C23C033DF //A8
1434 data8 0x45139152656606D8 //A9
1435 data8 0x45A8D45F8D3BF2E8 //A10
1436 data8 0x463FD32110E5BFE5 //A11
1437 data8 0x46D490B3BDBAE0BE //A12
1438 data8 0x476AC3CAD905DD23 //A13
1439 data8 0x48018558217AD473 //A14
1440 data8 0x48970AF371D30585 //A15
1441 data8 0x492E6273A8BEFFE3 //A16
1442 data8 0x49C47CC9AE3F1073 //A17
1443 data8 0x4A5D38E8C35EFF45 //A18
1444 data8 0x4AF0123E89694CD8 //A19
1445 // Polynomial coefficients for right root on [-8, -7]
1446 // Lgammal is approximated by polynomial within [-.154217 ; .154208 ] range
1447 data8 0xBCD2507D818DDD68, 0xB97F6940EA2871A0 //A0
1448 data8 0x40B3B407AA387BCB, 0x3D6320238F2C43D1 //A1
1449 data8 0x41683E85DAAFBAC7, 0x3E148D085958EA3A //A2
1450 data8 0x9F2A95AF1E10A548, 0x00004022 //A3
1451 data8 0x92F21522F482300E, 0x0000402E //A4
1452 data8 0x90B51AB03A1F244D, 0x0000403A //A5
1453 data8 0x44628E1C70EF534F //A6
1454 data8 0x452393E2BC32D244 //A7
1455 data8 0x45E5164141F4BA0B //A8
1456 data8 0x46A712B3A8AF5808 //A9
1457 data8 0x47698FD36CEDD0F2 //A10
1458 data8 0x482C9AE6BBAA3637 //A11
1459 data8 0x48F023821857C8E9 //A12
1460 data8 0x49B2569053FC106F //A13
1461 data8 0x4A74F646D5C1604B //A14
1462 data8 0x4B3811CF5ABA4934 //A15
1463 data8 0x4BFBB5DD6C84E233 //A16
1464 data8 0x4CC05021086F637B //A17
1465 data8 0x4D8450A345B0FB49 //A18
1466 data8 0x4E43825848865DB2 //A19
1467 // Polynomial coefficients for right root on [-9, -8]
1468 // Lgammal is approximated by polynomial within [-.154160 ; .154158 ] range
1469 data8 0x3CDF4358564F2B46, 0x397969BEE6042F81 //A0
1470 data8 0x40E3B088FED67721, 0x3D82787BA937EE85 //A1
1471 data8 0x41C83A3893550EF4, 0x3E542ED57E244DA8 //A2
1472 data8 0x9F003C6DC56E0B8E, 0x0000402B //A3
1473 data8 0x92BDF64A3213A699, 0x0000403A //A4
1474 data8 0x9074F503AAD417AF, 0x00004049 //A5
1475 data8 0x4582843E1313C8CD //A6
1476 data8 0x467387BD6A7826C1 //A7
1477 data8 0x4765074E788CF440 //A8
1478 data8 0x4857004DD9D1E09D //A9
1479 data8 0x4949792ED7530EAF //A10
1480 data8 0x4A3C7F089A292ED3 //A11
1481 data8 0x4B30125BF0AABB86 //A12
1482 data8 0x4C224175195E307E //A13
1483 data8 0x4D14DC4C8B32C08D //A14
1484 data8 0x4E07F1DB2786197E //A15
1485 data8 0x4EFB8EA1C336DACB //A16
1486 data8 0x4FF03797EACD0F23 //A17
1487 data8 0x50E4304A8E68A730 //A18
1488 data8 0x51D3618FB2EC9F93 //A19
1489 // Polynomial coefficients for right root on [-10, -9]
1490 // Lgammal is approximated by polynomial within [-.154152 ; .154152 ] range
1491 data8 0x3D42F34DA97ECF0C, 0x39FD1256F345B0D0 //A0
1492 data8 0x4116261203919787, 0x3DC12D44055588EB //A1
1493 data8 0x422EA8F32FB7FE99, 0x3ED849CE4E7B2D77 //A2
1494 data8 0xE25BAF73477A57B5, 0x00004034 //A3
1495 data8 0xEB021FD10060504A, 0x00004046 //A4
1496 data8 0x8220A208EE206C5F, 0x00004059 //A5
1497 data8 0x46B2C3903EC9DA14 //A6
1498 data8 0x47D64393744B9C67 //A7
1499 data8 0x48FAF79CCDC604DD //A8
1500 data8 0x4A20975DB8061EBA //A9
1501 data8 0x4B44AB9CBB38DB21 //A10
1502 data8 0x4C6A032F60094FE9 //A11
1503 data8 0x4D908103927634B4 //A12
1504 data8 0x4EB516CA21D30861 //A13
1505 data8 0x4FDB1BF12C58D318 //A14
1506 data8 0x510180AAE094A553 //A15
1507 data8 0x5226A8F2A2D45D57 //A16
1508 data8 0x534E00B6B0C8B809 //A17
1509 data8 0x5475022FE21215B2 //A18
1510 data8 0x5596B02BF6C5E19B //A19
1511 // Polynomial coefficients for right root on [-11, -10]
1512 // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
1513 data8 0x3D7AA9C2E2B1029C, 0x3A15FB37578544DB //A0
1514 data8 0x414BAF825A0C91D4, 0x3DFB9DA2CE398747 //A1
1515 data8 0x4297F3EC8AE0AF03, 0x3F34208B55FB8781 //A2
1516 data8 0xDD0C97D3197F56DE, 0x0000403E //A3
1517 data8 0x8F6F3AF7A5499674, 0x00004054 //A4
1518 data8 0xC68DA1AF6D878EEB, 0x00004069 //A5
1519 data8 0x47F1E4E1E2197CE0 //A6
1520 data8 0x494A8A28E597C3EB //A7
1521 data8 0x4AA4175D0D35D705 //A8
1522 data8 0x4BFEE6F0AF69E814 //A9
1523 data8 0x4D580FE7B3DBB3C6 //A10
1524 data8 0x4EB2ECE60E4608AF //A11
1525 data8 0x500E04BE3E2B4F24 //A12
1526 data8 0x5167F9450F0FB8FD //A13
1527 data8 0x52C342BDE747603F //A14
1528 data8 0x541F1699D557268C //A15
1529 data8 0x557927C5F079864E //A16
1530 data8 0x56D4D10FEEDB030C //A17
1531 data8 0x5832385DF86AD28A //A18
1532 data8 0x598898914B4D6523 //A19
1533 // Polynomial coefficients for right root on [-12, -11]
1534 // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
1535 data8 0xBD96F61647C58B03, 0xBA3ABB0C2A6C755B //A0
1536 data8 0x418308A82714B70D, 0x3E1088FC6A104C39 //A1
1537 data8 0x4306A493DD613C39, 0x3FB2341ECBF85741 //A2
1538 data8 0x8FA8FE98339474AB, 0x00004049 //A3
1539 data8 0x802CCDF570BA7942, 0x00004062 //A4
1540 data8 0xF3F748AF11A32890, 0x0000407A //A5
1541 data8 0x493E3B567EF178CF //A6
1542 data8 0x4ACED38F651BA362 //A7
1543 data8 0x4C600B357337F946 //A8
1544 data8 0x4DF0F71A52B54CCF //A9
1545 data8 0x4F8229F3B9FA2C70 //A10
1546 data8 0x5113A4C4979B770E //A11
1547 data8 0x52A56BC367F298D5 //A12
1548 data8 0x543785CF31842DC0 //A13
1549 data8 0x55C9FC37E3E40896 //A14
1550 data8 0x575CD5D1BA556C82 //A15
1551 data8 0x58F00A7AD99A9E08 //A16
1552 data8 0x5A824088688B008D //A17
1553 data8 0x5C15F75EF7E08EBD //A18
1554 data8 0x5DA462EA902F0C90 //A19
1555 // Polynomial coefficients for right root on [-13, -12]
1556 // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
1557 data8 0x3DC3191752ACFC9D, 0x3A26CB6629532DBF //A0
1558 data8 0x41BC8CFC051191BD, 0x3E68A84DA4E62AF2 //A1
1559 data8 0x43797926294A0148, 0x400F345FF3723CFF //A2
1560 data8 0xF26D2AF700B82625, 0x00004053 //A3
1561 data8 0xA238B24A4B1F7B15, 0x00004070 //A4
1562 data8 0xE793B5C0A41A264F, 0x0000408C //A5
1563 data8 0x4A9585BDDACE863D //A6
1564 data8 0x4C6075953448088A //A7
1565 data8 0x4E29B2F38D1FC670 //A8
1566 data8 0x4FF4619B079C440F //A9
1567 data8 0x51C05DAE118D8AD9 //A10
1568 data8 0x538A8C7F87326AD4 //A11
1569 data8 0x5555B6937588DAB3 //A12
1570 data8 0x5721E1F8B6E6A7DB //A13
1571 data8 0x58EDA1D7A77DD6E5 //A14
1572 data8 0x5AB8A9616B7DC9ED //A15
1573 data8 0x5C84942AA209ED17 //A16
1574 data8 0x5E518FC34C6F54EF //A17
1575 data8 0x601FB3F17BCCD9A0 //A18
1576 data8 0x61E61128D512FE97 //A1
1577 // Polynomial coefficients for right root on [-14, -13]
1578 // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
1579 data8 0xBE170D646421B3F5, 0xBAAD95F79FCB5097 //A0
1580 data8 0x41F7328CBFCD9AC7, 0x3E743B8B1E8AEDB1 //A1
1581 data8 0x43F0D0FA2DBDA237, 0x40A0422D6A227B55 //A2
1582 data8 0x82082DF2D32686CC, 0x0000405F //A3
1583 data8 0x8D64EE9B42E68B43, 0x0000407F //A4
1584 data8 0xA3FFD82E08C5F1F1, 0x0000409F //A5
1585 data8 0x4BF8C49D99123454 //A6
1586 data8 0x4DFEC79DDF11342F //A7
1587 data8 0x50038615A892F6BD //A8
1588 data8 0x520929453DB32EF1 //A9
1589 data8 0x54106A7808189A7F //A10
1590 data8 0x5615A302D03C207B //A11
1591 data8 0x581CC175AA736F5E //A12
1592 data8 0x5A233E071147C017 //A13
1593 data8 0x5C29E81917243F22 //A14
1594 data8 0x5E3184B0B5AC4707 //A15
1595 data8 0x6037C11DE62D8388 //A16
1596 data8 0x6240787C4B1C9D6C //A17
1597 data8 0x6448289235E80977 //A18
1598 data8 0x664B5352C6C3449E //A19
1599 // Polynomial coefficients for right root on [-15, -14]
1600 // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
1601 data8 0x3E562C2E34A9207D, 0x3ADC00DA3DFF7A83 //A0
1602 data8 0x42344C3B2F0D90AB, 0x3EB8A2E979F24536 //A1
1603 data8 0x4469BFFF28B50D07, 0x41181E3D05C1C294 //A2
1604 data8 0xAE38F64DCB24D9F8, 0x0000406A //A3
1605 data8 0xA5C3F52C1B350702, 0x0000408E //A4
1606 data8 0xA83BC857BCD67A1B, 0x000040B2 //A5
1607 data8 0x4D663B4727B4D80A //A6
1608 data8 0x4FA82C965B0F7788 //A7
1609 data8 0x51EAD58C02908D95 //A8
1610 data8 0x542E427970E073D8 //A9
1611 data8 0x56714644C558A818 //A10
1612 data8 0x58B3EC2040C77BAE //A11
1613 data8 0x5AF72AE6A83D45B1 //A12
1614 data8 0x5D3B214F611F5D12 //A13
1615 data8 0x5F7FF5E49C54E92A //A14
1616 data8 0x61C2E917AB765FB2 //A15
1617 data8 0x64066FD70907B4C1 //A16
1618 data8 0x664B3998D60D0F9B //A17
1619 data8 0x689178710782FA8B //A18
1620 data8 0x6AD14A66C1C7BEC3 //A19
1621 // Polynomial coefficients for right root on [-16, -15]
1622 // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
1623 data8 0xBE6D7E7192615BAE, 0xBB0137677D7CC719 //A0
1624 data8 0x4273077763F6628C, 0x3F09250FB8FC8EC9 //A1
1625 data8 0x44E6A1BF095B1AB3, 0x4178D5A74F6CB3B3 //A2
1626 data8 0x8F8E0D5060FCC76E, 0x00004076 //A3
1627 data8 0x800CC1DCFF092A63, 0x0000409E //A4
1628 data8 0xF3AB0BA9D14CDA72, 0x000040C5 //A5
1629 data8 0x4EDE3000A2F6D54F //A6
1630 data8 0x515EC613B9C8E241 //A7
1631 data8 0x53E003309FEEEA96 //A8
1632 data8 0x5660ED908D7C9A90 //A9
1633 data8 0x58E21E9B517B1A50 //A10
1634 data8 0x5B639745E4374EE2 //A11
1635 data8 0x5DE55BB626B2075D //A12
1636 data8 0x606772B7506BA747 //A13
1637 data8 0x62E9E581AB2E057B //A14
1638 data8 0x656CBAD1CF85D396 //A15
1639 data8 0x67EFF4EBD7989872 //A16
1640 data8 0x6A722D2B19B7E2F9 //A17
1641 data8 0x6CF5DEB3073B0743 //A18
1642 data8 0x6F744AC11550B93A //A19
1643 // Polynomial coefficients for right root on [-17, -16]
1644 // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
1645 data8 0xBEDCC6291188207E, 0xBB872E3FDD48F5B7 //A0
1646 data8 0x42B3076EE7525EF9, 0x3F6687A5038CA81C //A1
1647 data8 0x4566A1AAD96EBCB5, 0x421F0FEDFBF548D2 //A2
1648 data8 0x8F8D4D3DE9850DBA, 0x00004082 //A3
1649 data8 0x800BDD6DA2CE1859, 0x000040AE //A4
1650 data8 0xF3A8EC4C9CDC1CE5, 0x000040D9 //A5
1651 data8 0x505E2FAFDB812628 //A6
1652 data8 0x531EC5B3A7508719 //A7
1653 data8 0x55E002F77E99B628 //A8
1654 data8 0x58A0ED4C9B4DAE54 //A9
1655 data8 0x5B621E4A8240F90C //A10
1656 data8 0x5E2396E5C8849814 //A11
1657 data8 0x60E55B43D8C5CE71 //A12
1658 data8 0x63A7722F5D45D01D //A13
1659 data8 0x6669E4E010DCE45A //A14
1660 data8 0x692CBA120D5E78F6 //A15
1661 data8 0x6BEFF4045350B22E //A16
1662 data8 0x6EB22C9807C21819 //A17
1663 data8 0x7175DE20D04617C4 //A18
1664 data8 0x74344AB87C6D655F //A19
1665 // Polynomial coefficients for right root on [-18, -17]
1666 // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
1667 data8 0xBF28AEEE7B61D77C, 0xBBDBBB5FC57ABF79 //A0
1668 data8 0x42F436F56B3B8A0C, 0x3FA43EE3C5C576E9 //A1
1669 data8 0x45E98A22535D115D, 0x42984678BE78CC48 //A2
1670 data8 0xAC176F3775E6FCFC, 0x0000408E //A3
1671 data8 0xA3114F53A9FEB922, 0x000040BE //A4
1672 data8 0xA4D168A8334ABF41, 0x000040EE //A5
1673 data8 0x51E5B0E7EC7182BB //A6
1674 data8 0x54E77D67B876EAB6 //A7
1675 data8 0x57E9F7C30C09C4B6 //A8
1676 data8 0x5AED29B0488614CA //A9
1677 data8 0x5DF09486F87E79F9 //A10
1678 data8 0x60F30B199979654E //A11
1679 data8 0x63F60E02C7DCCC5F //A12
1680 data8 0x66F9B8A00EB01684 //A13
1681 data8 0x69FE2D3ED0700044 //A14
1682 data8 0x6D01C8363C7DCC84 //A15
1683 data8 0x700502B29C2F06E3 //A16
1684 data8 0x730962B4500F4A61 //A17
1685 data8 0x76103C6ED099192A //A18
1686 data8 0x79100C7132CFD6E3 //A19
1687 // Polynomial coefficients for right root on [-19, -18]
1688 // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
1689 data8 0x3F3C19A53328A0C3, 0x3BE04ADC3FBE1458 //A0
1690 data8 0x4336C16C16C16C19, 0x3FE58CE3AC4A7C28 //A1
1691 data8 0x46702E85C0898B70, 0x432C922E412CEC6E //A2
1692 data8 0xF57B99A1C034335D, 0x0000409A //A3
1693 data8 0x82EC9634223DF909, 0x000040CF //A4
1694 data8 0x94F66D7557E2EA60, 0x00004103 //A5
1695 data8 0x5376118B79AE34D0 //A6
1696 data8 0x56BAE7106D52E548 //A7
1697 data8 0x5A00BD48CC8E25AB //A8
1698 data8 0x5D4529722821B493 //A9
1699 data8 0x608B1654AF31BBC1 //A10
1700 data8 0x63D182CC98AEA859 //A11
1701 data8 0x6716D43D5EEB05E8 //A12
1702 data8 0x6A5DF884FC172E1C //A13
1703 data8 0x6DA3CA7EBB97976B //A14
1704 data8 0x70EA416D0BE6D2EF //A15
1705 data8 0x743176C31EBB65F2 //A16
1706 data8 0x7777C401A8715CF9 //A17
1707 data8 0x7AC1110C6D350440 //A18
1708 data8 0x7E02D0971CF84865 //A19
1709 // Polynomial coefficients for right root on [-20, -19]
1710 // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
1711 data8 0xBFAB767F9BE21803, 0xBC5ACEF5BB1BD8B5 //A0
1712 data8 0x4379999999999999, 0x4029241C7F5914C8 //A1
1713 data8 0x46F47AE147AE147A, 0x43AC2979B64B9D7E //A2
1714 data8 0xAEC33E1F67152993, 0x000040A7 //A3
1715 data8 0xD1B71758E219616F, 0x000040DF //A4
1716 data8 0x8637BD05AF6CF468, 0x00004118 //A5
1717 data8 0x55065E9F80F293DE //A6
1718 data8 0x588EADA78C44EE66 //A7
1719 data8 0x5C15798EE22DEF09 //A8
1720 data8 0x5F9E8ABFD644FA63 //A9
1721 data8 0x6325FD7FE29BD7CD //A10
1722 data8 0x66AFFC5C57E1F802 //A11
1723 data8 0x6A3774CD7D5C0181 //A12
1724 data8 0x6DC152724DE2A6FE //A13
1725 data8 0x7149BB138EB3D0C2 //A14
1726 data8 0x74D32FF8A70896C2 //A15
1727 data8 0x785D3749F9C72BD7 //A16
1728 data8 0x7BE5CCF65EBC4E40 //A17
1729 data8 0x7F641A891B5FC652 //A18
1730 data8 0x7FEFFFFFFFFFFFFF //A19
1731 LOCAL_OBJECT_END(lgammal_right_roots_polynomial_data)
1733 LOCAL_OBJECT_START(lgammal_left_roots_polynomial_data)
1734 // Polynomial coefficients for left root on [-3, -2]
1735 // Lgammal is approximated by polynomial within [.084641 ; -.059553 ] range
1736 data8 0xBC0844590979B82E, 0xB8BC7CE8CE2ECC3B //A0
1737 data8 0xBFFEA12DA904B18C, 0xBC91A6B2BAD5EF6E //A1
1738 data8 0x4023267F3C265A51, 0x3CD7055481D03AED //A2
1739 data8 0xA0C2D618645F8E00, 0x0000C003 //A3
1740 data8 0xFA8256664F8CD2BE, 0x00004004 //A4
1741 data8 0xC2C422C103F57158, 0x0000C006 //A5
1742 data8 0x4084373F7CC70AF5 //A6
1743 data8 0xC0A12239BDD6BB95 //A7
1744 data8 0x40BDBA65E2709397 //A8
1745 data8 0xC0DA2D2504DFB085 //A9
1746 data8 0x40F758173CA5BF3C //A10
1747 data8 0xC11506C65C267E72 //A11
1748 data8 0x413318EE3A6B05FC //A12
1749 data8 0xC1517767F247DA98 //A13
1750 data8 0x41701237B4754D73 //A14
1751 data8 0xC18DB8A03BC5C3D8 //A15
1752 data8 0x41AB80953AC14A07 //A16
1753 data8 0xC1C9B7B76638D0A4 //A17
1754 data8 0x41EA727E3033E2D9 //A18
1755 data8 0xC20812C297729142 //A19
1757 // Polynomial coefficients for left root on [-4, -3]
1758 // Lgammal is approximated by polynomial within [.147147 ; -.145158 ] range
1759 data8 0xBC3130AE5C4F54DB, 0xB8ED23294C13398A //A0
1760 data8 0xC034B99D966C5646, 0xBCE2E5FE3BC3DBB9 //A1
1761 data8 0x406F76DEAE0436BD, 0x3D14974DDEC057BD //A2
1762 data8 0xE929ACEA5979BE96, 0x0000C00A //A3
1763 data8 0xF47C14F8A0D52771, 0x0000400E //A4
1764 data8 0x88B7BC036937481C, 0x0000C013 //A5
1765 data8 0x4173E8F3AB9FC266 //A6
1766 data8 0xC1B7DBBE062FB11B //A7
1767 data8 0x41FD2F76DE7A47A7 //A8
1768 data8 0xC242225FE53B124D //A9
1769 data8 0x4286D12AE2FBFA30 //A10
1770 data8 0xC2CCFFC267A3C4C0 //A11
1771 data8 0x431294E10008E014 //A12
1772 data8 0xC357FAC8C9A2DF6A //A13
1773 data8 0x439F2190AB9FAE01 //A14
1774 data8 0xC3E44C1D8E8C67C3 //A15
1775 data8 0x442A8901105D5A38 //A16
1776 data8 0xC471C4421E908C3A //A17
1777 data8 0x44B92CD4D59D6D17 //A18
1778 data8 0xC4FB3A078B5247FA //A19
1779 // Polynomial coefficients for left root on [-5, -4]
1780 // Lgammal is approximated by polynomial within [.155671 ; -.155300 ] range
1781 data8 0xBC57BF3C6E8A94C1, 0xB902FB666934AC9E //A0
1782 data8 0xC05D224A3EF9E41F, 0xBCF6F5713913E440 //A1
1783 data8 0x40BB533C678A3955, 0x3D688E53E3C72538 //A2
1784 data8 0x869FBFF732E99B84, 0x0000C012 //A3
1785 data8 0xBA9537AD61392DEC, 0x00004018 //A4
1786 data8 0x89EAE8B1DEA06B05, 0x0000C01F //A5
1787 data8 0x425A8C5C53458D3C //A6
1788 data8 0xC2C5068B3ED6509B //A7
1789 data8 0x4330FFA575E99B4E //A8
1790 data8 0xC39BEC12DDDF7669 //A9
1791 data8 0x44073825725F74F9 //A10
1792 data8 0xC47380EBCA299047 //A11
1793 data8 0x44E084DD9B666437 //A12
1794 data8 0xC54C2DA6BF787ACF //A13
1795 data8 0x45B82D65C8D6FA42 //A14
1796 data8 0xC624D62113FE950A //A15
1797 data8 0x469200CC19B45016 //A16
1798 data8 0xC6FFDDC6DD938E2E //A17
1799 data8 0x476DD7C07184B9F9 //A18
1800 data8 0xC7D554A30085C052 //A19
1801 // Polynomial coefficients for left root on [-6, -5]
1802 // Lgammal is approximated by polynomial within [.157425 ; -.157360 ] range
1803 data8 0x3C9E20A87C8B79F1, 0x39488BE34B2427DB //A0
1804 data8 0xC08661F6A43A5E12, 0xBD3D912526D759CC //A1
1805 data8 0x410F79DCB794F270, 0x3DB9BEE7CD3C1BF5 //A2
1806 data8 0xEB7404450D0005DB, 0x0000C019 //A3
1807 data8 0xF7AE9846DFE4D4AB, 0x00004022 //A4
1808 data8 0x8AF535855A95B6DA, 0x0000C02C //A5
1809 data8 0x43544D54E9FE240E //A6
1810 data8 0xC3E8684E40CE6CFC //A7
1811 data8 0x447DF44C1D803454 //A8
1812 data8 0xC512AC305439B2BA //A9
1813 data8 0x45A79226AF79211A //A10
1814 data8 0xC63E0DFF7244893A //A11
1815 data8 0x46D35216C3A83AF3 //A12
1816 data8 0xC76903BE0C390E28 //A13
1817 data8 0x48004A4DECFA4FD5 //A14
1818 data8 0xC8954FBD243DB8BE //A15
1819 data8 0x492BF3A31EB18DDA //A16
1820 data8 0xC9C2C6A864521F3A //A17
1821 data8 0x4A5AB127C62E8DA1 //A18
1822 data8 0xCAECF60EF3183C57 //A19
1823 // Polynomial coefficients for left root on [-7, -6]
1824 // Lgammal is approximated by polynomial within [.157749 ; -.157739 ] range
1825 data8 0x3CC9B9E8B8D551D6, 0x3961813C8E1E10DB //A0
1826 data8 0xC0B3ABF7A5CEA91F, 0xBD55638D4BCB4CC4 //A1
1827 data8 0x4168349A25504236, 0x3E0287ECE50CCF76 //A2
1828 data8 0x9EC8ED6E4C219E67, 0x0000C022 //A3
1829 data8 0x9279EB1B799A3FF3, 0x0000402E //A4
1830 data8 0x90213EF8D9A5DBCF, 0x0000C03A //A5
1831 data8 0x4462775E857FB71C //A6
1832 data8 0xC52377E70B45FDBF //A7
1833 data8 0x45E4F3D28EDA8C28 //A8
1834 data8 0xC6A6E85571BD2D0B //A9
1835 data8 0x47695BB17E74DF74 //A10
1836 data8 0xC82C5AC0ED6A662F //A11
1837 data8 0x48EFF8159441C2E3 //A12
1838 data8 0xC9B22602C1B68AE5 //A13
1839 data8 0x4A74BA8CE7B34100 //A14
1840 data8 0xCB37C7E208482E4B //A15
1841 data8 0x4BFB5A1D57352265 //A16
1842 data8 0xCCC01CB3021212FF //A17
1843 data8 0x4D841613AC3431D1 //A18
1844 data8 0xCE431C9E9EE43AD9 //A19
1845 // Polynomial coefficients for left root on [-8, -7]
1846 // Lgammal is approximated by polynomial within [.157799 ; -.157798 ] range
1847 data8 0xBCF9C7A33AD9478C, 0xB995B0470F11E5ED //A0
1848 data8 0xC0E3AF76FE4C2F8B, 0xBD8DBCD503250511 //A1
1849 data8 0x41C838E76CAAF0D5, 0x3E5D79F5E2E069C3 //A2
1850 data8 0x9EF345992B262CE0, 0x0000C02B //A3
1851 data8 0x92AE0292985FD559, 0x0000403A //A4
1852 data8 0x90615420C08F7D8C, 0x0000C049 //A5
1853 data8 0x45828139342CEEB7 //A6
1854 data8 0xC67384066C31E2D3 //A7
1855 data8 0x476502BC4DAC2C35 //A8
1856 data8 0xC856FAADFF22ADC6 //A9
1857 data8 0x49497243255AB3CE //A10
1858 data8 0xCA3C768489520F6B //A11
1859 data8 0x4B300D1EA47AF838 //A12
1860 data8 0xCC223B0508AC620E //A13
1861 data8 0x4D14D46583338CD8 //A14
1862 data8 0xCE07E7A87AA068E4 //A15
1863 data8 0x4EFB811AD2F8BEAB //A16
1864 data8 0xCFF0351B51508523 //A17
1865 data8 0x50E4364CCBF53100 //A18
1866 data8 0xD1D33CFD0BF96FA6 //A19
1867 // Polynomial coefficients for left root on [-9, -8]
1868 // Lgammal is approximated by polynomial within [.157806 ; -.157806 ] range
1869 data8 0x3D333E4438B1B9D4, 0x39E7B956B83964C1 //A0
1870 data8 0xC11625EDFC63DCD8, 0xBDCF39625709EFAC //A1
1871 data8 0x422EA8C150480F16, 0x3EC16ED908AB7EDD //A2
1872 data8 0xE2598725E2E11646, 0x0000C034 //A3
1873 data8 0xEAFF2346DE3EBC98, 0x00004046 //A4
1874 data8 0x821E90DE12A0F05F, 0x0000C059 //A5
1875 data8 0x46B2C334AE5366FE //A6
1876 data8 0xC7D64314B43191B6 //A7
1877 data8 0x48FAF6ED5899E01B //A8
1878 data8 0xCA2096E4472AF37D //A9
1879 data8 0x4B44AAF49FB7E4C8 //A10
1880 data8 0xCC6A02469F2BD920 //A11
1881 data8 0x4D9080626D2EFC07 //A12
1882 data8 0xCEB515EDCF0695F7 //A13
1883 data8 0x4FDB1AC69BF36960 //A14
1884 data8 0xD1017F8274339270 //A15
1885 data8 0x5226A684961BAE2F //A16
1886 data8 0xD34E085C088404A5 //A17
1887 data8 0x547511892FF8960E //A18
1888 data8 0xD5968FA3B1ED67A9 //A19
1889 // Polynomial coefficients for left root on [-10, -9]
1890 // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
1891 data8 0xBD355818A2B42BA2, 0xB9B7320B6A0D61EA //A0
1892 data8 0xC14BAF7DA5F3770E, 0xBDE64AF9A868F719 //A1
1893 data8 0x4297F3E8791F9CD3, 0x3F2A553E59B4835E //A2
1894 data8 0xDD0C5F7E551BD13C, 0x0000C03E //A3
1895 data8 0x8F6F0A3B2EB08BBB, 0x00004054 //A4
1896 data8 0xC68D4D5AD230BA08, 0x0000C069 //A5
1897 data8 0x47F1E4D8C35D1A3E //A6
1898 data8 0xC94A8A191DB0A466 //A7
1899 data8 0x4AA4174F65FE6AE8 //A8
1900 data8 0xCBFEE6D90F94E9DD //A9
1901 data8 0x4D580FD3438BE16C //A10
1902 data8 0xCEB2ECD456D50224 //A11
1903 data8 0x500E049F7FE64546 //A12
1904 data8 0xD167F92D9600F378 //A13
1905 data8 0x52C342AE2B43261A //A14
1906 data8 0xD41F15DEEDA4B67E //A15
1907 data8 0x55792638748AFB7D //A16
1908 data8 0xD6D4D760074F6E6B //A17
1909 data8 0x5832469D58ED3FA9 //A18
1910 data8 0xD988769F3DC76642 //A19
1911 // Polynomial coefficients for left root on [-11, -10]
1912 // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
1913 data8 0xBDA050601F39778A, 0xBA0D4D1CE53E8241 //A0
1914 data8 0xC18308A7D8EA4039, 0xBE370C379D3EAD41 //A1
1915 data8 0x4306A49380644E6C, 0x3FBBB143C0E7B5C8 //A2
1916 data8 0x8FA8FB233E4AA6D2, 0x0000C049 //A3
1917 data8 0x802CC9D8AEAC207D, 0x00004062 //A4
1918 data8 0xF3F73EE651A37A13, 0x0000C07A //A5
1919 data8 0x493E3B550A7B9568 //A6
1920 data8 0xCACED38DAA060929 //A7
1921 data8 0x4C600B346BAB3BC6 //A8
1922 data8 0xCDF0F719193E3D26 //A9
1923 data8 0x4F8229F24528B151 //A10
1924 data8 0xD113A4C2D32FBBE2 //A11
1925 data8 0x52A56BC13DC4474D //A12
1926 data8 0xD43785CFAF5E3CE3 //A13
1927 data8 0x55C9FC3EA5941202 //A14
1928 data8 0xD75CD545A3341AF5 //A15
1929 data8 0x58F009911F77C282 //A16
1930 data8 0xDA8246294D210BEC //A17
1931 data8 0x5C1608AAC32C3A8E //A18
1932 data8 0xDDA446E570A397D5 //A19
1933 // Polynomial coefficients for left root on [-12, -11]
1934 // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
1935 data8 0x3DEACBB3081C502E, 0x3A8AA6F01DEDF745 //A0
1936 data8 0xC1BC8CFBFB0A9912, 0xBE6556B6504A2AE6 //A1
1937 data8 0x43797926206941D7, 0x40289A9644C2A216 //A2
1938 data8 0xF26D2A78446D0839, 0x0000C053 //A3
1939 data8 0xA238B1D937FFED38, 0x00004070 //A4
1940 data8 0xE793B4F6DE470538, 0x0000C08C //A5
1941 data8 0x4A9585BDC44DC45D //A6
1942 data8 0xCC60759520342C47 //A7
1943 data8 0x4E29B2F3694C0404 //A8
1944 data8 0xCFF4619AE7B6BBAB //A9
1945 data8 0x51C05DADF52B89E8 //A10
1946 data8 0xD38A8C7F48819A4A //A11
1947 data8 0x5555B6932D687860 //A12
1948 data8 0xD721E1FACB6C1B5B //A13
1949 data8 0x58EDA1E2677C8F91 //A14
1950 data8 0xDAB8A8EC523C1F71 //A15
1951 data8 0x5C84930133F30411 //A16
1952 data8 0xDE51952FDFD1EC49 //A17
1953 data8 0x601FCCEC1BBD25F1 //A18
1954 data8 0xE1E5F2D76B610920 //A19
1955 // Polynomial coefficients for left root on [-13, -12]
1956 // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
1957 data8 0xBE01612F373268ED, 0xBA97B7A18CDF103B //A0
1958 data8 0xC1F7328CBF7A4FAC, 0xBE89A25A6952F481 //A1
1959 data8 0x43F0D0FA2DBDA237, 0x40A0422EC1CE6084 //A2
1960 data8 0x82082DF2D32686C5, 0x0000C05F //A3
1961 data8 0x8D64EE9B42E68B36, 0x0000407F //A4
1962 data8 0xA3FFD82E08C630C9, 0x0000C09F //A5
1963 data8 0x4BF8C49D99123466 //A6
1964 data8 0xCDFEC79DDF1119ED //A7
1965 data8 0x50038615A892D242 //A8
1966 data8 0xD20929453DC8B537 //A9
1967 data8 0x54106A78083BA1EE //A10
1968 data8 0xD615A302C69E27B2 //A11
1969 data8 0x581CC175870FF16F //A12
1970 data8 0xDA233E0979E12B74 //A13
1971 data8 0x5C29E822BC568C80 //A14
1972 data8 0xDE31845DB5340FBC //A15
1973 data8 0x6037BFC6D498D5F9 //A16
1974 data8 0xE2407D92CD613E82 //A17
1975 data8 0x64483B9B62367EB7 //A18
1976 data8 0xE64B2DC830E8A799 //A1
1977 // Polynomial coefficients for left root on [-14, -13]
1978 // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
1979 data8 0x3E563D0B930B371F, 0x3AE779957E14F012 //A0
1980 data8 0xC2344C3B2F083767, 0xBEC0B7769AA3DD66 //A1
1981 data8 0x4469BFFF28B50D07, 0x41181E3F13ED2401 //A2
1982 data8 0xAE38F64DCB24D9EE, 0x0000C06A //A3
1983 data8 0xA5C3F52C1B3506F2, 0x0000408E //A4
1984 data8 0xA83BC857BCD6BA92, 0x0000C0B2 //A5
1985 data8 0x4D663B4727B4D81A //A6
1986 data8 0xCFA82C965B0F62E9 //A7
1987 data8 0x51EAD58C02905B71 //A8
1988 data8 0xD42E427970FA56AD //A9
1989 data8 0x56714644C57D8476 //A10
1990 data8 0xD8B3EC2037EC95F2 //A11
1991 data8 0x5AF72AE68BBA5B3D //A12
1992 data8 0xDD3B2152C67AA6B7 //A13
1993 data8 0x5F7FF5F082861B8B //A14
1994 data8 0xE1C2E8BE125A5B7A //A15
1995 data8 0x64066E92FE9EBE7D //A16
1996 data8 0xE64B4201CDF9F138 //A17
1997 data8 0x689186351E58AA88 //A18
1998 data8 0xEAD132A585DFC60A //A19
1999 // Polynomial coefficients for left root on [-15, -14]
2000 // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
2001 data8 0xBE6D7DDE12700AC1, 0xBB1E025BF1667FB5 //A0
2002 data8 0xC273077763F60AD5, 0xBF2A1698184C7A9A //A1
2003 data8 0x44E6A1BF095B1AB3, 0x4178D5AE8A4A2874 //A2
2004 data8 0x8F8E0D5060FCC767, 0x0000C076 //A3
2005 data8 0x800CC1DCFF092A57, 0x0000409E //A4
2006 data8 0xF3AB0BA9D14D37D1, 0x0000C0C5 //A5
2007 data8 0x4EDE3000A2F6D565 //A6
2008 data8 0xD15EC613B9C8C800 //A7
2009 data8 0x53E003309FEECCAA //A8
2010 data8 0xD660ED908D8B15C4 //A9
2011 data8 0x58E21E9B51A1C4AE //A10
2012 data8 0xDB639745DB82210D //A11
2013 data8 0x5DE55BB60C68FCF6 //A12
2014 data8 0xE06772BA3FCA23C6 //A13
2015 data8 0x62E9E58B4F702C31 //A14
2016 data8 0xE56CBA49B071ABE2 //A15
2017 data8 0x67EFF31E4F2BA36A //A16
2018 data8 0xEA7232C8804F32C3 //A17
2019 data8 0x6CF5EFEE929A0928 //A18
2020 data8 0xEF742EE03EC3E8FF //A19
2021 // Polynomial coefficients for left root on [-16, -15]
2022 // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
2023 data8 0xBEDCC628FEAC7A1B, 0xBB80582C8BEBB198 //A0
2024 data8 0xC2B3076EE752595E, 0xBF5388F55AFAE53E //A1
2025 data8 0x4566A1AAD96EBCB5, 0x421F0FEFE2444293 //A2
2026 data8 0x8F8D4D3DE9850DB2, 0x0000C082 //A3
2027 data8 0x800BDD6DA2CE184C, 0x000040AE //A4
2028 data8 0xF3A8EC4C9CDC7A43, 0x0000C0D9 //A5
2029 data8 0x505E2FAFDB81263F //A6
2030 data8 0xD31EC5B3A7506CD9 //A7
2031 data8 0x55E002F77E999810 //A8
2032 data8 0xD8A0ED4C9B5C2900 //A9
2033 data8 0x5B621E4A8267C401 //A10
2034 data8 0xDE2396E5BFCFDA7A //A11
2035 data8 0x60E55B43BE6F9A79 //A12
2036 data8 0xE3A772324C7405FA //A13
2037 data8 0x6669E4E9B7E57A2D //A14
2038 data8 0xE92CB989F8A8FB37 //A15
2039 data8 0x6BEFF2368849A36E //A16
2040 data8 0xEEB23234FE191D55 //A17
2041 data8 0x7175EF5D1080B105 //A18
2042 data8 0xF4342ED7B1B7BE31 //A19
2043 // Polynomial coefficients for left root on [-17, -16]
2044 // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
2045 data8 0xBF28AEEE7B58C790, 0xBBC4448DE371FA0A //A0
2046 data8 0xC2F436F56B3B89B1, 0xBF636755245AC63A //A1
2047 data8 0x45E98A22535D115D, 0x4298467DA93DB784 //A2
2048 data8 0xAC176F3775E6FCF2, 0x0000C08E //A3
2049 data8 0xA3114F53A9FEB908, 0x000040BE //A4
2050 data8 0xA4D168A8334AFE5A, 0x0000C0EE //A5
2051 data8 0x51E5B0E7EC7182CF //A6
2052 data8 0xD4E77D67B876D6B4 //A7
2053 data8 0x57E9F7C30C098C83 //A8
2054 data8 0xDAED29B0489EF7A7 //A9
2055 data8 0x5DF09486F8A524B8 //A10
2056 data8 0xE0F30B19910A2393 //A11
2057 data8 0x63F60E02AB3109F4 //A12
2058 data8 0xE6F9B8A3431854D5 //A13
2059 data8 0x69FE2D4A6D94218E //A14
2060 data8 0xED01C7E272A73560 //A15
2061 data8 0x7005017D82B186B6 //A16
2062 data8 0xF3096A81A69BD8AE //A17
2063 data8 0x76104951BAD67D5C //A18
2064 data8 0xF90FECC99786FD5B //A19
2065 // Polynomial coefficients for left root on [-18, -17]
2066 // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
2067 data8 0x3F3C19A53328E26A, 0x3BE238D7BA036B3B //A0
2068 data8 0xC336C16C16C16C13, 0xBFEACE245DEC56F3 //A1
2069 data8 0x46702E85C0898B70, 0x432C922B64FD1DA4 //A2
2070 data8 0xF57B99A1C0343350, 0x0000C09A //A3
2071 data8 0x82EC9634223DF90D, 0x000040CF //A4
2072 data8 0x94F66D7557E3237D, 0x0000C103 //A5
2073 data8 0x5376118B79AE34D6 //A6
2074 data8 0xD6BAE7106D52CE49 //A7
2075 data8 0x5A00BD48CC8E11AB //A8
2076 data8 0xDD4529722833E2DF //A9
2077 data8 0x608B1654AF5F46AF //A10
2078 data8 0xE3D182CC90D8723F //A11
2079 data8 0x6716D43D46706AA0 //A12
2080 data8 0xEA5DF888C5B428D3 //A13
2081 data8 0x6DA3CA85888931A6 //A14
2082 data8 0xF0EA40EF2AC7E070 //A15
2083 data8 0x743175D1A251AFCD //A16
2084 data8 0xF777CB6E2B550D73 //A17
2085 data8 0x7AC11E468A134A51 //A18
2086 data8 0xFE02B6BDD0FC40AA //A19
2087 // Polynomial coefficients for left root on [-19, -18]
2088 // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
2089 data8 0xBFAB767F9BE217FC, 0xBC4A5541CE0D8D0D //A0
2090 data8 0xC379999999999999, 0xC01A84981B490BE8 //A1
2091 data8 0x46F47AE147AE147A, 0x43AC2987BBC466EB //A2
2092 data8 0xAEC33E1F67152987, 0x0000C0A7 //A3
2093 data8 0xD1B71758E2196153, 0x000040DF //A4
2094 data8 0x8637BD05AF6D420E, 0x0000C118 //A5
2095 data8 0x55065E9F80F293B2 //A6
2096 data8 0xD88EADA78C44BFA7 //A7
2097 data8 0x5C15798EE22EC6CD //A8
2098 data8 0xDF9E8ABFD67895CF //A9
2099 data8 0x6325FD7FE13B0DE0 //A10
2100 data8 0xE6AFFC5C3DE70858 //A11
2101 data8 0x6A3774CE81C70D43 //A12
2102 data8 0xEDC1527412D8129F //A13
2103 data8 0x7149BABCDA8B7A72 //A14
2104 data8 0xF4D330AD49071BB5 //A15
2105 data8 0x785D4046F4C5F1FD //A16
2106 data8 0xFBE59BFEDBA73FAF //A17
2107 data8 0x7F64BEF2B2EC8DA1 //A18
2108 data8 0xFFEFFFFFFFFFFFFF //A19
2109 LOCAL_OBJECT_END(lgammal_left_roots_polynomial_data)
2112 //==============================================================
2114 //==============================================================
2117 GLOBAL_LIBM_ENTRY(__libm_lgammal)
2119 getf.exp rSignExpX = f8
2120 // Test x for NaTVal, NaN, +/-0, +/-INF, denormals
2121 fclass.m p6,p0 = f8,0x1EF
2122 addl r17Ones = 0x1FFFF, r0 // exponent mask
2125 addl GR_ad_z_1 = @ltoff(Constants_Z_1#),gp
2126 fcvt.fx.s1 fXint = f8 // Convert arg to int (int repres. in FR)
2127 adds rDelta = 0x3FC, r0
2131 getf.sig rSignifX = f8
2132 fcmp.lt.s1 p15, p14 = f8, f0
2133 shl rDelta = rDelta, 20 // single precision 1.5
2136 ld8 GR_ad_z_1 = [GR_ad_z_1]// get pointer to Constants_Z_1
2137 fma.s1 fTwo = f1, f1, f1 // 2.0
2138 addl rExp8 = 0x10002, r0 // exponent of 8.0
2142 alloc rPFS_SAVED = ar.pfs, 0, 34, 4, 0 // get some registers
2143 fmerge.s fAbsX = f1, f8 // |x|
2144 and rExpX = rSignExpX, r17Ones // mask sign bit
2147 addl rExpHalf = 0xFFFE, r0 // exponent of 0.5
2148 addl rExp2 = 0x10000, r0 // exponent of 2.0
2149 // branch out if x is NaTVal, NaN, +/-0, +/-INF, or denormalized number
2150 (p6) br.cond.spnt lgammal_spec
2153 _deno_back_to_main_path:
2155 // Point to Constants_G_H_h1
2156 add rTbl1Addr = 0x040, GR_ad_z_1
2157 frcpa.s1 fRcpX, p0 = f1, f8 // initial approximation of 1/x
2158 extr.u GR_Index1 = rSignifX, 59, 4
2161 (p14) cmp.ge.unc p8, p0 = rExpX, rExp8 // p8 = 1 if x >= 8.0
2162 adds rZ625 = 0x3F2, r0
2163 (p8) br.cond.spnt lgammal_big_positive // branch out if x >= 8.0
2167 shladd rZ1offsett = GR_Index1, 2, GR_ad_z_1 // Point to Z_1
2168 fmerge.se fSignifX = f1, f8 // sifnificand of x
2169 // Get high 15 bits of significand
2170 extr.u GR_X_0 = rSignifX, 49, 15
2173 cmp.lt.unc p9, p0 = rExpX, rExpHalf // p9 = 1 if |x| < 0.5
2174 // set p11 if 2 <= x < 4
2175 (p14) cmp.eq.unc p11, p0 = rExpX, rExp2
2176 (p9) br.cond.spnt lgammal_0_half // branch out if |x| < 0.5
2180 ld4 GR_Z_1 = [rZ1offsett] // Load Z_1
2181 fms.s1 fA5L = f1, f1, f8 // for 0.75 <= x < 1.3125 path
2182 shl rZ625 = rZ625, 20 // sinfle precision 0.625
2185 setf.s FR_MHalf = rDelta
2186 // set p10 if x >= 4.0
2187 (p14) cmp.gt.unc p10, p0 = rExpX, rExp2
2188 // branch to special path for 4.0 <= x < 8
2189 (p10) br.cond.spnt lgammal_4_8
2193 // for 1.3125 <= x < 1.5625 path
2194 addl rPolDataPtr= @ltoff(lgammal_loc_min_data),gp
2195 // argument of polynomial approximation for 1.5625 <= x < 2.25
2196 fms.s1 fB4 = f8, f1, fTwo
2197 cmp.eq p12, p0 = rExpX, rExpHalf
2200 addl rExpOne = 0xFFFF, r0 // exponent of 1.0
2201 // set p10 if significand of x >= 1.125
2202 (p11) cmp.le p11, p0 = 2, GR_Index1
2203 (p11) br.cond.spnt lgammal_2Q_4
2207 // point to xMin for 1.3125 <= x < 1.5625 path
2208 ld8 rPolDataPtr = [rPolDataPtr]
2209 fcvt.xf fFltIntX = fXint // RTN(x)
2210 (p14) cmp.eq.unc p13, p7 = rExpX, rExpOne // p13 set if 1.0 <= x < 2.0
2213 setf.s FR_FracX = rZ625
2214 // set p12 if |x| < 0.75
2215 (p12) cmp.gt.unc p12, p0 = 8, GR_Index1
2216 // branch out to special path for |x| < 0.75
2217 (p12) br.cond.spnt lgammal_half_3Q
2220 .pred.rel "mutex", p7, p13
2222 getf.sig rXRnd = fXint // integer part of the input value
2223 fnma.s1 fInvX = f8, fRcpX, f1 // start of 1st NR iteration
2224 // Get bits 30-15 of X_0 * Z_1
2225 pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15
2228 (p7) cmp.eq p6, p0 = rExpX, rExp2 // p6 set if 2.0 <= x < 2.25
2229 (p13) cmp.le p6, p0 = 9, GR_Index1
2230 // branch to special path 1.5625 <= x < 2.25
2231 (p6) br.cond.spnt lgammal_13Q_2Q
2235 // For performance, don't use result of pmpyshr2.u for 4 cycles.
2238 shladd GR_ad_tbl_1 = GR_Index1, 4, rTbl1Addr // Point to G_1
2239 fma.s1 fSix = fTwo, fTwo, fTwo // 6.0
2240 add GR_ad_q = -0x60, GR_ad_z_1 // Point to Constants_Q
2243 add rTmpPtr3 = -0x50, GR_ad_z_1
2244 (p13) cmp.gt p7, p0 = 5, GR_Index1
2245 // branch to special path 0.75 <= x < 1.3125
2246 (p7) br.cond.spnt lgammal_03Q_1Q
2250 add rTmpPtr = 8, GR_ad_tbl_1
2251 fma.s1 fRoot = f8, f1, f1 // x + 1
2252 // Absolute value of int arg. Will be used as index in table with roots
2253 sub rXRnd = r0, rXRnd
2256 ldfe fA5L = [rPolDataPtr], 16 // xMin
2257 addl rNegSingularity = 0x3003E, r0
2258 (p14) br.cond.spnt lgammal_loc_min
2262 ldfps FR_G, FR_H = [GR_ad_tbl_1], 8 // Load G_1, H_1
2264 add rZ2Addr = 0x140, GR_ad_z_1 // Point to Constants_Z_2
2267 ldfd FR_h = [rTmpPtr] // Load h_1
2268 // If arg is less or equal to -2^63
2269 cmp.geu.unc p8,p0 = rSignExpX, rNegSingularity
2270 // Singularity for x < -2^63 since all such arguments are integers
2271 // branch to special code which deals with singularity
2272 (p8) br.cond.spnt lgammal_singularity
2276 ldfe FR_log2_hi = [GR_ad_q], 32 // Load log2_hi
2278 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
2281 ldfe FR_log2_lo = [rTmpPtr3], 32 // Load log2_lo
2282 fms.s1 fDx = f8, f1, fFltIntX // x - RTN(x)
2283 // index in table with roots and bounds
2284 adds rXint = -2, rXRnd
2288 ldfe FR_Q4 = [GR_ad_q], 32 // Load Q4
2290 // set p12 if x may be close to negative root: -19.5 < x < -2.0
2291 cmp.gtu p12, p0 = 18, rXint
2294 shladd GR_ad_z_2 = GR_Index2, 2, rZ2Addr // Point to Z_2
2295 fma.s1 fRcpX = fInvX, fRcpX, fRcpX // end of 1st NR iteration
2296 // Point to Constants_G_H_h2
2297 add rTbl2Addr = 0x180, GR_ad_z_1
2301 shladd GR_ad_tbl_2 = GR_Index2, 4, rTbl2Addr // Point to G_2
2302 // set p9 if x is integer and negative
2303 fcmp.eq.s1 p9, p0 = f8,fFltIntX
2304 // Point to Constants_G_H_h3
2305 add rTbl3Addr = 0x280, GR_ad_z_1
2308 ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2
2310 sub GR_N = rExpX, rExpHalf, 1
2314 ldfe FR_Q3 = [rTmpPtr3], 32 // Load Q3
2316 // Point to lnsin polynomial coefficients
2317 adds rLnSinDataPtr = 864, rTbl3Addr
2320 ldfe FR_Q2 = [GR_ad_q],32 // Load Q2
2322 add rTmpPtr = 8, GR_ad_tbl_2
2326 ldfe FR_Q1 = [rTmpPtr3] // Load Q1
2327 fcmp.lt.s1 p0, p15 = fAbsX, fSix // p15 is set when x < -6.0
2328 // point to table with roots and bounds
2329 adds rRootsBndAddr = -1296, GR_ad_z_1
2332 // Put integer N into rightmost significand
2333 setf.sig fFloatN = GR_N
2334 fma.s1 fThirteen = fSix, fTwo, f1 // 13.0
2335 // Singularity if -2^63 < x < 0 and x is integer
2336 // branch to special code which deals with singularity
2337 (p9) br.cond.spnt lgammal_singularity
2341 ldfps FR_G2, FR_H2 = [GR_ad_tbl_2] // Load G_2, H_2
2342 // y = |x|/2^(exponent(x)) - 1.5
2343 fms.s1 FR_FracX = fSignifX, f1, FR_MHalf
2344 // Get bits 30-15 of X_1 * Z_2
2345 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15
2348 ldfd FR_h2 = [rTmpPtr] // Load h_2
2349 fma.s1 fDxSqr = fDx, fDx, f0 // deltaX^2
2350 adds rTmpPtr3 = 128, rLnSinDataPtr
2354 // For performance, don't use result of pmpyshr2.u for 4 cycles.
2357 getf.exp rRoot = fRoot // sign and biased exponent of (x + 1)
2359 // set p6 if -4 < x <= -2
2360 cmp.eq p6, p0 = rExpX, rExp2
2363 ldfpd fLnSin2, fLnSin2L = [rLnSinDataPtr], 16
2364 fnma.s1 fInvX = f8, fRcpX, f1 // start of 2nd NR iteration
2365 sub rIndexPol = rExpX, rExpHalf // index of polynom
2369 ldfe fLnSin4 = [rLnSinDataPtr], 96
2370 // p10 is set if x is potential "right" root
2371 // p11 set for possible "left" root
2372 fcmp.lt.s1 p10, p11 = fDx, f0
2373 shl rIndexPol = rIndexPol, 6 // (i*16)*4
2376 ldfpd fLnSin18, fLnSin20 = [rTmpPtr3], 16
2378 mov rExp2tom7 = 0x0fff8 // Exponent of 2^-7
2382 getf.sig rSignifDx = fDx // Get significand of RTN(x)
2384 // set p6 if -4 < x <= -3.0
2385 (p6) cmp.le.unc p6, p0 = 0x8, GR_Index1
2388 ldfpd fLnSin22, fLnSin24 = [rTmpPtr3], 16
2390 // mask sign bit in the exponent of (x + 1)
2391 and rRoot = rRoot, r17Ones
2395 ldfe fLnSin16 = [rLnSinDataPtr], -80
2397 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2
2400 ldfpd fLnSin26, fLnSin28 = [rTmpPtr3], 16
2402 and rXRnd = 1, rXRnd
2406 shladd GR_ad_tbl_3 = GR_Index3, 4, rTbl3Addr // Point to G_3
2407 fms.s1 fDxSqrL = fDx, fDx, fDxSqr // low part of deltaX^2
2408 // potential "left" root
2409 (p11) adds rRootsBndAddr = 560, rRootsBndAddr
2412 ldfpd fLnSin30, fLnSin32 = [rTmpPtr3], 16
2413 // set p7 if |x+1| < 2^-7
2414 cmp.lt p7, p0 = rRoot, rExp2tom7
2415 // branch to special path for |x+1| < 2^-7
2416 (p7) br.cond.spnt _closeToNegOne
2420 ldfps FR_G3, FR_H3 = [GR_ad_tbl_3], 8 // Load G_3, H_3
2421 fcmp.lt.s1 p14, p0 = fAbsX, fThirteen // set p14 if x > -13.0
2422 // base address of polynomial on range [-6.0, -0.75]
2423 adds rPolDataPtr = 3440, rTbl3Addr
2426 // (i*16)*4 + (i*16)*8 - offsett of polynomial on range [-6.0, -0.75]
2427 shladd rTmpPtr = rIndexPol, 2, rIndexPol
2428 fma.s1 fXSqr = FR_FracX, FR_FracX, f0 // y^2
2429 // point to left "near root" bound
2430 (p12) shladd rRootsBndAddr = rXint, 4, rRootsBndAddr
2434 ldfpd fLnSin34, fLnSin36 = [rTmpPtr3], 16
2435 fma.s1 fRcpX = fInvX, fRcpX, fRcpX // end of 2nd NR iteration
2436 // add special offsett if -4 < x <= -3.0
2437 (p6) adds rPolDataPtr = 640, rPolDataPtr
2440 // point to right "near root" bound
2441 adds rTmpPtr2 = 8, rRootsBndAddr
2442 fnma.s1 fMOne = f1, f1, f0 // -1.0
2443 // Point to Bernulli numbers
2444 adds rBernulliPtr = 544, rTbl3Addr
2448 // left bound of "near root" range
2449 (p12) ld8 rLeftBound = [rRootsBndAddr]
2450 fmerge.se fNormDx = f1, fDx // significand of DeltaX
2451 // base address + offsett for polynomial coeff. on range [-6.0, -0.75]
2452 add rPolDataPtr = rPolDataPtr, rTmpPtr
2455 // right bound of "near root" range
2456 (p12) ld8 rRightBound = [rTmpPtr2]
2457 fcvt.xf fFloatN = fFloatN
2458 // special "Bernulli" numbers for Stirling's formula for -13 < x < -6
2459 (p14) adds rBernulliPtr = 160, rBernulliPtr
2463 ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3
2464 fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2
2465 adds rTmpPtr3 = -160, rTmpPtr3
2468 adds rTmpPtr = 80, rPolDataPtr
2469 fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2
2470 // p15 is set if -2^63 < x < 6.0 and x is not an integer
2471 // branch to path with implementation using Stirling's formula for neg. x
2472 (p15) br.cond.spnt _negStirling
2476 ldfpd fA3, fA3L = [rPolDataPtr], 16 // A3
2477 fma.s1 fDelX4 = fDxSqr, fDxSqr, f0 // deltaX^4
2478 // Get high 4 bits of signif
2479 extr.u rIndex1Dx = rSignifDx, 59, 4
2482 ldfe fA5 = [rTmpPtr], -16 // A5
2483 fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2
2484 adds rLnSinTmpPtr = 16, rLnSinDataPtr
2488 ldfpd fA0, fA0L = [rPolDataPtr], 16 // A0
2489 fma.s1 fLnSin20 = fLnSin20, fDxSqr, fLnSin18
2490 // Get high 15 bits of significand
2491 extr.u rX0Dx = rSignifDx, 49, 15
2494 ldfe fA4 = [rTmpPtr], 192 // A4
2495 fms.s1 fXSqrL = FR_FracX, FR_FracX, fXSqr // low part of y^2
2496 shladd GR_ad_z_1 = rIndex1Dx, 2, GR_ad_z_1 // Point to Z_1
2500 ldfpd fA1, fA1L = [rPolDataPtr], 16 // A1
2501 fma.s1 fX4 = fXSqr, fXSqr, f0 // y^4
2502 adds rTmpPtr2 = 32, rTmpPtr
2505 ldfpd fA18, fA19 = [rTmpPtr], 16 // A18, A19
2506 fma.s1 fLnSin24 = fLnSin24, fDxSqr, fLnSin22
2511 ldfe fLnSin6 = [rLnSinDataPtr], 32
2512 fma.s1 fLnSin28 = fLnSin28, fDxSqr, fLnSin26
2516 ldfe fLnSin8 = [rLnSinTmpPtr], 32
2522 ldfpd fA20, fA21 = [rTmpPtr], 16 // A20, A21
2523 fma.s1 fLnSin32 = fLnSin32, fDxSqr, fLnSin30
2527 ldfpd fA22, fA23 = [rTmpPtr2], 16 // A22, A23
2528 fma.s1 fB20 = f1, f1, FR_MHalf // 2.5
2529 (p12) cmp.ltu.unc p6, p0 = rSignifX, rLeftBound
2533 ldfpd fA2, fA2L = [rPolDataPtr], 16 // A2
2534 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3
2535 // set p6 if x falls in "near root" range
2536 (p6) cmp.geu.unc p6, p0 = rSignifX, rRightBound
2539 adds rTmpPtr3 = -64, rTmpPtr
2540 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3
2541 // branch to special path if x falls in "near root" range
2542 (p6) br.cond.spnt _negRoots
2546 ldfpd fA24, fA25 = [rTmpPtr2], 16 // A24, A25
2547 fma.s1 fLnSin36 = fLnSin36, fDxSqr, fLnSin34
2548 (p11) cmp.eq.unc p7, p0 = 1,rXint // p7 set if -3.0 < x < -2.5
2551 adds rTmpPtr = -48, rTmpPtr
2552 fma.s1 fLnSin20 = fLnSin20, fDxSqr, fLnSin16
2553 addl rDelta = 0x5338, r0 // significand of -2.605859375
2557 getf.exp GR_N = fDx // Get N = exponent of DeltaX
2558 fma.s1 fX6 = fX4, fXSqr, f0 // y^6
2559 // p7 set if -2.605859375 <= x < -2.5
2560 (p7) cmp.gt.unc p7, p0 = rDelta, GR_X_0
2563 ld4 GR_Z_1 = [GR_ad_z_1] // Load Z_1
2564 fma.s1 fDelX8 = fDelX4, fDelX4, f0 // deltaX^8
2565 // branch to special path for -2.605859375 <= x < -2.5
2566 (p7) br.cond.spnt _neg2andHalf
2570 ldfpd fA14, fA15 = [rTmpPtr3], 16 // A14, A15
2571 fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3
2572 adds rTmpPtr2 = 128 , rPolDataPtr
2575 ldfpd fA16, fA17 = [rTmpPtr], 16 // A16, A17
2576 fma.s1 fLnSin28 = fLnSin28, fDelX4, fLnSin24
2577 adds rPolDataPtr = 144 , rPolDataPtr
2581 ldfe fLnSin10 = [rLnSinDataPtr], 32
2582 fma.s1 fRes1H = fA3, FR_FracX, f0 // (A3*y)hi
2583 and GR_N = GR_N, r17Ones // mask sign bit
2586 ldfe fLnSin12 = [rLnSinTmpPtr]
2587 fma.s1 fDelX6 = fDxSqr, fDelX4, f0 // DeltaX^6
2588 shladd GR_ad_tbl_1 = rIndex1Dx, 4, rTbl1Addr // Point to G_1
2592 ldfe fA13 = [rPolDataPtr], -32 // A13
2593 fma.s1 fA4 = fA5, FR_FracX, fA4 // A5*y + A4
2594 // Get bits 30-15 of X_0 * Z_1
2595 pmpyshr2.u GR_X_1 = rX0Dx, GR_Z_1, 15
2598 ldfe fA12 = [rTmpPtr2], -32 // A12
2599 fms.s1 FR_r = FR_G, fSignifX, f1 // r = G * S_hi - 1
2600 sub GR_N = GR_N, rExpHalf, 1 // unbisaed exponent of DeltaX
2604 // For performance, don't use result of pmpyshr2.u for 4 cycles.
2606 .pred.rel "mutex",p10,p11
2608 ldfe fA11 = [rPolDataPtr], -32 // A11
2609 // High part of log(|x|) = Y_hi = N * log2_hi + H
2610 fma.s1 fResH = fFloatN, FR_log2_hi, FR_H
2611 (p10) cmp.eq p8, p9 = rXRnd, r0
2614 ldfe fA10 = [rTmpPtr2], -32 // A10
2615 fma.s1 fRes6H = fA1, FR_FracX, f0 // (A1*y)hi
2616 (p11) cmp.eq p9, p8 = rXRnd, r0
2620 ldfe fA9 = [rPolDataPtr], -32 // A9
2621 fma.s1 fB14 = fLnSin6, fDxSqr, f0 // (LnSin6*deltaX^2)hi
2622 cmp.eq p6, p7 = 4, rSgnGamSize
2625 ldfe fA8 = [rTmpPtr2], -32 // A8
2626 fma.s1 fA18 = fA19, FR_FracX, fA18
2631 ldfe fA7 = [rPolDataPtr] // A7
2632 fma.s1 fA23 = fA23, FR_FracX, fA22
2636 ldfe fA6 = [rTmpPtr2] // A6
2637 fma.s1 fA21 = fA21, FR_FracX, fA20
2642 ldfe fLnSin14 = [rLnSinDataPtr]
2643 fms.s1 fRes1L = fA3, FR_FracX, fRes1H // delta((A3*y)hi)
2644 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
2647 setf.sig fFloatNDx = GR_N
2648 fadd.s1 fPol = fRes1H, fA2 // (A3*y + A2)hi
2653 ldfps FR_G, FR_H = [GR_ad_tbl_1], 8 // Load G_1, H_1
2654 fma.s1 fRes2H = fA4, fXSqr, f0 // ((A5 + A4*y)*y^2)hi
2658 shladd GR_ad_z_2 = GR_Index2, 2, rZ2Addr // Point to Z_2
2659 fma.s1 fA25 = fA25, FR_FracX, fA24
2660 shladd GR_ad_tbl_2 = GR_Index2, 4, rTbl2Addr // Point to G_2
2663 .pred.rel "mutex",p8,p9
2665 ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2
2666 fms.s1 fRes6L = fA1, FR_FracX, fRes6H // delta((A1*y)hi)
2667 // sign of GAMMA(x) is negative
2668 (p8) adds rSgnGam = -1, r0
2671 adds rTmpPtr = 8, GR_ad_tbl_2
2672 fadd.s1 fRes3H = fRes6H, fA0 // (A1*y + A0)hi
2673 // sign of GAMMA(x) is positive
2674 (p9) adds rSgnGam = 1, r0
2678 ldfps FR_G2, FR_H2 = [GR_ad_tbl_2] // Load G_2, H_2
2679 // (LnSin6*deltaX^2 + LnSin4)hi
2680 fadd.s1 fLnSinH = fB14, fLnSin4
2684 ldfd FR_h2 = [rTmpPtr] // Load h_2
2685 fms.s1 fB16 = fLnSin6, fDxSqr, fB14 // delta(LnSin6*deltaX^2)
2690 ldfd fhDelX = [GR_ad_tbl_1] // Load h_1
2691 fma.s1 fA21 = fA21, fXSqr, fA18
2696 fma.s1 fLnSin36 = fLnSin36, fDelX4, fLnSin32
2702 fma.s1 fRes1L = fA3L, FR_FracX, fRes1L // (A3*y)lo
2703 // Get bits 30-15 of X_1 * Z_
2704 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15
2708 fsub.s1 fPolL = fA2, fPol
2713 // For performance, don't use result of pmpyshr2.u for 4 cycles.
2717 // delta(((A5 + A4*y)*y^2)hi)
2718 fms.s1 fRes2L = fA4, fXSqr, fRes2H
2723 // (((A5 + A4*y)*y^2) + A3*y + A2)hi
2724 fadd.s1 fRes4H = fRes2H, fPol
2729 // store signgam if size of variable is 4 bytes
2730 (p6) st4 [rSgnGamAddr] = rSgnGam
2731 fma.s1 fRes6L = fA1L, FR_FracX, fRes6L // (A1*y)lo
2735 // store signgam if size of variable is 8 bytes
2736 (p7) st8 [rSgnGamAddr] = rSgnGam
2737 fsub.s1 fRes3L = fA0, fRes3H
2743 fsub.s1 fLnSinL = fLnSin4, fLnSinH
2748 // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2)hi
2749 fma.s1 fB18 = fLnSinH, fDxSqr, f0
2754 adds rTmpPtr = 8, rTbl3Addr
2755 fma.s1 fB16 = fLnSin6, fDxSqrL, fB16 // (LnSin6*deltaX^2)lo
2756 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2
2760 fma.s1 fA25 = fA25, fXSqr, fA23
2765 shladd GR_ad_tbl_3 = GR_Index3, 4, rTbl3Addr // Point to G_3
2766 fadd.s1 fPolL = fPolL, fRes1H
2770 shladd rTmpPtr = GR_Index3, 4, rTmpPtr // Point to G_3
2771 fadd.s1 fRes1L = fRes1L, fA2L // (A3*y)lo + A2lo
2776 ldfps FR_G3, FR_H3 = [GR_ad_tbl_3] // Load G_3, H_3
2777 fma.s1 fRes2L = fA4, fXSqrL, fRes2L // ((A5 + A4*y)*y^2)lo
2781 ldfd FR_h3 = [rTmpPtr] // Load h_3
2782 fsub.s1 fRes4L = fPol, fRes4H
2788 // ((((A5 + A4*y)*y^2) + A3*y + A2)*y^2)hi
2789 fma.s1 fRes7H = fRes4H, fXSqr, f0
2794 fma.s1 fA15 = fA15, FR_FracX, fA14
2800 fadd.s1 fRes3L = fRes3L, fRes6H
2805 fadd.s1 fRes6L = fRes6L, fA0L // (A1*y)lo + A0lo
2811 fadd.s1 fLnSinL = fLnSinL, fB14
2817 // delta((LnSin6*deltaX^2 + LnSin4)*deltaX^2)
2818 fms.s1 fB20 = fLnSinH, fDxSqr, fB18
2824 fadd.s1 fPolL = fPolL, fRes1L // (A3*y + A2)lo
2830 // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2 + LnSin2)hi
2831 fadd.s1 fLnSin6 = fB18, fLnSin2
2837 fadd.s1 fRes4L = fRes4L, fRes2H
2842 fma.s1 fA17 = fA17, FR_FracX, fA16
2848 // delta(((((A5 + A4*y)*y^2) + A3*y + A2)*y^2)
2849 fms.s1 fRes7L = fRes4H, fXSqr, fRes7H
2854 fadd.s1 fPol = fRes7H, fRes3H
2860 fadd.s1 fRes3L = fRes3L, fRes6L // (A1*y + A0)lo
2865 fma.s1 fA25 = fA25, fX4, fA21
2871 // (LnSin6*deltaX^2 + LnSin4)lo
2872 fadd.s1 fLnSinL = fLnSinL, fB16
2877 fma.s1 fB20 = fLnSinH, fDxSqrL, fB20
2883 fsub.s1 fLnSin4 = fLnSin2, fLnSin6
2888 // (((LnSin6*deltaX^2 + LnSin4)*deltaX^2 + LnSin2)*DeltaX^2)hi
2889 fma.s1 fLnSinH = fLnSin6, fDxSqr, f0
2895 // ((A5 + A4*y)*y^2)lo + (A3*y + A2)lo
2896 fadd.s1 fRes2L = fRes2L, fPolL
2901 fma.s1 fA17 = fA17, fXSqr, fA15
2907 // ((((A5 + A4*y)*y^2) + A3*y + A2)*y^2)lo
2908 fma.s1 fRes7L = fRes4H, fXSqrL, fRes7L
2913 fsub.s1 fPolL = fRes3H, fPol
2919 fma.s1 fA13 = fA13, FR_FracX, fA12
2924 fma.s1 fA11 = fA11, FR_FracX, fA10
2930 // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2)lo
2931 fma.s1 fB20 = fLnSinL, fDxSqr, fB20
2936 fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2
2942 fadd.s1 fLnSin4 = fLnSin4, fB18
2947 fms.s1 fLnSinL = fLnSin6, fDxSqr, fLnSinH
2953 // (((A5 + A4*y)*y^2) + A3*y + A2)lo
2954 fadd.s1 fRes4L = fRes4L, fRes2L
2959 fadd.s1 fhDelX = fhDelX, FR_h2 // h = h_1 + h_2
2965 fadd.s1 fRes7L = fRes7L, fRes3L
2970 fadd.s1 fPolL = fPolL, fRes7H
2976 fcvt.xf fFloatNDx = fFloatNDx
2981 fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2
2987 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3
2992 // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2)lo + (LnSin2)lo
2993 fadd.s1 fLnSin2L = fLnSin2L, fB20
2999 fma.s1 fA25 = fA25, fX4, fA17
3004 fma.s1 fA13 = fA13, fXSqr, fA11
3010 fma.s1 fA9 = fA9, FR_FracX, fA8
3015 fma.s1 fA7 = fA7, FR_FracX, fA6
3021 fma.s1 fLnSin36 = fLnSin36, fDelX8, fLnSin28
3026 fma.s1 fLnSin14 = fLnSin14, fDxSqr, fLnSin12
3032 fma.s1 fLnSin10 = fLnSin10, fDxSqr, fLnSin8
3037 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3
3043 fms.s1 fRDx = FR_G, fNormDx, f1 // r = G * S_hi - 1
3048 // poly_lo = r * Q4 + Q3
3049 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3
3055 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r
3060 // ((((A5 + A4*y)*y^2) + A3*y + A2)*y^2)lo + (A1*y + A0)lo
3061 fma.s1 fRes7L = fRes4L, fXSqr, fRes7L
3067 fma.s1 fA25 = fA25, fX4, fA13
3072 fma.s1 fA9 = fA9, fXSqr, fA7
3078 // h = N * log2_lo + h
3079 fma.s1 FR_h = fFloatN, FR_log2_lo, FR_h
3084 fadd.s1 fhDelX = fhDelX, FR_h3 // h = (h_1 + h_2) + h_3
3090 fma.s1 fLnSin36 = fLnSin36, fDelX6, fLnSin20
3095 fma.s1 fLnSin14 = fLnSin14, fDelX4, fLnSin10
3101 // poly_lo = r * Q4 + Q3
3102 fma.s1 fPolyLoDx = fRDx, FR_Q4, FR_Q3
3107 fmpy.s1 fRDxSq = fRDx, fRDx // rsq = r * r
3113 // Y_hi = N * log2_hi + H
3114 fma.s1 fResLnDxH = fFloatNDx, FR_log2_hi, FR_H
3119 fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3
3125 fma.s1 fA9 = fA25, fX4, fA9
3130 fadd.s1 fPolL = fPolL, fRes7L
3136 fadd.s1 fLnSin4 = fLnSin4, fLnSin2L
3141 // h = N * log2_lo + h
3142 fma.s1 fhDelX = fFloatNDx, FR_log2_lo, fhDelX
3148 fma.s1 fLnSin36 = fLnSin36, fDelX8, fLnSin14
3153 // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2 + LnSin2)lo
3154 fma.s1 fLnSinL = fLnSin6, fDxSqrL, fLnSinL
3160 // poly_lo = poly_lo * r + Q2
3161 fma.s1 fPolyLoDx = fPolyLoDx, fRDx, FR_Q2
3166 fma.s1 fRDxCub = fRDxSq, fRDx, f0 // rcub = r^3
3172 famax.s0 fRes5H = fPol, fResH
3177 // High part of (lgammal(|x|) + log(|x|))
3178 fadd.s1 fRes1H = fPol, fResH
3184 // poly_lo = poly_lo * r + Q2
3185 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2
3190 fma.s1 fPolL = fA9, fX6, fPolL // P25lo
3197 famin.s0 fRes5L = fPol, fResH
3202 // High part of -(LnSin + log(|DeltaX|))
3203 fnma.s1 fRes2H = fResLnDxH, f1, fLnSinH
3210 // (((LnSin6*deltaX^2 + LnSin4)*deltaX^2 + LnSin2)*DeltaX^2)lo
3211 fma.s1 fLnSinL = fLnSin4, fDxSqr, fLnSinL
3216 fma.s1 fLnSin36 = fLnSin36, fDelX6, f0
3222 // poly_hi = Q1 * rsq + r
3223 fma.s1 fPolyHiDx = FR_Q1, fRDxSq, fRDx
3228 // poly_lo = poly_lo*r^3 + h
3229 fma.s1 fPolyLoDx = fPolyLoDx, fRDxCub, fhDelX
3235 fsub.s1 fRes1L = fRes5H, fRes1H
3240 // -(lgammal(|x|) + log(|x|))hi
3241 fnma.s1 fRes1H = fRes1H, f1, f0
3248 // poly_hi = Q1 * rsq + r
3249 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
3254 // poly_lo = poly_lo*r^3 + h
3255 fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h
3261 fms.s1 fRes2L = fResLnDxH, fMOne, fRes2H
3267 fma.s1 fLnSinL = fLnSin36, fDxSqr, fLnSinL
3272 // Y_lo = poly_hi + poly_lo
3273 fadd.s1 fResLnDxL = fPolyHiDx, fPolyLoDx
3279 fadd.s1 fRes1L = fRes1L, fRes5L
3284 // high part of the final result
3285 fadd.s1 fYH = fRes2H, fRes1H
3291 // Y_lo = poly_hi + poly_lo
3292 fadd.s1 fResL = FR_poly_hi, FR_poly_lo
3298 famax.s0 fRes4H = fRes2H, fRes1H
3304 famin.s0 fRes4L = fRes2H, fRes1H
3310 // (LnSin)lo + (log(|DeltaX|))lo
3311 fsub.s1 fLnSinL = fLnSinL, fResLnDxL
3316 fadd.s1 fRes2L = fRes2L, fLnSinH
3322 //(lgammal(|x|))lo + (log(|x|))lo
3323 fadd.s1 fPolL = fResL, fPolL
3329 fsub.s1 fYL = fRes4H, fYH
3335 // Low part of -(LnSin + log(|DeltaX|))
3336 fadd.s1 fRes2L = fRes2L, fLnSinL
3341 // High part of (lgammal(|x|) + log(|x|))
3342 fadd.s1 fRes1L = fRes1L, fPolL
3348 fadd.s1 fYL = fYL, fRes4L
3353 fsub.s1 fRes2L = fRes2L, fRes1L
3359 // low part of the final result
3360 fadd.s1 fYL = fYL, fRes2L
3366 // final result for -6.0 < x <= -0.75, non-integer, "far" from roots
3367 fma.s0 f8 = fYH, f1, fYL
3368 // exit here for -6.0 < x <= -0.75, non-integer, "far" from roots
3373 // here if |x+1| < 2^(-7)
3377 getf.exp GR_N = fDx // Get N = exponent of x
3378 fmerge.se fAbsX = f1, fDx // Form |deltaX|
3379 // Get high 4 bits of significand of deltaX
3380 extr.u rIndex1Dx = rSignifDx, 59, 4
3383 addl rPolDataPtr= @ltoff(lgammal_1pEps_data),gp
3384 fma.s1 fA0L = fDxSqr, fDxSqr, f0 // deltaX^4
3385 // sign of GAMMA is positive if p10 is set to 1
3386 (p10) adds rSgnGam = 1, r0
3390 shladd GR_ad_z_1 = rIndex1Dx, 2, GR_ad_z_1 // Point to Z_1
3391 fnma.s1 fResL = fDx, f1, f0 // -(x+1)
3392 // Get high 15 bits of significand
3393 extr.u GR_X_0 = rSignifDx, 49, 15
3396 ld8 rPolDataPtr = [rPolDataPtr]
3398 shladd GR_ad_tbl_1 = rIndex1Dx, 4, rTbl1Addr // Point to G_1
3402 ld4 GR_Z_1 = [GR_ad_z_1] // Load Z_1
3404 and GR_N = GR_N, r17Ones // mask sign bit
3407 adds rTmpPtr = 8, GR_ad_tbl_1
3409 cmp.eq p6, p7 = 4, rSgnGamSize
3413 ldfps FR_G, FR_H = [GR_ad_tbl_1],8 // Load G_1, H_1
3415 adds rTmpPtr2 = 96, rPolDataPtr
3418 ldfd FR_h = [rTmpPtr] // Load h_1
3420 // unbiased exponent of deltaX
3421 sub GR_N = GR_N, rExpHalf, 1
3425 adds rTmpPtr3 = 192, rPolDataPtr
3427 // sign of GAMMA is negative if p11 is set to 1
3428 (p11) adds rSgnGam = -1, r0
3431 ldfe fA1 = [rPolDataPtr], 16 // A1
3437 ldfe fA2 = [rPolDataPtr], 16 // A2
3439 // Get bits 30-15 of X_0 * Z_1
3440 pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15
3443 ldfpd fA20, fA19 = [rTmpPtr2], 16 // P8, P7
3449 // For performance, don't use result of pmpyshr2.u for 4 cycles.
3452 ldfe fA3 = [rPolDataPtr], 16 // A3
3457 ldfpd fA18, fA17 = [rTmpPtr2], 16 // P6, P5
3463 ldfe fA4 = [rPolDataPtr], 16 // A4
3468 ldfpd fA16, fA15 = [rTmpPtr2], 16 // P4, p3
3474 ldfpd fA5L, fA6 = [rPolDataPtr], 16 // A5, A6
3479 ldfpd fA14, fA13 = [rTmpPtr2], 16 // P2, P1
3485 ldfpd fA7, fA8 = [rPolDataPtr], 16 // A7, A8
3487 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
3490 ldfe fLnSin2 = [rTmpPtr2], 16
3496 shladd GR_ad_z_2 = GR_Index2, 2, rZ2Addr // Point to Z_2
3498 shladd GR_ad_tbl_2 = GR_Index2, 4, rTbl2Addr // Point to G_2
3501 ldfe fLnSin4 = [rTmpPtr2], 32
3507 ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2
3509 adds rTmpPtr = 8, GR_ad_tbl_2
3512 // Put integer N into rightmost significand
3513 setf.sig fFloatN = GR_N
3519 ldfe fLnSin6 = [rTmpPtr3]
3524 ldfe fLnSin8 = [rTmpPtr2]
3530 ldfps FR_G2, FR_H2 = [GR_ad_tbl_2],8 // Load G_2, H_2
3535 ldfd FR_h2 = [rTmpPtr] // Load h_2
3541 // store signgam if size of variable is 4 bytes
3542 (p6) st4 [rSgnGamAddr] = rSgnGam
3543 fma.s1 fResH = fA20, fResL, fA19 //polynomial for log(|x|)
3544 // Get bits 30-15 of X_1 * Z_2
3545 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15
3548 // store signgam if size of variable is 8 bytes
3549 (p7) st8 [rSgnGamAddr] = rSgnGam
3550 fma.s1 fA2 = fA2, fDx, fA1 // polynomial for lgammal(|x|)
3555 // For performance, don't use result of pmpyshr2.u for 4 cycles.
3559 fma.s1 fA18 = fA18, fResL, fA17 //polynomial for log(|x|)
3565 fma.s1 fA16 = fA16, fResL, fA15 //polynomial for log(|x|)
3570 fma.s1 fA4 = fA4, fDx, fA3 // polynomial for lgammal(|x|)
3576 fma.s1 fA14 = fA14, fResL, fA13 //polynomial for log(|x|)
3581 fma.s1 fA6 = fA6, fDx, fA5L // polynomial for lgammal(|x|)
3587 fma.s1 fPol = fA8, fDx, fA7 // polynomial for lgammal(|x|)
3588 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2
3592 shladd GR_ad_tbl_3 = GR_Index3, 4, rTbl3Addr // Point to G_3
3593 // loqw part of lnsin polynomial
3594 fma.s1 fRes3L = fLnSin4, fDxSqr, fLnSin2
3599 ldfps FR_G3, FR_H3 = [GR_ad_tbl_3], 8 // Load G_3, H_3
3600 fcvt.xf fFloatN = fFloatN // N as FP number
3605 fma.s1 fResH = fResH, fDxSqr, fA18 // High part of log(|x|)
3610 ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3
3611 fma.s1 fA4 = fA4, fDxSqr, fA2 // Low part of lgammal(|x|)
3616 // high part of lnsin polynomial
3617 fma.s1 fRes3H = fLnSin8, fDxSqr, fLnSin6
3623 fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2
3628 fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2
3634 fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2
3639 fma.s1 fA16 = fA16, fDxSqr, fA14 // Low part of log(|x|)
3645 fma.s1 fPol = fPol, fDxSqr, fA6 // High part of lgammal(|x|)
3651 fma.s1 fResH = fResH, fA0L, fA16 // log(|x|)/deltaX^2 - deltaX
3657 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3
3662 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3
3668 fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3
3674 fma.s1 fResH = fResH, fDxSqr, fResL // log(|x|)
3679 fma.s1 fPol = fPol, fA0L, fA4 // lgammal(|x|)/|x|
3685 fms.s1 FR_r = FR_G, fAbsX, f1 // r = G * S_hi - 1
3690 // high part of log(deltaX)= Y_hi = N * log2_hi + H
3691 fma.s1 fRes4H = fFloatN, FR_log2_hi, FR_H
3697 // h = N * log2_lo + h
3698 fma.s1 FR_h = fFloatN, FR_log2_lo, FR_h
3704 fma.s1 fResH = fPol, fDx, fResH // lgammal(|x|) + log(|x|)
3710 fma.s1 fRes3H = fRes3H, fA0L, fRes3L
3716 // poly_lo = r * Q4 + Q3
3717 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3
3722 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r
3728 // lnSin - log(|x|) - lgammal(|x|)
3729 fms.s1 fResH = fRes3H, fDxSqr, fResH
3736 // poly_lo = poly_lo * r + Q2
3737 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2
3742 fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3
3749 // poly_hi = Q1 * rsq + r
3750 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
3757 // poly_lo = poly_lo*r^3 + h
3758 fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h
3765 // low part of log(|deltaX|) = Y_lo = poly_hi + poly_lo
3766 fadd.s1 fRes4L = FR_poly_hi, FR_poly_lo
3772 fsub.s1 fResH = fResH, fRes4L
3778 // final result for |x+1|< 2^(-7) path
3779 fsub.s0 f8 = fResH, fRes4H
3780 // exit for |x+1|< 2^(-7) path
3786 // here if -2^63 < x < -6.0 and x is not an integer
3787 // Also we are going to filter out cases when x falls in
3788 // range which is "close enough" to negative root. Rhis case
3789 // may occur only for -19.5 < x since other roots of lgamma are
3790 // insignificant from double extended point of view (they are closer
3791 // to RTN(x) than one ulp(x).
3795 ldfe fLnSin6 = [rLnSinDataPtr], 32
3796 fnma.s1 fInvX = f8, fRcpX, f1 // start of 3rd NR iteration
3797 // Get high 4 bits of significand of deltaX
3798 extr.u rIndex1Dx = rSignifDx, 59, 4
3801 ldfe fLnSin8 = [rTmpPtr3], 32
3802 fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2
3803 (p12) cmp.ltu.unc p6, p0 = rSignifX, rLeftBound
3807 ldfe fLnSin10 = [rLnSinDataPtr], 32
3808 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3
3809 // Get high 15 bits of significand
3810 extr.u GR_X_0 = rSignifDx, 49, 15
3813 shladd GR_ad_z_1 = rIndex1Dx, 2, GR_ad_z_1 // Point to Z_1
3814 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3
3815 // set p6 if x falls in "near root" range
3816 (p6) cmp.geu.unc p6, p0 = rSignifX, rRightBound
3820 getf.exp GR_N = fDx // Get N = exponent of x
3821 fma.s1 fDx4 = fDxSqr, fDxSqr, f0 // deltaX^4
3822 adds rTmpPtr = 96, rBernulliPtr
3825 ld4 GR_Z_1 = [GR_ad_z_1] // Load Z_1
3826 fma.s1 fLnSin34 = fLnSin34, fDxSqr, fLnSin32
3827 // branch to special path if x falls in "near root" range
3828 (p6) br.cond.spnt _negRoots
3831 .pred.rel "mutex",p10,p11
3833 ldfe fLnSin12 = [rTmpPtr3]
3834 fma.s1 fLnSin26 = fLnSin26, fDxSqr, fLnSin24
3835 (p10) cmp.eq p8, p9 = rXRnd, r0
3838 ldfe fLnSin14 = [rLnSinDataPtr]
3839 fma.s1 fLnSin30 = fLnSin30, fDxSqr, fLnSin28
3840 (p11) cmp.eq p9, p8 = rXRnd, r0
3844 ldfpd fB2, fB2L = [rBernulliPtr], 16
3845 fma.s1 fLnSin18 = fLnSin18, fDxSqr, fLnSin16
3846 shladd GR_ad_tbl_1 = rIndex1Dx, 4, rTbl1Addr // Point to G_1
3850 ldfe fB14 = [rTmpPtr], 16
3851 fma.s1 fLnSin22 = fLnSin22, fDxSqr, fLnSin20
3852 and GR_N = GR_N, r17Ones // mask sign bit
3856 ldfe fB4 = [rBernulliPtr], 16
3857 fma.s1 fInvX = fInvX, fRcpX, fRcpX // end of 3rd NR iteration
3858 // Get bits 30-15 of X_0 * Z_1
3859 pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15
3862 ldfe fB16 = [rTmpPtr], 16
3863 fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3
3864 adds rTmpPtr2 = 8, GR_ad_tbl_1
3868 // For performance, don't use result of pmpyshr2.u for 4 cycles.
3871 ldfe fB6 = [rBernulliPtr], 16
3872 fms.s1 FR_r = FR_G, fSignifX, f1 // r = G * S_hi - 1
3873 adds rTmpPtr3 = -48, rTmpPtr
3876 ldfe fB18 = [rTmpPtr], 16
3877 // High part of the log(|x|) = Y_hi = N * log2_hi + H
3878 fma.s1 fResH = fFloatN, FR_log2_hi, FR_H
3879 sub GR_N = GR_N, rExpHalf, 1 // unbiased exponent of deltaX
3882 .pred.rel "mutex",p8,p9
3884 ldfe fB8 = [rBernulliPtr], 16
3885 fma.s1 fLnSin36 = fLnSin36, fDx4, fLnSin34
3886 // sign of GAMMA(x) is negative
3887 (p8) adds rSgnGam = -1, r0
3890 ldfe fB20 = [rTmpPtr], -160
3891 fma.s1 fRes5H = fLnSin4, fDxSqr, f0
3892 // sign of GAMMA(x) is positive
3893 (p9) adds rSgnGam = 1, r0
3898 ldfe fB10 = [rBernulliPtr], 16
3899 fma.s1 fLnSin30 = fLnSin30, fDx4, fLnSin26
3900 (p14) adds rTmpPtr = -160, rTmpPtr
3903 ldfe fB12 = [rTmpPtr3], 16
3904 fma.s1 fDx8 = fDx4, fDx4, f0 // deltaX^8
3905 cmp.eq p6, p7 = 4, rSgnGamSize
3909 ldfps fGDx, fHDx = [GR_ad_tbl_1], 8 // Load G_1, H_1
3910 fma.s1 fDx6 = fDx4, fDxSqr, f0 // deltaX^6
3911 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
3914 ldfd fhDx = [rTmpPtr2] // Load h_1
3915 fma.s1 fLnSin22 = fLnSin22, fDx4, fLnSin18
3920 // Load two parts of C
3921 ldfpd fRes1H, fRes1L = [rTmpPtr], 16
3922 fma.s1 fRcpX = fInvX, fInvX, f0 // (1/x)^2
3923 shladd GR_ad_tbl_2 = GR_Index2, 4, rTbl2Addr // Point to G_2
3926 shladd GR_ad_z_2 = GR_Index2, 2, rZ2Addr // Point to Z_2
3927 fma.s1 FR_h = fFloatN, FR_log2_lo, FR_h// h = N * log2_lo + h
3932 ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2
3933 fnma.s1 fInvXL = f8, fInvX, f1 // relative error of 1/x
3937 adds rTmpPtr2 = 8, GR_ad_tbl_2
3938 fma.s1 fLnSin8 = fLnSin8, fDxSqr, fLnSin6
3943 ldfps FR_G2, FR_H2 = [GR_ad_tbl_2],8 // Load G_2, H_2
3944 // poly_lo = r * Q4 + Q3
3945 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3
3949 ldfd fh2Dx = [rTmpPtr2] // Load h_2
3950 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r
3956 fma.s1 fA1L = fB2, fInvX, f0 // (B2*(1/x))hi
3960 // Put integer N into rightmost significand
3961 setf.sig fFloatNDx = GR_N
3962 fms.s1 fRes4H = fResH, f1, f1 // ln(|x|)hi - 1
3968 fadd.s1 fRes2H = fRes5H, fLnSin2//(lnSin4*DeltaX^2 + lnSin2)hi
3969 // Get bits 30-15 of X_1 * Z_2
3970 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15
3974 fms.s1 fRes5L = fLnSin4, fDxSqr, fRes5H
3979 // For performance, don't use result of pmpyshr2.u for 4 cycles.
3983 fma.s1 fInvX4 = fRcpX, fRcpX, f0 // (1/x)^4
3988 fma.s1 fB6 = fB6, fRcpX, fB4
3993 // store signgam if size of variable is 4 bytes
3994 (p6) st4 [rSgnGamAddr] = rSgnGam
3995 fma.s1 fB18 = fB18, fRcpX, fB16
3999 // store signgam if size of variable is 8 bytes
4000 (p7) st8 [rSgnGamAddr] = rSgnGam
4001 fma.s1 fInvXL = fInvXL, fInvX, f0 // low part of 1/x
4007 // poly_lo = poly_lo * r + Q2
4008 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2
4013 fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3
4019 fma.s1 fRes3H = fRes4H, f8, f0 // (-|x|*(ln(|x|)-1))hi
4020 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2
4024 // poly_hi = Q1 * rsq + r
4025 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
4030 shladd GR_ad_tbl_3 = GR_Index3, 4, rTbl3Addr // Point to G_3
4031 fms.s1 fA2L = fB2, fInvX, fA1L // delta(B2*(1/x))
4036 fnma.s1 fBrnH = fRes1H, f1, fA1L // (-C - S(1/x))hi
4041 ldfps fG3Dx, fH3Dx = [GR_ad_tbl_3],8 // Load G_3, H_3
4042 fma.s1 fInvX8 = fInvX4, fInvX4, f0 // (1/x)^8
4047 fma.s1 fB10 = fB10, fRcpX, fB8
4053 ldfd fh3Dx = [GR_ad_tbl_3] // Load h_3
4054 fma.s1 fB20 = fB20, fInvX4, fB18
4059 fma.s1 fB14 = fB14, fRcpX, fB12
4065 fma.s1 fLnSin36 = fLnSin36, fDx8, fLnSin30
4070 fma.s1 fLnSin12 = fLnSin12, fDxSqr, fLnSin10
4076 fsub.s1 fRes2L = fLnSin2, fRes2H
4081 fma.s1 fPol = fRes2H, fDxSqr, f0 // high part of LnSin
4087 fnma.s1 fResH = fResH, FR_MHalf, fResH // -0.5*ln(|x|)hi
4092 fmpy.s1 fGDx = fGDx, FR_G2 // G = G_1 * G_2
4098 // poly_lo = poly_lo*r^3 + h
4099 fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h
4104 // B2lo*(1/x)hi+ delta(B2*(1/x))
4105 fma.s1 fA2L = fB2L, fInvX, fA2L
4111 fma.s1 fB20 = fB20, fInvX4, fB14
4116 fma.s1 fB10 = fB10, fInvX4, fB6
4122 fcvt.xf fFloatNDx = fFloatNDx
4127 fma.s1 fLnSin14 = fLnSin14, fDx4, fLnSin12
4133 fma.s1 fLnSin36 = fLnSin36, fDx8, fLnSin22
4138 fms.s1 fRes3L = fRes4H, f8, fRes3H // delta(-|x|*(ln(|x|)-1))
4144 fmpy.s1 fGDx = fGDx, fG3Dx // G = (G_1 * G_2) * G_3
4149 // (-|x|*(ln(|x|)-1) - 0.5ln(|x|))hi
4150 fadd.s1 fRes4H = fRes3H, fResH
4156 fma.s1 fA2L = fInvXL, fB2, fA2L //(B2*(1/x))lo
4161 // low part of log(|x|) = Y_lo = poly_hi + poly_lo
4162 fadd.s1 fResL = FR_poly_hi, FR_poly_lo
4168 fma.s1 fB20 = fB20, fInvX8, fB10
4173 fma.s1 fInvX3 = fInvX, fRcpX, f0 // (1/x)^3
4179 fadd.s1 fHDx = fHDx, FR_H2 // H = H_1 + H_2
4184 fadd.s1 fRes5L = fRes5L, fLnSin2L
4190 fadd.s1 fRes2L = fRes2L, fRes5H
4195 fadd.s1 fhDx = fhDx, fh2Dx // h = h_1 + h_2
4201 fms.s1 fBrnL = fRes1H, fMOne, fBrnH
4206 fms.s1 FR_r = fGDx, fNormDx, f1 // r = G * S_hi - 1
4212 fma.s1 fRes3L = fResL, f8 , fRes3L // (-|x|*(ln(|x|)-1))lo
4217 fsub.s1 fRes4L = fRes3H, fRes4H
4223 // low part of "Bernulli" polynomial
4224 fma.s1 fB20 = fB20, fInvX3, fA2L
4229 fnma.s1 fResL = fResL, FR_MHalf, fResL // -0.5*ln(|x|)lo
4235 fadd.s1 fHDx = fHDx, fH3Dx // H = (H_1 + H_2) + H_3
4240 fms.s1 fPolL = fRes2H, fDxSqr, fPol
4246 fadd.s1 fhDx = fhDx, fh3Dx // h = (h_1 + h_2) + h_3
4251 // (-|x|*(ln(|x|)-1) - 0.5ln(|x|) - C - S(1/x))hi
4252 fadd.s1 fB14 = fRes4H, fBrnH
4258 // poly_lo = r * Q4 + Q3
4259 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3
4264 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r
4270 fadd.s1 fRes4L = fRes4L, fResH
4275 fadd.s1 fBrnL = fBrnL, fA1L
4281 // (-|x|*(ln(|x|)-1))lo + (-0.5ln(|x|))lo
4282 fadd.s1 fRes3L = fRes3L, fResL
4287 fnma.s1 fB20 = fRes1L, f1, fB20 // -Clo - S(1/x)lo
4293 fadd.s1 fRes2L = fRes2L, fRes5L // (lnSin4*DeltaX^2 + lnSin2)lo
4298 fma.s1 fPolL = fDxSqrL, fRes2H, fPolL
4304 fma.s1 fLnSin14 = fLnSin14, fDx4, fLnSin8
4309 fma.s1 fLnSin36 = fLnSin36, fDx8, f0
4315 // poly_lo = poly_lo * r + Q2
4316 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2
4321 fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3
4327 // poly_hi = Q1 * rsq + r
4328 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
4333 fsub.s1 fB12 = fRes4H, fB14
4339 // (-|x|*(ln(|x|)-1) - 0.5ln(|x|))lo
4340 fadd.s1 fRes4L = fRes4L, fRes3L
4345 fadd.s1 fBrnL = fBrnL, fB20 // (-C - S(1/x))lo
4351 // high part of log(|DeltaX|) = Y_hi = N * log2_hi + H
4352 fma.s1 fLnDeltaH = fFloatNDx, FR_log2_hi, fHDx
4357 // h = N * log2_lo + h
4358 fma.s1 fhDx = fFloatNDx, FR_log2_lo, fhDx
4364 fma.s1 fPolL = fRes2L, fDxSqr, fPolL
4369 fma.s1 fLnSin14 = fLnSin36, fDxSqr, fLnSin14
4375 // (-|x|*(ln(|x|)-1) - 0.5ln(|x|))lo + (- C - S(1/x))lo
4376 fadd.s1 fBrnL = fBrnL, fRes4L
4381 fadd.s1 fB12 = fB12, fBrnH
4387 // poly_lo = poly_lo*r^3 + h
4388 fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, fhDx
4393 fnma.s1 fRes1H = fLnDeltaH, f1, fPol//(-ln(|DeltaX|) + LnSin)hi
4399 fma.s1 fPolL = fDxSqrL, fRes2L, fPolL
4404 fma.s1 fLnSin36 = fLnSin14, fDx6, f0
4410 // (-|x|*(ln(|x|)-1) - 0.5ln(|x|) - C - S(1/x))lo
4411 fadd.s1 fB12 = fB12, fBrnL
4417 // low part of log(|DeltaX|) = Y_lo = poly_hi + poly_lo
4418 fadd.s1 fLnDeltaL= FR_poly_hi, FR_poly_lo
4423 fms.s1 fRes1L = fLnDeltaH, fMOne, fRes1H
4429 fadd.s1 fPolL = fPolL, fLnSin36
4434 //(-|x|*(ln(|x|)-1)-0.5ln(|x|) - C - S(1/x))hi + (-ln(|DeltaX|) + LnSin)hi
4435 fadd.s1 f8 = fRes1H, fB14
4441 //max((-|x|*(ln(|x|)-1)-0.5ln(|x|) - C - S(1/x))hi,
4442 // (-ln(|DeltaX|) + LnSin)hi)
4443 famax.s1 fMaxNegStir = fRes1H, fB14
4448 //min((-|x|*(ln(|x|)-1)-0.5ln(|x|) - C - S(1/x))hi,
4449 // (-ln(|DeltaX|) + LnSin)hi)
4450 famin.s1 fMinNegStir = fRes1H, fB14
4456 fadd.s1 fRes1L = fRes1L, fPol
4461 // (-ln(|DeltaX|))lo + (LnSin)lo
4462 fnma.s1 fPolL = fLnDeltaL, f1, fPolL
4468 fsub.s1 f9 = fMaxNegStir, f8 // delta1
4474 fadd.s1 fRes1L = fRes1L, fPolL // (-ln(|DeltaX|) + LnSin)lo
4480 fadd.s1 f9 = f9, fMinNegStir
4486 fadd.s1 fRes1L = fRes1L, fB12
4491 // low part of the result
4492 fadd.s1 f9 = f9, fRes1L
4498 // final result for -2^63 < x < -6.0 path
4499 fma.s0 f8 = f8, f1, f9
4500 // exit here for -2^63 < x < -6.0 path
4505 // here if x falls in neighbourhood of any negative root
4506 // "neighbourhood" typically means that |lgammal(x)| < 0.17
4507 // on the [-3.0,-2.0] range |lgammal(x)| has even less
4509 // rXint contains index of the root
4510 // p10 is set if root belongs to "right" ones
4511 // p11 is set if root belongs to "left" ones
4512 // lgammal(x) is approximated by polynomial of
4513 // 19th degree from (x - root) argument
4517 addl rPolDataPtr= @ltoff(lgammal_right_roots_polynomial_data),gp
4519 shl rTmpPtr2 = rXint, 7 // (i*16)*8
4522 adds rRootsAddr = -288, rRootsBndAddr
4528 ldfe fRoot = [rRootsAddr] // FP representation of root
4530 shl rTmpPtr = rXint, 6 // (i*16)*4
4533 (p11) adds rTmpPtr2 = 3536, rTmpPtr2
4539 ld8 rPolDataPtr = [rPolDataPtr]
4541 shladd rTmpPtr = rXint, 4, rTmpPtr // (i*16) + (i*16)*4
4544 adds rTmpPtr3 = 32, rTmpPtr2
4549 .pred.rel "mutex",p10,p11
4551 add rTmpPtr3 = rTmpPtr, rTmpPtr3
4553 (p10) cmp.eq p8, p9 = rXRnd, r0
4556 // (i*16) + (i*16)*4 + (i*16)*8
4557 add rTmpPtr = rTmpPtr, rTmpPtr2
4559 (p11) cmp.eq p9, p8 = rXRnd, r0
4563 add rTmpPtr2 = rPolDataPtr, rTmpPtr3
4568 add rPolDataPtr = rPolDataPtr, rTmpPtr // begin + offsett
4574 ldfpd fA0, fA0L = [rPolDataPtr], 16 // A0
4576 adds rTmpPtr = 112, rTmpPtr2
4579 ldfpd fA2, fA2L = [rTmpPtr2], 16 // A2
4581 cmp.eq p12, p13 = 4, rSgnGamSize
4585 ldfpd fA1, fA1L = [rPolDataPtr], 16 // A1
4590 ldfe fA3 = [rTmpPtr2], 128 // A4
4596 ldfpd fA12, fA13 = [rTmpPtr], 16 // A12, A13
4598 adds rTmpPtr3 = 64, rPolDataPtr
4601 ldfpd fA16, fA17 = [rTmpPtr2], 16 // A16, A17
4603 adds rPolDataPtr = 32, rPolDataPtr
4606 .pred.rel "mutex",p8,p9
4608 ldfpd fA14, fA15 = [rTmpPtr], 16 // A14, A15
4610 // sign of GAMMA(x) is negative
4611 (p8) adds rSgnGam = -1, r0
4614 ldfpd fA18, fA19 = [rTmpPtr2], 16 // A18, A19
4616 // sign of GAMMA(x) is positive
4617 (p9) adds rSgnGam = 1, r0
4621 ldfe fA4 = [rPolDataPtr], 16 // A4
4626 ldfpd fA6, fA7 = [rTmpPtr3], 16 // A6, A7
4632 ldfe fA5 = [rPolDataPtr], 16 // A5
4633 // if x equals to (rounded) root exactly
4634 fcmp.eq.s1 p6, p0 = f8, fRoot
4638 ldfpd fA8, fA9 = [rTmpPtr3], 16 // A8, A9
4639 fms.s1 FR_FracX = f8, f1, fRoot
4644 // store signgam if size of variable is 4 bytes
4645 (p12) st4 [rSgnGamAddr] = rSgnGam
4650 // store signgam if size of variable is 8 bytes
4651 (p13) st8 [rSgnGamAddr] = rSgnGam
4652 // answer if x equals to (rounded) root exactly
4653 (p6) fadd.s0 f8 = fA0, fA0L
4654 // exit if x equals to (rounded) root exactly
4659 ldfpd fA10, fA11 = [rTmpPtr3], 16 // A10, A11
4666 fma.s1 fResH = fA2, FR_FracX, f0 // (A2*x)hi
4671 fma.s1 fA4L = FR_FracX, FR_FracX, f0 // x^2
4677 fma.s1 fA17 = fA17, FR_FracX, fA16
4682 fma.s1 fA13 = fA13, FR_FracX, fA12
4688 fma.s1 fA19 = fA19, FR_FracX, fA18
4693 fma.s1 fA15 = fA15, FR_FracX, fA14
4699 fma.s1 fPol = fA7, FR_FracX, fA6
4705 fma.s1 fA9 = fA9, FR_FracX, fA8
4711 fms.s1 fResL = fA2, FR_FracX, fResH // delta(A2*x)
4716 fadd.s1 fRes1H = fResH, fA1 // (A2*x + A1)hi
4722 fma.s1 fA11 = fA11, FR_FracX, fA10
4727 fma.s1 fA5L = fA4L, fA4L, f0 // x^4
4733 fma.s1 fA19 = fA19, fA4L, fA17
4738 fma.s1 fA15 = fA15, fA4L, fA13
4744 fma.s1 fPol = fPol, FR_FracX, fA5
4749 fma.s1 fA3L = fA4L, FR_FracX, f0 // x^3
4755 // delta(A2*x) + A2L*x = (A2*x)lo
4756 fma.s1 fResL = fA2L, FR_FracX, fResL
4761 fsub.s1 fRes1L = fA1, fRes1H
4767 fma.s1 fA11 = fA11, fA4L, fA9
4772 fma.s1 fA19 = fA19, fA5L, fA15
4778 fma.s1 fPol = fPol, FR_FracX, fA4
4784 fadd.s1 fResL = fResL, fA1L // (A2*x)lo + A1
4789 fadd.s1 fRes1L = fRes1L, fResH
4795 fma.s1 fRes2H = fRes1H, FR_FracX, f0 // ((A2*x + A1)*x)hi
4801 fma.s1 fA19 = fA19, fA5L, fA11
4807 fma.s1 fPol = fPol, FR_FracX, fA3
4813 fadd.s1 fRes1L = fRes1L, fResL // (A2*x + A1)lo
4819 // delta((A2*x + A1)*x)
4820 fms.s1 fRes2L = fRes1H, FR_FracX, fRes2H
4825 fadd.s1 fRes3H = fRes2H, fA0 // ((A2*x + A1)*x + A0)hi
4831 fma.s1 fA19 = fA19, fA5L, f0
4838 fma.s1 fRes2L = fRes1L, FR_FracX, fRes2L // ((A2*x + A1)*x)lo
4843 fsub.s1 fRes3L = fRes2H, fRes3H
4849 fma.s1 fPol = fA19, FR_FracX, fPol
4855 fadd.s1 fRes3L = fRes3L, fA0
4860 fadd.s1 fRes2L = fRes2L, fA0L // ((A2*x + A1)*x)lo + A0L
4866 fadd.s1 fRes3L = fRes3L, fRes2L // (((A2*x + A1)*x) + A0)lo
4872 fma.s1 fRes3L = fPol, fA3L, fRes3L
4878 // final result for arguments which are close to negative roots
4879 fma.s0 f8 = fRes3H, f1, fRes3L
4880 // exit here for arguments which are close to negative roots
4885 // here if |x| < 0.5
4889 ld4 GR_Z_1 = [rZ1offsett] // Load Z_1
4890 fma.s1 fA4L = f8, f8, f0 // x^2
4891 addl rPolDataPtr = @ltoff(lgammal_0_Half_data), gp
4894 shladd GR_ad_tbl_1 = GR_Index1, 4, rTbl1Addr// Point to G_1
4896 addl rLnSinDataPtr = @ltoff(lgammal_lnsin_data), gp
4900 ldfps FR_G, FR_H = [GR_ad_tbl_1],8 // Load G_1, H_1
4902 // Point to Constants_Z_2
4903 add GR_ad_z_2 = 0x140, GR_ad_z_1
4906 add GR_ad_q = -0x60, GR_ad_z_1 // Point to Constants_Q
4908 // Point to Constants_G_H_h2
4909 add GR_ad_tbl_2 = 0x180, GR_ad_z_1
4913 ld8 rPolDataPtr = [rPolDataPtr]
4915 // Point to Constants_G_H_h3
4916 add GR_ad_tbl_3 = 0x280, GR_ad_z_1
4919 ldfd FR_h = [GR_ad_tbl_1] // Load h_1
4921 sub GR_N = rExpX, rExpHalf, 1
4925 ld8 rLnSinDataPtr = [rLnSinDataPtr]
4927 // Get bits 30-15 of X_0 * Z_1
4928 pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15
4931 ldfe FR_log2_hi = [GR_ad_q],16 // Load log2_hi
4937 // For performance, don't use result of pmpyshr2.u for 4 cycles.
4940 ldfe FR_log2_lo = [GR_ad_q], 16 // Load log2_lo
4942 add rTmpPtr2 = 320, rPolDataPtr
4945 add rTmpPtr = 32, rPolDataPtr
4948 adds rExp2 = -1, rExpHalf
4952 ldfpd fA3, fA3L = [rPolDataPtr], 16 // A3
4953 fma.s1 fA5L = fA4L, fA4L, f0 // x^4
4957 ldfpd fA1, fA1L = [rTmpPtr], 16 // A1
4958 fms.s1 fB8 = f8, f8, fA4L // x^2 - <x^2>
4959 // set p6 if -0.5 < x <= -0.25
4960 (p15) cmp.eq.unc p6, p0 = rExpX, rExp2
4964 ldfpd fA2, fA2L = [rPolDataPtr], 16 // A2
4966 // set p6 if -0.5 < x <= -0.40625
4967 (p6) cmp.le.unc p6, p0 = 10, GR_Index1
4970 ldfe fA21 = [rTmpPtr2], -16 // A21
4971 // Put integer N into rightmost significand
4973 adds rTmpPtr = 240, rTmpPtr
4977 setf.sig fFloatN = GR_N
4979 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
4982 ldfe FR_Q4 = [GR_ad_q], 16 // Load Q4
4984 adds rPolDataPtr = 304, rPolDataPtr
4988 ldfe fA20 = [rTmpPtr2], -32 // A20
4990 shladd GR_ad_z_2 = GR_Index2, 2, GR_ad_z_2 // Point to Z_2
4993 ldfe fA19 = [rTmpPtr], -32 // A19
4995 shladd GR_ad_tbl_2 = GR_Index2, 4, GR_ad_tbl_2// Point to G_2
4999 ldfe fA17 = [rTmpPtr], -32 // A17
5001 adds rTmpPtr3 = 8, GR_ad_tbl_2
5004 ldfe fA18 = [rTmpPtr2], -32 // A18
5006 // branch to special path for -0.5 < x <= 0.40625
5007 (p6) br.cond.spnt lgammal_near_neg_half
5011 ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2
5012 ldfe fA15 = [rTmpPtr], -32 // A15
5013 fma.s1 fB20 = fA5L, fA5L, f0 // x^8
5017 ldfe fA16 = [rTmpPtr2], -32 // A16
5018 ldfe fA13 = [rTmpPtr], -32 // A13
5019 fms.s1 fB16 = fA4L, fA4L, fA5L
5023 ldfps FR_G2, FR_H2 = [GR_ad_tbl_2], 8 // Load G_2, H_2
5024 ldfd FR_h2 = [rTmpPtr3] // Load h_2
5025 fmerge.s fB10 = f8, fA5L // sign(x) * x^4
5029 ldfe fA14 = [rTmpPtr2], -32 // A14
5030 ldfe fA11 = [rTmpPtr], -32 // A11
5031 // Get bits 30-15 of X_1 * Z_2
5032 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15
5036 // For performance, don't use result of pmpyshr2.u for 4 cycles.
5039 ldfe fA12 = [rTmpPtr2], -32 // A12
5040 fma.s1 fRes4H = fA3, fAbsX, f0
5041 adds rTmpPtr3 = 16, GR_ad_q
5044 ldfe fA9 = [rTmpPtr], -32 // A9
5050 ldfe fA10 = [rTmpPtr2], -32 // A10
5051 ldfe fA7 = [rTmpPtr], -32 // A7
5052 fma.s1 fB18 = fB20, fB20, f0 // x^16
5056 ldfe fA8 = [rTmpPtr2], -32 // A8
5057 ldfe fA22 = [rPolDataPtr], 16 // A22
5058 fcvt.xf fFloatN = fFloatN
5062 ldfe fA5 = [rTmpPtr], -32 // A5
5063 fma.s1 fA21 = fA21, fAbsX, fA20 // v16
5064 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2
5067 ldfe fA6 = [rTmpPtr2], -32 // A6
5074 shladd GR_ad_tbl_3 = GR_Index3, 4, GR_ad_tbl_3
5075 ldfe fA4 = [rTmpPtr2], -32 // A4
5076 fma.s1 fA19 = fA19, fAbsX, fA18 // v13
5079 .pred.rel "mutex",p14,p15
5081 ldfps FR_G3, FR_H3 = [GR_ad_tbl_3],8 // Load G_3, H_3
5082 fms.s1 fRes4L = fA3, fAbsX, fRes4H
5083 (p14) adds rSgnGam = 1, r0
5086 cmp.eq p6, p7 = 4, rSgnGamSize
5087 fadd.s1 fRes2H = fRes4H, fA2
5088 (p15) adds rSgnGam = -1, r0
5093 ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3
5094 fma.s1 fA17 = fA17, fAbsX, fA16 // v12
5099 ldfe FR_Q3 = [GR_ad_q], 32 // Load Q3
5100 fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2
5104 ldfe FR_Q2 = [rTmpPtr3], 16 // Load Q2
5105 fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2
5110 ldfe FR_Q1 = [GR_ad_q] // Load Q1
5111 fma.s1 fA15 = fA15, fAbsX, fA14 // v8
5115 adds rTmpPtr3 = 32, rLnSinDataPtr
5116 fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2
5121 ldfpd fLnSin2, fLnSin2L = [rLnSinDataPtr], 16
5122 ldfe fLnSin6 = [rTmpPtr3], 32
5123 fma.s1 fA13 = fA13, fAbsX, fA12 // v7
5128 ldfe fLnSin4 = [rLnSinDataPtr], 32
5129 fma.s1 fRes4L = fA3L, fAbsX, fRes4L
5133 ldfe fLnSin10 = [rTmpPtr3], 32
5134 fsub.s1 fRes2L = fA2, fRes2H
5139 ldfe fLnSin8 = [rLnSinDataPtr], 32
5140 fma.s1 fResH = fRes2H, fAbsX, f0
5144 ldfe fLnSin14 = [rTmpPtr3], 32
5145 fma.s1 fA22 = fA22, fA4L, fA21 // v15
5150 ldfe fLnSin12 = [rLnSinDataPtr], 32
5151 fma.s1 fA9 = fA9, fAbsX, fA8 // v4
5155 ldfd fLnSin18 = [rTmpPtr3], 16
5156 fma.s1 fA11 = fA11, fAbsX, fA10 // v5
5161 ldfe fLnSin16 = [rLnSinDataPtr], 24
5162 fma.s1 fA19 = fA19, fA4L, fA17 // v11
5166 ldfd fLnSin22 = [rTmpPtr3], 16
5167 fma.s1 fPolL = fA7, fAbsX, fA6
5172 ldfd fLnSin20 = [rLnSinDataPtr], 16
5173 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3
5177 ldfd fLnSin26 = [rTmpPtr3], 16
5178 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3
5183 ldfd fLnSin24 = [rLnSinDataPtr], 16
5184 fadd.s1 fRes2L = fRes2L, fRes4H
5188 ldfd fLnSin30 = [rTmpPtr3], 16
5189 fadd.s1 fA2L = fA2L, fRes4L
5194 ldfd fLnSin28 = [rLnSinDataPtr], 16
5195 fms.s1 fResL = fRes2H, fAbsX, fResH
5199 ldfd fLnSin34 = [rTmpPtr3], 8
5200 fadd.s1 fRes2H = fResH, fA1
5205 ldfd fLnSin32 = [rLnSinDataPtr]
5206 fma.s1 fA11 = fA11, fA4L, fA9 // v3
5210 ldfd fLnSin36 = [rTmpPtr3]
5211 fma.s1 fA15 = fA15, fA4L, fA13 // v6
5217 // store signgam if size of variable is 4 bytes
5218 (p6) st4 [rSgnGamAddr] = rSgnGam
5219 fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3
5223 // store signgam if size of variable is 8 bytes
5224 (p7) st8 [rSgnGamAddr] = rSgnGam
5225 fma.s1 fA5 = fA5, fAbsX, fA4
5231 fms.s1 FR_r = FR_G, fSignifX, f1 // r = G * S_hi - 1
5236 // High part of the log(|x|): Y_hi = N * log2_hi + H
5237 fms.s1 FR_log2_hi = fFloatN, FR_log2_hi, FR_H
5243 fadd.s1 fA3L = fRes2L, fA2L
5248 fma.s1 fA22 = fA22, fA5L, fA19
5254 fsub.s1 fRes2L = fA1, fRes2H
5259 fma.s1 fRes3H = fRes2H, f8, f0
5265 fma.s1 fA15 = fA15, fA5L, fA11 // v2
5270 fma.s1 fLnSin18 = fLnSin18, fA4L, fLnSin16
5276 // h = N * log2_lo + h
5277 fms.s1 FR_h = fFloatN, FR_log2_lo, FR_h
5282 fma.s1 fPolL = fPolL, fA4L, fA5
5288 // poly_lo = r * Q4 + Q3
5289 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3
5294 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r
5300 fma.s1 fResL = fA3L, fAbsX, fResL
5305 fma.s1 fLnSin30 = fLnSin30, fA4L, fLnSin28
5311 fadd.s1 fRes2L = fRes2L, fResH
5316 fms.s1 fRes3L = fRes2H, f8, fRes3H
5322 fadd.s1 fRes1H = fRes3H, FR_log2_hi
5327 fma.s1 fPol = fB20, fA22, fA15
5333 fma.s1 fLnSin34 = fLnSin34, fA4L, fLnSin32
5338 fma.s1 fLnSin14 = fLnSin14, fA4L, fLnSin12
5345 // poly_lo = poly_lo * r + Q2
5346 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2
5351 fnma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3
5357 // poly_hi = Q1 * rsq + r
5358 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
5363 fadd.s1 fA1L = fA1L, fResL
5370 fma.s1 fLnSin22 = fLnSin22, fA4L, fLnSin20
5375 fma.s1 fLnSin26 = fLnSin26, fA4L, fLnSin24
5382 fsub.s1 fRes1L = FR_log2_hi, fRes1H
5387 fma.s1 fPol = fPol, fA5L, fPolL
5393 fma.s1 fLnSin34 = fLnSin36, fA5L, fLnSin34
5398 fma.s1 fLnSin18 = fLnSin18, fA5L, fLnSin14
5404 fma.s1 fLnSin6 = fLnSin6, fA4L, fLnSin4
5409 fma.s1 fLnSin10 = fLnSin10, fA4L, fLnSin8
5415 // poly_hi = Q1 * rsq + r
5416 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
5421 fadd.s1 fRes2L = fRes2L, fA1L
5427 // poly_lo = poly_lo*r^3 + h
5428 fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h
5433 fma.s1 fB2 = fLnSin2, fA4L, f0
5439 fadd.s1 fRes1L = fRes1L, fRes3H
5444 fma.s1 fPol = fPol, fB10, f0
5450 fma.s1 fLnSin26 = fLnSin26, fA5L, fLnSin22
5455 fma.s1 fLnSin34 = fLnSin34, fA5L, fLnSin30
5461 fma.s1 fLnSin10 = fLnSin10, fA5L, fLnSin6
5466 fma.s1 fLnSin2L = fLnSin2L, fA4L, f0
5473 fma.s1 fRes3L = fRes2L, f8, fRes3L
5479 // Y_lo = poly_hi + poly_lo
5480 fsub.s1 FR_log2_lo = FR_poly_lo, FR_poly_hi
5485 fms.s1 fB4 = fLnSin2, fA4L, fB2
5491 fadd.s1 fRes2H = fRes1H, fPol
5497 fma.s1 fLnSin34 = fLnSin34, fB20, fLnSin26
5503 fma.s1 fLnSin18 = fLnSin18, fB20, fLnSin10
5508 fma.s1 fLnSin2L = fB8, fLnSin2, fLnSin2L
5515 fadd.s1 FR_log2_lo = FR_log2_lo, fRes3L
5521 fsub.s1 fRes2L = fRes1H, fRes2H
5527 fma.s1 fB6 = fLnSin34, fB18, fLnSin18
5532 fadd.s1 fB4 = fLnSin2L, fB4
5539 fadd.s1 fRes1L = fRes1L, FR_log2_lo
5545 fadd.s1 fRes2L = fRes2L, fPol
5551 fma.s1 fB12 = fB6, fA5L, f0
5557 fadd.s1 fRes2L = fRes2L, fRes1L
5564 fms.s1 fB14 = fB6, fA5L, fB12
5569 fadd.s1 fLnSin30 = fB2, fB12
5570 // branch out if x is negative
5571 (p15) br.cond.spnt _O_Half_neg
5576 // sign(x)*Pol(|x|) - log(|x|)
5577 fma.s0 f8 = fRes2H, f1, fRes2L
5578 // it's an answer already for positive x
5579 // exit if 0 < x < 0.5
5584 // here if x is negative and |x| < 0.5
5589 fma.s1 fB14 = fB16, fB6, fB14
5594 fsub.s1 fLnSin16 = fB2, fLnSin30
5600 fadd.s1 fResH = fLnSin30, fRes2H
5606 fadd.s1 fLnSin16 = fLnSin16, fB12
5611 fadd.s1 fB4 = fB14, fB4
5617 fadd.s1 fLnSin16 = fB4, fLnSin16
5622 fsub.s1 fResL = fRes2H, fResH
5628 fadd.s1 fResL = fResL, fLnSin30
5633 fadd.s1 fLnSin16 = fLnSin16, fRes2L
5639 fadd.s1 fResL = fResL, fLnSin16
5645 // final result for -0.5 < x < 0
5646 fma.s0 f8 = fResH, f1, fResL
5647 // exit for -0.5 < x < 0
5653 // there are two computational paths:
5654 // 1) For x >10.0 Stirling's formula is used
5655 // 2) Polynomial approximation for 8.0 <= x <= 10.0
5657 lgammal_big_positive:
5659 addl rPolDataPtr = @ltoff(lgammal_data), gp
5660 fmerge.se fSignifX = f1, f8
5661 // Get high 15 bits of significand
5662 extr.u GR_X_0 = rSignifX, 49, 15
5665 shladd rZ1offsett = GR_Index1, 2, GR_ad_z_1 // Point to Z_1
5666 fnma.s1 fInvX = f8, fRcpX, f1 // start of 1st NR iteration
5667 adds rSignif1andQ = 0x5, r0
5671 ld4 GR_Z_1 = [rZ1offsett] // Load Z_1
5673 shl rSignif1andQ = rSignif1andQ, 61 // significand of 1.25
5676 cmp.eq p8, p0 = rExpX, rExp8 // p8 = 1 if 8.0 <= x < 16
5678 adds rSgnGam = 1, r0 // gamma is positive at this range
5682 shladd GR_ad_tbl_1 = GR_Index1, 4, rTbl1Addr// Point to G_1
5684 add GR_ad_q = -0x60, GR_ad_z_1 // Point to Constants_Q
5687 ld8 rPolDataPtr = [rPolDataPtr]
5688 movl rDelta = 0x3FF2000000000000
5692 ldfps FR_G, FR_H = [GR_ad_tbl_1],8 // Load G_1, H_1
5694 add GR_ad_z_2 = 0x140, GR_ad_z_1 // Point to Constants_Z_2
5697 // Point to Constants_G_H_h2
5698 add GR_ad_tbl_2 = 0x180, GR_ad_z_1
5700 // p8 = 1 if 8.0 <= x <= 10.0
5701 (p8) cmp.leu.unc p8, p0 = rSignifX, rSignif1andQ
5705 ldfd FR_h = [GR_ad_tbl_1] // Load h_1
5707 // Get bits 30-15 of X_0 * Z_1
5708 pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15
5711 (p8) setf.d FR_MHalf = rDelta
5713 (p8) br.cond.spnt lgammal_8_10 // branch out if 8.0 <= x <= 10.0
5717 // For performance, don't use result of pmpyshr2.u for 4 cycles.
5720 ldfe fA1 = [rPolDataPtr], 16 // Load overflow threshold
5721 fma.s1 fRcpX = fInvX, fRcpX, fRcpX // end of 1st NR iteration
5722 // Point to Constants_G_H_h3
5723 add GR_ad_tbl_3 = 0x280, GR_ad_z_1
5727 movl rDelta = 0xBFE0000000000000 // -0.5 in DP
5731 ldfe FR_log2_hi = [GR_ad_q],16 // Load log2_hi
5733 sub GR_N = rExpX, rExpHalf, 1 // unbiased exponent of x
5737 ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo
5742 setf.d FR_MHalf = rDelta
5748 // Put integer N into rightmost significand
5749 setf.sig fFloatN = GR_N
5751 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
5754 ldfe FR_Q4 = [GR_ad_q], 16 // Load Q4
5760 shladd GR_ad_z_2 = GR_Index2, 2, GR_ad_z_2 // Point to Z_2
5762 shladd GR_ad_tbl_2 = GR_Index2, 4, GR_ad_tbl_2// Point to G_2
5765 ldfe FR_Q3 = [GR_ad_q], 16 // Load Q3
5771 ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2
5772 fnma.s1 fInvX = f8, fRcpX, f1 // start of 2nd NR iteration
5777 ldfps FR_G2, FR_H2 = [GR_ad_tbl_2], 8 // Load G_2, H_2
5783 ldfd FR_h2 = [GR_ad_tbl_2] // Load h_2
5789 ldfe FR_Q2 = [GR_ad_q],16 // Load Q2
5791 // Get bits 30-15 of X_1 * Z_2
5792 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15
5796 // For performance, don't use result of pmpyshr2.u for 4 cycles.
5799 ldfe FR_Q1 = [GR_ad_q] // Load Q1
5800 fcmp.gt.s1 p7,p0 = f8, fA1 // check if x > overflow threshold
5805 ldfpd fA0, fA0L = [rPolDataPtr], 16 // Load two parts of C
5806 fma.s1 fRcpX = fInvX, fRcpX, fRcpX // end of 2nd NR iteration
5811 ldfpd fB2, fA1 = [rPolDataPtr], 16
5813 (p7) br.cond.spnt lgammal_overflow // branch if x > overflow threshold
5817 ldfe fB4 = [rPolDataPtr], 16
5818 fcvt.xf fFloatN = fFloatN
5819 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2
5823 shladd GR_ad_tbl_3 = GR_Index3, 4, GR_ad_tbl_3// Point to G_3
5828 ldfe fB6 = [rPolDataPtr], 16
5834 ldfps FR_G3, FR_H3 = [GR_ad_tbl_3], 8 // Load G_3, H_3
5840 ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3
5841 fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2
5846 fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2
5852 ldfe fB8 = [rPolDataPtr], 16
5853 fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2
5858 fnma.s1 fInvX = f8, fRcpX, f1 // start of 3rd NR iteration
5863 ldfe fB10 = [rPolDataPtr], 16
5865 cmp.eq p6, p7 = 4, rSgnGamSize
5869 ldfe fB12 = [rPolDataPtr], 16
5875 ldfe fB14 = [rPolDataPtr], 16
5882 ldfe fB16 = [rPolDataPtr], 16
5883 // get double extended coefficients from two doubles
5884 // two doubles are needed in Stitling's formula for negative x
5885 fadd.s1 fB2 = fB2, fA1
5890 ldfe fB18 = [rPolDataPtr], 16
5891 fma.s1 fInvX = fInvX, fRcpX, fRcpX // end of 3rd NR iteration
5896 ldfe fB20 = [rPolDataPtr], 16
5902 // store signgam if size of variable is 4 bytes
5903 (p6) st4 [rSgnGamAddr] = rSgnGam
5904 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3
5908 // store signgam if size of variable is 8 bytes
5909 (p7) st8 [rSgnGamAddr] = rSgnGam
5910 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3
5916 fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3
5922 fma.s1 fRcpX = fInvX, fInvX, f0 // 1/x^2
5927 fma.s1 fA0L = fB2, fInvX, fA0L
5933 fms.s1 FR_r = fSignifX, FR_G, f1 // r = G * S_hi - 1
5938 // High part of the log(x): Y_hi = N * log2_hi + H
5939 fma.s1 fRes2H = fFloatN, FR_log2_hi, FR_H
5946 // h = N * log2_lo + h
5947 fma.s1 FR_h = fFloatN, FR_log2_lo, FR_h
5952 // High part of the log(x): Y_hi = N * log2_hi + H
5953 fma.s1 fRes1H = fFloatN, FR_log2_hi, FR_H
5959 fma.s1 fPol = fB18, fRcpX, fB16 // v9
5964 fma.s1 fA2L = fRcpX, fRcpX, f0 // v10
5970 fma.s1 fA3 = fB6, fRcpX, fB4 // v3
5975 fma.s1 fA4 = fB10, fRcpX, fB8 // v4
5981 fms.s1 fRes2H =fRes2H, f1, f1 // log_Hi(x) -1
5986 // poly_lo = r * Q4 + Q3
5987 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3
5993 fma.s1 fRes1H = fRes1H, FR_MHalf, f0 // -0.5*log_Hi(x)
5998 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r
6004 fma.s1 fA7 = fB14, fRcpX, fB12 // v7
6009 fma.s1 fA8 = fA2L, fB20, fPol // v8
6015 fma.s1 fA2 = fA4, fA2L, fA3 // v2
6020 fma.s1 fA4L = fA2L, fA2L, f0 // v5
6026 fma.s1 fResH = fRes2H, f8, f0 // (x*(ln(x)-1))hi
6031 // poly_lo = poly_lo * r + Q2
6032 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2
6038 fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3
6043 // poly_hi = Q1 * rsq + r
6044 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
6050 fma.s1 fA11 = fRcpX, fInvX, f0 // 1/x^3
6055 fma.s1 fA6 = fA8, fA2L, fA7 // v6
6061 fms.s1 fResL = fRes2H, f8, fResH // d(x*(ln(x)-1))
6066 fadd.s1 fRes3H = fResH, fRes1H // (x*(ln(x)-1) -0.5ln(x))hi
6072 // poly_lo = poly_lo*r^3 + h
6073 fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h
6079 fma.s1 fPol = fA4L, fA6, fA2 // v1
6084 // raise inexact exception
6085 fma.s0 FR_log2_lo = FR_log2_lo, FR_log2_lo, f0
6091 fadd.s1 fRes4H = fRes3H, fA0 // (x*(ln(x)-1) -0.5ln(x))hi + Chi
6096 fsub.s1 fRes3L = fResH, fRes3H
6102 // Y_lo = poly_hi + poly_lo
6103 fadd.s1 fRes2L = FR_poly_hi, FR_poly_lo
6110 fma.s1 fA0L = fPol, fA11, fA0L // S(1/x) + Clo
6116 fadd.s1 fRes3L = fRes3L, fRes1H
6121 fsub.s1 fRes4L = fRes3H, fRes4H
6127 fma.s1 fResL = fRes2L, f8 , fResL // lo part of x*(ln(x)-1)
6133 // Clo + S(1/x) - 0.5*logLo(x)
6134 fma.s1 fA0L = fRes2L, FR_MHalf, fA0L
6140 fadd.s1 fRes4L = fRes4L, fA0
6146 // Clo + S(1/x) - 0.5*logLo(x) + (x*(ln(x)-1))lo
6147 fadd.s1 fA0L = fA0L, fResL
6153 fadd.s1 fRes4L = fRes4L, fRes3L
6159 fadd.s1 fRes4L = fRes4L, fA0L
6165 fma.s0 f8 = fRes4H, f1, fRes4L
6166 // exit for x > 10.0
6170 // here if 8.0 <= x <= 10.0
6171 // Result = P15(y), where y = x/8.0 - 1.5
6175 addl rPolDataPtr = @ltoff(lgammal_8_10_data), gp
6176 fms.s1 FR_FracX = fSignifX, f1, FR_MHalf // y = x/8.0 - 1.5
6177 cmp.eq p6, p7 = 4, rSgnGamSize
6181 ld8 rLnSinDataPtr = [rPolDataPtr]
6186 ld8 rPolDataPtr = [rPolDataPtr]
6192 adds rZ1offsett = 32, rLnSinDataPtr
6197 adds rLnSinDataPtr = 48, rLnSinDataPtr
6203 ldfpd fA1, fA1L = [rPolDataPtr], 16 // A1
6208 ldfe fA2 = [rZ1offsett], 32 // A5
6214 ldfpd fA0, fA0L = [rPolDataPtr], 16 // A0
6215 fma.s1 FR_rsq = FR_FracX, FR_FracX, f0 // y^2
6219 ldfe fA3 = [rLnSinDataPtr],32 // A5
6225 ldfe fA4 = [rZ1offsett], 32 // A4
6226 ldfe fA5 = [rLnSinDataPtr], 32 // A5
6231 ldfe fA6 = [rZ1offsett], 32 // A6
6232 ldfe fA7 = [rLnSinDataPtr], 32 // A7
6237 ldfe fA8 = [rZ1offsett], 32 // A8
6238 ldfe fA9 = [rLnSinDataPtr], 32 // A9
6243 ldfe fA10 = [rZ1offsett], 32 // A10
6244 ldfe fA11 = [rLnSinDataPtr], 32 // A11
6249 ldfe fA12 = [rZ1offsett], 32 // A12
6250 ldfe fA13 = [rLnSinDataPtr], 32 // A13
6251 fma.s1 FR_Q4 = FR_rsq, FR_rsq, f0 // y^4
6255 ldfe fA14 = [rZ1offsett], 32 // A14
6256 ldfe fA15 = [rLnSinDataPtr], 32 // A15
6262 fma.s1 fRes1H = FR_FracX, fA1, f0
6268 fma.s1 fA3 = fA3, FR_FracX, fA2 // v4
6274 fma.s1 fA5 = fA5, FR_FracX, fA4 // v5
6279 // store sign of GAMMA(x) if size of variable is 4 bytes
6280 (p6) st4 [rSgnGamAddr] = rSgnGam
6281 fma.s1 fA3L = FR_Q4, FR_Q4, f0 // v9 = y^8
6285 // store sign of GAMMA(x) if size of variable is 8 bytes
6286 (p7) st8 [rSgnGamAddr] = rSgnGam
6287 fma.s1 fA7 = fA7, FR_FracX, fA6 // v7
6293 fma.s1 fA9 = fA9, FR_FracX, fA8 // v8
6299 fms.s1 fRes1L = FR_FracX, fA1, fRes1H
6304 fma.s1 fA11 = fA11, FR_FracX, fA10 // v12
6310 fma.s1 fA13 = fA13, FR_FracX, fA12 // v13
6315 fma.s1 fRes2H = fRes1H, f1, fA0
6321 fma.s1 fA15 = fA15, FR_FracX, fA14 // v16
6326 fma.s1 fA5 = fA5, FR_rsq, fA3 // v3
6332 fma.s1 fA9 = fA9, FR_rsq, fA7 // v6
6338 fma.s1 fRes1L = FR_FracX, fA1L, fRes1L
6344 fms.s1 fRes2L = fA0, f1, fRes2H
6349 fma.s1 fA13 = fA13, FR_rsq, fA11 // v11
6355 fma.s1 fA9 = fA9, FR_Q4, fA5 // v2
6361 fma.s1 fRes1L = fRes1L, f1, fA0L
6367 fma.s1 fRes2L = fRes2L, f1, fRes1H
6372 fma.s1 fA15 = fA15, FR_Q4, fA13 // v10
6378 fma.s1 fRes2L = fRes1L, f1, fRes2L
6383 fma.s1 fPol = fA3L, fA15, fA9
6389 fma.s1 f8 = FR_rsq , fPol, fRes2H
6394 fma.s1 fPol = fPol, FR_rsq, f0
6400 fms.s1 fRes1L = fRes2H, f1, f8
6406 fma.s1 fRes1L = fRes1L, f1, fPol
6412 fma.s1 fRes1L = fRes1L, f1, fRes2L
6418 fma.s0 f8 = f8, f1, fRes1L
6419 // exit for 8.0 <= x <= 10.0
6424 // here if 4.0 <=x < 8.0
6428 addl rPolDataPtr= @ltoff(lgammal_4_8_data),gp
6429 fms.s1 FR_FracX = fSignifX, f1, FR_MHalf
6430 adds rSgnGam = 1, r0
6434 ld8 rPolDataPtr = [rPolDataPtr]
6441 adds rTmpPtr = 160, rPolDataPtr
6443 // branch to special path which computes polynomial of 25th degree
6444 br.sptk lgamma_polynom25
6448 // here if 2.25 <=x < 4.0
6452 addl rPolDataPtr= @ltoff(lgammal_2Q_4_data),gp
6453 fms.s1 FR_FracX = fSignifX, f1, FR_MHalf
6454 adds rSgnGam = 1, r0
6458 ld8 rPolDataPtr = [rPolDataPtr]
6465 adds rTmpPtr = 160, rPolDataPtr
6467 // branch to special path which computes polynomial of 25th degree
6468 br.sptk lgamma_polynom25
6472 // here if 0.5 <= |x| < 0.75
6475 .pred.rel "mutex", p14, p15
6477 (p14) addl rPolDataPtr= @ltoff(lgammal_half_3Q_data),gp
6478 // FR_FracX = x - 0.625 for positive x
6479 (p14) fms.s1 FR_FracX = f8, f1, FR_FracX
6480 (p14) adds rSgnGam = 1, r0
6483 (p15) addl rPolDataPtr= @ltoff(lgammal_half_3Q_neg_data),gp
6484 // FR_FracX = x + 0.625 for negative x
6485 (p15) fma.s1 FR_FracX = f8, f1, FR_FracX
6486 (p15) adds rSgnGam = -1, r0
6490 ld8 rPolDataPtr = [rPolDataPtr]
6496 adds rTmpPtr = 160, rPolDataPtr
6498 // branch to special path which computes polynomial of 25th degree
6499 br.sptk lgamma_polynom25
6502 // here if 1.3125 <= x < 1.5625
6506 adds rSgnGam = 1, r0
6511 adds rTmpPtr = 160, rPolDataPtr
6512 fms.s1 FR_FracX = f8, f1, fA5L
6513 br.sptk lgamma_polynom25
6516 // here if -2.605859375 <= x < -2.5
6517 // special polynomial approximation used since neither "near root"
6518 // approximation nor reflection formula give satisfactory accuracy on
6523 addl rPolDataPtr= @ltoff(lgammal_neg2andHalf_data),gp
6524 fma.s1 FR_FracX = fB20, f1, f8 // 2.5 + x
6525 adds rSgnGam = -1, r0
6529 ld8 rPolDataPtr = [rPolDataPtr]
6535 adds rTmpPtr = 160, rPolDataPtr
6537 // branch to special path which computes polynomial of 25th degree
6538 br.sptk lgamma_polynom25
6542 // here if -0.5 < x <= -0.40625
6544 lgammal_near_neg_half:
6546 addl rPolDataPtr= @ltoff(lgammal_near_neg_half_data),gp
6547 setf.exp FR_FracX = rExpHalf
6552 ld8 rPolDataPtr = [rPolDataPtr]
6554 adds rSgnGam = -1, r0
6558 adds rTmpPtr = 160, rPolDataPtr
6559 fma.s1 FR_FracX = FR_FracX, f1, f8
6560 // branch to special path which computes polynomial of 25th degree
6561 br.sptk lgamma_polynom25
6565 // here if there an answer is P25(x)
6566 // rPolDataPtr, rTmpPtr point to coefficients
6567 // x is in FR_FracX register
6571 ldfpd fA3, fA0L = [rPolDataPtr], 16 // A3
6573 cmp.eq p6, p7 = 4, rSgnGamSize
6576 ldfpd fA18, fA19 = [rTmpPtr], 16 // D7, D6
6582 ldfpd fA1, fA1L = [rPolDataPtr], 16 // A1
6587 ldfpd fA16, fA17 = [rTmpPtr], 16 // D4, D5
6592 ldfpd fA12, fA13 = [rPolDataPtr], 16 // D0, D1
6597 ldfpd fA14, fA15 = [rTmpPtr], 16 // D2, D3
6603 ldfpd fA24, fA25 = [rPolDataPtr], 16 // C21, C20
6608 ldfpd fA22, fA23 = [rTmpPtr], 16 // C19, C18
6614 ldfpd fA2, fA2L = [rPolDataPtr], 16 // A2
6615 fma.s1 fA4L = FR_FracX, FR_FracX, f0 // x^2
6619 ldfpd fA20, fA21 = [rTmpPtr], 16 // C17, C16
6625 ldfe fA11 = [rTmpPtr], 16 // E7
6630 ldfpd fA0, fA3L = [rPolDataPtr], 16 // A0
6635 ldfe fA10 = [rPolDataPtr], 16 // E6
6640 ldfe fA9 = [rTmpPtr], 16 // E5
6646 ldfe fA8 = [rPolDataPtr], 16 // E4
6647 ldfe fA7 = [rTmpPtr], 16 // E3
6652 ldfe fA6 = [rPolDataPtr], 16 // E2
6653 ldfe fA5 = [rTmpPtr], 16 // E1
6658 ldfe fA4 = [rPolDataPtr], 16 // E0
6659 fma.s1 fA5L = fA4L, fA4L, f0 // x^4
6664 fms.s1 fB2 = FR_FracX, FR_FracX, fA4L // x^2 - <x^2>
6669 // store signgam if size of variable is 4 bytes
6670 (p6) st4 [rSgnGamAddr] = rSgnGam
6671 fma.s1 fRes4H = fA3, FR_FracX, f0 // (A3*x)hi
6675 // store signgam if size of variable is 8 bytes
6676 (p7) st8 [rSgnGamAddr] = rSgnGam
6677 fma.s1 fA19 = fA19, FR_FracX, fA18 // D7*x + D6
6683 fma.s1 fResH = fA1, FR_FracX, f0 // (A1*x)hi
6688 fma.s1 fB6 = fA1L, FR_FracX, fA0L // A1L*x + A0L
6694 fma.s1 fA17 = fA17, FR_FracX, fA16 // D5*x + D4
6699 fma.s1 fA15 = fA15, FR_FracX, fA14 // D3*x + D2
6705 fma.s1 fA25 = fA25, FR_FracX, fA24 // C21*x + C20
6710 fma.s1 fA13 = fA13, FR_FracX, fA12 // D1*x + D0
6716 fma.s1 fA23 = fA23, FR_FracX, fA22 // C19*x + C18
6721 fma.s1 fA21 = fA21, FR_FracX, fA20 // C17*x + C16
6727 fms.s1 fRes4L = fA3, FR_FracX, fRes4H // delta((A3*x)hi)
6732 fadd.s1 fRes2H = fRes4H, fA2 // (A3*x + A2)hi
6738 fms.s1 fResL = fA1, FR_FracX, fResH // d(A1*x)
6743 fadd.s1 fRes1H = fResH, fA0 // (A1*x + A0)hi
6749 fma.s1 fA19 = fA19, fA4L, fA17 // Dhi
6754 fma.s1 fA11 = fA11, FR_FracX, fA10 // E7*x + E6
6760 // Doing this to raise inexact flag
6761 fma.s0 fA10 = fA0, fA0, f0
6767 fma.s1 fA15 = fA15, fA4L, fA13 // Dlo
6772 // (C21*x + C20)*x^2 + C19*x + C18
6773 fma.s1 fA25 = fA25, fA4L, fA23
6779 fma.s1 fA9 = fA9, FR_FracX, fA8 // E5*x + E4
6784 fma.s1 fA7 = fA7, FR_FracX, fA6 // E3*x + E2
6790 fma.s1 fRes4L = fA3L, FR_FracX, fRes4L // (A3*x)lo
6795 fsub.s1 fRes2L = fA2, fRes2H
6801 fadd.s1 fResL = fResL, fB6 // (A1L*x + A0L) + d(A1*x)
6806 fsub.s1 fRes1L = fA0, fRes1H
6812 fma.s1 fA5 = fA5, FR_FracX, fA4 // E1*x + E0
6817 fma.s1 fB8 = fA5L, fA5L, f0 // x^8
6823 // ((C21*x + C20)*x^2 + C19*x + C18)*x^2 + C17*x + C16
6824 fma.s1 fA25 = fA25, fA4L, fA21
6829 fma.s1 fA19 = fA19, fA5L, fA15 // D
6835 fma.s1 fA11 = fA11, fA4L, fA9 // Ehi
6841 fadd.s1 fRes2L = fRes2L, fRes4H
6846 fadd.s1 fRes4L = fRes4L, fA2L // (A3*x)lo + A2L
6852 fma.s1 fRes3H = fRes2H, fA4L, f0 // ((A3*x + A2)*x^2)hi
6857 fadd.s1 fRes1L = fRes1L, fResH
6863 fma.s1 fRes3L = fRes2H, fB2, f0 // (A3*x + A2)hi*d(x^2)
6868 fma.s1 fA7 = fA7, fA4L, fA5 // Elo
6874 fma.s1 fA25 = fA25, fB8, fA19 // C*x^8 + D
6880 fadd.s1 fRes2L = fRes2L, fRes4L // (A3*x + A2)lo
6886 fms.s1 fB4 = fRes2H, fA4L, fRes3H // d((A3*x + A2)*x^2))
6891 fadd.s1 fRes1L = fRes1L, fResL // (A1*x + A0)lo
6897 fadd.s1 fB20 = fRes3H, fRes1H // Phi
6902 fma.s1 fA11 = fA11, fA5L, fA7 // E
6908 // ( (A3*x + A2)lo*<x^2> + (A3*x + A2)hi*d(x^2))
6909 fma.s1 fRes3L = fRes2L, fA4L, fRes3L
6915 // d((A3*x + A2)*x^2)) + (A1*x + A0)lo
6916 fadd.s1 fRes1L = fRes1L, fB4
6922 fsub.s1 fB18 = fRes1H, fB20
6927 fma.s1 fPol = fA25, fB8, fA11
6933 fadd.s1 fRes1L = fRes1L, fRes3L
6939 fadd.s1 fB18 = fB18, fRes3H
6944 fma.s1 fRes4H = fPol, fA5L, fB20
6950 fma.s1 fPolL = fPol, fA5L, f0
6956 fadd.s1 fB18 = fB18, fRes1L // Plo
6961 fsub.s1 fRes4L = fB20, fRes4H
6967 fadd.s1 fB18 = fB18, fPolL
6973 fadd.s1 fRes4L = fRes4L, fB18
6979 fma.s0 f8 = fRes4H, f1, fRes4L
6980 // P25(x) computed, exit here
6986 // here if 0.75 <= x < 1.3125
6990 addl rPolDataPtr= @ltoff(lgammal_03Q_1Q_data),gp
6991 fma.s1 FR_FracX = fA5L, f1, f0 // x
6992 adds rSgnGam = 1, r0
6996 fma.s1 fB4 = fA5L, fA5L, f0 // x^2
7001 ld8 rPolDataPtr = [rPolDataPtr]
7007 adds rTmpPtr = 144, rPolDataPtr
7009 br.sptk lgamma_polynom24x
7013 // here if 1.5625 <= x < 2.25
7017 addl rPolDataPtr= @ltoff(lgammal_13Q_2Q_data),gp
7018 fma.s1 FR_FracX = fB4, f1, f0 // x
7019 adds rSgnGam = 1, r0
7023 fma.s1 fB4 = fB4, fB4, f0 // x^2
7028 ld8 rPolDataPtr = [rPolDataPtr]
7034 adds rTmpPtr = 144, rPolDataPtr
7036 br.sptk lgamma_polynom24x
7040 // here if result is Pol24(x)
7041 // x is in FR_FracX,
7042 // rPolDataPtr, rTmpPtr point to coefficients
7046 ldfpd fA4, fA2L = [rPolDataPtr], 16
7048 cmp.eq p6, p7 = 4, rSgnGamSize
7051 ldfpd fA23, fA24 = [rTmpPtr], 16 // C18, C19
7057 ldfpd fA3, fA1L = [rPolDataPtr], 16
7058 fma.s1 fA5L = fB4, fB4, f0 // x^4
7062 ldfpd fA19, fA20 = [rTmpPtr], 16 // D6, D7
7063 fms.s1 fB2 = FR_FracX, FR_FracX, fB4 // x^2 - <x^2>
7068 ldfpd fA15, fA16 = [rPolDataPtr], 16 // D2, D3
7069 ldfpd fA17, fA18 = [rTmpPtr], 16 // D4, D5
7074 ldfpd fA13, fA14 = [rPolDataPtr], 16 // D0, D1
7075 ldfpd fA12, fA21 = [rTmpPtr], 16 // E7, C16
7080 ldfe fA11 = [rPolDataPtr], 16 // E6
7085 ldfe fA10 = [rTmpPtr], 16 // E5
7091 ldfpd fA2, fA4L = [rPolDataPtr], 16
7096 ldfpd fA1, fA3L = [rTmpPtr], 16
7102 ldfpd fA22, fA25 = [rPolDataPtr], 16 // C17, C20
7103 fma.s1 fA0 = fA5L, fA5L, f0 // x^8
7108 fma.s1 fA0L = fA5L, FR_FracX, f0 // x^5
7113 ldfe fA9 = [rPolDataPtr], 16 // E4
7114 ldfe fA8 = [rTmpPtr], 16 // E3
7119 ldfe fA7 = [rPolDataPtr], 16 // E2
7120 ldfe fA6 = [rTmpPtr], 16 // E1
7125 ldfe fA5 = [rTmpPtr], 16 // E0
7126 fma.s1 fRes4H = fA4, fB4, f0 // A4*<x^2>
7131 fma.s1 fPol = fA24, FR_FracX, fA23 // C19*x + C18
7136 // store signgam if size of variable is 4 bytes
7137 (p6) st4 [rSgnGamAddr] = rSgnGam
7138 fma.s1 fRes1H = fA3, fB4, f0 // A3*<x^2>
7142 // store signgam if size of variable is 8 bytes
7143 (p7) st8 [rSgnGamAddr] = rSgnGam
7144 fma.s1 fA1L = fA3, fB2,fA1L // A3*d(x^2) + A1L
7150 fma.s1 fA20 = fA20, FR_FracX, fA19 // D7*x + D6
7155 fma.s1 fA18 = fA18, FR_FracX, fA17 // D5*x + D4
7161 fma.s1 fA16 = fA16, FR_FracX, fA15 // D3*x + D2
7166 fma.s1 fA14 = fA14, FR_FracX, fA13 // D1*x + D0
7172 fma.s1 fA2L = fA4, fB2,fA2L // A4*d(x^2) + A2L
7177 fma.s1 fA12 = fA12, FR_FracX, fA11 // E7*x + E6
7183 fms.s1 fRes2L = fA4, fB4, fRes4H // delta(A4*<x^2>)
7188 fadd.s1 fRes2H = fRes4H, fA2 // A4*<x^2> + A2
7194 fms.s1 fRes3L = fA3, fB4, fRes1H // delta(A3*<x^2>)
7199 fadd.s1 fRes3H = fRes1H, fA1 // A3*<x^2> + A1
7205 fma.s1 fA20 = fA20, fB4, fA18 // (D7*x + D6)*x^2 + D5*x + D4
7210 fma.s1 fA22 = fA22, FR_FracX, fA21 // C17*x + C16
7216 fma.s1 fA16 = fA16, fB4, fA14 // (D3*x + D2)*x^2 + D1*x + D0
7221 fma.s1 fPol = fA25, fB4, fPol // C20*x^2 + C19*x + C18
7227 fma.s1 fA2L = fA4L, fB4, fA2L // A4L*<x^2> + A4*d(x^2) + A2L
7232 fma.s1 fA1L = fA3L, fB4, fA1L // A3L*<x^2> + A3*d(x^2) + A1L
7238 fsub.s1 fRes4L = fA2, fRes2H // d1
7243 fma.s1 fResH = fRes2H, fB4, f0 // (A4*<x^2> + A2)*x^2
7249 fsub.s1 fRes1L = fA1, fRes3H // d1
7254 fma.s1 fB6 = fRes3H, FR_FracX, f0 // (A3*<x^2> + A1)*x
7260 fma.s1 fA10 = fA10, FR_FracX, fA9 // E5*x + E4
7265 fma.s1 fA8 = fA8, FR_FracX, fA7 // E3*x + E2
7271 // (C20*x^2 + C19*x + C18)*x^2 + C17*x + C16
7272 fma.s1 fPol = fPol, fB4, fA22
7277 fma.s1 fA6 = fA6, FR_FracX, fA5 // E1*x + E0
7283 // A4L*<x^2> + A4*d(x^2) + A2L + delta(A4*<x^2>)
7284 fadd.s1 fRes2L = fA2L, fRes2L
7289 // A3L*<x^2> + A3*d(x^2) + A1L + delta(A3*<x^2>)
7290 fadd.s1 fRes3L = fA1L, fRes3L
7296 fadd.s1 fRes4L = fRes4L, fRes4H // d2
7301 fms.s1 fResL = fRes2H, fB4, fResH // d(A4*<x^2> + A2)*x^2)
7307 fadd.s1 fRes1L = fRes1L, fRes1H // d2
7312 fms.s1 fB8 = fRes3H, FR_FracX, fB6 // d((A3*<x^2> + A1)*x)
7318 fadd.s1 fB10 = fResH, fB6 // (A4*x^4 + .. + A1*x)hi
7323 fma.s1 fA12 = fA12, fB4, fA10 // Ehi
7329 // ((D7*x + D6)*x^2 + D5*x + D4)*x^4 + (D3*x + D2)*x^2 + D1*x + D0
7330 fma.s1 fA20 = fA20, fA5L, fA16
7335 fma.s1 fA8 = fA8, fB4, fA6 // Elo
7341 fadd.s1 fRes2L = fRes2L, fRes4L // (A4*<x^2> + A2)lo
7346 // d(A4*<x^2> + A2)*x^2) + A4*<x^2> + A2)*d(x^2)
7347 fma.s1 fResL = fRes2H, fB2, fResL
7353 fadd.s1 fRes3L = fRes3L, fRes1L // (A4*<x^2> + A2)lo
7359 fsub.s1 fB12 = fB6, fB10
7365 fma.s1 fPol = fPol, fA0, fA20 // PolC*x^8 + PolD
7370 fma.s1 fPolL = fA12, fA5L, fA8 // E
7376 fma.s1 fResL = fB4, fRes2L, fResL // ((A4*<x^2> + A2)*x^2)lo
7382 fma.s1 fRes3L = fRes3L, FR_FracX, fB8 // ((A3*<x^2> + A1)*x)lo
7388 fadd.s1 fB12 = fB12, fResH
7394 fma.s1 fPol = fPol, fA0, fPolL
7400 fadd.s1 fRes3L = fRes3L, fResL
7406 fma.s1 fRes2H = fPol, fA0L, fB10
7412 fadd.s1 fRes3L = fB12, fRes3L
7418 fsub.s1 fRes4L = fB10, fRes2H
7424 fma.s1 fRes4L = fPol, fA0L, fRes4L
7430 fadd.s1 fRes4L = fRes4L, fRes3L
7436 // final result for all paths for which the result is Pol24(x)
7437 fma.s0 f8 = fRes2H, f1, fRes4L
7438 // here is the exit for all paths for which the result is Pol24(x)
7444 // here if x is natval, nan, +/-inf, +/-0, or denormal
7449 fclass.m p9, p0 = f8, 0xB // +/-denormals
7454 fclass.m p6, p0 = f8, 0x1E1 // Test x for natval, nan, +inf
7459 fclass.m p7, p0 = f8, 0x7 // +/-0
7460 (p9) br.cond.sptk lgammal_denormal_input
7465 // branch out if x is natval, nan, +inf
7466 (p6) br.cond.spnt lgammal_nan_pinf
7471 (p7) br.cond.spnt lgammal_singularity
7473 // if we are still here then x = -inf
7475 cmp.eq p6, p7 = 4, rSgnGamSize
7477 adds rSgnGam = 1, r0
7480 // store signgam if size of variable is 4 bytes
7481 (p6) st4 [rSgnGamAddr] = rSgnGam
7486 // store signgam if size of variable is 8 bytes
7487 (p7) st8 [rSgnGamAddr] = rSgnGam
7488 fma.s0 f8 = f8,f8,f0 // return +inf, no call to error support
7492 // here if x is NaN, NatVal or +INF
7496 cmp.eq p6, p7 = 4, rSgnGamSize
7498 adds rSgnGam = 1, r0
7502 // store signgam if size of variable is 4 bytes
7503 (p6) st4 [rSgnGamAddr] = rSgnGam
7504 fma.s0 f8 = f8,f1,f8 // return x+x if x is natval, nan, +inf
7508 // store signgam if size of variable is 8 bytes
7509 (p7) st8 [rSgnGamAddr] = rSgnGam
7515 // here if x denormal or unnormal
7517 lgammal_denormal_input:
7520 fma.s0 fResH = f1, f1, f8 // raise denormal exception
7525 fnorm.s1 f8 = f8 // normalize input value
7530 getf.sig rSignifX = f8
7531 fmerge.se fSignifX = f1, f8
7535 getf.exp rSignExpX = f8
7536 fcvt.fx.s1 fXint = f8 // Convert arg to int (int repres. in FR)
7541 getf.exp rSignExpX = f8
7542 fcmp.lt.s1 p15, p14 = f8, f0
7547 and rExpX = rSignExpX, r17Ones
7548 fmerge.s fAbsX = f1, f8 // |x|
7549 br.cond.sptk _deno_back_to_main_path
7554 // here if overflow (x > overflow_bound)
7558 addl r8 = 0x1FFFE, r0
7560 cmp.eq p6, p7 = 4, rSgnGamSize
7563 adds rSgnGam = 1, r0
7570 fmerge.s FR_X = f8,f8
7571 mov GR_Parameter_TAG = 102 // overflow
7574 // store signgam if size of variable is 4 bytes
7575 (p6) st4 [rSgnGamAddr] = rSgnGam
7580 // store signgam if size of variable is 8 bytes
7581 (p7) st8 [rSgnGamAddr] = rSgnGam
7582 fma.s0 FR_RESULT = f9,f9,f0 // Set I,O and +INF result
7583 br.cond.sptk __libm_error_region
7586 // here if x is negative integer or +/-0 (SINGULARITY)
7588 lgammal_singularity:
7590 adds rSgnGam = 1, r0
7591 fclass.m p8,p0 = f8,0x6 // is x -0?
7592 mov GR_Parameter_TAG = 103 // negative
7595 cmp.eq p6, p7 = 4, rSgnGamSize
7596 fma.s1 FR_X = f0,f0,f8
7600 (p8) sub rSgnGam = r0, rSgnGam
7610 // store signgam if size of variable is 4 bytes
7611 (p6) st4 [rSgnGamAddr] = rSgnGam
7616 // store signgam if size of variable is 8 bytes
7617 (p7) st8 [rSgnGamAddr] = rSgnGam
7618 frcpa.s0 FR_RESULT, p0 = f1, f0
7619 br.cond.sptk __libm_error_region
7622 GLOBAL_LIBM_END(__libm_lgammal)
7626 LOCAL_LIBM_ENTRY(__libm_error_region)
7629 add GR_Parameter_Y=-32,sp // Parameter 2 value
7631 .save ar.pfs,GR_SAVE_PFS
7632 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
7636 add sp=-64,sp // Create new stack
7638 mov GR_SAVE_GP=gp // Save gp
7641 stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
7642 add GR_Parameter_X = 16,sp // Parameter 1 address
7643 .save b0, GR_SAVE_B0
7644 mov GR_SAVE_B0=b0 // Save b0
7648 stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
7649 add GR_Parameter_RESULT = 0,GR_Parameter_Y
7650 nop.b 0 // Parameter 3 address
7653 stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
7654 add GR_Parameter_Y = -16,GR_Parameter_Y
7655 br.call.sptk b0=__libm_error_support# // Call error handling function
7658 add GR_Parameter_RESULT = 48,sp
7663 ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
7665 add sp = 64,sp // Restore stack pointer
7666 mov b0 = GR_SAVE_B0 // Restore return address
7669 mov gp = GR_SAVE_GP // Restore gp
7670 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
7671 br.ret.sptk b0 // Return
7674 LOCAL_LIBM_END(__libm_error_region#)
7676 .type __libm_error_support#,@function
7677 .global __libm_error_support#