]> git.ipfire.org Git - thirdparty/glibc.git/blame - ports/sysdeps/ia64/fpu/libm_lgammal.S
Fix typos.
[thirdparty/glibc.git] / ports / sysdeps / ia64 / fpu / libm_lgammal.S
CommitLineData
d5efd131
MF
1.file "libm_lgammal.s"
2
3
4// Copyright (c) 2002 - 2005, Intel Corporation
5// All rights reserved.
6//
7// Contributed 2002 by the Intel Numerics Group, Intel Corporation
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// * Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// * Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// * The name of Intel Corporation may not be used to endorse or promote
21// products derived from this software without specific prior written
22// permission.
23
24// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT
26// LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
28// CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL,
29// EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO,
30// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR
31// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
32// OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING
33// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35//
36// Intel Corporation is the author of this code,and requests that all
37// problem reports or change requests be submitted to it directly at
38// http://www.intel.com/software/products/opensource/libraries/num.htm.
39//
40//*********************************************************************
41//
42// History:
43// 03/28/02 Original version
44// 05/20/02 Cleaned up namespace and sf0 syntax
45// 08/21/02 Added support of SIGN(GAMMA(x)) calculation
46// 09/26/02 Algorithm description improved
47// 10/21/02 Now it returns SIGN(GAMMA(x))=-1 for negative zero
48// 02/10/03 Reordered header: .section, .global, .proc, .align
49// 03/31/05 Reformatted delimiters between data tables
50//
51//*********************************************************************
52//
53// Function: __libm_lgammal(long double x, int* signgam, int szsigngam)
54// computes the principal value of the logarithm of the GAMMA function
55// of x. Signum of GAMMA(x) is stored to memory starting at the address
56// specified by the signgam.
57//
58//*********************************************************************
59//
60// Resources Used:
61//
62// Floating-Point Registers: f8 (Input and Return Value)
63// f9-f15
64// f32-f127
65//
66// General Purpose Registers:
67// r2, r3, r8-r11, r14-r31
68// r32-r65
69// r66-r69 (Used to pass arguments to error handling routine)
70//
71// Predicate Registers: p6-p15
72//
73//*********************************************************************
74//
75// IEEE Special Conditions:
76//
77// __libm_lgammal(+inf) = +inf
78// __libm_lgammal(-inf) = QNaN
79// __libm_lgammal(+/-0) = +inf
80// __libm_lgammal(x<0, x - integer) = QNaN
81// __libm_lgammal(SNaN) = QNaN
82// __libm_lgammal(QNaN) = QNaN
83//
84//*********************************************************************
85//
86// ALGORITHM DESCRIPTION
87//
88// Below we suppose that there is log(z) function which takes an long
89// double argument and returns result as a pair of long double numbers
90// lnHi and lnLo (such that sum lnHi + lnLo provides ~80 correct bits
91// of significand). Algorithm description for such log(z) function
92// see below.
93// Also, it this algorithm description we use the following notational
94// conventions:
95// a) pair A = (Ahi, Alo) means number A represented as sum of Ahi and Alo
96// b) C = A + B = (Ahi, Alo) + (Bhi, Blo) means multi-precision addition.
97// The result would be C = (Chi, Clo). Notice, that Clo shouldn't be
98// equal to Alo + Blo
99// c) D = A*B = (Ahi, Alo)*(Bhi, Blo) = (Dhi, Dlo) multi-precisiion
100// multiplication.
101//
102// So, lgammal has the following computational paths:
103// 1) |x| < 0.5
104// P = A1*|x| + A2*|x|^2 + ... + A22*|x|^22
105// A1, A2, A3 represented as a sum of two double precision
106// numbers and multi-precision computations are used for 3 higher
107// terms of the polynomial. We get polynomial as a sum of two
108// double extended numbers: P = (Phi, Plo)
109// 1.1) x > 0
110// lgammal(x) = P - log(|x|) = (Phi, Plo) - (lnHi(|x|), lnLo(|x|))
111// 1.2) x < 0
112// lgammal(x) = -P - log(|x|) - log(sin(Pi*x)/(Pi*x))
113// P and log(|x|) are computed by the same way as in 1.1;
114// - log(sin(Pi*x)/(Pi*x)) is approximated by a polynomial Plnsin.
115// Plnsin:= fLnSin2*|x|^2 + fLnSin4*|x|^4 + ... + fLnSin36*|x|^36
116// The first coefficient of Plnsin is represented as sum of two
117// double precision numbers (fLnSin2, fLnSin2L). Multi-precision
118// computations for higher two terms of Plnsin are used.
119// So, the final result is reconstructed by the following formula
120// lgammal(x) = (-(Phi, Plo) - (lnHi(|x|), lnLo(|x|))) -
121// - (PlnsinHi,PlnsinLo)
122//
123// 2) 0.5 <= x < 0.75 -> t = x - 0.625
124// -0.75 < x <= -0.5 -> t = x + 0.625
125// 2.25 <= x < 4.0 -> t = x/2 - 1.5
126// 4.0 <= x < 8.0 -> t = x/4 - 1.5
127// -0.5 < x <= -0.40625 -> t = x + 0.5
128// -2.6005859375 < x <= -2.5 -> t = x + 2.5
129// 1.3125 <= x < 1.5625 -> t = x - LOC_MIN, where LOC_MIN is point in
130// which lgammal has local minimum. Exact
131// value can be found in the table below,
132// approximate value is ~1.46
133//
134// lgammal(x) is approximated by the polynomial of 25th degree: P25(t)
135// P25(t) = A0 + A1*t + ... + A25*t^25 = (Phi, Plo) + t^4*P21(t),
136// where
137// (Phi, Plo) is sum of four highest terms of the polynomial P25(t):
138// (Phi, Plo) = ((A0, A0L) + (A1, A1L)*t) + t^2 *((A2, A2L) + (A3, A3L)*t),
139// (Ai, AiL) - coefficients represented as pairs of DP numbers.
140//
141// P21(t) = (PolC(t)*t^8 + PolD(t))*t^8 + PolE(t),
142// where
143// PolC(t) = C21*t^5 + C20*t^4 + ... + C16,
144// C21 = A25, C20 = A24, ..., C16 = A20
145//
146// PolD(t) = D7*t^7 + D6*t^6 + ... + D0,
147// D7 = A19, D6 = A18, ..., D0 = A12
148//
149// PolE(t) = E7*t^7 + E6*t^6 + ... + E0,
150// E7 = A11, E6 = A10, ..., E0 = A4
151//
152// Cis and Dis are represented as double precision numbers,
153// Eis are represented as double extended numbers.
154//
155// 3) 0.75 <= x < 1.3125 -> t = x - 1.0
156// 1.5625 <= x < 2.25 -> t = x - 2.0
157// lgammal(x) is approximated by the polynomial of 25th degree: P25(t)
158// P25(t) = A1*t + ... + A25*t^25, and computations are carried out
159// by similar way as in the previous case
160//
161// 4) 10.0 < x <= Overflow Bound ("positive Sterling" range)
162// lgammal(x) is approximated using Sterling's formula:
163// lgammal(x) ~ ((x*(lnHi(x) - 1, lnLo(x))) - 0.5*(lnHi(x), lnLo(x))) +
164// + ((Chi, Clo) + S(1/x))
165// where
166// C = (Chi, Clo) - pair of double precision numbers representing constant
167// 0.5*ln(2*Pi);
168// S(1/x) = 1/x * (B2 + B4*(1/x)^2 + ... + B20*(1/x)^18), B2, ..., B20 are
169// Bernulli numbers. S is computed in native precision and then added to
170// Clo;
171// lnHi(x) - 1 is computed in native precision and the multiprecision
172// multiplication (x, 0) *(lnHi(x) - 1, lnLo(x)) is used.
173//
174// 5) -INF < x <= -2^63, any negative integer < 0
175// All numbers in this range are integers -> error handler is called
176//
177// 6) -2^63 < x <= -0.75 ("negative Sterling" range), x is "far" from root,
178// lgammal(-t) for positive t is approximated using the following formula:
179// lgammal(-t) = -lgammal(t)-log(t)-log(|dT|)+log(sin(Pi*|dT|)/(Pi*|dT|))
180// where dT = -t -round_to_nearest_integer(-t)
181// Last item is approximated by the same polynomial as described in 1.2.
182// We split the whole range into three subranges due to different ways of
183// approximation of the first terms.
184// 6.1) -2^63 < x < -6.0 ("negative Sterling" range)
185// lgammal(t) is approximated exactly as in #4. The only difference that
186// for -13.0 < x < -6.0 subrange instead of Bernulli numbers we use their
187// minimax approximation on this range.
188// log(t), log(|dT|) are approximated by the log routine mentioned above.
189// 6.2) -6.0 < x <= -0.75, |x + 1|> 2^(-7)
190// log(t), log(|dT|) are approximated by the log routine mentioned above,
191// lgammal(t) is approximated by polynomials of the 25th degree similar
192// to ones from #2. Arguments z of the polynomials are as follows
193// a) 0.75 <= t < 1.0 - 2^(-7), z = 2*t - 1.5
194// b) 1.0 - 2^(-7) < t < 2.0, z = t - 1.5
195// c) 2.0 < t < 3.0, z = t/2 - 1.5
196// d) 3.0 < t < 4.0, z = t/2 - 1.5. Notice, that range reduction is
197// the same as in case c) but the set of coefficients is different
198// e) 4.0 < t < 6.0, z = t/4 - 1.5
199// 6.3) |x + 1| <= 2^(-7)
200// log(1 + (x-1)) is approximated by Taylor series,
201// log(sin(Pi*|dT|)/(Pi*|dT|)) is still approximated by polynomial but
202// it has just 4th degree.
203// log(|dT|) is approximated by the log routine mentioned above.
204// lgammal(-x) is approximated by polynomial of 8th degree from (-x + 1).
205//
206// 7) -20.0 < x < -2.0, x falls in root "neighbourhood".
207// "Neighbourhood" means that |lgammal(x)| < epsilon, where epsilon is
208// different for every root (and it is stored in the table), but typically
209// it is ~ 0.15. There are 35 roots significant from "double extended"
210// point of view. We split all the roots into two subsets: "left" and "right"
211// roots. Considering [-(N+1), -N] range we call root as "left" one if it
212// lies closer to -(N+1) and "right" otherwise. There is no "left" root in
213// the [-20, -19] range (it exists, but is insignificant for double extended
214// precision). To determine if x falls in root "neighbourhood" we store
215// significands of all the 35 roots as well as epsilon values (expressed
216// by the left and right bound).
217// In these ranges we approximate lgammal(x) by polynomial series of 19th
218// degree:
219// lgammal(x) = P19(t) = A0 + A1*t + ...+ A19*t^19, where t = x - EDP_Root,
220// EDP_Root is the exact value of the corresponding root rounded to double
221// extended precision. So, we have 35 different polynomials which make our
222// table rather big. We may hope that x falls in root "neighbourhood"
223// quite rarely -> ther might be no need in frequent use of different
224// polynomials.
225// A0, A1, A2, A3 are represented as pairs of double precision numbers,
226// A4, A5 are long doubles, and to decrease the size of the table we
227// keep the rest of coefficients in just double precision
228//
229//*********************************************************************
230// Algorithm for log(X) = (lnHi(X), lnLo(X))
231//
232// ALGORITHM
233//
234// Here we use a table lookup method. The basic idea is that in
235// order to compute logl(Arg) for an argument Arg in [1,2), we
236// construct a value G such that G*Arg is close to 1 and that
237// logl(1/G) is obtainable easily from a table of values calculated
238// beforehand. Thus
239//
240// logl(Arg) = logl(1/G) + logl(G*Arg)
241// = logl(1/G) + logl(1 + (G*Arg - 1))
242//
243// Because |G*Arg - 1| is small, the second term on the right hand
244// side can be approximated by a short polynomial. We elaborate
245// this method in four steps.
246//
247// Step 0: Initialization
248//
249// We need to calculate logl( X ). Obtain N, S_hi such that
250//
251// X = 2^N * S_hi exactly
252//
253// where S_hi in [1,2)
254//
255// Step 1: Argument Reduction
256//
257// Based on S_hi, obtain G_1, G_2, G_3 from a table and calculate
258//
259// G := G_1 * G_2 * G_3
260// r := (G * S_hi - 1)
261//
262// These G_j's have the property that the product is exactly
263// representable and that |r| < 2^(-12) as a result.
264//
265// Step 2: Approximation
266//
267//
268// logl(1 + r) is approximated by a short polynomial poly(r).
269//
270// Step 3: Reconstruction
271//
272//
273// Finally, logl( X ) is given by
274//
275// logl( X ) = logl( 2^N * S_hi )
276// ~=~ N*logl(2) + logl(1/G) + logl(1 + r)
277// ~=~ N*logl(2) + logl(1/G) + poly(r).
278//
279// IMPLEMENTATION
280//
281// Step 0. Initialization
282// ----------------------
283//
284// Z := X
285// N := unbaised exponent of Z
286// S_hi := 2^(-N) * Z
287//
288// Step 1. Argument Reduction
289// --------------------------
290//
291// Let
292//
293// Z = 2^N * S_hi = 2^N * 1.d_1 d_2 d_3 ... d_63
294//
295// We obtain G_1, G_2, G_3 by the following steps.
296//
297//
298// Define X_0 := 1.d_1 d_2 ... d_14. This is extracted
299// from S_hi.
300//
301// Define A_1 := 1.d_1 d_2 d_3 d_4. This is X_0 truncated
302// to lsb = 2^(-4).
303//
304// Define index_1 := [ d_1 d_2 d_3 d_4 ].
305//
306// Fetch Z_1 := (1/A_1) rounded UP in fixed point with
307// fixed point lsb = 2^(-15).
308// Z_1 looks like z_0.z_1 z_2 ... z_15
309// Note that the fetching is done using index_1.
310// A_1 is actually not needed in the implementation
311// and is used here only to explain how is the value
312// Z_1 defined.
313//
314// Fetch G_1 := (1/A_1) truncated to 21 sig. bits.
315// floating pt. Again, fetching is done using index_1. A_1
316// explains how G_1 is defined.
317//
318// Calculate X_1 := X_0 * Z_1 truncated to lsb = 2^(-14)
319// = 1.0 0 0 0 d_5 ... d_14
c0c3f78a 320// This is accomplished by integer multiplication.
d5efd131
MF
321// It is proved that X_1 indeed always begin
322// with 1.0000 in fixed point.
323//
324//
325// Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1
326// truncated to lsb = 2^(-8). Similar to A_1,
327// A_2 is not needed in actual implementation. It
328// helps explain how some of the values are defined.
329//
330// Define index_2 := [ d_5 d_6 d_7 d_8 ].
331//
332// Fetch Z_2 := (1/A_2) rounded UP in fixed point with
333// fixed point lsb = 2^(-15). Fetch done using index_2.
334// Z_2 looks like z_0.z_1 z_2 ... z_15
335//
336// Fetch G_2 := (1/A_2) truncated to 21 sig. bits.
337// floating pt.
338//
339// Calculate X_2 := X_1 * Z_2 truncated to lsb = 2^(-14)
340// = 1.0 0 0 0 0 0 0 0 d_9 d_10 ... d_14
c0c3f78a 341// This is accomplished by integer multiplication.
d5efd131
MF
342// It is proved that X_2 indeed always begin
343// with 1.00000000 in fixed point.
344//
345//
346// Define A_3 := 1.0 0 0 0 0 0 0 0 d_9 d_10 d_11 d_12 d_13 1.
347// This is 2^(-14) + X_2 truncated to lsb = 2^(-13).
348//
349// Define index_3 := [ d_9 d_10 d_11 d_12 d_13 ].
350//
351// Fetch G_3 := (1/A_3) truncated to 21 sig. bits.
352// floating pt. Fetch is done using index_3.
353//
354// Compute G := G_1 * G_2 * G_3.
355//
356// This is done exactly since each of G_j only has 21 sig. bits.
357//
358// Compute
359//
360// r := (G*S_hi - 1)
361//
362//
363// Step 2. Approximation
364// ---------------------
365//
366// This step computes an approximation to logl( 1 + r ) where r is the
367// reduced argument just obtained. It is proved that |r| <= 1.9*2^(-13);
368// thus logl(1+r) can be approximated by a short polynomial:
369//
370// logl(1+r) ~=~ poly = r + Q1 r^2 + ... + Q4 r^5
371//
372//
373// Step 3. Reconstruction
374// ----------------------
375//
376// This step computes the desired result of logl(X):
377//
378// logl(X) = logl( 2^N * S_hi )
379// = N*logl(2) + logl( S_hi )
380// = N*logl(2) + logl(1/G) +
381// logl(1 + G*S_hi - 1 )
382//
383// logl(2), logl(1/G_j) are stored as pairs of (single,double) numbers:
384// log2_hi, log2_lo, log1byGj_hi, log1byGj_lo. The high parts are
385// single-precision numbers and the low parts are double precision
386// numbers. These have the property that
387//
388// N*log2_hi + SUM ( log1byGj_hi )
389//
390// is computable exactly in double-extended precision (64 sig. bits).
391// Finally
392//
393// lnHi(X) := N*log2_hi + SUM ( log1byGj_hi )
394// lnLo(X) := poly_hi + [ poly_lo +
395// ( SUM ( log1byGj_lo ) + N*log2_lo ) ]
396//
397//
398//*********************************************************************
399// General Purpose Registers
400// scratch registers
401rPolDataPtr = r2
402rLnSinDataPtr = r3
403rExpX = r8
404rSignifX = r9
405rDelta = r10
406rSignExpX = r11
407GR_ad_z_1 = r14
408r17Ones = r15
409GR_Index1 = r16
410rSignif1andQ = r17
411GR_X_0 = r18
412GR_X_1 = r19
413GR_X_2 = r20
414GR_Z_1 = r21
415GR_Z_2 = r22
416GR_N = r23
417rExpHalf = r24
418rExp8 = r25
419rX0Dx = r25
420GR_ad_tbl_1 = r26
421GR_ad_tbl_2 = r27
422GR_ad_tbl_3 = r28
423GR_ad_q = r29
424GR_ad_z_1 = r30
425GR_ad_z_2 = r31
426// stacked registers
427rPFS_SAVED = r32
428GR_ad_z_3 = r33
429rSgnGamAddr = r34
430rSgnGamSize = r35
431rLogDataPtr = r36
432rZ1offsett = r37
433rTmpPtr = r38
434rTmpPtr2 = r39
435rTmpPtr3 = r40
436rExp2 = r41
437rExp2tom7 = r42
438rZ625 = r42
439rExpOne = r43
440rNegSingularity = r44
441rXint = r45
442rTbl1Addr = r46
443rTbl2Addr = r47
444rTbl3Addr = r48
445rZ2Addr = r49
446rRootsAddr = r50
447rRootsBndAddr = r51
448rRoot = r52
449rRightBound = r53
450rLeftBound = r54
451rSignifDx = r55
452rBernulliPtr = r56
453rLnSinTmpPtr = r56
454rIndex1Dx = r57
455rIndexPol = r58
456GR_Index3 = r59
457GR_Index2 = r60
458rSgnGam = r61
459rXRnd = r62
460
461GR_SAVE_B0 = r63
462GR_SAVE_GP = r64
463GR_SAVE_PFS = r65
464// output parameters when calling error handling routine
465GR_Parameter_X = r66
466GR_Parameter_Y = r67
467GR_Parameter_RESULT = r68
468GR_Parameter_TAG = r69
469
470//********************************************************************
471// Floating Point Registers
472// CAUTION: due to the lack of registers there exist (below in the code)
473// sometimes "unconventional" use of declared registers
474//
475fAbsX = f6
476fDelX4 = f6
477fSignifX = f7
478// macros for error handling routine
479FR_X = f10 // first argument
480FR_Y = f1 // second argument (lgammal has just one)
481FR_RESULT = f8 // result
482
483// First 7 Bernulli numbers
484fB2 = f9
485fLnDeltaL = f9
486fXSqr = f9
487fB4 = f10
488fX4 = f10
489fB6 = f11
490fX6 = f11
491fB8 = f12
492fXSqrL = f12
493fB10 = f13
494fRes7H = f13
495fB12 = f14
496fRes7L = f14
497fB14 = f15
498
499// stack registers
500// Polynomial coefficients: A0, ..., A25
501fA0 = f32
502fA0L = f33
503fInvXL = f33
504fA1 = f34
505fA1L = f35
506fA2 = f36
507fA2L = f37
508fA3 = f38
509fA3L = f39
510fA4 = f40
511fA4L = f41
512fRes6H = f41
513fA5 = f42
514fB2L = f42
515fA5L = f43
516fMinNegStir = f43
517fRes6L = f43
518fA6 = f44
519fMaxNegStir = f44
520fA7 = f45
521fLnDeltaH = f45
522fA8 = f46
523fBrnL = f46
524fA9 = f47
525fBrnH = f47
526fA10 = f48
527fRes5L = f48
528fA11 = f49
529fRes5H = f49
530fA12 = f50
531fDx6 = f50
532fA13 = f51
533fDx8 = f51
534fA14 = f52
535fDx4 = f52
536fA15 = f53
537fYL = f53
538fh3Dx = f53
539fA16 = f54
540fYH = f54
541fH3Dx = f54
542fA17 = f55
543fResLnDxL = f55
544fG3Dx = f55
545fA18 = f56
546fResLnDxH = f56
547fh2Dx = f56
548fA19 = f57
549fFloatNDx = f57
550fA20 = f58
551fPolyHiDx = f58
552fhDx = f58
553fA21 = f59
554fRDxCub = f59
555fHDx = f59
556fA22 = f60
557fRDxSq = f60
558fGDx = f60
559fA23 = f61
560fPolyLoDx = f61
561fInvX3 = f61
562fA24 = f62
563fRDx = f62
564fInvX8 = f62
565fA25 = f63
566fInvX4 = f63
567fPol = f64
568fPolL = f65
569// Coefficients of ln(sin(Pi*x)/Pi*x)
570fLnSin2 = f66
571fLnSin2L = f67
572fLnSin4 = f68
573fLnSin6 = f69
574fLnSin8 = f70
575fLnSin10 = f71
576fLnSin12 = f72
577fLnSin14 = f73
578fLnSin16 = f74
579fLnSin18 = f75
580fDelX8 = f75
581fLnSin20 = f76
582fLnSin22 = f77
583fDelX6 = f77
584fLnSin24 = f78
585fLnSin26 = f79
586fLnSin28 = f80
587fLnSin30 = f81
588fhDelX = f81
589fLnSin32 = f82
590fLnSin34 = f83
591fLnSin36 = f84
592fXint = f85
593fDxSqr = f85
594fRes3L = f86
595fRes3H = f87
596fRes4H = f88
597fRes4L = f89
598fResH = f90
599fResL = f91
600fDx = f92
601FR_MHalf = f93
602fRes1H = f94
603fRes1L = f95
604fRes2H = f96
605fRes2L = f97
606FR_FracX = f98
607fRcpX = f99
608fLnSinH = f99
609fTwo = f100
610fMOne = f100
611FR_G = f101
612FR_H = f102
613FR_h = f103
614FR_G2 = f104
615FR_H2 = f105
616FR_poly_lo = f106
617FR_poly_hi = f107
618FR_h2 = f108
619FR_rsq = f109
620FR_r = f110
621FR_log2_hi = f111
622FR_log2_lo = f112
623fFloatN = f113
624FR_Q4 = f114
625FR_G3 = f115
626FR_H3 = f116
627FR_h3 = f117
628FR_Q3 = f118
629FR_Q2 = f119
630FR_Q1 = f120
631fThirteen = f121
632fSix = f121
633FR_rcub = f121
634// Last three Bernulli numbers
635fB16 = f122
636fB18 = f123
637fB20 = f124
638fInvX = f125
639fLnSinL = f125
640fDxSqrL = f126
641fFltIntX = f126
642fRoot = f127
643fNormDx = f127
644
645// Data tables
646//==============================================================
647RODATA
648// ************* DO NOT CHANGE THE ORDER OF THESE TABLES *************
649.align 16
650LOCAL_OBJECT_START(lgammal_right_roots_data)
651// List of all right roots themselves
652data8 0x9D3FE4B007C360AB, 0x0000C000 // Range [-3, -2]
653data8 0xC9306DE4F2CD7BEE, 0x0000C000 // Range [-4, -3]
654data8 0x814273C2CCAC0618, 0x0000C001 // Range [-5, -4]
655data8 0xA04352BF85B6C865, 0x0000C001 // Range [-6, -5]
656data8 0xC00B592C4BE4676C, 0x0000C001 // Range [-7, -6]
657data8 0xE0019FEF6FF0F5BF, 0x0000C001 // Range [-8, -7]
658data8 0x80001A01459FC9F6, 0x0000C002 // Range [-9, -8]
659data8 0x900002E3BB47D86D, 0x0000C002 // Range [-10, -9]
660data8 0xA0000049F93BB992, 0x0000C002 // Range [-11, -10]
661data8 0xB0000006B9915316, 0x0000C002 // Range [-12, -11]
662data8 0xC00000008F76C773, 0x0000C002 // Range [-13, -12]
663data8 0xD00000000B09230A, 0x0000C002 // Range [-14, -13]
664data8 0xE000000000C9CBA5, 0x0000C002 // Range [-15, -14]
665data8 0xF0000000000D73FA, 0x0000C002 // Range [-16, -15]
666data8 0x8000000000006BA0, 0x0000C003 // Range [-17, -16]
667data8 0x8800000000000655, 0x0000C003 // Range [-18, -17]
668data8 0x900000000000005A, 0x0000C003 // Range [-19, -18]
669data8 0x9800000000000005, 0x0000C003 // Range [-20, -19]
670// List of bounds of ranges with special polynomial approximation near root
671// Only significands of bounds are actually stored
672data8 0xA000000000000000, 0x9800000000000000 // Bounds for root on [-3, -2]
673data8 0xCAB88035C5EFBB41, 0xC7E05E31F4B02115 // Bounds for root on [-4, -3]
674data8 0x817831B899735C72, 0x8114633941B8053A // Bounds for root on [-5, -4]
675data8 0xA04E8B34C6AA9476, 0xA039B4A42978197B // Bounds for root on [-6, -5]
676data8 0xC00D3D5E588A78A9, 0xC009BA25F7E858A6 // Bounds for root on [-7, -6]
677data8 0xE001E54202991EB4, 0xE001648416CE897F // Bounds for root on [-8, -7]
678data8 0x80001E56D13A6B9F, 0x8000164A3BAD888A // Bounds for root on [-9, -8]
679data8 0x9000035F0529272A, 0x9000027A0E3D94F0 // Bounds for root on [-10, -9]
680data8 0xA00000564D705880, 0xA000003F67EA0CC7 // Bounds for root on [-11, -10]
681data8 0xB0000007D87EE0EF, 0xB0000005C3A122A5 // Bounds for root on [-12, -11]
682data8 0xC0000000A75FE8B1, 0xC00000007AF818AC // Bounds for root on [-13, -12]
683data8 0xD00000000CDFFE36, 0xD000000009758BBF // Bounds for root on [-14, -13]
684data8 0xE000000000EB6D96, 0xE000000000ACF7B2 // Bounds for root on [-15, -14]
685data8 0xF0000000000FB1F9, 0xF0000000000B87FB // Bounds for root on [-16, -15]
686data8 0x8000000000007D90, 0x8000000000005C40 // Bounds for root on [-17, -16]
687data8 0x8800000000000763, 0x880000000000056D // Bounds for root on [-18, -17]
688data8 0x9000000000000069, 0x900000000000004D // Bounds for root on [-19, -18]
689data8 0x9800000000000006, 0x9800000000000005 // Bounds for root on [-20, -19]
690// List of all left roots themselves
691data8 0xAFDA0850DEC8065E, 0x0000C000 // Range [-3, -2]
692data8 0xFD238AA3E17F285C, 0x0000C000 // Range [-4, -3]
693data8 0x9FBABBD37757E6A2, 0x0000C001 // Range [-5, -4]
694data8 0xBFF497AC8FA06AFC, 0x0000C001 // Range [-6, -5]
695data8 0xDFFE5FBB5C377FE8, 0x0000C001 // Range [-7, -6]
696data8 0xFFFFCBFC0ACE7879, 0x0000C001 // Range [-8, -7]
697data8 0x8FFFFD1C425E8100, 0x0000C002 // Range [-9, -8]
698data8 0x9FFFFFB606BDFDCD, 0x0000C002 // Range [-10, -9]
699data8 0xAFFFFFF9466E9F1B, 0x0000C002 // Range [-11, -10]
700data8 0xBFFFFFFF70893874, 0x0000C002 // Range [-12, -11]
701data8 0xCFFFFFFFF4F6DCF6, 0x0000C002 // Range [-13, -12]
702data8 0xDFFFFFFFFF36345B, 0x0000C002 // Range [-14, -13]
703data8 0xEFFFFFFFFFF28C06, 0x0000C002 // Range [-15, -14]
704data8 0xFFFFFFFFFFFF28C0, 0x0000C002 // Range [-16, -15]
705data8 0x87FFFFFFFFFFF9AB, 0x0000C003 // Range [-17, -16]
706data8 0x8FFFFFFFFFFFFFA6, 0x0000C003 // Range [-18, -17]
707data8 0x97FFFFFFFFFFFFFB, 0x0000C003 // Range [-19, -18]
708data8 0x0000000000000000, 0x00000000 // pad to keep logic in the main path
709// List of bounds of ranges with special polynomial approximation near root
710// Only significands of bounds are actually stored
711data8 0xB235880944CC758E, 0xADD2F1A9FBE76C8B // Bounds for root on [-3, -2]
712data8 0xFD8E7844F307B07C, 0xFCA655C2152BDE4D // Bounds for root on [-4, -3]
713data8 0x9FC4D876EE546967, 0x9FAEE4AF68BC4292 // Bounds for root on [-5, -4]
714data8 0xBFF641FFBFCC44F1, 0xBFF2A47919F4BA89 // Bounds for root on [-6, -5]
715data8 0xDFFE9C803DEFDD59, 0xDFFE18932EB723FE // Bounds for root on [-7, -6]
716data8 0xFFFFD393FA47AFC3, 0xFFFFC317CF638AE1 // Bounds for root on [-8, -7]
717data8 0x8FFFFD8840279925, 0x8FFFFC9DCECEEE92 // Bounds for root on [-9, -8]
718data8 0x9FFFFFC0D34E2AF8, 0x9FFFFFA9619AA3B7 // Bounds for root on [-10, -9]
719data8 0xAFFFFFFA41C18246, 0xAFFFFFF82025A23C // Bounds for root on [-11, -10]
720data8 0xBFFFFFFF857ACB4E, 0xBFFFFFFF58032378 // Bounds for root on [-12, -11]
721data8 0xCFFFFFFFF6934AB8, 0xCFFFFFFFF313EF0A // Bounds for root on [-13, -12]
722data8 0xDFFFFFFFFF53A9E9, 0xDFFFFFFFFF13B5A5 // Bounds for root on [-14, -13]
723data8 0xEFFFFFFFFFF482CB, 0xEFFFFFFFFFF03F4F // Bounds for root on [-15, -14]
724data8 0xFFFFFFFFFFFF482D, 0xFFFFFFFFFFFF03F5 // Bounds for root on [-16, -15]
725data8 0x87FFFFFFFFFFFA98, 0x87FFFFFFFFFFF896 // Bounds for root on [-17, -16]
726data8 0x8FFFFFFFFFFFFFB3, 0x8FFFFFFFFFFFFF97 // Bounds for root on [-18, -17]
727data8 0x97FFFFFFFFFFFFFC, 0x97FFFFFFFFFFFFFB // Bounds for root on [-19, -18]
728LOCAL_OBJECT_END(lgammal_right_roots_data)
729
730LOCAL_OBJECT_START(lgammal_0_Half_data)
731// Polynomial coefficients for the lgammal(x), 0.0 < |x| < 0.5
732data8 0xBFD9A4D55BEAB2D6, 0xBC8AA3C097746D1F //A3
733data8 0x3FEA51A6625307D3, 0x3C7180E7BD2D0DCC //A2
734data8 0xBFE2788CFC6FB618, 0xBC9E9346C4692BCC //A1
735data8 0x8A8991563EC1BD13, 0x00003FFD //A4
736data8 0xD45CE0BD52C27EF2, 0x0000BFFC //A5
737data8 0xADA06587FA2BBD47, 0x00003FFC //A6
738data8 0x9381D0ED2194902A, 0x0000BFFC //A7
739data8 0x80859B3CF92D4192, 0x00003FFC //A8
740data8 0xE4033517C622A946, 0x0000BFFB //A9
741data8 0xCD00CE67A51FC82A, 0x00003FFB //A10
742data8 0xBA44E2A96C3B5700, 0x0000BFFB //A11
743data8 0xAAAD008FA46DBD99, 0x00003FFB //A12
744data8 0x9D604AC65A41153D, 0x0000BFFB //A13
745data8 0x917CECB864B5A861, 0x00003FFB //A14
746data8 0x85A4810EB730FDE4, 0x0000BFFB //A15
747data8 0xEF2761C38BD21F77, 0x00003FFA //A16
748data8 0xC913043A128367DA, 0x0000BFFA //A17
749data8 0x96A29B71FF7AFFAA, 0x00003FFA //A18
750data8 0xBB9FFA1A5FE649BB, 0x0000BFF9 //A19
751data8 0xB17982CD2DAA0EE3, 0x00003FF8 //A20
752data8 0xDE1DDCBFFB9453F0, 0x0000BFF6 //A21
753data8 0x87FBF5D7ACD9FA9D, 0x00003FF4 //A22
754LOCAL_OBJECT_END(lgammal_0_Half_data)
755
756LOCAL_OBJECT_START(Constants_Q)
757// log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1
758data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
759data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
760data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000
761data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000
762data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000
763data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
764LOCAL_OBJECT_END(Constants_Q)
765
766LOCAL_OBJECT_START(Constants_Z_1)
767// Z1 - 16 bit fixed
768data4 0x00008000
769data4 0x00007879
770data4 0x000071C8
771data4 0x00006BCB
772data4 0x00006667
773data4 0x00006187
774data4 0x00005D18
775data4 0x0000590C
776data4 0x00005556
777data4 0x000051EC
778data4 0x00004EC5
779data4 0x00004BDB
780data4 0x00004925
781data4 0x0000469F
782data4 0x00004445
783data4 0x00004211
784LOCAL_OBJECT_END(Constants_Z_1)
785
786LOCAL_OBJECT_START(Constants_G_H_h1)
787// G1 and H1 - IEEE single and h1 - IEEE double
788data4 0x3F800000,0x00000000,0x00000000,0x00000000
789data4 0x3F70F0F0,0x3D785196,0x617D741C,0x3DA163A6
790data4 0x3F638E38,0x3DF13843,0xCBD3D5BB,0x3E2C55E6
791data4 0x3F579430,0x3E2FF9A0,0xD86EA5E7,0xBE3EB0BF
792data4 0x3F4CCCC8,0x3E647FD6,0x86B12760,0x3E2E6A8C
793data4 0x3F430C30,0x3E8B3AE7,0x5C0739BA,0x3E47574C
794data4 0x3F3A2E88,0x3EA30C68,0x13E8AF2F,0x3E20E30F
795data4 0x3F321640,0x3EB9CEC8,0xF2C630BD,0xBE42885B
796data4 0x3F2AAAA8,0x3ECF9927,0x97E577C6,0x3E497F34
797data4 0x3F23D708,0x3EE47FC5,0xA6B0A5AB,0x3E3E6A6E
798data4 0x3F1D89D8,0x3EF8947D,0xD328D9BE,0xBDF43E3C
799data4 0x3F17B420,0x3F05F3A1,0x0ADB090A,0x3E4094C3
800data4 0x3F124920,0x3F0F4303,0xFC1FE510,0xBE28FBB2
801data4 0x3F0D3DC8,0x3F183EBF,0x10FDE3FA,0x3E3A7895
802data4 0x3F088888,0x3F20EC80,0x7CC8C98F,0x3E508CE5
803data4 0x3F042108,0x3F29516A,0xA223106C,0xBE534874
804LOCAL_OBJECT_END(Constants_G_H_h1)
805
806LOCAL_OBJECT_START(Constants_Z_2)
807// Z2 - 16 bit fixed
808data4 0x00008000
809data4 0x00007F81
810data4 0x00007F02
811data4 0x00007E85
812data4 0x00007E08
813data4 0x00007D8D
814data4 0x00007D12
815data4 0x00007C98
816data4 0x00007C20
817data4 0x00007BA8
818data4 0x00007B31
819data4 0x00007ABB
820data4 0x00007A45
821data4 0x000079D1
822data4 0x0000795D
823data4 0x000078EB
824LOCAL_OBJECT_END(Constants_Z_2)
825
826LOCAL_OBJECT_START(Constants_G_H_h2)
827// G2 and H2 - IEEE single and h2 - IEEE double
828data4 0x3F800000,0x00000000,0x00000000,0x00000000
829data4 0x3F7F00F8,0x3B7F875D,0x22C42273,0x3DB5A116
830data4 0x3F7E03F8,0x3BFF015B,0x21F86ED3,0x3DE620CF
831data4 0x3F7D08E0,0x3C3EE393,0x484F34ED,0xBDAFA07E
832data4 0x3F7C0FC0,0x3C7E0586,0x3860BCF6,0xBDFE07F0
833data4 0x3F7B1880,0x3C9E75D2,0xA78093D6,0x3DEA370F
834data4 0x3F7A2328,0x3CBDC97A,0x72A753D0,0x3DFF5791
835data4 0x3F792FB0,0x3CDCFE47,0xA7EF896B,0x3DFEBE6C
836data4 0x3F783E08,0x3CFC15D0,0x409ECB43,0x3E0CF156
837data4 0x3F774E38,0x3D0D874D,0xFFEF71DF,0xBE0B6F97
838data4 0x3F766038,0x3D1CF49B,0x5D59EEE8,0xBE080483
839data4 0x3F757400,0x3D2C531D,0xA9192A74,0x3E1F91E9
840data4 0x3F748988,0x3D3BA322,0xBF72A8CD,0xBE139A06
841data4 0x3F73A0D0,0x3D4AE46F,0xF8FBA6CF,0x3E1D9202
842data4 0x3F72B9D0,0x3D5A1756,0xBA796223,0xBE1DCCC4
843data4 0x3F71D488,0x3D693B9D,0xB6B7C239,0xBE049391
844LOCAL_OBJECT_END(Constants_G_H_h2)
845
846LOCAL_OBJECT_START(Constants_G_H_h3)
847// G3 and H3 - IEEE single and h3 - IEEE double
848data4 0x3F7FFC00,0x38800100,0x562224CD,0x3D355595
849data4 0x3F7FF400,0x39400480,0x06136FF6,0x3D8200A2
850data4 0x3F7FEC00,0x39A00640,0xE8DE9AF0,0x3DA4D68D
851data4 0x3F7FE400,0x39E00C41,0xB10238DC,0xBD8B4291
852data4 0x3F7FDC00,0x3A100A21,0x3B1952CA,0xBD89CCB8
853data4 0x3F7FD400,0x3A300F22,0x1DC46826,0xBDB10707
854data4 0x3F7FCC08,0x3A4FF51C,0xF43307DB,0x3DB6FCB9
855data4 0x3F7FC408,0x3A6FFC1D,0x62DC7872,0xBD9B7C47
856data4 0x3F7FBC10,0x3A87F20B,0x3F89154A,0xBDC3725E
857data4 0x3F7FB410,0x3A97F68B,0x62B9D392,0xBD93519D
858data4 0x3F7FAC18,0x3AA7EB86,0x0F21BD9D,0x3DC18441
859data4 0x3F7FA420,0x3AB7E101,0x2245E0A6,0xBDA64B95
860data4 0x3F7F9C20,0x3AC7E701,0xAABB34B8,0x3DB4B0EC
861data4 0x3F7F9428,0x3AD7DD7B,0x6DC40A7E,0x3D992337
862data4 0x3F7F8C30,0x3AE7D474,0x4F2083D3,0x3DC6E17B
863data4 0x3F7F8438,0x3AF7CBED,0x811D4394,0x3DAE314B
864data4 0x3F7F7C40,0x3B03E1F3,0xB08F2DB1,0xBDD46F21
865data4 0x3F7F7448,0x3B0BDE2F,0x6D34522B,0xBDDC30A4
866data4 0x3F7F6C50,0x3B13DAAA,0xB1F473DB,0x3DCB0070
867data4 0x3F7F6458,0x3B1BD766,0x6AD282FD,0xBDD65DDC
868data4 0x3F7F5C68,0x3B23CC5C,0xF153761A,0xBDCDAB83
869data4 0x3F7F5470,0x3B2BC997,0x341D0F8F,0xBDDADA40
870data4 0x3F7F4C78,0x3B33C711,0xEBC394E8,0x3DCD1BD7
871data4 0x3F7F4488,0x3B3BBCC6,0x52E3E695,0xBDC3532B
872data4 0x3F7F3C90,0x3B43BAC0,0xE846B3DE,0xBDA3961E
873data4 0x3F7F34A0,0x3B4BB0F4,0x785778D4,0xBDDADF06
874data4 0x3F7F2CA8,0x3B53AF6D,0xE55CE212,0x3DCC3ED1
875data4 0x3F7F24B8,0x3B5BA620,0x9E382C15,0xBDBA3103
876data4 0x3F7F1CC8,0x3B639D12,0x5C5AF197,0x3D635A0B
877data4 0x3F7F14D8,0x3B6B9444,0x71D34EFC,0xBDDCCB19
878data4 0x3F7F0CE0,0x3B7393BC,0x52CD7ADA,0x3DC74502
879data4 0x3F7F04F0,0x3B7B8B6D,0x7D7F2A42,0xBDB68F17
880LOCAL_OBJECT_END(Constants_G_H_h3)
881
882LOCAL_OBJECT_START(lgammal_data)
883// Positive overflow value
884data8 0xB8D54C8BFFFDEBF4, 0x00007FF1
885LOCAL_OBJECT_END(lgammal_data)
886
887LOCAL_OBJECT_START(lgammal_Stirling)
888// Coefficients needed for Strirling's formula
889data8 0x3FED67F1C864BEB4 // High part of 0.5*ln(2*Pi)
890data8 0x3C94D252F2400510 // Low part of 0.5*ln(2*Pi)
891//
892// Bernulli numbers used in Striling's formula for -2^63 < |x| < -13.0
893//(B1H, B1L) = 8.3333333333333333333262747254e-02
894data8 0x3FB5555555555555, 0x3C55555555555555
895data8 0xB60B60B60B60B60B, 0x0000BFF6 //B2 = -2.7777777777777777777777777778e-03
896data8 0xD00D00D00D00D00D, 0x00003FF4 //B3 = 7.9365079365079365079365079365e-04
897data8 0x9C09C09C09C09C0A, 0x0000BFF4 //B4 = -5.9523809523809523809523809524e-04
898data8 0xDCA8F158C7F91AB8, 0x00003FF4 //B5 = 8.4175084175084175084175084175e-04
899data8 0xFB5586CCC9E3E410, 0x0000BFF5 //B6 = -1.9175269175269175269175269175e-03
900data8 0xD20D20D20D20D20D, 0x00003FF7 //B7 = 6.4102564102564102564102564103e-03
901data8 0xF21436587A9CBEE1, 0x0000BFF9 //B8 = -2.9550653594771241830065359477e-02
902data8 0xB7F4B1C0F033FFD1, 0x00003FFC //B9 = 1.7964437236883057316493849002e-01
903data8 0xB23B3808C0F9CF6E, 0x0000BFFF //B10 = -1.3924322169059011164274322169e+00
904// Polynomial coefficients for Stirling's formula, -13.0 < x < -6.0
905data8 0x3FB5555555555555, 0x3C4D75060289C58B //A0
906data8 0xB60B60B60B0F0876, 0x0000BFF6 //A1
907data8 0xD00D00CE54B1256C, 0x00003FF4 //A2
908data8 0x9C09BF46B58F75E1, 0x0000BFF4 //A3
909data8 0xDCA8483BC91ACC6D, 0x00003FF4 //A4
910data8 0xFB3965C939CC9FEE, 0x0000BFF5 //A5
911data8 0xD0723ADE3F0BC401, 0x00003FF7 //A6
912data8 0xE1ED7434E81F0B73, 0x0000BFF9 //A7
913data8 0x8069C6982F993283, 0x00003FFC //A8
914data8 0xC271F65BFA5BEE3F, 0x0000BFFD //A9
915LOCAL_OBJECT_END(lgammal_Stirling)
916
917LOCAL_OBJECT_START(lgammal_lnsin_data)
918// polynomial approximation of -ln(sin(Pi*x)/(Pi*x)), 0 < x <= 0.5
919data8 0x3FFA51A6625307D3, 0x3C81873332FAF94C //A2
920data8 0x8A8991563EC241C3, 0x00003FFE //A4
921data8 0xADA06588061805DF, 0x00003FFD //A6
922data8 0x80859B57C338D0F7, 0x00003FFD //A8
923data8 0xCD00F1C2D78754BD, 0x00003FFC //A10
924data8 0xAAB56B1D3A1F4655, 0x00003FFC //A12
925data8 0x924B6F2FBBED12B1, 0x00003FFC //A14
926data8 0x80008E58765F43FC, 0x00003FFC //A16
927data8 0x3FBC718EC115E429//A18
928data8 0x3FB99CE544FE183E//A20
929data8 0x3FB7251C09EAAD89//A22
930data8 0x3FB64A970733628C//A24
931data8 0x3FAC92D6802A3498//A26
932data8 0x3FC47E1165261586//A28
933data8 0xBFCA1BAA434750D4//A30
934data8 0x3FE460001C4D5961//A32
935data8 0xBFE6F06A3E4908AD//A34
936data8 0x3FE300889EBB203A//A36
937LOCAL_OBJECT_END(lgammal_lnsin_data)
938
939LOCAL_OBJECT_START(lgammal_half_3Q_data)
940// Polynomial coefficients for the lgammal(x), 0.5 <= x < 0.75
941data8 0xBFF7A648EE90C62E, 0x3C713F326857E066 // A3, A0L
942data8 0xBFF73E4B8BA780AE, 0xBCA953BC788877EF // A1, A1L
943data8 0x403774DCD58D0291, 0xC0415254D5AE6623 // D0, D1
944data8 0x40B07213855CBFB0, 0xC0B8855E25D2D229 // C20, C21
945data8 0x3FFB359F85FF5000, 0x3C9BAECE6EF9EF3A // A2, A2L
946data8 0x3FD717D498A3A8CC, 0xBC9088E101CFEDFA // A0, A3L
947data8 0xAFEF36CC5AEC3FF0, 0x00004002 // E6
948data8 0xABE2054E1C34E791, 0x00004001 // E4
949data8 0xB39343637B2900D1, 0x00004000 // E2
950data8 0xD74FB710D53F58F6, 0x00003FFF // E0
951data8 0x4070655963BA4256, 0xC078DA9D263C4EA3 // D6, D7
952data8 0x405CD2B6A9B90978, 0xC065B3B9F4F4F171 // D4, D5
953data8 0x4049BC2204CF61FF, 0xC05337227E0BA152 // D2, D3
954data8 0x4095509A50C07A96, 0xC0A0747949D2FB45 // C18, C19
955data8 0x4082ECCBAD709414, 0xC08CD02FB088A702 // C16, C17
956data8 0xFFE4B2A61B508DD5, 0x0000C002 // E7
957data8 0xF461ADB8AE17E0A5, 0x0000C001 // E5
958data8 0xF5BE8B0B90325F20, 0x0000C000 // E3
959data8 0x877B275F3FB78DCA, 0x0000C000 // E1
960LOCAL_OBJECT_END(lgammal_half_3Q_data)
961
962LOCAL_OBJECT_START(lgammal_half_3Q_neg_data)
963// Polynomial coefficients for the lgammal(x), -0.75 < x <= -0.5
964data8 0xC014836EFD94899C, 0x3C9835679663B44F // A3, A0L
965data8 0xBFF276C7B4FB1875, 0xBC92D3D9FA29A1C0 // A1, A1L
966data8 0x40C5178F24E1A435, 0xC0D9DE84FBC5D76A // D0, D1
967data8 0x41D4D1B236BF6E93, 0xC1EBB0445CE58550 // C20, C21
968data8 0x4015718CD67F63D3, 0x3CC5354B6F04B59C // A2, A2L
969data8 0x3FF554493087E1ED, 0xBCB72715E37B02B9 // A0, A3L
970data8 0xE4AC7E915FA72229, 0x00004009 // E6
971data8 0xA28244206395FCC6, 0x00004007 // E4
972data8 0xFB045F19C07B2544, 0x00004004 // E2
973data8 0xE5C8A6E6A9BA7D7B, 0x00004002 // E0
974data8 0x4143943B55BF5118, 0xC158AC05EA675406 // D6, D7
975data8 0x4118F6833D19717C, 0xC12F51A6F375CC80 // D4, D5
976data8 0x40F00C209483481C, 0xC103F1DABF750259 // D2, D3
977data8 0x4191038F2D8F9E40, 0xC1A413066DA8AE4A // C18, C19
978data8 0x4170B537EDD833DE, 0xC1857E79424C61CE // C16, C17
979data8 0x8941D8AB4855DB73, 0x0000C00B // E7
980data8 0xBB822B131BD2E813, 0x0000C008 // E5
981data8 0x852B4C03B83D2D4F, 0x0000C006 // E3
982data8 0xC754CA7E2DDC0F1F, 0x0000C003 // E1
983LOCAL_OBJECT_END(lgammal_half_3Q_neg_data)
984
985LOCAL_OBJECT_START(lgammal_2Q_4_data)
986// Polynomial coefficients for the lgammal(x), 2.25 <= |x| < 4.0
987data8 0xBFCA4D55BEAB2D6F, 0x3C7ABC9DA14141F5 // A3, A0L
988data8 0x3FFD8773039049E7, 0x3C66CB7957A95BA4 // A1, A1L
989data8 0x3F45C3CC79E91E7D, 0xBF3A8E5005937E97 // D0, D1
990data8 0x3EC951E35E1C9203, 0xBEB030A90026C5DF // C20, C21
991data8 0x3FE94699894C1F4C, 0x3C91884D21D123F1 // A2, A2L
992data8 0x3FE62E42FEFA39EF, 0xBC66480CEB70870F // A0, A3L
993data8 0xF1C2EAFF0B3A7579, 0x00003FF5 // E6
994data8 0xB36AF863926B55A3, 0x00003FF7 // E4
995data8 0x9620656185BB44CA, 0x00003FF9 // E2
996data8 0xA264558FB0906AFF, 0x00003FFB // E0
997data8 0x3F03D59E9666C961, 0xBEF91115893D84A6 // D6, D7
998data8 0x3F19333611C46225, 0xBF0F89EB7D029870 // D4, D5
999data8 0x3F3055A96B347AFE, 0xBF243B5153E178A8 // D2, D3
1000data8 0x3ED9A4AEF30C4BB2, 0xBED388138B1CEFF2 // C18, C19
1001data8 0x3EEF7945A3C3A254, 0xBEE36F32A938EF11 // C16, C17
1002data8 0x9028923F47C82118, 0x0000BFF5 // E7
1003data8 0xCE0DAAFB6DC93B22, 0x0000BFF6 // E5
1004data8 0xA0D0983B34AC4C8D, 0x0000BFF8 // E3
1005data8 0x94D6C50FEB8B0CE7, 0x0000BFFA // E1
1006LOCAL_OBJECT_END(lgammal_2Q_4_data)
1007
1008LOCAL_OBJECT_START(lgammal_4_8_data)
1009// Polynomial coefficients for the lgammal(x), 4.0 <= |x| < 8.0
1010data8 0xBFD6626BC9B31B54, 0x3CAA53C82493A92B // A3, A0L
1011data8 0x401B4C420A50AD7C, 0x3C8C6E9929F789A3 // A1, A1L
1012data8 0x3F49410427E928C2, 0xBF3E312678F8C146 // D0, D1
1013data8 0x3ED51065F7CD5848, 0xBED052782A03312F // C20, C21
1014data8 0x3FF735973273D5EC, 0x3C831DFC65BF8CCF // A2, A2L
1015data8 0x401326643C4479C9, 0xBC6FA0498C5548A6 // A0, A3L
1016data8 0x9382D8B3CD4EB7E3, 0x00003FF6 // E6
1017data8 0xE9F92CAD8A85CBCD, 0x00003FF7 // E4
1018data8 0xD58389FE38258CEC, 0x00003FF9 // E2
1019data8 0x81310136363AE8AA, 0x00003FFC // E0
1020data8 0x3F04F0AE38E78570, 0xBEF9E2144BB8F03C // D6, D7
1021data8 0x3F1B5E992A6CBC2A, 0xBF10F3F400113911 // D4, D5
1022data8 0x3F323EE00AAB7DEE, 0xBF2640FDFA9FB637 // D2, D3
1023data8 0x3ED2143EBAFF067A, 0xBEBBDEB92D6FF35D // C18, C19
1024data8 0x3EF173A42B69AAA4, 0xBEE78B9951A2EAA5 // C16, C17
1025data8 0xAB3CCAC6344E52AA, 0x0000BFF5 // E7
1026data8 0x81ACCB8915B16508, 0x0000BFF7 // E5
1027data8 0xDA62C7221102C426, 0x0000BFF8 // E3
1028data8 0xDF1BD44C4083580A, 0x0000BFFA // E1
1029LOCAL_OBJECT_END(lgammal_4_8_data)
1030
1031LOCAL_OBJECT_START(lgammal_loc_min_data)
1032// Polynomial coefficients for the lgammal(x), 1.3125 <= x < 1.5625
1033data8 0xBB16C31AB5F1FB71, 0x00003FFF // xMin - point of local minimum
1034data8 0xBFC2E4278DC6BC23, 0xBC683DA8DDCA9650 // A3, A0L
1035data8 0x3BD4DB7D0CA61D5F, 0x386E719EDD01D801 // A1, A1L
1036data8 0x3F4CC72638E1D93F, 0xBF4228EC9953CCB9 // D0, D1
1037data8 0x3ED222F97A04613E,0xBED3DDD58095CB6C // C20, C21
1038data8 0x3FDEF72BC8EE38AB, 0x3C863AFF3FC48940 // A2, A2L
1039data8 0xBFBF19B9BCC38A41, 0xBC7425F1BFFC1442// A0, A3L
1040data8 0x941890032BEB34C3, 0x00003FF6 // E6
1041data8 0xC7E701591CE534BC, 0x00003FF7 // E4
1042data8 0x93373CBD05138DD4, 0x00003FF9 // E2
1043data8 0x845A14A6A81C05D6, 0x00003FFB // E0
1044data8 0x3F0F6C4DF6D47A13, 0xBF045DCDB5B49E19 // D6, D7
1045data8 0x3F22E23345DDE59C, 0xBF1851159AFB1735 // D4, D5
1046data8 0x3F37101EA4022B78, 0xBF2D721E6323AF13 // D2, D3
1047data8 0x3EE691EBE82DF09D, 0xBEDD42550961F730 // C18, C19
1048data8 0x3EFA793EDE99AD85, 0xBEF14000108E70BE // C16, C17
1049data8 0xB7CBC033ACE0C99C, 0x0000BFF5 // E7
1050data8 0xF178D1F7B1A45E27, 0x0000BFF6 // E5
1051data8 0xA8FCFCA8106F471C, 0x0000BFF8 // E3
1052data8 0x864D46FA898A9AD2, 0x0000BFFA // E1
1053LOCAL_OBJECT_END(lgammal_loc_min_data)
1054
1055LOCAL_OBJECT_START(lgammal_03Q_1Q_data)
1056// Polynomial coefficients for the lgammal(x), 0.75 <= |x| < 1.3125
1057data8 0x3FD151322AC7D848, 0x3C7184DE0DB7B4EE // A4, A2L
1058data8 0x3FD9A4D55BEAB2D6, 0x3C9E934AAB10845F // A3, A1L
1059data8 0x3FB111289C381259, 0x3FAFFFCFB32AE18D // D2, D3
1060data8 0x3FB3B1D9E0E3E00D, 0x3FB2496F0D3768DF // D0, D1
1061data8 0xBA461972C057D439, 0x00003FFB // E6
1062data8 0x3FEA51A6625307D3, 0x3C76ABC886A72DA2 // A2, A4L
1063data8 0x3FA8EFE46B32A70E, 0x3F8F31B3559576B6 // C17, C20
1064data8 0xE403383700387D85, 0x00003FFB // E4
1065data8 0x9381D0EE74BF7251, 0x00003FFC // E2
1066data8 0x3FAA2177A6D28177, 0x3FA4895E65FBD995 // C18, C19
1067data8 0x3FAAED2C77DBEE5D, 0x3FA94CA59385512C // D6, D7
1068data8 0x3FAE1F522E8A5941, 0x3FAC785EF56DD87E // D4, D5
1069data8 0x3FB556AD5FA56F0A, 0x3FA81F416E87C783 // E7, C16
1070data8 0xCD00F1C2DC2C9F1E, 0x00003FFB // E5
1071data8 0x3FE2788CFC6FB618, 0x3C8E52519B5B17CB // A1, A3L
1072data8 0x80859B57C3E7F241, 0x00003FFC // E3
1073data8 0xADA065880615F401, 0x00003FFC // E1
1074data8 0xD45CE0BD530AB50E, 0x00003FFC // E0
1075LOCAL_OBJECT_END(lgammal_03Q_1Q_data)
1076
1077LOCAL_OBJECT_START(lgammal_13Q_2Q_data)
1078// Polynomial coefficients for the lgammal(x), 1.5625 <= |x| < 2.25
1079data8 0x3F951322AC7D8483, 0x3C71873D88C6539D // A4, A2L
1080data8 0xBFB13E001A557606, 0x3C56CB907018A101 // A3, A1L
1081data8 0xBEC11B2EC1E7F6FC, 0x3EB0064ED9824CC7 // D2, D3
1082data8 0xBEE3CBC963EC103A, 0x3ED2597A330C107D // D0, D1
1083data8 0xBC6F2DEBDFE66F38, 0x0000BFF0 // E6
1084data8 0x3FD4A34CC4A60FA6, 0x3C3AFC9BF775E8A0 // A2, A4L
1085data8 0x3E48B0C542F85B32, 0xBE347F12EAF787AB // C17, C20
1086data8 0xE9FEA63B6984FA1E, 0x0000BFF2 // E4
1087data8 0x9C562E15FC703BBF, 0x0000BFF5 // E2
1088data8 0xBE3C12A50AB0355E, 0xBE1C941626AE4717 // C18, C19
1089data8 0xBE7AFA8714342BC4,0x3E69A12D2B7761CB // D6, D7
1090data8 0xBE9E25EF1D526730, 0x3E8C762291889B99 // D4, D5
1091data8 0x3EF580DCEE754733, 0xBE57C811D070549C // E7, C16
1092data8 0xD093D878BE209C98, 0x00003FF1 // E5
1093data8 0x3FDB0EE6072093CE, 0xBC6024B9E81281C4 // A1, A3L
1094data8 0x859B57C31CB77D96, 0x00003FF4 // E3
1095data8 0xBD6EB756DB617E8D, 0x00003FF6 // E1
1096data8 0xF2027E10C7AF8C38, 0x0000BFF7 // E0
1097LOCAL_OBJECT_END(lgammal_13Q_2Q_data)
1098
1099LOCAL_OBJECT_START(lgammal_8_10_data)
1100// Polynomial coefficients for the lgammal(x), 8.0 <= |x| < 10.0
1101// Multi Precision terms
1102data8 0x40312008A3A23E5C, 0x3CE020B4F2E4083A //A1
1103data8 0x4025358E82FCB70C, 0x3CD4A5A74AF7B99C //A0
1104// Native precision terms
1105data8 0xF0AA239FFBC616D2, 0x00004000 //A2
1106data8 0x96A8EA798FE57D66, 0x0000BFFF //A3
1107data8 0x8D501B7E3B9B9BDB, 0x00003FFE //A4
1108data8 0x9EE062401F4B1DC2, 0x0000BFFD //A5
1109data8 0xC63FD8CD31E93431, 0x00003FFC //A6
1110data8 0x8461101709C23C30, 0x0000BFFC //A7
1111data8 0xB96D7EA7EF3648B2, 0x00003FFB //A8
1112data8 0x86886759D2ACC906, 0x0000BFFB //A9
1113data8 0xC894B6E28265B183, 0x00003FFA //A10
1114data8 0x98C4348CAD821662, 0x0000BFFA //A11
1115data8 0xEC9B092226A94DF2, 0x00003FF9 //A12
1116data8 0xB9F169FF9B98CDDC, 0x0000BFF9 //A13
1117data8 0x9A3A32BB040894D3, 0x00003FF9 //A14
1118data8 0xF9504CCC1003B3C3, 0x0000BFF8 //A15
1119LOCAL_OBJECT_END(lgammal_8_10_data)
1120
1121LOCAL_OBJECT_START(lgammal_03Q_6_data)
1122// Polynomial coefficients for the lgammal(x), 0.75 <= |x| < 1.0
1123data8 0xBFBC47DCA479E295, 0xBC607E6C1A379D55 //A3
1124data8 0x3FCA051C372609ED, 0x3C7B02D73EB7D831 //A0
1125data8 0xBFE15FAFA86B04DB, 0xBC3F52EE4A8945B5 //A1
1126data8 0x3FD455C4FF28F0BF, 0x3C75F8C6C99F30BB //A2
1127data8 0xD2CF04CD934F03E1, 0x00003FFA //A4
1128data8 0xDB4ED667E29256E1, 0x0000BFF9 //A5
1129data8 0xF155A33A5B6021BF, 0x00003FF8 //A6
1130data8 0x895E9B9D386E0338, 0x0000BFF8 //A7
1131data8 0xA001BE94B937112E, 0x00003FF7 //A8
1132data8 0xBD82846E490ED048, 0x0000BFF6 //A9
1133data8 0xE358D24EC30DBB5D, 0x00003FF5 //A10
1134data8 0x89C4F3652446B78B, 0x0000BFF5 //A11
1135data8 0xA86043E10280193D, 0x00003FF4 //A12
1136data8 0xCF3A2FBA61EB7682, 0x0000BFF3 //A13
1137data8 0x3F300900CC9200EC //A14
1138data8 0xBF23F42264B94AE8 //A15
1139data8 0x3F18EEF29895FE73 //A16
1140data8 0xBF0F3C4563E3EDFB //A17
1141data8 0x3F0387DBBC385056 //A18
1142data8 0xBEF81B4004F92900 //A19
1143data8 0x3EECA6692A9A5B81 //A20
1144data8 0xBEDF61A0059C15D3 //A21
1145data8 0x3ECDA9F40DCA0111 //A22
1146data8 0xBEB60FE788217BAF //A23
1147data8 0x3E9661D795DFC8C6 //A24
1148data8 0xBE66C7756A4EDEE5 //A25
1149// Polynomial coefficients for the lgammal(x), 1.0 <= |x| < 2.0
1150data8 0xBFC1AE55B180726B, 0xBC7DE1BC478453F5 //A3
1151data8 0xBFBEEB95B094C191, 0xBC53456FF6F1C9D9 //A0
1152data8 0x3FA2AED059BD608A, 0x3C0B65CC647D557F //A1
1153data8 0x3FDDE9E64DF22EF2, 0x3C8993939A8BA8E4 //A2
1154data8 0xF07C206D6B100CFF, 0x00003FFA //A4
1155data8 0xED2CEA9BA52FE7FB, 0x0000BFF9 //A5
1156data8 0xFCE51CED52DF3602, 0x00003FF8 //A6
1157data8 0x8D45D27872326619, 0x0000BFF8 //A7
1158data8 0xA2B78D6BCEBE27F7, 0x00003FF7 //A8
1159data8 0xBF6DC0996A895B6F, 0x0000BFF6 //A9
1160data8 0xE4B9AD335AF82D79, 0x00003FF5 //A10
1161data8 0x8A451880195362A1, 0x0000BFF5 //A11
1162data8 0xA8BE35E63089A7A9, 0x00003FF4 //A12
1163data8 0xCF7FA175FA11C40C, 0x0000BFF3 //A13
1164data8 0x3F300C282FAA3B02 //A14
1165data8 0xBF23F6AEBDA68B80 //A15
1166data8 0x3F18F6860E2224DD //A16
1167data8 0xBF0F542B3CE32F28 //A17
1168data8 0x3F039436218C9BF8 //A18
1169data8 0xBEF8AE6307677AEC //A19
1170data8 0x3EF0B55527B3A211 //A20
1171data8 0xBEE576AC995E7605 //A21
1172data8 0x3ED102DDC1365D2D //A22
1173data8 0xBEC442184F97EA54 //A23
1174data8 0x3ED4D2283DFE5FC6 //A24
1175data8 0xBECB9219A9B46787 //A25
1176// Polynomial coefficients for the lgammal(x), 2.0 <= |x| < 3.0
1177data8 0xBFCA4D55BEAB2D6F, 0xBC66F80E5BFD5AF5 //A3
1178data8 0x3FE62E42FEFA39EF, 0x3C7ABC9E3B347E3D //A0
1179data8 0x3FFD8773039049E7, 0x3C66CB9007C426EA //A1
1180data8 0x3FE94699894C1F4C, 0x3C918726EB111663 //A2
1181data8 0xA264558FB0906209, 0x00003FFB //A4
1182data8 0x94D6C50FEB902ADC, 0x0000BFFA //A5
1183data8 0x9620656184243D17, 0x00003FF9 //A6
1184data8 0xA0D0983B8BCA910B, 0x0000BFF8 //A7
1185data8 0xB36AF8559B222BD3, 0x00003FF7 //A8
1186data8 0xCE0DACB3260AE6E5, 0x0000BFF6 //A9
1187data8 0xF1C2C0BF0437C7DB, 0x00003FF5 //A10
1188data8 0x902A2F2F3AB74A92, 0x0000BFF5 //A11
1189data8 0xAE05009B1B2C6E4C, 0x00003FF4 //A12
1190data8 0xD5B71F6456D7D4CB, 0x0000BFF3 //A13
1191data8 0x3F2F0351D71BC9C6 //A14
1192data8 0xBF2B53BC56A3B793 //A15
1193data8 0xBF18B12DC6F6B861 //A16
1194data8 0xBF43EE6EB5215C2F //A17
1195data8 0xBF5474787CDD455E //A18
1196data8 0xBF642B503C9C060A //A19
1197data8 0xBF6E07D1AA254AA3 //A20
1198data8 0xBF71C785443AAEE8 //A21
1199data8 0xBF6F67BF81B71052 //A22
1200data8 0xBF63E4BCCF4FFABF //A23
1201data8 0xBF50067F8C671D5A //A24
1202data8 0xBF29C770D680A5AC //A25
1203// Polynomial coefficients for the lgammal(x), 4.0 <= |x| < 6.0
1204data8 0xBFD6626BC9B31B54, 0xBC85AABE08680902 //A3
1205data8 0x401326643C4479C9, 0x3CAA53C26F31E364 //A0
1206data8 0x401B4C420A50AD7C, 0x3C8C76D55E57DD8D //A1
1207data8 0x3FF735973273D5EC, 0x3C83A0B78E09188A //A2
1208data8 0x81310136363AAB6D, 0x00003FFC //A4
1209data8 0xDF1BD44C4075C0E6, 0x0000BFFA //A5
1210data8 0xD58389FE38D8D664, 0x00003FF9 //A6
1211data8 0xDA62C7221D5B5F87, 0x0000BFF8 //A7
1212data8 0xE9F92CAD0263E157, 0x00003FF7 //A8
1213data8 0x81ACCB8606C165FE, 0x0000BFF7 //A9
1214data8 0x9382D8D263D1C2A3, 0x00003FF6 //A10
1215data8 0xAB3CCBA4C853B12C, 0x0000BFF5 //A11
1216data8 0xCA0818BBCCC59296, 0x00003FF4 //A12
1217data8 0xF18912691CBB5BD0, 0x0000BFF3 //A13
1218data8 0x3F323EF5D8330339 //A14
1219data8 0xBF2641132EA571F7 //A15
1220data8 0x3F1B5D9576175CA9 //A16
1221data8 0xBF10F56A689C623D //A17
1222data8 0x3F04CACA9141A18D //A18
1223data8 0xBEFA307AC9B4E85D //A19
1224data8 0x3EF4B625939FBE32 //A20
1225data8 0xBECEE6AC1420F86F //A21
1226data8 0xBE9A95AE2E485964 //A22
1227data8 0xBF039EF47F8C09BB //A23
1228data8 0xBF05345957F7B7A9 //A24
1229data8 0xBEF85AE6385D4CCC //A25
1230// Polynomial coefficients for the lgammal(x), 3.0 <= |x| < 4.0
1231data8 0xBFCA4D55BEAB2D6F, 0xBC667B20FF46C6A8 //A3
1232data8 0x3FE62E42FEFA39EF, 0x3C7ABC9E3B398012 //A0
1233data8 0x3FFD8773039049E7, 0x3C66CB9070238D77 //A1
1234data8 0x3FE94699894C1F4C, 0x3C91873D8839B1CD //A2
1235data8 0xA264558FB0906D7E, 0x00003FFB //A4
1236data8 0x94D6C50FEB8AFD72, 0x0000BFFA //A5
1237data8 0x9620656185B68F14, 0x00003FF9 //A6
1238data8 0xA0D0983B34B7088A, 0x0000BFF8 //A7
1239data8 0xB36AF863964AA440, 0x00003FF7 //A8
1240data8 0xCE0DAAFB5497AFB8, 0x0000BFF6 //A9
1241data8 0xF1C2EAFA79CC2864, 0x00003FF5 //A10
1242data8 0x9028922A839572B8, 0x0000BFF5 //A11
1243data8 0xAE1E62F870BA0278, 0x00003FF4 //A12
1244data8 0xD4726F681E2ABA29, 0x0000BFF3 //A13
1245data8 0x3F30559B9A02FADF //A14
1246data8 0xBF243ADEB1266CAE //A15
1247data8 0x3F19303B6F552603 //A16
1248data8 0xBF0F768C288EC643 //A17
1249data8 0x3F039D5356C21DE1 //A18
1250data8 0xBEF81BCA8168E6BE //A19
1251data8 0x3EEC74A53A06AD54 //A20
1252data8 0xBEDED52D1A5DACDF //A21
1253data8 0x3ECCB4C2C7087342 //A22
1254data8 0xBEB4F1FAFDFF5C2F //A23
1255data8 0x3E94C80B52D58904 //A24
1256data8 0xBE64A328CBE92A27 //A25
1257LOCAL_OBJECT_END(lgammal_03Q_6_data)
1258
1259LOCAL_OBJECT_START(lgammal_1pEps_data)
1260// Polynomial coefficients for the lgammal(x), 1 - 2^(-7) <= |x| < 1 + 2^(-7)
1261data8 0x93C467E37DB0C7A5, 0x00003FFE //A1
1262data8 0xD28D3312983E9919, 0x00003FFE //A2
1263data8 0xCD26AADF559A47E3, 0x00003FFD //A3
1264data8 0x8A8991563EC22E81, 0x00003FFD //A4
1265data8 0x3FCA8B9C168D52FE //A5
1266data8 0x3FC5B40CB0696370 //A6
1267data8 0x3FC270AC2229A65D //A7
1268data8 0x3FC0110AF10FCBFC //A8
1269// Polynomial coefficients for the log1p(x), - 2^(-7) <= |x| < 2^(-7)
1270data8 0x3FBC71C71C71C71C //P8
1271data8 0xBFC0000000000000 //P7
1272data8 0x3FC2492492492492 //P6
1273data8 0xBFC5555555555555 //P5
1274data8 0x3FC999999999999A //P4
1275data8 0xBFD0000000000000 //P3
1276data8 0x3FD5555555555555 //P2
1277data8 0xBFE0000000000000 //P1
1278// short version of "lnsin" polynomial
1279data8 0xD28D3312983E9918, 0x00003FFF //A2
1280data8 0x8A8991563EC241B6, 0x00003FFE //A4
1281data8 0xADA06588061830A5, 0x00003FFD //A6
1282data8 0x80859B57C31CB746, 0x00003FFD //A8
1283LOCAL_OBJECT_END(lgammal_1pEps_data)
1284
1285LOCAL_OBJECT_START(lgammal_neg2andHalf_data)
1286// Polynomial coefficients for the lgammal(x), -2.005859375 <= x < -2.5
1287data8 0xBF927781D4BB093A, 0xBC511D86D85B7045 // A3, A0L
1288data8 0x3FF1A68793DEFC15, 0x3C9852AE2DA7DEEF // A1, A1L
1289data8 0x408555562D45FAFD, 0xBF972CDAFE5FEFAD // D0, D1
1290data8 0xC18682331EF492A5, 0xC1845E3E0D29606B // C20, C21
1291data8 0x4013141822E16979, 0x3CCF8718B6E75F6C // A2, A2L
1292data8 0xBFACCBF9F5ED0F15, 0xBBDD1AEB73297401 // A0, A3L
1293data8 0xCCCDB17423046445, 0x00004006 // E6
1294data8 0x800514E230A3A452, 0x00004005 // E4
1295data8 0xAAE9A48EC162E76F, 0x00004003 // E2
1296data8 0x81D4F88B3F3EA0FC, 0x00004002 // E0
1297data8 0x40CF3F3E35238DA0, 0xC0F8B340945F1A7E // D6, D7
1298data8 0x40BF89EC0BD609C6, 0xC095897242AEFEE2 // D4, D5
1299data8 0x40A2482FF01DBC5C, 0xC02095E275FDCF62 // D2, D3
1300data8 0xC1641354F2312A6A, 0xC17B3657F85258E9 // C18, C19
1301data8 0xC11F964E9ECBE2C9, 0xC146D7A90F70696C // C16, C17
1302data8 0xE7AECDE6AF8EA816, 0x0000BFEF // E7
1303data8 0xD711252FEBBE1091, 0x0000BFEB // E5
1304data8 0xE648BD10F8C43391, 0x0000BFEF // E3
1305data8 0x948A1E78AA00A98D, 0x0000BFF4 // E1
1306LOCAL_OBJECT_END(lgammal_neg2andHalf_data)
1307
1308LOCAL_OBJECT_START(lgammal_near_neg_half_data)
1309// Polynomial coefficients for the lgammal(x), -0.5 < x < -0.40625
1310data8 0xBFC1AE55B180726C, 0x3C8053CD734E6A1D // A3, A0L
1311data8 0x3FA2AED059BD608A, 0x3C0CD3D2CDBA17F4 // A1, A1L
1312data8 0x40855554DBCD1E1E, 0x3F96C51AC2BEE9E1 // D0, D1
1313data8 0xC18682331EF4927D, 0x41845E3E0D295DFC // C20, C21
1314data8 0x4011DE9E64DF22EF, 0x3CA692B70DAD6B7B // A2, A2L
1315data8 0x3FF43F89A3F0EDD6, 0xBC4955AED0FA087D // A0, A3L
1316data8 0xCCCD3F1DF4A2C1DD, 0x00004006 // E6
1317data8 0x80028ADE33C7FCD9, 0x00004005 // E4
1318data8 0xAACA474E485507EF, 0x00004003 // E2
1319data8 0x80F07C206D6B0ECD, 0x00004002 // E0
1320data8 0x40CF3F3E33E83056, 0x40F8B340944633D9 // D6, D7
1321data8 0x40BF89EC059931F0, 0x409589723307AD20 // D4, D5
1322data8 0x40A2482FD0054824, 0x402095CE7F19D011 // D2, D3
1323data8 0xC1641354F2313614, 0x417B3657F8525354 // C18, C19
1324data8 0xC11F964E9ECFD21C, 0x4146D7A90F701836 // C16, C17
1325data8 0x86A9C01F0EA11E5A, 0x0000BFF5 // E7
1326data8 0xBF6D8469142881C0, 0x0000BFF6 // E5
1327data8 0x8D45D277BA8255F1, 0x0000BFF8 // E3
1328data8 0xED2CEA9BA528BCC3, 0x0000BFF9 // E1
1329LOCAL_OBJECT_END(lgammal_near_neg_half_data)
1330
1331//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1332////////////// POLYNOMIAL COEFFICIENTS FOR "NEAR ROOTS" RANGES /////////////
1333////////////// THIS PART OF TABLE SHOULD BE ADDRESSED REALLY RARE /////////////
1334//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1335LOCAL_OBJECT_START(lgammal_right_roots_polynomial_data)
1336// Polynomial coefficients for right root on [-3, -2]
6f65e668 1337// Lgammal is approximated by polynomial within [-.056244 ; .158208 ] range
d5efd131
MF
1338data8 0xBBBD5E9DCD11030B, 0xB867411D9FF87DD4 //A0
1339data8 0x3FF83FE966AF535E, 0x3CAA21235B8A769A //A1
1340data8 0x40136EEBB002F55C, 0x3CC3959A6029838E //A2
1341data8 0xB4A5302C53C2BEDD, 0x00003FFF //A3
1342data8 0x8B8C6BE504F2DA1C, 0x00004002 //A4
1343data8 0xB99CFF02593B4D98, 0x00004001 //A5
1344data8 0x4038D32F682AA1CF //A6
1345data8 0x403809F04EE6C5B5 //A7
1346data8 0x40548EAA81634CEE //A8
1347data8 0x4059297ADB6BC03D //A9
1348data8 0x407286FB8EC5C9DA //A10
1349data8 0x407A92E05B744CFB //A11
1350data8 0x4091A9D4144258CD //A12
1351data8 0x409C4D01D24F367E //A13
1352data8 0x40B1871B9A426A83 //A14
1353data8 0x40BE51C48BD9A583 //A15
1354data8 0x40D2140D0C6153E7 //A16
1355data8 0x40E0FB2C989CE4A3 //A17
1356data8 0x40E52739AB005641 //A18
1357data8 0x41161E3E6DDF503A //A19
1358// Polynomial coefficients for right root on [-4, -3]
6f65e668 1359// Lgammal is approximated by polynomial within [-.172797 ; .171573 ] range
d5efd131
MF
1360data8 0x3C172712B248E42E, 0x38CB8D17801A5D67 //A0
1361data8 0x401F20A65F2FAC54, 0x3CCB9EA1817A824E //A1
1362data8 0x4039D4D2977150EF, 0x3CDA42E149B6276A //A2
1363data8 0xE089B8926AE2D9CB, 0x00004005 //A3
1364data8 0x933901EBBB586C37, 0x00004008 //A4
1365data8 0xCCD319BED1CFA1CD, 0x0000400A //A5
1366data8 0x40D293C3F78D3C37 //A6
1367data8 0x40FBB97AA0B6DD02 //A7
1368data8 0x41251EA3345E5EB9 //A8
1369data8 0x415057F65C92E7B0 //A9
1370data8 0x41799C865241B505 //A10
1371data8 0x41A445209EFE896B //A11
1372data8 0x41D02D21880C953B //A12
1373data8 0x41F9FFDE8C63E16D //A13
1374data8 0x422504DC8302D2BE //A14
1375data8 0x425111BF18C95414 //A15
1376data8 0x427BCBE74A2B8EF7 //A16
1377data8 0x42A7256F59B286F7 //A17
1378data8 0x42D462D1586DE61F //A18
1379data8 0x42FBB1228D6C5118 //A19
1380// Polynomial coefficients for right root on [-5, -4]
6f65e668 1381// Lgammal is approximated by polynomial within [-.163171 ; .161988 ] range
d5efd131
MF
1382data8 0x3C5840FBAFDEE5BB, 0x38CAC0336E8C490A //A0
1383data8 0x403ACA5CF4921642, 0x3CCEDCDDA5491E56 //A1
1384data8 0x40744415CD813F8E, 0x3CFBFEBC17E39146 //A2
1385data8 0xAACD88D954E3E1BD, 0x0000400B //A3
1386data8 0xCB68C710D75ED802, 0x0000400F //A4
1387data8 0x8130F5AB997277AC, 0x00004014 //A5
1388data8 0x41855E3DBF99EBA7 //A6
1389data8 0x41CD14FE49C49FC2 //A7
1390data8 0x421433DCE281F07D //A8
1391data8 0x425C8399C7A92B6F //A9
1392data8 0x42A45FBE67840F1A //A10
1393data8 0x42ED68D75F9E6C98 //A11
1394data8 0x433567291C27E5BE //A12
1395data8 0x437F5ED7A9D9FD28 //A13
1396data8 0x43C720A65C8AB711 //A14
1397data8 0x441120A6C1D40B9B //A15
1398data8 0x44596F561F2D1CBE //A16
1399data8 0x44A3507DA81D5C01 //A17
1400data8 0x44EF06A31E39EEDF //A18
1401data8 0x45333774C99F523F //A19
1402// Polynomial coefficients for right root on [-6, -5]
6f65e668 1403// Lgammal is approximated by polynomial within [-.156450 ; .156126 ] range
d5efd131
MF
1404data8 0x3C71B82D6B2B3304, 0x3917186E3C0DC231 //A0
1405data8 0x405ED72E0829AE02, 0x3C960C25157980EB //A1
1406data8 0x40BCECC32EC22F9B, 0x3D5D8335A32F019C //A2
1407data8 0x929EC2B1FB931F17, 0x00004012 //A3
1408data8 0xD112EF96D37316DE, 0x00004018 //A4
1409data8 0x9F00BB9BB13416AB, 0x0000401F //A5
1410data8 0x425F7D8D5BDCB223 //A6
1411data8 0x42C9A8D00C776CC6 //A7
1412data8 0x433557FD8C481424 //A8
1413data8 0x43A209221A953EF0 //A9
1414data8 0x440EDC98D5618AB7 //A10
1415data8 0x447AABD25E367378 //A11
1416data8 0x44E73DE20CC3B288 //A12
1417data8 0x455465257B4E0BD8 //A13
1418data8 0x45C2011532085353 //A14
1419data8 0x462FEE4CC191945B //A15
1420data8 0x469C63AEEFEF0A7F //A16
1421data8 0x4709D045390A3810 //A17
1422data8 0x4778D360873C9F64 //A18
1423data8 0x47E26965BE9A682A //A19
1424// Polynomial coefficients for right root on [-7, -6]
6f65e668 1425// Lgammal is approximated by polynomial within [-.154582 ; .154521 ] range
d5efd131
MF
1426data8 0x3C75F103A1B00A48, 0x391C041C190C726D //A0
1427data8 0x40869DE49E3AF2AA, 0x3D1C17E1F813063B //A1
1428data8 0x410FCE23484CFD10, 0x3DB6F38C2F11DAB9 //A2
1429data8 0xEF281D1E1BE2055A, 0x00004019 //A3
1430data8 0xFCE3DA92AC55DFF8, 0x00004022 //A4
1431data8 0x8E9EA838A20BD58E, 0x0000402C //A5
1432data8 0x4354F21E2FB9E0C9 //A6
1433data8 0x43E9500994CD4F09 //A7
1434data8 0x447F3A2C23C033DF //A8
1435data8 0x45139152656606D8 //A9
1436data8 0x45A8D45F8D3BF2E8 //A10
1437data8 0x463FD32110E5BFE5 //A11
1438data8 0x46D490B3BDBAE0BE //A12
1439data8 0x476AC3CAD905DD23 //A13
1440data8 0x48018558217AD473 //A14
1441data8 0x48970AF371D30585 //A15
1442data8 0x492E6273A8BEFFE3 //A16
1443data8 0x49C47CC9AE3F1073 //A17
1444data8 0x4A5D38E8C35EFF45 //A18
1445data8 0x4AF0123E89694CD8 //A19
1446// Polynomial coefficients for right root on [-8, -7]
6f65e668 1447// Lgammal is approximated by polynomial within [-.154217 ; .154208 ] range
d5efd131
MF
1448data8 0xBCD2507D818DDD68, 0xB97F6940EA2871A0 //A0
1449data8 0x40B3B407AA387BCB, 0x3D6320238F2C43D1 //A1
1450data8 0x41683E85DAAFBAC7, 0x3E148D085958EA3A //A2
1451data8 0x9F2A95AF1E10A548, 0x00004022 //A3
1452data8 0x92F21522F482300E, 0x0000402E //A4
1453data8 0x90B51AB03A1F244D, 0x0000403A //A5
1454data8 0x44628E1C70EF534F //A6
1455data8 0x452393E2BC32D244 //A7
1456data8 0x45E5164141F4BA0B //A8
1457data8 0x46A712B3A8AF5808 //A9
1458data8 0x47698FD36CEDD0F2 //A10
1459data8 0x482C9AE6BBAA3637 //A11
1460data8 0x48F023821857C8E9 //A12
1461data8 0x49B2569053FC106F //A13
1462data8 0x4A74F646D5C1604B //A14
1463data8 0x4B3811CF5ABA4934 //A15
1464data8 0x4BFBB5DD6C84E233 //A16
1465data8 0x4CC05021086F637B //A17
1466data8 0x4D8450A345B0FB49 //A18
1467data8 0x4E43825848865DB2 //A19
1468// Polynomial coefficients for right root on [-9, -8]
6f65e668 1469// Lgammal is approximated by polynomial within [-.154160 ; .154158 ] range
d5efd131
MF
1470data8 0x3CDF4358564F2B46, 0x397969BEE6042F81 //A0
1471data8 0x40E3B088FED67721, 0x3D82787BA937EE85 //A1
1472data8 0x41C83A3893550EF4, 0x3E542ED57E244DA8 //A2
1473data8 0x9F003C6DC56E0B8E, 0x0000402B //A3
1474data8 0x92BDF64A3213A699, 0x0000403A //A4
1475data8 0x9074F503AAD417AF, 0x00004049 //A5
1476data8 0x4582843E1313C8CD //A6
1477data8 0x467387BD6A7826C1 //A7
1478data8 0x4765074E788CF440 //A8
1479data8 0x4857004DD9D1E09D //A9
1480data8 0x4949792ED7530EAF //A10
1481data8 0x4A3C7F089A292ED3 //A11
1482data8 0x4B30125BF0AABB86 //A12
1483data8 0x4C224175195E307E //A13
1484data8 0x4D14DC4C8B32C08D //A14
1485data8 0x4E07F1DB2786197E //A15
1486data8 0x4EFB8EA1C336DACB //A16
1487data8 0x4FF03797EACD0F23 //A17
1488data8 0x50E4304A8E68A730 //A18
1489data8 0x51D3618FB2EC9F93 //A19
1490// Polynomial coefficients for right root on [-10, -9]
6f65e668 1491// Lgammal is approximated by polynomial within [-.154152 ; .154152 ] range
d5efd131
MF
1492data8 0x3D42F34DA97ECF0C, 0x39FD1256F345B0D0 //A0
1493data8 0x4116261203919787, 0x3DC12D44055588EB //A1
1494data8 0x422EA8F32FB7FE99, 0x3ED849CE4E7B2D77 //A2
1495data8 0xE25BAF73477A57B5, 0x00004034 //A3
1496data8 0xEB021FD10060504A, 0x00004046 //A4
1497data8 0x8220A208EE206C5F, 0x00004059 //A5
1498data8 0x46B2C3903EC9DA14 //A6
1499data8 0x47D64393744B9C67 //A7
1500data8 0x48FAF79CCDC604DD //A8
1501data8 0x4A20975DB8061EBA //A9
1502data8 0x4B44AB9CBB38DB21 //A10
1503data8 0x4C6A032F60094FE9 //A11
1504data8 0x4D908103927634B4 //A12
1505data8 0x4EB516CA21D30861 //A13
1506data8 0x4FDB1BF12C58D318 //A14
1507data8 0x510180AAE094A553 //A15
1508data8 0x5226A8F2A2D45D57 //A16
1509data8 0x534E00B6B0C8B809 //A17
1510data8 0x5475022FE21215B2 //A18
1511data8 0x5596B02BF6C5E19B //A19
1512// Polynomial coefficients for right root on [-11, -10]
6f65e668 1513// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
d5efd131
MF
1514data8 0x3D7AA9C2E2B1029C, 0x3A15FB37578544DB //A0
1515data8 0x414BAF825A0C91D4, 0x3DFB9DA2CE398747 //A1
1516data8 0x4297F3EC8AE0AF03, 0x3F34208B55FB8781 //A2
1517data8 0xDD0C97D3197F56DE, 0x0000403E //A3
1518data8 0x8F6F3AF7A5499674, 0x00004054 //A4
1519data8 0xC68DA1AF6D878EEB, 0x00004069 //A5
1520data8 0x47F1E4E1E2197CE0 //A6
1521data8 0x494A8A28E597C3EB //A7
1522data8 0x4AA4175D0D35D705 //A8
1523data8 0x4BFEE6F0AF69E814 //A9
1524data8 0x4D580FE7B3DBB3C6 //A10
1525data8 0x4EB2ECE60E4608AF //A11
1526data8 0x500E04BE3E2B4F24 //A12
1527data8 0x5167F9450F0FB8FD //A13
1528data8 0x52C342BDE747603F //A14
1529data8 0x541F1699D557268C //A15
1530data8 0x557927C5F079864E //A16
1531data8 0x56D4D10FEEDB030C //A17
1532data8 0x5832385DF86AD28A //A18
1533data8 0x598898914B4D6523 //A19
1534// Polynomial coefficients for right root on [-12, -11]
6f65e668 1535// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
d5efd131
MF
1536data8 0xBD96F61647C58B03, 0xBA3ABB0C2A6C755B //A0
1537data8 0x418308A82714B70D, 0x3E1088FC6A104C39 //A1
1538data8 0x4306A493DD613C39, 0x3FB2341ECBF85741 //A2
1539data8 0x8FA8FE98339474AB, 0x00004049 //A3
1540data8 0x802CCDF570BA7942, 0x00004062 //A4
1541data8 0xF3F748AF11A32890, 0x0000407A //A5
1542data8 0x493E3B567EF178CF //A6
1543data8 0x4ACED38F651BA362 //A7
1544data8 0x4C600B357337F946 //A8
1545data8 0x4DF0F71A52B54CCF //A9
1546data8 0x4F8229F3B9FA2C70 //A10
1547data8 0x5113A4C4979B770E //A11
1548data8 0x52A56BC367F298D5 //A12
1549data8 0x543785CF31842DC0 //A13
1550data8 0x55C9FC37E3E40896 //A14
1551data8 0x575CD5D1BA556C82 //A15
1552data8 0x58F00A7AD99A9E08 //A16
1553data8 0x5A824088688B008D //A17
1554data8 0x5C15F75EF7E08EBD //A18
1555data8 0x5DA462EA902F0C90 //A19
1556// Polynomial coefficients for right root on [-13, -12]
6f65e668 1557// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
d5efd131
MF
1558data8 0x3DC3191752ACFC9D, 0x3A26CB6629532DBF //A0
1559data8 0x41BC8CFC051191BD, 0x3E68A84DA4E62AF2 //A1
1560data8 0x43797926294A0148, 0x400F345FF3723CFF //A2
1561data8 0xF26D2AF700B82625, 0x00004053 //A3
1562data8 0xA238B24A4B1F7B15, 0x00004070 //A4
1563data8 0xE793B5C0A41A264F, 0x0000408C //A5
1564data8 0x4A9585BDDACE863D //A6
1565data8 0x4C6075953448088A //A7
1566data8 0x4E29B2F38D1FC670 //A8
1567data8 0x4FF4619B079C440F //A9
1568data8 0x51C05DAE118D8AD9 //A10
1569data8 0x538A8C7F87326AD4 //A11
1570data8 0x5555B6937588DAB3 //A12
1571data8 0x5721E1F8B6E6A7DB //A13
1572data8 0x58EDA1D7A77DD6E5 //A14
1573data8 0x5AB8A9616B7DC9ED //A15
1574data8 0x5C84942AA209ED17 //A16
1575data8 0x5E518FC34C6F54EF //A17
1576data8 0x601FB3F17BCCD9A0 //A18
1577data8 0x61E61128D512FE97 //A1
1578// Polynomial coefficients for right root on [-14, -13]
6f65e668 1579// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
d5efd131
MF
1580data8 0xBE170D646421B3F5, 0xBAAD95F79FCB5097 //A0
1581data8 0x41F7328CBFCD9AC7, 0x3E743B8B1E8AEDB1 //A1
1582data8 0x43F0D0FA2DBDA237, 0x40A0422D6A227B55 //A2
1583data8 0x82082DF2D32686CC, 0x0000405F //A3
1584data8 0x8D64EE9B42E68B43, 0x0000407F //A4
1585data8 0xA3FFD82E08C5F1F1, 0x0000409F //A5
1586data8 0x4BF8C49D99123454 //A6
1587data8 0x4DFEC79DDF11342F //A7
1588data8 0x50038615A892F6BD //A8
1589data8 0x520929453DB32EF1 //A9
1590data8 0x54106A7808189A7F //A10
1591data8 0x5615A302D03C207B //A11
1592data8 0x581CC175AA736F5E //A12
1593data8 0x5A233E071147C017 //A13
1594data8 0x5C29E81917243F22 //A14
1595data8 0x5E3184B0B5AC4707 //A15
1596data8 0x6037C11DE62D8388 //A16
1597data8 0x6240787C4B1C9D6C //A17
1598data8 0x6448289235E80977 //A18
1599data8 0x664B5352C6C3449E //A19
1600// Polynomial coefficients for right root on [-15, -14]
6f65e668 1601// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
d5efd131
MF
1602data8 0x3E562C2E34A9207D, 0x3ADC00DA3DFF7A83 //A0
1603data8 0x42344C3B2F0D90AB, 0x3EB8A2E979F24536 //A1
1604data8 0x4469BFFF28B50D07, 0x41181E3D05C1C294 //A2
1605data8 0xAE38F64DCB24D9F8, 0x0000406A //A3
1606data8 0xA5C3F52C1B350702, 0x0000408E //A4
1607data8 0xA83BC857BCD67A1B, 0x000040B2 //A5
1608data8 0x4D663B4727B4D80A //A6
1609data8 0x4FA82C965B0F7788 //A7
1610data8 0x51EAD58C02908D95 //A8
1611data8 0x542E427970E073D8 //A9
1612data8 0x56714644C558A818 //A10
1613data8 0x58B3EC2040C77BAE //A11
1614data8 0x5AF72AE6A83D45B1 //A12
1615data8 0x5D3B214F611F5D12 //A13
1616data8 0x5F7FF5E49C54E92A //A14
1617data8 0x61C2E917AB765FB2 //A15
1618data8 0x64066FD70907B4C1 //A16
1619data8 0x664B3998D60D0F9B //A17
1620data8 0x689178710782FA8B //A18
1621data8 0x6AD14A66C1C7BEC3 //A19
1622// Polynomial coefficients for right root on [-16, -15]
6f65e668 1623// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
d5efd131
MF
1624data8 0xBE6D7E7192615BAE, 0xBB0137677D7CC719 //A0
1625data8 0x4273077763F6628C, 0x3F09250FB8FC8EC9 //A1
1626data8 0x44E6A1BF095B1AB3, 0x4178D5A74F6CB3B3 //A2
1627data8 0x8F8E0D5060FCC76E, 0x00004076 //A3
1628data8 0x800CC1DCFF092A63, 0x0000409E //A4
1629data8 0xF3AB0BA9D14CDA72, 0x000040C5 //A5
1630data8 0x4EDE3000A2F6D54F //A6
1631data8 0x515EC613B9C8E241 //A7
1632data8 0x53E003309FEEEA96 //A8
1633data8 0x5660ED908D7C9A90 //A9
1634data8 0x58E21E9B517B1A50 //A10
1635data8 0x5B639745E4374EE2 //A11
1636data8 0x5DE55BB626B2075D //A12
1637data8 0x606772B7506BA747 //A13
1638data8 0x62E9E581AB2E057B //A14
1639data8 0x656CBAD1CF85D396 //A15
1640data8 0x67EFF4EBD7989872 //A16
1641data8 0x6A722D2B19B7E2F9 //A17
1642data8 0x6CF5DEB3073B0743 //A18
1643data8 0x6F744AC11550B93A //A19
1644// Polynomial coefficients for right root on [-17, -16]
6f65e668 1645// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
d5efd131
MF
1646data8 0xBEDCC6291188207E, 0xBB872E3FDD48F5B7 //A0
1647data8 0x42B3076EE7525EF9, 0x3F6687A5038CA81C //A1
1648data8 0x4566A1AAD96EBCB5, 0x421F0FEDFBF548D2 //A2
1649data8 0x8F8D4D3DE9850DBA, 0x00004082 //A3
1650data8 0x800BDD6DA2CE1859, 0x000040AE //A4
1651data8 0xF3A8EC4C9CDC1CE5, 0x000040D9 //A5
1652data8 0x505E2FAFDB812628 //A6
1653data8 0x531EC5B3A7508719 //A7
1654data8 0x55E002F77E99B628 //A8
1655data8 0x58A0ED4C9B4DAE54 //A9
1656data8 0x5B621E4A8240F90C //A10
1657data8 0x5E2396E5C8849814 //A11
1658data8 0x60E55B43D8C5CE71 //A12
1659data8 0x63A7722F5D45D01D //A13
1660data8 0x6669E4E010DCE45A //A14
1661data8 0x692CBA120D5E78F6 //A15
1662data8 0x6BEFF4045350B22E //A16
1663data8 0x6EB22C9807C21819 //A17
1664data8 0x7175DE20D04617C4 //A18
1665data8 0x74344AB87C6D655F //A19
1666// Polynomial coefficients for right root on [-18, -17]
6f65e668 1667// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
d5efd131
MF
1668data8 0xBF28AEEE7B61D77C, 0xBBDBBB5FC57ABF79 //A0
1669data8 0x42F436F56B3B8A0C, 0x3FA43EE3C5C576E9 //A1
1670data8 0x45E98A22535D115D, 0x42984678BE78CC48 //A2
1671data8 0xAC176F3775E6FCFC, 0x0000408E //A3
1672data8 0xA3114F53A9FEB922, 0x000040BE //A4
1673data8 0xA4D168A8334ABF41, 0x000040EE //A5
1674data8 0x51E5B0E7EC7182BB //A6
1675data8 0x54E77D67B876EAB6 //A7
1676data8 0x57E9F7C30C09C4B6 //A8
1677data8 0x5AED29B0488614CA //A9
1678data8 0x5DF09486F87E79F9 //A10
1679data8 0x60F30B199979654E //A11
1680data8 0x63F60E02C7DCCC5F //A12
1681data8 0x66F9B8A00EB01684 //A13
1682data8 0x69FE2D3ED0700044 //A14
1683data8 0x6D01C8363C7DCC84 //A15
1684data8 0x700502B29C2F06E3 //A16
1685data8 0x730962B4500F4A61 //A17
1686data8 0x76103C6ED099192A //A18
1687data8 0x79100C7132CFD6E3 //A19
1688// Polynomial coefficients for right root on [-19, -18]
6f65e668 1689// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
d5efd131
MF
1690data8 0x3F3C19A53328A0C3, 0x3BE04ADC3FBE1458 //A0
1691data8 0x4336C16C16C16C19, 0x3FE58CE3AC4A7C28 //A1
1692data8 0x46702E85C0898B70, 0x432C922E412CEC6E //A2
1693data8 0xF57B99A1C034335D, 0x0000409A //A3
1694data8 0x82EC9634223DF909, 0x000040CF //A4
1695data8 0x94F66D7557E2EA60, 0x00004103 //A5
1696data8 0x5376118B79AE34D0 //A6
1697data8 0x56BAE7106D52E548 //A7
1698data8 0x5A00BD48CC8E25AB //A8
1699data8 0x5D4529722821B493 //A9
1700data8 0x608B1654AF31BBC1 //A10
1701data8 0x63D182CC98AEA859 //A11
1702data8 0x6716D43D5EEB05E8 //A12
1703data8 0x6A5DF884FC172E1C //A13
1704data8 0x6DA3CA7EBB97976B //A14
1705data8 0x70EA416D0BE6D2EF //A15
1706data8 0x743176C31EBB65F2 //A16
1707data8 0x7777C401A8715CF9 //A17
1708data8 0x7AC1110C6D350440 //A18
1709data8 0x7E02D0971CF84865 //A19
1710// Polynomial coefficients for right root on [-20, -19]
6f65e668 1711// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range
d5efd131
MF
1712data8 0xBFAB767F9BE21803, 0xBC5ACEF5BB1BD8B5 //A0
1713data8 0x4379999999999999, 0x4029241C7F5914C8 //A1
1714data8 0x46F47AE147AE147A, 0x43AC2979B64B9D7E //A2
1715data8 0xAEC33E1F67152993, 0x000040A7 //A3
1716data8 0xD1B71758E219616F, 0x000040DF //A4
1717data8 0x8637BD05AF6CF468, 0x00004118 //A5
1718data8 0x55065E9F80F293DE //A6
1719data8 0x588EADA78C44EE66 //A7
1720data8 0x5C15798EE22DEF09 //A8
1721data8 0x5F9E8ABFD644FA63 //A9
1722data8 0x6325FD7FE29BD7CD //A10
1723data8 0x66AFFC5C57E1F802 //A11
1724data8 0x6A3774CD7D5C0181 //A12
1725data8 0x6DC152724DE2A6FE //A13
1726data8 0x7149BB138EB3D0C2 //A14
1727data8 0x74D32FF8A70896C2 //A15
1728data8 0x785D3749F9C72BD7 //A16
1729data8 0x7BE5CCF65EBC4E40 //A17
1730data8 0x7F641A891B5FC652 //A18
1731data8 0x7FEFFFFFFFFFFFFF //A19
1732LOCAL_OBJECT_END(lgammal_right_roots_polynomial_data)
1733
1734LOCAL_OBJECT_START(lgammal_left_roots_polynomial_data)
1735// Polynomial coefficients for left root on [-3, -2]
6f65e668 1736// Lgammal is approximated by polynomial within [.084641 ; -.059553 ] range
d5efd131
MF
1737data8 0xBC0844590979B82E, 0xB8BC7CE8CE2ECC3B //A0
1738data8 0xBFFEA12DA904B18C, 0xBC91A6B2BAD5EF6E //A1
1739data8 0x4023267F3C265A51, 0x3CD7055481D03AED //A2
1740data8 0xA0C2D618645F8E00, 0x0000C003 //A3
1741data8 0xFA8256664F8CD2BE, 0x00004004 //A4
1742data8 0xC2C422C103F57158, 0x0000C006 //A5
1743data8 0x4084373F7CC70AF5 //A6
1744data8 0xC0A12239BDD6BB95 //A7
1745data8 0x40BDBA65E2709397 //A8
1746data8 0xC0DA2D2504DFB085 //A9
1747data8 0x40F758173CA5BF3C //A10
1748data8 0xC11506C65C267E72 //A11
1749data8 0x413318EE3A6B05FC //A12
1750data8 0xC1517767F247DA98 //A13
1751data8 0x41701237B4754D73 //A14
1752data8 0xC18DB8A03BC5C3D8 //A15
1753data8 0x41AB80953AC14A07 //A16
1754data8 0xC1C9B7B76638D0A4 //A17
1755data8 0x41EA727E3033E2D9 //A18
1756data8 0xC20812C297729142 //A19
1757//
1758// Polynomial coefficients for left root on [-4, -3]
6f65e668 1759// Lgammal is approximated by polynomial within [.147147 ; -.145158 ] range
d5efd131
MF
1760data8 0xBC3130AE5C4F54DB, 0xB8ED23294C13398A //A0
1761data8 0xC034B99D966C5646, 0xBCE2E5FE3BC3DBB9 //A1
1762data8 0x406F76DEAE0436BD, 0x3D14974DDEC057BD //A2
1763data8 0xE929ACEA5979BE96, 0x0000C00A //A3
1764data8 0xF47C14F8A0D52771, 0x0000400E //A4
1765data8 0x88B7BC036937481C, 0x0000C013 //A5
1766data8 0x4173E8F3AB9FC266 //A6
1767data8 0xC1B7DBBE062FB11B //A7
1768data8 0x41FD2F76DE7A47A7 //A8
1769data8 0xC242225FE53B124D //A9
1770data8 0x4286D12AE2FBFA30 //A10
1771data8 0xC2CCFFC267A3C4C0 //A11
1772data8 0x431294E10008E014 //A12
1773data8 0xC357FAC8C9A2DF6A //A13
1774data8 0x439F2190AB9FAE01 //A14
1775data8 0xC3E44C1D8E8C67C3 //A15
1776data8 0x442A8901105D5A38 //A16
1777data8 0xC471C4421E908C3A //A17
1778data8 0x44B92CD4D59D6D17 //A18
1779data8 0xC4FB3A078B5247FA //A19
1780// Polynomial coefficients for left root on [-5, -4]
6f65e668 1781// Lgammal is approximated by polynomial within [.155671 ; -.155300 ] range
d5efd131
MF
1782data8 0xBC57BF3C6E8A94C1, 0xB902FB666934AC9E //A0
1783data8 0xC05D224A3EF9E41F, 0xBCF6F5713913E440 //A1
1784data8 0x40BB533C678A3955, 0x3D688E53E3C72538 //A2
1785data8 0x869FBFF732E99B84, 0x0000C012 //A3
1786data8 0xBA9537AD61392DEC, 0x00004018 //A4
1787data8 0x89EAE8B1DEA06B05, 0x0000C01F //A5
1788data8 0x425A8C5C53458D3C //A6
1789data8 0xC2C5068B3ED6509B //A7
1790data8 0x4330FFA575E99B4E //A8
1791data8 0xC39BEC12DDDF7669 //A9
1792data8 0x44073825725F74F9 //A10
1793data8 0xC47380EBCA299047 //A11
1794data8 0x44E084DD9B666437 //A12
1795data8 0xC54C2DA6BF787ACF //A13
1796data8 0x45B82D65C8D6FA42 //A14
1797data8 0xC624D62113FE950A //A15
1798data8 0x469200CC19B45016 //A16
1799data8 0xC6FFDDC6DD938E2E //A17
1800data8 0x476DD7C07184B9F9 //A18
1801data8 0xC7D554A30085C052 //A19
1802// Polynomial coefficients for left root on [-6, -5]
6f65e668 1803// Lgammal is approximated by polynomial within [.157425 ; -.157360 ] range
d5efd131
MF
1804data8 0x3C9E20A87C8B79F1, 0x39488BE34B2427DB //A0
1805data8 0xC08661F6A43A5E12, 0xBD3D912526D759CC //A1
1806data8 0x410F79DCB794F270, 0x3DB9BEE7CD3C1BF5 //A2
1807data8 0xEB7404450D0005DB, 0x0000C019 //A3
1808data8 0xF7AE9846DFE4D4AB, 0x00004022 //A4
1809data8 0x8AF535855A95B6DA, 0x0000C02C //A5
1810data8 0x43544D54E9FE240E //A6
1811data8 0xC3E8684E40CE6CFC //A7
1812data8 0x447DF44C1D803454 //A8
1813data8 0xC512AC305439B2BA //A9
1814data8 0x45A79226AF79211A //A10
1815data8 0xC63E0DFF7244893A //A11
1816data8 0x46D35216C3A83AF3 //A12
1817data8 0xC76903BE0C390E28 //A13
1818data8 0x48004A4DECFA4FD5 //A14
1819data8 0xC8954FBD243DB8BE //A15
1820data8 0x492BF3A31EB18DDA //A16
1821data8 0xC9C2C6A864521F3A //A17
1822data8 0x4A5AB127C62E8DA1 //A18
1823data8 0xCAECF60EF3183C57 //A19
1824// Polynomial coefficients for left root on [-7, -6]
6f65e668 1825// Lgammal is approximated by polynomial within [.157749 ; -.157739 ] range
d5efd131
MF
1826data8 0x3CC9B9E8B8D551D6, 0x3961813C8E1E10DB //A0
1827data8 0xC0B3ABF7A5CEA91F, 0xBD55638D4BCB4CC4 //A1
1828data8 0x4168349A25504236, 0x3E0287ECE50CCF76 //A2
1829data8 0x9EC8ED6E4C219E67, 0x0000C022 //A3
1830data8 0x9279EB1B799A3FF3, 0x0000402E //A4
1831data8 0x90213EF8D9A5DBCF, 0x0000C03A //A5
1832data8 0x4462775E857FB71C //A6
1833data8 0xC52377E70B45FDBF //A7
1834data8 0x45E4F3D28EDA8C28 //A8
1835data8 0xC6A6E85571BD2D0B //A9
1836data8 0x47695BB17E74DF74 //A10
1837data8 0xC82C5AC0ED6A662F //A11
1838data8 0x48EFF8159441C2E3 //A12
1839data8 0xC9B22602C1B68AE5 //A13
1840data8 0x4A74BA8CE7B34100 //A14
1841data8 0xCB37C7E208482E4B //A15
1842data8 0x4BFB5A1D57352265 //A16
1843data8 0xCCC01CB3021212FF //A17
1844data8 0x4D841613AC3431D1 //A18
1845data8 0xCE431C9E9EE43AD9 //A19
1846// Polynomial coefficients for left root on [-8, -7]
6f65e668 1847// Lgammal is approximated by polynomial within [.157799 ; -.157798 ] range
d5efd131
MF
1848data8 0xBCF9C7A33AD9478C, 0xB995B0470F11E5ED //A0
1849data8 0xC0E3AF76FE4C2F8B, 0xBD8DBCD503250511 //A1
1850data8 0x41C838E76CAAF0D5, 0x3E5D79F5E2E069C3 //A2
1851data8 0x9EF345992B262CE0, 0x0000C02B //A3
1852data8 0x92AE0292985FD559, 0x0000403A //A4
1853data8 0x90615420C08F7D8C, 0x0000C049 //A5
1854data8 0x45828139342CEEB7 //A6
1855data8 0xC67384066C31E2D3 //A7
1856data8 0x476502BC4DAC2C35 //A8
1857data8 0xC856FAADFF22ADC6 //A9
1858data8 0x49497243255AB3CE //A10
1859data8 0xCA3C768489520F6B //A11
1860data8 0x4B300D1EA47AF838 //A12
1861data8 0xCC223B0508AC620E //A13
1862data8 0x4D14D46583338CD8 //A14
1863data8 0xCE07E7A87AA068E4 //A15
1864data8 0x4EFB811AD2F8BEAB //A16
1865data8 0xCFF0351B51508523 //A17
1866data8 0x50E4364CCBF53100 //A18
1867data8 0xD1D33CFD0BF96FA6 //A19
1868// Polynomial coefficients for left root on [-9, -8]
6f65e668 1869// Lgammal is approximated by polynomial within [.157806 ; -.157806 ] range
d5efd131
MF
1870data8 0x3D333E4438B1B9D4, 0x39E7B956B83964C1 //A0
1871data8 0xC11625EDFC63DCD8, 0xBDCF39625709EFAC //A1
1872data8 0x422EA8C150480F16, 0x3EC16ED908AB7EDD //A2
1873data8 0xE2598725E2E11646, 0x0000C034 //A3
1874data8 0xEAFF2346DE3EBC98, 0x00004046 //A4
1875data8 0x821E90DE12A0F05F, 0x0000C059 //A5
1876data8 0x46B2C334AE5366FE //A6
1877data8 0xC7D64314B43191B6 //A7
1878data8 0x48FAF6ED5899E01B //A8
1879data8 0xCA2096E4472AF37D //A9
1880data8 0x4B44AAF49FB7E4C8 //A10
1881data8 0xCC6A02469F2BD920 //A11
1882data8 0x4D9080626D2EFC07 //A12
1883data8 0xCEB515EDCF0695F7 //A13
1884data8 0x4FDB1AC69BF36960 //A14
1885data8 0xD1017F8274339270 //A15
1886data8 0x5226A684961BAE2F //A16
1887data8 0xD34E085C088404A5 //A17
1888data8 0x547511892FF8960E //A18
1889data8 0xD5968FA3B1ED67A9 //A19
1890// Polynomial coefficients for left root on [-10, -9]
6f65e668 1891// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
d5efd131
MF
1892data8 0xBD355818A2B42BA2, 0xB9B7320B6A0D61EA //A0
1893data8 0xC14BAF7DA5F3770E, 0xBDE64AF9A868F719 //A1
1894data8 0x4297F3E8791F9CD3, 0x3F2A553E59B4835E //A2
1895data8 0xDD0C5F7E551BD13C, 0x0000C03E //A3
1896data8 0x8F6F0A3B2EB08BBB, 0x00004054 //A4
1897data8 0xC68D4D5AD230BA08, 0x0000C069 //A5
1898data8 0x47F1E4D8C35D1A3E //A6
1899data8 0xC94A8A191DB0A466 //A7
1900data8 0x4AA4174F65FE6AE8 //A8
1901data8 0xCBFEE6D90F94E9DD //A9
1902data8 0x4D580FD3438BE16C //A10
1903data8 0xCEB2ECD456D50224 //A11
1904data8 0x500E049F7FE64546 //A12
1905data8 0xD167F92D9600F378 //A13
1906data8 0x52C342AE2B43261A //A14
1907data8 0xD41F15DEEDA4B67E //A15
1908data8 0x55792638748AFB7D //A16
1909data8 0xD6D4D760074F6E6B //A17
1910data8 0x5832469D58ED3FA9 //A18
1911data8 0xD988769F3DC76642 //A19
1912// Polynomial coefficients for left root on [-11, -10]
6f65e668 1913// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
d5efd131
MF
1914data8 0xBDA050601F39778A, 0xBA0D4D1CE53E8241 //A0
1915data8 0xC18308A7D8EA4039, 0xBE370C379D3EAD41 //A1
1916data8 0x4306A49380644E6C, 0x3FBBB143C0E7B5C8 //A2
1917data8 0x8FA8FB233E4AA6D2, 0x0000C049 //A3
1918data8 0x802CC9D8AEAC207D, 0x00004062 //A4
1919data8 0xF3F73EE651A37A13, 0x0000C07A //A5
1920data8 0x493E3B550A7B9568 //A6
1921data8 0xCACED38DAA060929 //A7
1922data8 0x4C600B346BAB3BC6 //A8
1923data8 0xCDF0F719193E3D26 //A9
1924data8 0x4F8229F24528B151 //A10
1925data8 0xD113A4C2D32FBBE2 //A11
1926data8 0x52A56BC13DC4474D //A12
1927data8 0xD43785CFAF5E3CE3 //A13
1928data8 0x55C9FC3EA5941202 //A14
1929data8 0xD75CD545A3341AF5 //A15
1930data8 0x58F009911F77C282 //A16
1931data8 0xDA8246294D210BEC //A17
1932data8 0x5C1608AAC32C3A8E //A18
1933data8 0xDDA446E570A397D5 //A19
1934// Polynomial coefficients for left root on [-12, -11]
6f65e668 1935// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
d5efd131
MF
1936data8 0x3DEACBB3081C502E, 0x3A8AA6F01DEDF745 //A0
1937data8 0xC1BC8CFBFB0A9912, 0xBE6556B6504A2AE6 //A1
1938data8 0x43797926206941D7, 0x40289A9644C2A216 //A2
1939data8 0xF26D2A78446D0839, 0x0000C053 //A3
1940data8 0xA238B1D937FFED38, 0x00004070 //A4
1941data8 0xE793B4F6DE470538, 0x0000C08C //A5
1942data8 0x4A9585BDC44DC45D //A6
1943data8 0xCC60759520342C47 //A7
1944data8 0x4E29B2F3694C0404 //A8
1945data8 0xCFF4619AE7B6BBAB //A9
1946data8 0x51C05DADF52B89E8 //A10
1947data8 0xD38A8C7F48819A4A //A11
1948data8 0x5555B6932D687860 //A12
1949data8 0xD721E1FACB6C1B5B //A13
1950data8 0x58EDA1E2677C8F91 //A14
1951data8 0xDAB8A8EC523C1F71 //A15
1952data8 0x5C84930133F30411 //A16
1953data8 0xDE51952FDFD1EC49 //A17
1954data8 0x601FCCEC1BBD25F1 //A18
1955data8 0xE1E5F2D76B610920 //A19
1956// Polynomial coefficients for left root on [-13, -12]
6f65e668 1957// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
d5efd131
MF
1958data8 0xBE01612F373268ED, 0xBA97B7A18CDF103B //A0
1959data8 0xC1F7328CBF7A4FAC, 0xBE89A25A6952F481 //A1
1960data8 0x43F0D0FA2DBDA237, 0x40A0422EC1CE6084 //A2
1961data8 0x82082DF2D32686C5, 0x0000C05F //A3
1962data8 0x8D64EE9B42E68B36, 0x0000407F //A4
1963data8 0xA3FFD82E08C630C9, 0x0000C09F //A5
1964data8 0x4BF8C49D99123466 //A6
1965data8 0xCDFEC79DDF1119ED //A7
1966data8 0x50038615A892D242 //A8
1967data8 0xD20929453DC8B537 //A9
1968data8 0x54106A78083BA1EE //A10
1969data8 0xD615A302C69E27B2 //A11
1970data8 0x581CC175870FF16F //A12
1971data8 0xDA233E0979E12B74 //A13
1972data8 0x5C29E822BC568C80 //A14
1973data8 0xDE31845DB5340FBC //A15
1974data8 0x6037BFC6D498D5F9 //A16
1975data8 0xE2407D92CD613E82 //A17
1976data8 0x64483B9B62367EB7 //A18
1977data8 0xE64B2DC830E8A799 //A1
1978// Polynomial coefficients for left root on [-14, -13]
6f65e668 1979// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
d5efd131
MF
1980data8 0x3E563D0B930B371F, 0x3AE779957E14F012 //A0
1981data8 0xC2344C3B2F083767, 0xBEC0B7769AA3DD66 //A1
1982data8 0x4469BFFF28B50D07, 0x41181E3F13ED2401 //A2
1983data8 0xAE38F64DCB24D9EE, 0x0000C06A //A3
1984data8 0xA5C3F52C1B3506F2, 0x0000408E //A4
1985data8 0xA83BC857BCD6BA92, 0x0000C0B2 //A5
1986data8 0x4D663B4727B4D81A //A6
1987data8 0xCFA82C965B0F62E9 //A7
1988data8 0x51EAD58C02905B71 //A8
1989data8 0xD42E427970FA56AD //A9
1990data8 0x56714644C57D8476 //A10
1991data8 0xD8B3EC2037EC95F2 //A11
1992data8 0x5AF72AE68BBA5B3D //A12
1993data8 0xDD3B2152C67AA6B7 //A13
1994data8 0x5F7FF5F082861B8B //A14
1995data8 0xE1C2E8BE125A5B7A //A15
1996data8 0x64066E92FE9EBE7D //A16
1997data8 0xE64B4201CDF9F138 //A17
1998data8 0x689186351E58AA88 //A18
1999data8 0xEAD132A585DFC60A //A19
2000// Polynomial coefficients for left root on [-15, -14]
6f65e668 2001// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
d5efd131
MF
2002data8 0xBE6D7DDE12700AC1, 0xBB1E025BF1667FB5 //A0
2003data8 0xC273077763F60AD5, 0xBF2A1698184C7A9A //A1
2004data8 0x44E6A1BF095B1AB3, 0x4178D5AE8A4A2874 //A2
2005data8 0x8F8E0D5060FCC767, 0x0000C076 //A3
2006data8 0x800CC1DCFF092A57, 0x0000409E //A4
2007data8 0xF3AB0BA9D14D37D1, 0x0000C0C5 //A5
2008data8 0x4EDE3000A2F6D565 //A6
2009data8 0xD15EC613B9C8C800 //A7
2010data8 0x53E003309FEECCAA //A8
2011data8 0xD660ED908D8B15C4 //A9
2012data8 0x58E21E9B51A1C4AE //A10
2013data8 0xDB639745DB82210D //A11
2014data8 0x5DE55BB60C68FCF6 //A12
2015data8 0xE06772BA3FCA23C6 //A13
2016data8 0x62E9E58B4F702C31 //A14
2017data8 0xE56CBA49B071ABE2 //A15
2018data8 0x67EFF31E4F2BA36A //A16
2019data8 0xEA7232C8804F32C3 //A17
2020data8 0x6CF5EFEE929A0928 //A18
2021data8 0xEF742EE03EC3E8FF //A19
2022// Polynomial coefficients for left root on [-16, -15]
6f65e668 2023// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
d5efd131
MF
2024data8 0xBEDCC628FEAC7A1B, 0xBB80582C8BEBB198 //A0
2025data8 0xC2B3076EE752595E, 0xBF5388F55AFAE53E //A1
2026data8 0x4566A1AAD96EBCB5, 0x421F0FEFE2444293 //A2
2027data8 0x8F8D4D3DE9850DB2, 0x0000C082 //A3
2028data8 0x800BDD6DA2CE184C, 0x000040AE //A4
2029data8 0xF3A8EC4C9CDC7A43, 0x0000C0D9 //A5
2030data8 0x505E2FAFDB81263F //A6
2031data8 0xD31EC5B3A7506CD9 //A7
2032data8 0x55E002F77E999810 //A8
2033data8 0xD8A0ED4C9B5C2900 //A9
2034data8 0x5B621E4A8267C401 //A10
2035data8 0xDE2396E5BFCFDA7A //A11
2036data8 0x60E55B43BE6F9A79 //A12
2037data8 0xE3A772324C7405FA //A13
2038data8 0x6669E4E9B7E57A2D //A14
2039data8 0xE92CB989F8A8FB37 //A15
2040data8 0x6BEFF2368849A36E //A16
2041data8 0xEEB23234FE191D55 //A17
2042data8 0x7175EF5D1080B105 //A18
2043data8 0xF4342ED7B1B7BE31 //A19
2044// Polynomial coefficients for left root on [-17, -16]
6f65e668 2045// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
d5efd131
MF
2046data8 0xBF28AEEE7B58C790, 0xBBC4448DE371FA0A //A0
2047data8 0xC2F436F56B3B89B1, 0xBF636755245AC63A //A1
2048data8 0x45E98A22535D115D, 0x4298467DA93DB784 //A2
2049data8 0xAC176F3775E6FCF2, 0x0000C08E //A3
2050data8 0xA3114F53A9FEB908, 0x000040BE //A4
2051data8 0xA4D168A8334AFE5A, 0x0000C0EE //A5
2052data8 0x51E5B0E7EC7182CF //A6
2053data8 0xD4E77D67B876D6B4 //A7
2054data8 0x57E9F7C30C098C83 //A8
2055data8 0xDAED29B0489EF7A7 //A9
2056data8 0x5DF09486F8A524B8 //A10
2057data8 0xE0F30B19910A2393 //A11
2058data8 0x63F60E02AB3109F4 //A12
2059data8 0xE6F9B8A3431854D5 //A13
2060data8 0x69FE2D4A6D94218E //A14
2061data8 0xED01C7E272A73560 //A15
2062data8 0x7005017D82B186B6 //A16
2063data8 0xF3096A81A69BD8AE //A17
2064data8 0x76104951BAD67D5C //A18
2065data8 0xF90FECC99786FD5B //A19
2066// Polynomial coefficients for left root on [-18, -17]
6f65e668 2067// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
d5efd131
MF
2068data8 0x3F3C19A53328E26A, 0x3BE238D7BA036B3B //A0
2069data8 0xC336C16C16C16C13, 0xBFEACE245DEC56F3 //A1
2070data8 0x46702E85C0898B70, 0x432C922B64FD1DA4 //A2
2071data8 0xF57B99A1C0343350, 0x0000C09A //A3
2072data8 0x82EC9634223DF90D, 0x000040CF //A4
2073data8 0x94F66D7557E3237D, 0x0000C103 //A5
2074data8 0x5376118B79AE34D6 //A6
2075data8 0xD6BAE7106D52CE49 //A7
2076data8 0x5A00BD48CC8E11AB //A8
2077data8 0xDD4529722833E2DF //A9
2078data8 0x608B1654AF5F46AF //A10
2079data8 0xE3D182CC90D8723F //A11
2080data8 0x6716D43D46706AA0 //A12
2081data8 0xEA5DF888C5B428D3 //A13
2082data8 0x6DA3CA85888931A6 //A14
2083data8 0xF0EA40EF2AC7E070 //A15
2084data8 0x743175D1A251AFCD //A16
2085data8 0xF777CB6E2B550D73 //A17
2086data8 0x7AC11E468A134A51 //A18
2087data8 0xFE02B6BDD0FC40AA //A19
2088// Polynomial coefficients for left root on [-19, -18]
6f65e668 2089// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range
d5efd131
MF
2090data8 0xBFAB767F9BE217FC, 0xBC4A5541CE0D8D0D //A0
2091data8 0xC379999999999999, 0xC01A84981B490BE8 //A1
2092data8 0x46F47AE147AE147A, 0x43AC2987BBC466EB //A2
2093data8 0xAEC33E1F67152987, 0x0000C0A7 //A3
2094data8 0xD1B71758E2196153, 0x000040DF //A4
2095data8 0x8637BD05AF6D420E, 0x0000C118 //A5
2096data8 0x55065E9F80F293B2 //A6
2097data8 0xD88EADA78C44BFA7 //A7
2098data8 0x5C15798EE22EC6CD //A8
2099data8 0xDF9E8ABFD67895CF //A9
2100data8 0x6325FD7FE13B0DE0 //A10
2101data8 0xE6AFFC5C3DE70858 //A11
2102data8 0x6A3774CE81C70D43 //A12
2103data8 0xEDC1527412D8129F //A13
2104data8 0x7149BABCDA8B7A72 //A14
2105data8 0xF4D330AD49071BB5 //A15
2106data8 0x785D4046F4C5F1FD //A16
2107data8 0xFBE59BFEDBA73FAF //A17
2108data8 0x7F64BEF2B2EC8DA1 //A18
2109data8 0xFFEFFFFFFFFFFFFF //A19
2110LOCAL_OBJECT_END(lgammal_left_roots_polynomial_data)
2111
2112
2113//==============================================================
2114// Code
2115//==============================================================
2116
2117.section .text
2118GLOBAL_LIBM_ENTRY(__libm_lgammal)
2119{ .mfi
2120 getf.exp rSignExpX = f8
2121 // Test x for NaTVal, NaN, +/-0, +/-INF, denormals
2122 fclass.m p6,p0 = f8,0x1EF
2123 addl r17Ones = 0x1FFFF, r0 // exponent mask
2124}
2125{ .mfi
2126 addl GR_ad_z_1 = @ltoff(Constants_Z_1#),gp
2127 fcvt.fx.s1 fXint = f8 // Convert arg to int (int repres. in FR)
2128 adds rDelta = 0x3FC, r0
2129}
2130;;
2131{ .mfi
2132 getf.sig rSignifX = f8
2133 fcmp.lt.s1 p15, p14 = f8, f0
2134 shl rDelta = rDelta, 20 // single precision 1.5
2135}
2136{ .mfi
2137 ld8 GR_ad_z_1 = [GR_ad_z_1]// get pointer to Constants_Z_1
2138 fma.s1 fTwo = f1, f1, f1 // 2.0
2139 addl rExp8 = 0x10002, r0 // exponent of 8.0
2140}
2141;;
2142{ .mfi
2143 alloc rPFS_SAVED = ar.pfs, 0, 34, 4, 0 // get some registers
2144 fmerge.s fAbsX = f1, f8 // |x|
2145 and rExpX = rSignExpX, r17Ones // mask sign bit
2146}
2147{ .mib
2148 addl rExpHalf = 0xFFFE, r0 // exponent of 0.5
2149 addl rExp2 = 0x10000, r0 // exponent of 2.0
2150 // branch out if x is NaTVal, NaN, +/-0, +/-INF, or denormalized number
2151(p6) br.cond.spnt lgammal_spec
2152}
2153;;
2154_deno_back_to_main_path:
2155{ .mfi
2156 // Point to Constants_G_H_h1
2157 add rTbl1Addr = 0x040, GR_ad_z_1
2158 frcpa.s1 fRcpX, p0 = f1, f8 // initial approximation of 1/x
2159 extr.u GR_Index1 = rSignifX, 59, 4
2160}
2161{ .mib
2162(p14) cmp.ge.unc p8, p0 = rExpX, rExp8 // p8 = 1 if x >= 8.0
2163 adds rZ625 = 0x3F2, r0
2164(p8) br.cond.spnt lgammal_big_positive // branch out if x >= 8.0
2165}
2166;;
2167{ .mfi
2168 shladd rZ1offsett = GR_Index1, 2, GR_ad_z_1 // Point to Z_1
2169 fmerge.se fSignifX = f1, f8 // sifnificand of x
2170 // Get high 15 bits of significand
2171 extr.u GR_X_0 = rSignifX, 49, 15
2172}
2173{ .mib
2174 cmp.lt.unc p9, p0 = rExpX, rExpHalf // p9 = 1 if |x| < 0.5
2175 // set p11 if 2 <= x < 4
2176(p14) cmp.eq.unc p11, p0 = rExpX, rExp2
2177(p9) br.cond.spnt lgammal_0_half // branch out if |x| < 0.5
2178}
2179;;
2180{ .mfi
2181 ld4 GR_Z_1 = [rZ1offsett] // Load Z_1
2182 fms.s1 fA5L = f1, f1, f8 // for 0.75 <= x < 1.3125 path
2183 shl rZ625 = rZ625, 20 // sinfle precision 0.625
2184}
2185{ .mib
2186 setf.s FR_MHalf = rDelta
2187 // set p10 if x >= 4.0
2188(p14) cmp.gt.unc p10, p0 = rExpX, rExp2
2189 // branch to special path for 4.0 <= x < 8
2190(p10) br.cond.spnt lgammal_4_8
2191}
2192;;
2193{ .mfi
2194 // for 1.3125 <= x < 1.5625 path
2195 addl rPolDataPtr= @ltoff(lgammal_loc_min_data),gp
2196 // argument of polynomial approximation for 1.5625 <= x < 2.25
2197 fms.s1 fB4 = f8, f1, fTwo
2198 cmp.eq p12, p0 = rExpX, rExpHalf
2199}
2200{ .mib
2201 addl rExpOne = 0xFFFF, r0 // exponent of 1.0
2202 // set p10 if significand of x >= 1.125
2203(p11) cmp.le p11, p0 = 2, GR_Index1
2204(p11) br.cond.spnt lgammal_2Q_4
2205}
2206;;
2207{ .mfi
2208 // point to xMin for 1.3125 <= x < 1.5625 path
2209 ld8 rPolDataPtr = [rPolDataPtr]
2210 fcvt.xf fFltIntX = fXint // RTN(x)
2211(p14) cmp.eq.unc p13, p7 = rExpX, rExpOne // p13 set if 1.0 <= x < 2.0
2212}
2213{ .mib
2214 setf.s FR_FracX = rZ625
2215 // set p12 if |x| < 0.75
2216(p12) cmp.gt.unc p12, p0 = 8, GR_Index1
2217 // branch out to special path for |x| < 0.75
2218(p12) br.cond.spnt lgammal_half_3Q
2219}
2220;;
2221.pred.rel "mutex", p7, p13
2222{ .mfi
2223 getf.sig rXRnd = fXint // integer part of the input value
2224 fnma.s1 fInvX = f8, fRcpX, f1 // start of 1st NR iteration
2225 // Get bits 30-15 of X_0 * Z_1
2226 pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15
2227}
2228{ .mib
2229(p7) cmp.eq p6, p0 = rExpX, rExp2 // p6 set if 2.0 <= x < 2.25
2230(p13) cmp.le p6, p0 = 9, GR_Index1
2231 // branch to special path 1.5625 <= x < 2.25
2232(p6) br.cond.spnt lgammal_13Q_2Q
2233}
2234;;
2235//
2236// For performance, don't use result of pmpyshr2.u for 4 cycles.
2237//
2238{ .mfi
2239 shladd GR_ad_tbl_1 = GR_Index1, 4, rTbl1Addr // Point to G_1
2240 fma.s1 fSix = fTwo, fTwo, fTwo // 6.0
2241 add GR_ad_q = -0x60, GR_ad_z_1 // Point to Constants_Q
2242}
2243{ .mib
2244 add rTmpPtr3 = -0x50, GR_ad_z_1
2245(p13) cmp.gt p7, p0 = 5, GR_Index1
2246 // branch to special path 0.75 <= x < 1.3125
2247(p7) br.cond.spnt lgammal_03Q_1Q
2248}
2249;;
2250{ .mfi
2251 add rTmpPtr = 8, GR_ad_tbl_1
2252 fma.s1 fRoot = f8, f1, f1 // x + 1
2253 // Absolute value of int arg. Will be used as index in table with roots
2254 sub rXRnd = r0, rXRnd
2255}
2256{ .mib
2257 ldfe fA5L = [rPolDataPtr], 16 // xMin
2258 addl rNegSingularity = 0x3003E, r0
2259(p14) br.cond.spnt lgammal_loc_min
2260}
2261;;
2262{ .mfi
2263 ldfps FR_G, FR_H = [GR_ad_tbl_1], 8 // Load G_1, H_1
2264 nop.f 0
2265 add rZ2Addr = 0x140, GR_ad_z_1 // Point to Constants_Z_2
2266}
2267{ .mib
2268 ldfd FR_h = [rTmpPtr] // Load h_1
2269 // If arg is less or equal to -2^63
2270 cmp.geu.unc p8,p0 = rSignExpX, rNegSingularity
2271 // Singularity for x < -2^63 since all such arguments are integers
2272 // branch to special code which deals with singularity
2273(p8) br.cond.spnt lgammal_singularity
2274}
2275;;
2276{ .mfi
2277 ldfe FR_log2_hi = [GR_ad_q], 32 // Load log2_hi
2278 nop.f 0
2279 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
2280}
2281{ .mfi
2282 ldfe FR_log2_lo = [rTmpPtr3], 32 // Load log2_lo
2283 fms.s1 fDx = f8, f1, fFltIntX // x - RTN(x)
2284 // index in table with roots and bounds
2285 adds rXint = -2, rXRnd
2286}
2287;;
2288{ .mfi
2289 ldfe FR_Q4 = [GR_ad_q], 32 // Load Q4
2290 nop.f 0
2291 // set p12 if x may be close to negative root: -19.5 < x < -2.0
2292 cmp.gtu p12, p0 = 18, rXint
2293}
2294{ .mfi
2295 shladd GR_ad_z_2 = GR_Index2, 2, rZ2Addr // Point to Z_2
2296 fma.s1 fRcpX = fInvX, fRcpX, fRcpX // end of 1st NR iteration
2297 // Point to Constants_G_H_h2
2298 add rTbl2Addr = 0x180, GR_ad_z_1
2299}
2300;;
2301{ .mfi
2302 shladd GR_ad_tbl_2 = GR_Index2, 4, rTbl2Addr // Point to G_2
2303 // set p9 if x is integer and negative
2304 fcmp.eq.s1 p9, p0 = f8,fFltIntX
2305 // Point to Constants_G_H_h3
2306 add rTbl3Addr = 0x280, GR_ad_z_1
2307}
2308{ .mfi
2309 ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2
2310 nop.f 0
2311 sub GR_N = rExpX, rExpHalf, 1
2312}
2313;;
2314{ .mfi
2315 ldfe FR_Q3 = [rTmpPtr3], 32 // Load Q3
2316 nop.f 0
2317 // Point to lnsin polynomial coefficients
2318 adds rLnSinDataPtr = 864, rTbl3Addr
2319}
2320{ .mfi
2321 ldfe FR_Q2 = [GR_ad_q],32 // Load Q2
2322 nop.f 0
2323 add rTmpPtr = 8, GR_ad_tbl_2
2324}
2325;;
2326{ .mfi
2327 ldfe FR_Q1 = [rTmpPtr3] // Load Q1
2328 fcmp.lt.s1 p0, p15 = fAbsX, fSix // p15 is set when x < -6.0
2329 // point to table with roots and bounds
2330 adds rRootsBndAddr = -1296, GR_ad_z_1
2331}
2332{ .mfb
2333 // Put integer N into rightmost significand
2334 setf.sig fFloatN = GR_N
2335 fma.s1 fThirteen = fSix, fTwo, f1 // 13.0
2336 // Singularity if -2^63 < x < 0 and x is integer
2337 // branch to special code which deals with singularity
2338(p9) br.cond.spnt lgammal_singularity
2339}
2340;;
2341{ .mfi
2342 ldfps FR_G2, FR_H2 = [GR_ad_tbl_2] // Load G_2, H_2
2343 // y = |x|/2^(exponent(x)) - 1.5
2344 fms.s1 FR_FracX = fSignifX, f1, FR_MHalf
2345 // Get bits 30-15 of X_1 * Z_2
2346 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15
2347}
2348{ .mfi
2349 ldfd FR_h2 = [rTmpPtr] // Load h_2
2350 fma.s1 fDxSqr = fDx, fDx, f0 // deltaX^2
2351 adds rTmpPtr3 = 128, rLnSinDataPtr
2352}
2353;;
2354//
2355// For performance, don't use result of pmpyshr2.u for 4 cycles.
2356//
2357{ .mfi
2358 getf.exp rRoot = fRoot // sign and biased exponent of (x + 1)
2359 nop.f 0
2360 // set p6 if -4 < x <= -2
2361 cmp.eq p6, p0 = rExpX, rExp2
2362}
2363{ .mfi
2364 ldfpd fLnSin2, fLnSin2L = [rLnSinDataPtr], 16
2365 fnma.s1 fInvX = f8, fRcpX, f1 // start of 2nd NR iteration
2366 sub rIndexPol = rExpX, rExpHalf // index of polynom
2367}
2368;;
2369{ .mfi
2370 ldfe fLnSin4 = [rLnSinDataPtr], 96
2371 // p10 is set if x is potential "right" root
2372 // p11 set for possible "left" root
2373 fcmp.lt.s1 p10, p11 = fDx, f0
2374 shl rIndexPol = rIndexPol, 6 // (i*16)*4
2375}
2376{ .mfi
2377 ldfpd fLnSin18, fLnSin20 = [rTmpPtr3], 16
2378 nop.f 0
2379 mov rExp2tom7 = 0x0fff8 // Exponent of 2^-7
2380}
2381;;
2382{ .mfi
2383 getf.sig rSignifDx = fDx // Get significand of RTN(x)
2384 nop.f 0
2385 // set p6 if -4 < x <= -3.0
2386(p6) cmp.le.unc p6, p0 = 0x8, GR_Index1
2387}
2388{ .mfi
2389 ldfpd fLnSin22, fLnSin24 = [rTmpPtr3], 16
2390 nop.f 0
2391 // mask sign bit in the exponent of (x + 1)
2392 and rRoot = rRoot, r17Ones
2393}
2394;;
2395{ .mfi
2396 ldfe fLnSin16 = [rLnSinDataPtr], -80
2397 nop.f 0
2398 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2
2399}
2400{ .mfi
2401 ldfpd fLnSin26, fLnSin28 = [rTmpPtr3], 16
2402 nop.f 0
2403 and rXRnd = 1, rXRnd
2404}
2405;;
2406{ .mfi
2407 shladd GR_ad_tbl_3 = GR_Index3, 4, rTbl3Addr // Point to G_3
2408 fms.s1 fDxSqrL = fDx, fDx, fDxSqr // low part of deltaX^2
2409 // potential "left" root
2410(p11) adds rRootsBndAddr = 560, rRootsBndAddr
2411}
2412{ .mib
2413 ldfpd fLnSin30, fLnSin32 = [rTmpPtr3], 16
2414 // set p7 if |x+1| < 2^-7
2415 cmp.lt p7, p0 = rRoot, rExp2tom7
2416 // branch to special path for |x+1| < 2^-7
2417(p7) br.cond.spnt _closeToNegOne
2418}
2419;;
2420{ .mfi
2421 ldfps FR_G3, FR_H3 = [GR_ad_tbl_3], 8 // Load G_3, H_3
2422 fcmp.lt.s1 p14, p0 = fAbsX, fThirteen // set p14 if x > -13.0
2423 // base address of polynomial on range [-6.0, -0.75]
2424 adds rPolDataPtr = 3440, rTbl3Addr
2425}
2426{ .mfi
2427 // (i*16)*4 + (i*16)*8 - offsett of polynomial on range [-6.0, -0.75]
2428 shladd rTmpPtr = rIndexPol, 2, rIndexPol
2429 fma.s1 fXSqr = FR_FracX, FR_FracX, f0 // y^2
2430 // point to left "near root" bound
2431(p12) shladd rRootsBndAddr = rXint, 4, rRootsBndAddr
2432}
2433;;
2434{ .mfi
2435 ldfpd fLnSin34, fLnSin36 = [rTmpPtr3], 16
2436 fma.s1 fRcpX = fInvX, fRcpX, fRcpX // end of 2nd NR iteration
2437 // add special offsett if -4 < x <= -3.0
2438(p6) adds rPolDataPtr = 640, rPolDataPtr
2439}
2440{ .mfi
2441 // point to right "near root" bound
2442 adds rTmpPtr2 = 8, rRootsBndAddr
2443 fnma.s1 fMOne = f1, f1, f0 // -1.0
2444 // Point to Bernulli numbers
2445 adds rBernulliPtr = 544, rTbl3Addr
2446}
2447;;
2448{ .mfi
2449 // left bound of "near root" range
2450(p12) ld8 rLeftBound = [rRootsBndAddr]
2451 fmerge.se fNormDx = f1, fDx // significand of DeltaX
2452 // base address + offsett for polynomial coeff. on range [-6.0, -0.75]
2453 add rPolDataPtr = rPolDataPtr, rTmpPtr
2454}
2455{ .mfi
2456 // right bound of "near root" range
2457(p12) ld8 rRightBound = [rTmpPtr2]
2458 fcvt.xf fFloatN = fFloatN
2459 // special "Bernulli" numbers for Stirling's formula for -13 < x < -6
2460(p14) adds rBernulliPtr = 160, rBernulliPtr
2461}
2462;;
2463{ .mfi
2464 ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3
2465 fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2
2466 adds rTmpPtr3 = -160, rTmpPtr3
2467}
2468{ .mfb
2469 adds rTmpPtr = 80, rPolDataPtr
2470 fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2
2471 // p15 is set if -2^63 < x < 6.0 and x is not an integer
2472 // branch to path with implementation using Stirling's formula for neg. x
2473(p15) br.cond.spnt _negStirling
2474}
2475;;
2476{ .mfi
2477 ldfpd fA3, fA3L = [rPolDataPtr], 16 // A3
2478 fma.s1 fDelX4 = fDxSqr, fDxSqr, f0 // deltaX^4
2479 // Get high 4 bits of signif
2480 extr.u rIndex1Dx = rSignifDx, 59, 4
2481}
2482{ .mfi
2483 ldfe fA5 = [rTmpPtr], -16 // A5
2484 fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2
2485 adds rLnSinTmpPtr = 16, rLnSinDataPtr
2486}
2487;;
2488{ .mfi
2489 ldfpd fA0, fA0L = [rPolDataPtr], 16 // A0
2490 fma.s1 fLnSin20 = fLnSin20, fDxSqr, fLnSin18
2491 // Get high 15 bits of significand
2492 extr.u rX0Dx = rSignifDx, 49, 15
2493}
2494{ .mfi
2495 ldfe fA4 = [rTmpPtr], 192 // A4
2496 fms.s1 fXSqrL = FR_FracX, FR_FracX, fXSqr // low part of y^2
2497 shladd GR_ad_z_1 = rIndex1Dx, 2, GR_ad_z_1 // Point to Z_1
2498}
2499;;
2500{ .mfi
2501 ldfpd fA1, fA1L = [rPolDataPtr], 16 // A1
2502 fma.s1 fX4 = fXSqr, fXSqr, f0 // y^4
2503 adds rTmpPtr2 = 32, rTmpPtr
2504}
2505{ .mfi
2506 ldfpd fA18, fA19 = [rTmpPtr], 16 // A18, A19
2507 fma.s1 fLnSin24 = fLnSin24, fDxSqr, fLnSin22
2508 nop.i 0
2509}
2510;;
2511{ .mfi
2512 ldfe fLnSin6 = [rLnSinDataPtr], 32
2513 fma.s1 fLnSin28 = fLnSin28, fDxSqr, fLnSin26
2514 nop.i 0
2515}
2516{ .mfi
2517 ldfe fLnSin8 = [rLnSinTmpPtr], 32
2518 nop.f 0
2519 nop.i 0
2520}
2521;;
2522{ .mfi
2523 ldfpd fA20, fA21 = [rTmpPtr], 16 // A20, A21
2524 fma.s1 fLnSin32 = fLnSin32, fDxSqr, fLnSin30
2525 nop.i 0
2526}
2527{ .mfi
2528 ldfpd fA22, fA23 = [rTmpPtr2], 16 // A22, A23
2529 fma.s1 fB20 = f1, f1, FR_MHalf // 2.5
2530(p12) cmp.ltu.unc p6, p0 = rSignifX, rLeftBound
2531}
2532;;
2533{ .mfi
2534 ldfpd fA2, fA2L = [rPolDataPtr], 16 // A2
2535 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3
2536 // set p6 if x falls in "near root" range
2537(p6) cmp.geu.unc p6, p0 = rSignifX, rRightBound
2538}
2539{ .mfb
2540 adds rTmpPtr3 = -64, rTmpPtr
2541 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3
2542 // branch to special path if x falls in "near root" range
2543(p6) br.cond.spnt _negRoots
2544}
2545;;
2546{ .mfi
2547 ldfpd fA24, fA25 = [rTmpPtr2], 16 // A24, A25
2548 fma.s1 fLnSin36 = fLnSin36, fDxSqr, fLnSin34
2549(p11) cmp.eq.unc p7, p0 = 1,rXint // p7 set if -3.0 < x < -2.5
2550}
2551{ .mfi
2552 adds rTmpPtr = -48, rTmpPtr
2553 fma.s1 fLnSin20 = fLnSin20, fDxSqr, fLnSin16
2554 addl rDelta = 0x5338, r0 // significand of -2.605859375
2555}
2556;;
2557{ .mfi
2558 getf.exp GR_N = fDx // Get N = exponent of DeltaX
2559 fma.s1 fX6 = fX4, fXSqr, f0 // y^6
2560 // p7 set if -2.605859375 <= x < -2.5
2561(p7) cmp.gt.unc p7, p0 = rDelta, GR_X_0
2562}
2563{ .mfb
2564 ld4 GR_Z_1 = [GR_ad_z_1] // Load Z_1
2565 fma.s1 fDelX8 = fDelX4, fDelX4, f0 // deltaX^8
2566 // branch to special path for -2.605859375 <= x < -2.5
2567(p7) br.cond.spnt _neg2andHalf
2568}
2569;;
2570{ .mfi
2571 ldfpd fA14, fA15 = [rTmpPtr3], 16 // A14, A15
2572 fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3
2573 adds rTmpPtr2 = 128 , rPolDataPtr
2574}
2575{ .mfi
2576 ldfpd fA16, fA17 = [rTmpPtr], 16 // A16, A17
2577 fma.s1 fLnSin28 = fLnSin28, fDelX4, fLnSin24
2578 adds rPolDataPtr = 144 , rPolDataPtr
2579}
2580;;
2581{ .mfi
2582 ldfe fLnSin10 = [rLnSinDataPtr], 32
2583 fma.s1 fRes1H = fA3, FR_FracX, f0 // (A3*y)hi
2584 and GR_N = GR_N, r17Ones // mask sign bit
2585}
2586{ .mfi
2587 ldfe fLnSin12 = [rLnSinTmpPtr]
2588 fma.s1 fDelX6 = fDxSqr, fDelX4, f0 // DeltaX^6
2589 shladd GR_ad_tbl_1 = rIndex1Dx, 4, rTbl1Addr // Point to G_1
2590}
2591;;
2592{ .mfi
2593 ldfe fA13 = [rPolDataPtr], -32 // A13
2594 fma.s1 fA4 = fA5, FR_FracX, fA4 // A5*y + A4
2595 // Get bits 30-15 of X_0 * Z_1
2596 pmpyshr2.u GR_X_1 = rX0Dx, GR_Z_1, 15
2597}
2598{ .mfi
2599 ldfe fA12 = [rTmpPtr2], -32 // A12
2600 fms.s1 FR_r = FR_G, fSignifX, f1 // r = G * S_hi - 1
2601 sub GR_N = GR_N, rExpHalf, 1 // unbisaed exponent of DeltaX
2602}
2603;;
2604//
2605// For performance, don't use result of pmpyshr2.u for 4 cycles.
2606//
2607.pred.rel "mutex",p10,p11
2608{ .mfi
2609 ldfe fA11 = [rPolDataPtr], -32 // A11
2610 // High part of log(|x|) = Y_hi = N * log2_hi + H
2611 fma.s1 fResH = fFloatN, FR_log2_hi, FR_H
2612(p10) cmp.eq p8, p9 = rXRnd, r0
2613}
2614{ .mfi
2615 ldfe fA10 = [rTmpPtr2], -32 // A10
2616 fma.s1 fRes6H = fA1, FR_FracX, f0 // (A1*y)hi
2617(p11) cmp.eq p9, p8 = rXRnd, r0
2618}
2619;;
2620{ .mfi
2621 ldfe fA9 = [rPolDataPtr], -32 // A9
2622 fma.s1 fB14 = fLnSin6, fDxSqr, f0 // (LnSin6*deltaX^2)hi
2623 cmp.eq p6, p7 = 4, rSgnGamSize
2624}
2625{ .mfi
2626 ldfe fA8 = [rTmpPtr2], -32 // A8
2627 fma.s1 fA18 = fA19, FR_FracX, fA18
2628 nop.i 0
2629}
2630;;
2631{ .mfi
2632 ldfe fA7 = [rPolDataPtr] // A7
2633 fma.s1 fA23 = fA23, FR_FracX, fA22
2634 nop.i 0
2635}
2636{ .mfi
2637 ldfe fA6 = [rTmpPtr2] // A6
2638 fma.s1 fA21 = fA21, FR_FracX, fA20
2639 nop.i 0
2640}
2641;;
2642{ .mfi
2643 ldfe fLnSin14 = [rLnSinDataPtr]
2644 fms.s1 fRes1L = fA3, FR_FracX, fRes1H // delta((A3*y)hi)
2645 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
2646}
2647{ .mfi
2648 setf.sig fFloatNDx = GR_N
2649 fadd.s1 fPol = fRes1H, fA2 // (A3*y + A2)hi
2650 nop.i 0
2651}
2652;;
2653{ .mfi
2654 ldfps FR_G, FR_H = [GR_ad_tbl_1], 8 // Load G_1, H_1
2655 fma.s1 fRes2H = fA4, fXSqr, f0 // ((A5 + A4*y)*y^2)hi
2656 nop.i 0
2657}
2658{ .mfi
2659 shladd GR_ad_z_2 = GR_Index2, 2, rZ2Addr // Point to Z_2
2660 fma.s1 fA25 = fA25, FR_FracX, fA24
2661 shladd GR_ad_tbl_2 = GR_Index2, 4, rTbl2Addr // Point to G_2
2662}
2663;;
2664.pred.rel "mutex",p8,p9
2665{ .mfi
2666 ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2
2667 fms.s1 fRes6L = fA1, FR_FracX, fRes6H // delta((A1*y)hi)
2668 // sign of GAMMA(x) is negative
2669(p8) adds rSgnGam = -1, r0
2670}
2671{ .mfi
2672 adds rTmpPtr = 8, GR_ad_tbl_2
2673 fadd.s1 fRes3H = fRes6H, fA0 // (A1*y + A0)hi
2674 // sign of GAMMA(x) is positive
2675(p9) adds rSgnGam = 1, r0
2676}
2677;;
2678{ .mfi
2679 ldfps FR_G2, FR_H2 = [GR_ad_tbl_2] // Load G_2, H_2
2680 // (LnSin6*deltaX^2 + LnSin4)hi
2681 fadd.s1 fLnSinH = fB14, fLnSin4
2682 nop.i 0
2683}
2684{ .mfi
2685 ldfd FR_h2 = [rTmpPtr] // Load h_2
2686 fms.s1 fB16 = fLnSin6, fDxSqr, fB14 // delta(LnSin6*deltaX^2)
2687 nop.i 0
2688}
2689;;
2690{ .mfi
2691 ldfd fhDelX = [GR_ad_tbl_1] // Load h_1
2692 fma.s1 fA21 = fA21, fXSqr, fA18
2693 nop.i 0
2694}
2695{ .mfi
2696 nop.m 0
2697 fma.s1 fLnSin36 = fLnSin36, fDelX4, fLnSin32
2698 nop.i 0
2699}
2700;;
2701{ .mfi
2702 nop.m 0
2703 fma.s1 fRes1L = fA3L, FR_FracX, fRes1L // (A3*y)lo
2704 // Get bits 30-15 of X_1 * Z_
2705 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15
2706}
2707{ .mfi
2708 nop.m 0
2709 fsub.s1 fPolL = fA2, fPol
2710 nop.i 0
2711}
2712;;
2713//
2714// For performance, don't use result of pmpyshr2.u for 4 cycles.
2715//
2716{ .mfi
2717 nop.m 0
2718 // delta(((A5 + A4*y)*y^2)hi)
2719 fms.s1 fRes2L = fA4, fXSqr, fRes2H
2720 nop.i 0
2721}
2722{ .mfi
2723 nop.m 0
2724 // (((A5 + A4*y)*y^2) + A3*y + A2)hi
2725 fadd.s1 fRes4H = fRes2H, fPol
2726 nop.i 0
2727}
2728;;
2729{ .mfi
2730 // store signgam if size of variable is 4 bytes
2731(p6) st4 [rSgnGamAddr] = rSgnGam
2732 fma.s1 fRes6L = fA1L, FR_FracX, fRes6L // (A1*y)lo
2733 nop.i 0
2734}
2735{ .mfi
2736 // store signgam if size of variable is 8 bytes
2737(p7) st8 [rSgnGamAddr] = rSgnGam
2738 fsub.s1 fRes3L = fA0, fRes3H
2739 nop.i 0
2740}
2741;;
2742{ .mfi
2743 nop.m 0
2744 fsub.s1 fLnSinL = fLnSin4, fLnSinH
2745 nop.i 0
2746}
2747{ .mfi
2748 nop.m 0
2749 // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2)hi
2750 fma.s1 fB18 = fLnSinH, fDxSqr, f0
2751 nop.i 0
2752}
2753;;
2754{ .mfi
2755 adds rTmpPtr = 8, rTbl3Addr
2756 fma.s1 fB16 = fLnSin6, fDxSqrL, fB16 // (LnSin6*deltaX^2)lo
2757 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2
2758}
2759{ .mfi
2760 nop.m 0
2761 fma.s1 fA25 = fA25, fXSqr, fA23
2762 nop.i 0
2763}
2764;;
2765{ .mfi
2766 shladd GR_ad_tbl_3 = GR_Index3, 4, rTbl3Addr // Point to G_3
2767 fadd.s1 fPolL = fPolL, fRes1H
2768 nop.i 0
2769}
2770{ .mfi
2771 shladd rTmpPtr = GR_Index3, 4, rTmpPtr // Point to G_3
2772 fadd.s1 fRes1L = fRes1L, fA2L // (A3*y)lo + A2lo
2773 nop.i 0
2774}
2775;;
2776{ .mfi
2777 ldfps FR_G3, FR_H3 = [GR_ad_tbl_3] // Load G_3, H_3
2778 fma.s1 fRes2L = fA4, fXSqrL, fRes2L // ((A5 + A4*y)*y^2)lo
2779 nop.i 0
2780}
2781{ .mfi
2782 ldfd FR_h3 = [rTmpPtr] // Load h_3
2783 fsub.s1 fRes4L = fPol, fRes4H
2784 nop.i 0
2785}
2786;;
2787{ .mfi
2788 nop.m 0
2789 // ((((A5 + A4*y)*y^2) + A3*y + A2)*y^2)hi
2790 fma.s1 fRes7H = fRes4H, fXSqr, f0
2791 nop.i 0
2792}
2793{ .mfi
2794 nop.m 0
2795 fma.s1 fA15 = fA15, FR_FracX, fA14
2796 nop.i 0
2797}
2798;;
2799{ .mfi
2800 nop.m 0
2801 fadd.s1 fRes3L = fRes3L, fRes6H
2802 nop.i 0
2803}
2804{ .mfi
2805 nop.m 0
2806 fadd.s1 fRes6L = fRes6L, fA0L // (A1*y)lo + A0lo
2807 nop.i 0
2808}
2809;;
2810{ .mfi
2811 nop.m 0
2812 fadd.s1 fLnSinL = fLnSinL, fB14
2813
2814 nop.i 0
2815}
2816{ .mfi
2817 nop.m 0
2818 // delta((LnSin6*deltaX^2 + LnSin4)*deltaX^2)
2819 fms.s1 fB20 = fLnSinH, fDxSqr, fB18
2820 nop.i 0
2821}
2822;;
2823{ .mfi
2824 nop.m 0
2825 fadd.s1 fPolL = fPolL, fRes1L // (A3*y + A2)lo
2826
2827 nop.i 0
2828}
2829{ .mfi
2830 nop.m 0
2831 // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2 + LnSin2)hi
2832 fadd.s1 fLnSin6 = fB18, fLnSin2
2833 nop.i 0
2834}
2835;;
2836{ .mfi
2837 nop.m 0
2838 fadd.s1 fRes4L = fRes4L, fRes2H
2839 nop.i 0
2840}
2841{ .mfi
2842 nop.m 0
2843 fma.s1 fA17 = fA17, FR_FracX, fA16
2844 nop.i 0
2845}
2846;;
2847{ .mfi
2848 nop.m 0
2849 // delta(((((A5 + A4*y)*y^2) + A3*y + A2)*y^2)
2850 fms.s1 fRes7L = fRes4H, fXSqr, fRes7H
2851 nop.i 0
2852}
2853{ .mfi
2854 nop.m 0
2855 fadd.s1 fPol = fRes7H, fRes3H
2856 nop.i 0
2857}
2858;;
2859{ .mfi
2860 nop.m 0
2861 fadd.s1 fRes3L = fRes3L, fRes6L // (A1*y + A0)lo
2862 nop.i 0
2863}
2864{ .mfi
2865 nop.m 0
2866 fma.s1 fA25 = fA25, fX4, fA21
2867 nop.i 0
2868}
2869;;
2870{ .mfi
2871 nop.m 0
2872 // (LnSin6*deltaX^2 + LnSin4)lo
2873 fadd.s1 fLnSinL = fLnSinL, fB16
2874 nop.i 0
2875}
2876{ .mfi
2877 nop.m 0
2878 fma.s1 fB20 = fLnSinH, fDxSqrL, fB20
2879 nop.i 0
2880}
2881;;
2882{ .mfi
2883 nop.m 0
2884 fsub.s1 fLnSin4 = fLnSin2, fLnSin6
2885 nop.i 0
2886}
2887{ .mfi
2888 nop.m 0
2889 // (((LnSin6*deltaX^2 + LnSin4)*deltaX^2 + LnSin2)*DeltaX^2)hi
2890 fma.s1 fLnSinH = fLnSin6, fDxSqr, f0
2891 nop.i 0
2892}
2893;;
2894{ .mfi
2895 nop.m 0
2896 // ((A5 + A4*y)*y^2)lo + (A3*y + A2)lo
2897 fadd.s1 fRes2L = fRes2L, fPolL
2898 nop.i 0
2899}
2900{ .mfi
2901 nop.m 0
2902 fma.s1 fA17 = fA17, fXSqr, fA15
2903 nop.i 0
2904}
2905;;
2906{ .mfi
2907 nop.m 0
2908 // ((((A5 + A4*y)*y^2) + A3*y + A2)*y^2)lo
2909 fma.s1 fRes7L = fRes4H, fXSqrL, fRes7L
2910 nop.i 0
2911}
2912{ .mfi
2913 nop.m 0
2914 fsub.s1 fPolL = fRes3H, fPol
2915 nop.i 0
2916}
2917;;
2918{ .mfi
2919 nop.m 0
2920 fma.s1 fA13 = fA13, FR_FracX, fA12
2921 nop.i 0
2922}
2923{ .mfi
2924 nop.m 0
2925 fma.s1 fA11 = fA11, FR_FracX, fA10
2926 nop.i 0
2927}
2928;;
2929{ .mfi
2930 nop.m 0
2931 // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2)lo
2932 fma.s1 fB20 = fLnSinL, fDxSqr, fB20
2933 nop.i 0
2934}
2935{ .mfi
2936 nop.m 0
2937 fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2
2938 nop.i 0
2939}
2940;;
2941{ .mfi
2942 nop.m 0
2943 fadd.s1 fLnSin4 = fLnSin4, fB18
2944 nop.i 0
2945}
2946{ .mfi
2947 nop.m 0
2948 fms.s1 fLnSinL = fLnSin6, fDxSqr, fLnSinH
2949 nop.i 0
2950}
2951;;
2952{ .mfi
2953 nop.m 0
2954 // (((A5 + A4*y)*y^2) + A3*y + A2)lo
2955 fadd.s1 fRes4L = fRes4L, fRes2L
2956 nop.i 0
2957}
2958{ .mfi
2959 nop.m 0
2960 fadd.s1 fhDelX = fhDelX, FR_h2 // h = h_1 + h_2
2961 nop.i 0
2962}
2963;;
2964{ .mfi
2965 nop.m 0
2966 fadd.s1 fRes7L = fRes7L, fRes3L
2967 nop.i 0
2968}
2969{ .mfi
2970 nop.m 0
2971 fadd.s1 fPolL = fPolL, fRes7H
2972 nop.i 0
2973}
2974;;
2975{ .mfi
2976 nop.m 0
2977 fcvt.xf fFloatNDx = fFloatNDx
2978 nop.i 0
2979}
2980{ .mfi
2981 nop.m 0
2982 fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2
2983 nop.i 0
2984}
2985;;
2986{ .mfi
2987 nop.m 0
2988 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3
2989 nop.i 0
2990}
2991{ .mfi
2992 nop.m 0
2993 // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2)lo + (LnSin2)lo
2994 fadd.s1 fLnSin2L = fLnSin2L, fB20
2995 nop.i 0
2996}
2997;;
2998{ .mfi
2999 nop.m 0
3000 fma.s1 fA25 = fA25, fX4, fA17
3001 nop.i 0
3002}
3003{ .mfi
3004 nop.m 0
3005 fma.s1 fA13 = fA13, fXSqr, fA11
3006 nop.i 0
3007}
3008;;
3009{ .mfi
3010 nop.m 0
3011 fma.s1 fA9 = fA9, FR_FracX, fA8
3012 nop.i 0
3013}
3014{ .mfi
3015 nop.m 0
3016 fma.s1 fA7 = fA7, FR_FracX, fA6
3017 nop.i 0
3018}
3019;;
3020{ .mfi
3021 nop.m 0
3022 fma.s1 fLnSin36 = fLnSin36, fDelX8, fLnSin28
3023 nop.i 0
3024}
3025{ .mfi
3026 nop.m 0
3027 fma.s1 fLnSin14 = fLnSin14, fDxSqr, fLnSin12
3028 nop.i 0
3029}
3030;;
3031{ .mfi
3032 nop.m 0
3033 fma.s1 fLnSin10 = fLnSin10, fDxSqr, fLnSin8
3034 nop.i 0
3035}
3036{ .mfi
3037 nop.m 0
3038 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3
3039 nop.i 0
3040}
3041;;
3042{ .mfi
3043 nop.m 0
3044 fms.s1 fRDx = FR_G, fNormDx, f1 // r = G * S_hi - 1
3045 nop.i 0
3046}
3047{ .mfi
3048 nop.m 0
3049 // poly_lo = r * Q4 + Q3
3050 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3
3051 nop.i 0
3052}
3053;;
3054{ .mfi
3055 nop.m 0
3056 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r
3057 nop.i 0
3058}
3059{ .mfi
3060 nop.m 0
3061 // ((((A5 + A4*y)*y^2) + A3*y + A2)*y^2)lo + (A1*y + A0)lo
3062 fma.s1 fRes7L = fRes4L, fXSqr, fRes7L
3063 nop.i 0
3064}
3065;;
3066{ .mfi
3067 nop.m 0
3068 fma.s1 fA25 = fA25, fX4, fA13
3069 nop.i 0
3070}
3071{ .mfi
3072 nop.m 0
3073 fma.s1 fA9 = fA9, fXSqr, fA7
3074 nop.i 0
3075}
3076;;
3077{ .mfi
3078 nop.m 0
3079 // h = N * log2_lo + h
3080 fma.s1 FR_h = fFloatN, FR_log2_lo, FR_h
3081 nop.i 0
3082}
3083{ .mfi
3084 nop.m 0
3085 fadd.s1 fhDelX = fhDelX, FR_h3 // h = (h_1 + h_2) + h_3
3086 nop.i 0
3087}
3088;;
3089{ .mfi
3090 nop.m 0
3091 fma.s1 fLnSin36 = fLnSin36, fDelX6, fLnSin20
3092 nop.i 0
3093}
3094{ .mfi
3095 nop.m 0
3096 fma.s1 fLnSin14 = fLnSin14, fDelX4, fLnSin10
3097 nop.i 0
3098}
3099;;
3100{ .mfi
3101 nop.m 0
3102 // poly_lo = r * Q4 + Q3
3103 fma.s1 fPolyLoDx = fRDx, FR_Q4, FR_Q3
3104 nop.i 0
3105}
3106{ .mfi
3107 nop.m 0
3108 fmpy.s1 fRDxSq = fRDx, fRDx // rsq = r * r
3109 nop.i 0
3110}
3111;;
3112{ .mfi
3113 nop.m 0
3114 // Y_hi = N * log2_hi + H
3115 fma.s1 fResLnDxH = fFloatNDx, FR_log2_hi, FR_H
3116 nop.i 0
3117}
3118{ .mfi
3119 nop.m 0
3120 fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3
3121 nop.i 0
3122}
3123;;
3124{ .mfi
3125 nop.m 0
3126 fma.s1 fA9 = fA25, fX4, fA9
3127 nop.i 0
3128}
3129{ .mfi
3130 nop.m 0
3131 fadd.s1 fPolL = fPolL, fRes7L
3132 nop.i 0
3133}
3134;;
3135{ .mfi
3136 nop.m 0
3137 fadd.s1 fLnSin4 = fLnSin4, fLnSin2L
3138 nop.i 0
3139}
3140{ .mfi
3141 nop.m 0
3142 // h = N * log2_lo + h
3143 fma.s1 fhDelX = fFloatNDx, FR_log2_lo, fhDelX
3144 nop.i 0
3145}
3146;;
3147{ .mfi
3148 nop.m 0
3149 fma.s1 fLnSin36 = fLnSin36, fDelX8, fLnSin14
3150 nop.i 0
3151}
3152{ .mfi
3153 nop.m 0
3154 // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2 + LnSin2)lo
3155 fma.s1 fLnSinL = fLnSin6, fDxSqrL, fLnSinL
3156 nop.i 0
3157}
3158;;
3159{ .mfi
3160 nop.m 0
3161 // poly_lo = poly_lo * r + Q2
3162 fma.s1 fPolyLoDx = fPolyLoDx, fRDx, FR_Q2
3163 nop.i 0
3164}
3165{ .mfi
3166 nop.m 0
3167 fma.s1 fRDxCub = fRDxSq, fRDx, f0 // rcub = r^3
3168 nop.i 0
3169}
3170;;
3171{ .mfi
3172 nop.m 0
3173 famax.s0 fRes5H = fPol, fResH
3174 nop.i 0
3175}
3176{ .mfi
3177 nop.m 0
3178 // High part of (lgammal(|x|) + log(|x|))
3179 fadd.s1 fRes1H = fPol, fResH
3180 nop.i 0
3181}
3182;;
3183{ .mfi
3184 nop.m 0
3185 // poly_lo = poly_lo * r + Q2
3186 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2
3187 nop.i 0
3188}
3189{ .mfi
3190 nop.m 0
3191 fma.s1 fPolL = fA9, fX6, fPolL // P25lo
3192 nop.i 0
3193}
3194;;
3195
3196{ .mfi
3197 nop.m 0
3198 famin.s0 fRes5L = fPol, fResH
3199 nop.i 0
3200}
3201{ .mfi
3202 nop.m 0
3203 // High part of -(LnSin + log(|DeltaX|))
3204 fnma.s1 fRes2H = fResLnDxH, f1, fLnSinH
3205 nop.i 0
3206}
3207;;
3208
3209{ .mfi
3210 nop.m 0
3211 // (((LnSin6*deltaX^2 + LnSin4)*deltaX^2 + LnSin2)*DeltaX^2)lo
3212 fma.s1 fLnSinL = fLnSin4, fDxSqr, fLnSinL
3213 nop.i 0
3214}
3215{ .mfi
3216 nop.m 0
3217 fma.s1 fLnSin36 = fLnSin36, fDelX6, f0
3218 nop.i 0
3219}
3220;;
3221{ .mfi
3222 nop.m 0
3223 // poly_hi = Q1 * rsq + r
3224 fma.s1 fPolyHiDx = FR_Q1, fRDxSq, fRDx
3225 nop.i 0
3226}
3227{ .mfi
3228 nop.m 0
3229 // poly_lo = poly_lo*r^3 + h
3230 fma.s1 fPolyLoDx = fPolyLoDx, fRDxCub, fhDelX
3231 nop.i 0
3232}
3233;;
3234{ .mfi
3235 nop.m 0
3236 fsub.s1 fRes1L = fRes5H, fRes1H
3237 nop.i 0
3238}
3239{ .mfi
3240 nop.m 0
3241 // -(lgammal(|x|) + log(|x|))hi
3242 fnma.s1 fRes1H = fRes1H, f1, f0
3243
3244 nop.i 0
3245}
3246;;
3247{ .mfi
3248 nop.m 0
3249 // poly_hi = Q1 * rsq + r
3250 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
3251 nop.i 0
3252}
3253{ .mfi
3254 nop.m 0
3255 // poly_lo = poly_lo*r^3 + h
3256 fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h
3257 nop.i 0
3258}
3259;;
3260{ .mfi
3261 nop.m 0
3262 fms.s1 fRes2L = fResLnDxH, fMOne, fRes2H
3263 nop.i 0
3264}
3265;;
3266{ .mfi
3267 nop.m 0
3268 fma.s1 fLnSinL = fLnSin36, fDxSqr, fLnSinL
3269 nop.i 0
3270}
3271{ .mfi
3272 nop.m 0
3273 // Y_lo = poly_hi + poly_lo
3274 fadd.s1 fResLnDxL = fPolyHiDx, fPolyLoDx
3275 nop.i 0
3276}
3277;;
3278{ .mfi
3279 nop.m 0
3280 fadd.s1 fRes1L = fRes1L, fRes5L
3281 nop.i 0
3282}
3283{ .mfi
3284 nop.m 0
3285 // high part of the final result
3286 fadd.s1 fYH = fRes2H, fRes1H
3287 nop.i 0
3288}
3289;;
3290{ .mfi
3291 nop.m 0
3292 // Y_lo = poly_hi + poly_lo
3293 fadd.s1 fResL = FR_poly_hi, FR_poly_lo
3294 nop.i 0
3295}
3296;;
3297{ .mfi
3298 nop.m 0
3299 famax.s0 fRes4H = fRes2H, fRes1H
3300 nop.i 0
3301}
3302;;
3303{ .mfi
3304 nop.m 0
3305 famin.s0 fRes4L = fRes2H, fRes1H
3306 nop.i 0
3307}
3308;;
3309{ .mfi
3310 nop.m 0
3311 // (LnSin)lo + (log(|DeltaX|))lo
3312 fsub.s1 fLnSinL = fLnSinL, fResLnDxL
3313 nop.i 0
3314}
3315{ .mfi
3316 nop.m 0
3317 fadd.s1 fRes2L = fRes2L, fLnSinH
3318 nop.i 0
3319}
3320;;
3321{ .mfi
3322 nop.m 0
3323 //(lgammal(|x|))lo + (log(|x|))lo
3324 fadd.s1 fPolL = fResL, fPolL
3325 nop.i 0
3326}
3327;;
3328{ .mfi
3329 nop.m 0
3330 fsub.s1 fYL = fRes4H, fYH
3331 nop.i 0
3332}
3333;;
3334{ .mfi
3335 nop.m 0
3336 // Low part of -(LnSin + log(|DeltaX|))
3337 fadd.s1 fRes2L = fRes2L, fLnSinL
3338 nop.i 0
3339}
3340{ .mfi
3341 nop.m 0
3342 // High part of (lgammal(|x|) + log(|x|))
3343 fadd.s1 fRes1L = fRes1L, fPolL
3344 nop.i 0
3345}
3346;;
3347{ .mfi
3348 nop.m 0
3349 fadd.s1 fYL = fYL, fRes4L
3350 nop.i 0
3351}
3352{ .mfi
3353 nop.m 0
3354 fsub.s1 fRes2L = fRes2L, fRes1L
3355 nop.i 0
3356}
3357;;
3358{ .mfi
3359 nop.m 0
3360 // low part of the final result
3361 fadd.s1 fYL = fYL, fRes2L
3362 nop.i 0
3363}
3364;;
3365{ .mfb
3366 nop.m 0
3367 // final result for -6.0 < x <= -0.75, non-integer, "far" from roots
3368 fma.s0 f8 = fYH, f1, fYL
3369 // exit here for -6.0 < x <= -0.75, non-integer, "far" from roots
3370 br.ret.sptk b0
3371}
3372;;
3373
3374// here if |x+1| < 2^(-7)
3375.align 32
3376_closeToNegOne:
3377{ .mfi
3378 getf.exp GR_N = fDx // Get N = exponent of x
3379 fmerge.se fAbsX = f1, fDx // Form |deltaX|
3380 // Get high 4 bits of significand of deltaX
3381 extr.u rIndex1Dx = rSignifDx, 59, 4
3382}
3383{ .mfi
3384 addl rPolDataPtr= @ltoff(lgammal_1pEps_data),gp
3385 fma.s1 fA0L = fDxSqr, fDxSqr, f0 // deltaX^4
3386 // sign of GAMMA is positive if p10 is set to 1
3387(p10) adds rSgnGam = 1, r0
3388}
3389;;
3390{ .mfi
3391 shladd GR_ad_z_1 = rIndex1Dx, 2, GR_ad_z_1 // Point to Z_1
3392 fnma.s1 fResL = fDx, f1, f0 // -(x+1)
3393 // Get high 15 bits of significand
3394 extr.u GR_X_0 = rSignifDx, 49, 15
3395}
3396{ .mfi
3397 ld8 rPolDataPtr = [rPolDataPtr]
3398 nop.f 0
3399 shladd GR_ad_tbl_1 = rIndex1Dx, 4, rTbl1Addr // Point to G_1
3400}
3401;;
3402{ .mfi
3403 ld4 GR_Z_1 = [GR_ad_z_1] // Load Z_1
3404 nop.f 0
3405 and GR_N = GR_N, r17Ones // mask sign bit
3406}
3407{ .mfi
3408 adds rTmpPtr = 8, GR_ad_tbl_1
3409 nop.f 0
3410 cmp.eq p6, p7 = 4, rSgnGamSize
3411}
3412;;
3413{ .mfi
3414 ldfps FR_G, FR_H = [GR_ad_tbl_1],8 // Load G_1, H_1
3415 nop.f 0
3416 adds rTmpPtr2 = 96, rPolDataPtr
3417}
3418{ .mfi
3419 ldfd FR_h = [rTmpPtr] // Load h_1
3420 nop.f 0
3421 // unbiased exponent of deltaX
3422 sub GR_N = GR_N, rExpHalf, 1
3423}
3424;;
3425{ .mfi
3426 adds rTmpPtr3 = 192, rPolDataPtr
3427 nop.f 0
3428 // sign of GAMMA is negative if p11 is set to 1
3429(p11) adds rSgnGam = -1, r0
3430}
3431{ .mfi
3432 ldfe fA1 = [rPolDataPtr], 16 // A1
3433 nop.f 0
3434 nop.i 0
3435}
3436;;
3437{.mfi
3438 ldfe fA2 = [rPolDataPtr], 16 // A2
3439 nop.f 0
3440 // Get bits 30-15 of X_0 * Z_1
3441 pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15
3442}
3443{ .mfi
3444 ldfpd fA20, fA19 = [rTmpPtr2], 16 // P8, P7
3445 nop.f 0
3446 nop.i 0
3447}
3448;;
3449//
3450// For performance, don't use result of pmpyshr2.u for 4 cycles.
3451//
3452{ .mfi
3453 ldfe fA3 = [rPolDataPtr], 16 // A3
3454 nop.f 0
3455 nop.i 0
3456}
3457{ .mfi
3458 ldfpd fA18, fA17 = [rTmpPtr2], 16 // P6, P5
3459 nop.f 0
3460 nop.i 0
3461}
3462;;
3463{ .mfi
3464 ldfe fA4 = [rPolDataPtr], 16 // A4
3465 nop.f 0
3466 nop.i 0
3467}
3468{ .mfi
3469 ldfpd fA16, fA15 = [rTmpPtr2], 16 // P4, p3
3470 nop.f 0
3471 nop.i 0
3472}
3473;;
3474{ .mfi
3475 ldfpd fA5L, fA6 = [rPolDataPtr], 16 // A5, A6
3476 nop.f 0
3477 nop.i 0
3478}
3479{ .mfi
3480 ldfpd fA14, fA13 = [rTmpPtr2], 16 // P2, P1
3481 nop.f 0
3482 nop.i 0
3483}
3484;;
3485{ .mfi
3486 ldfpd fA7, fA8 = [rPolDataPtr], 16 // A7, A8
3487 nop.f 0
3488 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
3489}
3490{ .mfi
3491 ldfe fLnSin2 = [rTmpPtr2], 16
3492 nop.f 0
3493 nop.i 0
3494}
3495;;
3496{ .mfi
3497 shladd GR_ad_z_2 = GR_Index2, 2, rZ2Addr // Point to Z_2
3498 nop.f 0
3499 shladd GR_ad_tbl_2 = GR_Index2, 4, rTbl2Addr // Point to G_2
3500}
3501{ .mfi
3502 ldfe fLnSin4 = [rTmpPtr2], 32
3503 nop.f 0
3504 nop.i 0
3505}
3506;;
3507{ .mfi
3508 ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2
3509 nop.f 0
3510 adds rTmpPtr = 8, GR_ad_tbl_2
3511}
3512{ .mfi
3513 // Put integer N into rightmost significand
3514 setf.sig fFloatN = GR_N
3515 nop.f 0
3516 nop.i 0
3517}
3518;;
3519{ .mfi
3520 ldfe fLnSin6 = [rTmpPtr3]
3521 nop.f 0
3522 nop.i 0
3523}
3524{ .mfi
3525 ldfe fLnSin8 = [rTmpPtr2]
3526 nop.f 0
3527 nop.i 0
3528}
3529;;
3530{ .mfi
3531 ldfps FR_G2, FR_H2 = [GR_ad_tbl_2],8 // Load G_2, H_2
3532 nop.f 0
3533 nop.i 0
3534}
3535{ .mfi
3536 ldfd FR_h2 = [rTmpPtr] // Load h_2
3537 nop.f 0
3538 nop.i 0
3539}
3540;;
3541{ .mfi
3542 // store signgam if size of variable is 4 bytes
3543(p6) st4 [rSgnGamAddr] = rSgnGam
3544 fma.s1 fResH = fA20, fResL, fA19 //polynomial for log(|x|)
3545 // Get bits 30-15 of X_1 * Z_2
3546 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15
3547}
3548{ .mfi
3549 // store signgam if size of variable is 8 bytes
3550(p7) st8 [rSgnGamAddr] = rSgnGam
3551 fma.s1 fA2 = fA2, fDx, fA1 // polynomial for lgammal(|x|)
3552 nop.i 0
3553}
3554;;
3555//
3556// For performance, don't use result of pmpyshr2.u for 4 cycles.
3557//
3558{ .mfi
3559 nop.m 0
3560 fma.s1 fA18 = fA18, fResL, fA17 //polynomial for log(|x|)
3561 nop.i 0
3562}
3563;;
3564{ .mfi
3565 nop.m 0
3566 fma.s1 fA16 = fA16, fResL, fA15 //polynomial for log(|x|)
3567 nop.i 0
3568}
3569{ .mfi
3570 nop.m 0
3571 fma.s1 fA4 = fA4, fDx, fA3 // polynomial for lgammal(|x|)
3572 nop.i 0
3573}
3574;;
3575{ .mfi
3576 nop.m 0
3577 fma.s1 fA14 = fA14, fResL, fA13 //polynomial for log(|x|)
3578 nop.i 0
3579}
3580{ .mfi
3581 nop.m 0
3582 fma.s1 fA6 = fA6, fDx, fA5L // polynomial for lgammal(|x|)
3583 nop.i 0
3584}
3585;;
3586{ .mfi
3587 nop.m 0
3588 fma.s1 fPol = fA8, fDx, fA7 // polynomial for lgammal(|x|)
3589 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2
3590}
3591;;
3592{ .mfi
3593 shladd GR_ad_tbl_3 = GR_Index3, 4, rTbl3Addr // Point to G_3
3594 // loqw part of lnsin polynomial
3595 fma.s1 fRes3L = fLnSin4, fDxSqr, fLnSin2
3596 nop.i 0
3597}
3598;;
3599{ .mfi
3600 ldfps FR_G3, FR_H3 = [GR_ad_tbl_3], 8 // Load G_3, H_3
3601 fcvt.xf fFloatN = fFloatN // N as FP number
3602 nop.i 0
3603}
3604{ .mfi
3605 nop.m 0
3606 fma.s1 fResH = fResH, fDxSqr, fA18 // High part of log(|x|)
3607 nop.i 0
3608}
3609;;
3610{ .mfi
3611 ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3
3612 fma.s1 fA4 = fA4, fDxSqr, fA2 // Low part of lgammal(|x|)
3613 nop.i 0
3614}
3615{ .mfi
3616 nop.m 0
3617 // high part of lnsin polynomial
3618 fma.s1 fRes3H = fLnSin8, fDxSqr, fLnSin6
3619 nop.i 0
3620}
3621;;
3622{ .mfi
3623 nop.m 0
3624 fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2
3625 nop.i 0
3626}
3627{ .mfi
3628 nop.m 0
3629 fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2
3630 nop.i 0
3631}
3632;;
3633{ .mfi
3634 nop.m 0
3635 fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2
3636 nop.i 0
3637}
3638{ .mfi
3639 nop.m 0
3640 fma.s1 fA16 = fA16, fDxSqr, fA14 // Low part of log(|x|)
3641 nop.i 0
3642}
3643;;
3644{ .mfi
3645 nop.m 0
3646 fma.s1 fPol = fPol, fDxSqr, fA6 // High part of lgammal(|x|)
3647 nop.i 0
3648}
3649;;
3650{ .mfi
3651 nop.m 0
3652 fma.s1 fResH = fResH, fA0L, fA16 // log(|x|)/deltaX^2 - deltaX
3653 nop.i 0
3654}
3655;;
3656{ .mfi
3657 nop.m 0
3658 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3
3659 nop.i 0
3660}
3661{ .mfi
3662 nop.m 0
3663 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3
3664 nop.i 0
3665}
3666;;
3667{ .mfi
3668 nop.m 0
3669 fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3
3670 nop.i 0
3671}
3672;;
3673{ .mfi
3674 nop.m 0
3675 fma.s1 fResH = fResH, fDxSqr, fResL // log(|x|)
3676 nop.i 0
3677}
3678{ .mfi
3679 nop.m 0
3680 fma.s1 fPol = fPol, fA0L, fA4 // lgammal(|x|)/|x|
3681 nop.i 0
3682}
3683;;
3684{ .mfi
3685 nop.m 0
3686 fms.s1 FR_r = FR_G, fAbsX, f1 // r = G * S_hi - 1
3687 nop.i 0
3688}
3689{ .mfi
3690 nop.m 0
3691 // high part of log(deltaX)= Y_hi = N * log2_hi + H
3692 fma.s1 fRes4H = fFloatN, FR_log2_hi, FR_H
3693 nop.i 0
3694}
3695;;
3696{ .mfi
3697 nop.m 0
3698 // h = N * log2_lo + h
3699 fma.s1 FR_h = fFloatN, FR_log2_lo, FR_h
3700 nop.i 0
3701}
3702;;
3703{ .mfi
3704 nop.m 0
3705 fma.s1 fResH = fPol, fDx, fResH // lgammal(|x|) + log(|x|)
3706 nop.i 0
3707}
3708{ .mfi
3709 nop.m 0
3710 // lnsin/deltaX^2
3711 fma.s1 fRes3H = fRes3H, fA0L, fRes3L
3712 nop.i 0
3713}
3714;;
3715{ .mfi
3716 nop.m 0
3717 // poly_lo = r * Q4 + Q3
3718 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3
3719 nop.i 0
3720}
3721{ .mfi
3722 nop.m 0
3723 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r
3724 nop.i 0
3725}
3726;;
3727{ .mfi
3728 nop.m 0
3729 // lnSin - log(|x|) - lgammal(|x|)
3730 fms.s1 fResH = fRes3H, fDxSqr, fResH
3731 nop.i 0
3732}
3733;;
3734
3735{ .mfi
3736 nop.m 0
3737 // poly_lo = poly_lo * r + Q2
3738 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2
3739 nop.i 0
3740}
3741{ .mfi
3742 nop.m 0
3743 fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3
3744 nop.i 0
3745}
3746;;
3747
3748{ .mfi
3749 nop.m 0
3750 // poly_hi = Q1 * rsq + r
3751 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
3752 nop.i 0
3753}
3754;;
3755
3756{ .mfi
3757 nop.m 0
3758 // poly_lo = poly_lo*r^3 + h
3759 fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h
3760 nop.i 0
3761}
3762;;
3763
3764{ .mfi
3765 nop.m 0
3766 // low part of log(|deltaX|) = Y_lo = poly_hi + poly_lo
3767 fadd.s1 fRes4L = FR_poly_hi, FR_poly_lo
3768 nop.i 0
3769}
3770;;
3771{ .mfi
3772 nop.m 0
3773 fsub.s1 fResH = fResH, fRes4L
3774 nop.i 0
3775}
3776;;
3777{ .mfb
3778 nop.m 0
3779 // final result for |x+1|< 2^(-7) path
3780 fsub.s0 f8 = fResH, fRes4H
3781 // exit for |x+1|< 2^(-7) path
3782 br.ret.sptk b0
3783}
3784;;
3785
3786
3787// here if -2^63 < x < -6.0 and x is not an integer
3788// Also we are going to filter out cases when x falls in
3789// range which is "close enough" to negative root. Rhis case
3790// may occur only for -19.5 < x since other roots of lgamma are
3791// insignificant from double extended point of view (they are closer
3792// to RTN(x) than one ulp(x).
3793.align 32
3794_negStirling:
3795{ .mfi
3796 ldfe fLnSin6 = [rLnSinDataPtr], 32
3797 fnma.s1 fInvX = f8, fRcpX, f1 // start of 3rd NR iteration
3798 // Get high 4 bits of significand of deltaX
3799 extr.u rIndex1Dx = rSignifDx, 59, 4
3800}
3801{ .mfi
3802 ldfe fLnSin8 = [rTmpPtr3], 32
3803 fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2
3804(p12) cmp.ltu.unc p6, p0 = rSignifX, rLeftBound
3805}
3806;;
3807{ .mfi
3808 ldfe fLnSin10 = [rLnSinDataPtr], 32
3809 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3
3810 // Get high 15 bits of significand
3811 extr.u GR_X_0 = rSignifDx, 49, 15
3812}
3813{ .mfi
3814 shladd GR_ad_z_1 = rIndex1Dx, 2, GR_ad_z_1 // Point to Z_1
3815 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3
3816 // set p6 if x falls in "near root" range
3817(p6) cmp.geu.unc p6, p0 = rSignifX, rRightBound
3818}
3819;;
3820{ .mfi
3821 getf.exp GR_N = fDx // Get N = exponent of x
3822 fma.s1 fDx4 = fDxSqr, fDxSqr, f0 // deltaX^4
3823 adds rTmpPtr = 96, rBernulliPtr
3824}
3825{ .mfb
3826 ld4 GR_Z_1 = [GR_ad_z_1] // Load Z_1
3827 fma.s1 fLnSin34 = fLnSin34, fDxSqr, fLnSin32
3828 // branch to special path if x falls in "near root" range
3829(p6) br.cond.spnt _negRoots
3830}
3831;;
3832.pred.rel "mutex",p10,p11
3833{ .mfi
3834 ldfe fLnSin12 = [rTmpPtr3]
3835 fma.s1 fLnSin26 = fLnSin26, fDxSqr, fLnSin24
3836(p10) cmp.eq p8, p9 = rXRnd, r0
3837}
3838{ .mfi
3839 ldfe fLnSin14 = [rLnSinDataPtr]
3840 fma.s1 fLnSin30 = fLnSin30, fDxSqr, fLnSin28
3841(p11) cmp.eq p9, p8 = rXRnd, r0
3842}
3843;;
3844{ .mfi
3845 ldfpd fB2, fB2L = [rBernulliPtr], 16
3846 fma.s1 fLnSin18 = fLnSin18, fDxSqr, fLnSin16
3847 shladd GR_ad_tbl_1 = rIndex1Dx, 4, rTbl1Addr // Point to G_1
3848
3849}
3850{ .mfi
3851 ldfe fB14 = [rTmpPtr], 16
3852 fma.s1 fLnSin22 = fLnSin22, fDxSqr, fLnSin20
3853 and GR_N = GR_N, r17Ones // mask sign bit
3854}
3855;;
3856{ .mfi
3857 ldfe fB4 = [rBernulliPtr], 16
3858 fma.s1 fInvX = fInvX, fRcpX, fRcpX // end of 3rd NR iteration
3859 // Get bits 30-15 of X_0 * Z_1
3860 pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15
3861}
3862{ .mfi
3863 ldfe fB16 = [rTmpPtr], 16
3864 fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3
3865 adds rTmpPtr2 = 8, GR_ad_tbl_1
3866}
3867;;
3868//
3869// For performance, don't use result of pmpyshr2.u for 4 cycles.
3870//
3871{ .mfi
3872 ldfe fB6 = [rBernulliPtr], 16
3873 fms.s1 FR_r = FR_G, fSignifX, f1 // r = G * S_hi - 1
3874 adds rTmpPtr3 = -48, rTmpPtr
3875}
3876{ .mfi
3877 ldfe fB18 = [rTmpPtr], 16
3878 // High part of the log(|x|) = Y_hi = N * log2_hi + H
3879 fma.s1 fResH = fFloatN, FR_log2_hi, FR_H
3880 sub GR_N = GR_N, rExpHalf, 1 // unbiased exponent of deltaX
3881}
3882;;
3883.pred.rel "mutex",p8,p9
3884{ .mfi
3885 ldfe fB8 = [rBernulliPtr], 16
3886 fma.s1 fLnSin36 = fLnSin36, fDx4, fLnSin34
3887 // sign of GAMMA(x) is negative
3888(p8) adds rSgnGam = -1, r0
3889}
3890{ .mfi
3891 ldfe fB20 = [rTmpPtr], -160
3892 fma.s1 fRes5H = fLnSin4, fDxSqr, f0
3893 // sign of GAMMA(x) is positive
3894(p9) adds rSgnGam = 1, r0
3895
3896}
3897;;
3898{ .mfi
3899 ldfe fB10 = [rBernulliPtr], 16
3900 fma.s1 fLnSin30 = fLnSin30, fDx4, fLnSin26
3901(p14) adds rTmpPtr = -160, rTmpPtr
3902}
3903{ .mfi
3904 ldfe fB12 = [rTmpPtr3], 16
3905 fma.s1 fDx8 = fDx4, fDx4, f0 // deltaX^8
3906 cmp.eq p6, p7 = 4, rSgnGamSize
3907}
3908;;
3909{ .mfi
3910 ldfps fGDx, fHDx = [GR_ad_tbl_1], 8 // Load G_1, H_1
3911 fma.s1 fDx6 = fDx4, fDxSqr, f0 // deltaX^6
3912 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
3913}
3914{ .mfi
3915 ldfd fhDx = [rTmpPtr2] // Load h_1
3916 fma.s1 fLnSin22 = fLnSin22, fDx4, fLnSin18
3917 nop.i 0
3918}
3919;;
3920{ .mfi
3921 // Load two parts of C
3922 ldfpd fRes1H, fRes1L = [rTmpPtr], 16
3923 fma.s1 fRcpX = fInvX, fInvX, f0 // (1/x)^2
3924 shladd GR_ad_tbl_2 = GR_Index2, 4, rTbl2Addr // Point to G_2
3925}
3926{ .mfi
3927 shladd GR_ad_z_2 = GR_Index2, 2, rZ2Addr // Point to Z_2
3928 fma.s1 FR_h = fFloatN, FR_log2_lo, FR_h// h = N * log2_lo + h
3929 nop.i 0
3930}
3931;;
3932{ .mfi
3933 ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2
3934 fnma.s1 fInvXL = f8, fInvX, f1 // relative error of 1/x
3935 nop.i 0
3936}
3937{ .mfi
3938 adds rTmpPtr2 = 8, GR_ad_tbl_2
3939 fma.s1 fLnSin8 = fLnSin8, fDxSqr, fLnSin6
3940 nop.i 0
3941}
3942;;
3943{ .mfi
3944 ldfps FR_G2, FR_H2 = [GR_ad_tbl_2],8 // Load G_2, H_2
3945 // poly_lo = r * Q4 + Q3
3946 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3
3947 nop.i 0
3948}
3949{ .mfi
3950 ldfd fh2Dx = [rTmpPtr2] // Load h_2
3951 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r
3952 nop.i 0
3953}
3954;;
3955{ .mfi
3956 nop.m 0
3957 fma.s1 fA1L = fB2, fInvX, f0 // (B2*(1/x))hi
3958 nop.i 0
3959}
3960{ .mfi
3961 // Put integer N into rightmost significand
3962 setf.sig fFloatNDx = GR_N
3963 fms.s1 fRes4H = fResH, f1, f1 // ln(|x|)hi - 1
3964 nop.i 0
3965}
3966;;
3967{ .mfi
3968 nop.m 0
3969 fadd.s1 fRes2H = fRes5H, fLnSin2//(lnSin4*DeltaX^2 + lnSin2)hi
3970 // Get bits 30-15 of X_1 * Z_2
3971 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15
3972}
3973{ .mfi
3974 nop.m 0
3975 fms.s1 fRes5L = fLnSin4, fDxSqr, fRes5H
3976 nop.i 0
3977}
3978;;
3979//
3980// For performance, don't use result of pmpyshr2.u for 4 cycles.
3981//
3982{ .mfi
3983 nop.m 0
3984 fma.s1 fInvX4 = fRcpX, fRcpX, f0 // (1/x)^4
3985 nop.i 0
3986}
3987{ .mfi
3988 nop.m 0
3989 fma.s1 fB6 = fB6, fRcpX, fB4
3990 nop.i 0
3991}
3992;;
3993{ .mfi
3994 // store signgam if size of variable is 4 bytes
3995(p6) st4 [rSgnGamAddr] = rSgnGam
3996 fma.s1 fB18 = fB18, fRcpX, fB16
3997 nop.i 0
3998}
3999{ .mfi
4000 // store signgam if size of variable is 8 bytes
4001(p7) st8 [rSgnGamAddr] = rSgnGam
4002 fma.s1 fInvXL = fInvXL, fInvX, f0 // low part of 1/x
4003 nop.i 0
4004}
4005;;
4006{ .mfi
4007 nop.m 0
4008 // poly_lo = poly_lo * r + Q2
4009 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2
4010 nop.i 0
4011}
4012{ .mfi
4013 nop.m 0
4014 fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3
4015 nop.i 0
4016}
4017;;
4018{ .mfi
4019 nop.m 0
4020 fma.s1 fRes3H = fRes4H, f8, f0 // (-|x|*(ln(|x|)-1))hi
4021 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2
4022}
4023{ .mfi
4024 nop.m 0
4025 // poly_hi = Q1 * rsq + r
4026 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
4027 nop.i 0
4028}
4029;;
4030{ .mfi
4031 shladd GR_ad_tbl_3 = GR_Index3, 4, rTbl3Addr // Point to G_3
4032 fms.s1 fA2L = fB2, fInvX, fA1L // delta(B2*(1/x))
4033 nop.i 0
4034}
4035{ .mfi
4036 nop.m 0
4037 fnma.s1 fBrnH = fRes1H, f1, fA1L // (-C - S(1/x))hi
4038 nop.i 0
4039}
4040;;
4041{ .mfi
4042 ldfps fG3Dx, fH3Dx = [GR_ad_tbl_3],8 // Load G_3, H_3
4043 fma.s1 fInvX8 = fInvX4, fInvX4, f0 // (1/x)^8
4044 nop.i 0
4045}
4046{ .mfi
4047 nop.m 0
4048 fma.s1 fB10 = fB10, fRcpX, fB8
4049 nop.i 0
4050}
4051;;
4052
4053{ .mfi
4054 ldfd fh3Dx = [GR_ad_tbl_3] // Load h_3
4055 fma.s1 fB20 = fB20, fInvX4, fB18
4056 nop.i 0
4057}
4058{ .mfi
4059 nop.m 0
4060 fma.s1 fB14 = fB14, fRcpX, fB12
4061 nop.i 0
4062}
4063;;
4064{ .mfi
4065 nop.m 0
4066 fma.s1 fLnSin36 = fLnSin36, fDx8, fLnSin30
4067 nop.i 0
4068}
4069{ .mfi
4070 nop.m 0
4071 fma.s1 fLnSin12 = fLnSin12, fDxSqr, fLnSin10
4072 nop.i 0
4073}
4074;;
4075{ .mfi
4076 nop.m 0
4077 fsub.s1 fRes2L = fLnSin2, fRes2H
4078 nop.i 0
4079}
4080{ .mfi
4081 nop.m 0
4082 fma.s1 fPol = fRes2H, fDxSqr, f0 // high part of LnSin
4083 nop.i 0
4084}
4085;;
4086{ .mfi
4087 nop.m 0
4088 fnma.s1 fResH = fResH, FR_MHalf, fResH // -0.5*ln(|x|)hi
4089 nop.i 0
4090}
4091{ .mfi
4092 nop.m 0
4093 fmpy.s1 fGDx = fGDx, FR_G2 // G = G_1 * G_2
4094 nop.i 0
4095}
4096;;
4097{ .mfi
4098 nop.m 0
4099 // poly_lo = poly_lo*r^3 + h
4100 fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h
4101 nop.i 0
4102}
4103{ .mfi
4104 nop.m 0
4105 // B2lo*(1/x)hi+ delta(B2*(1/x))
4106 fma.s1 fA2L = fB2L, fInvX, fA2L
4107 nop.i 0
4108}
4109;;
4110{ .mfi
4111 nop.m 0
4112 fma.s1 fB20 = fB20, fInvX4, fB14
4113 nop.i 0
4114}
4115{ .mfi
4116 nop.m 0
4117 fma.s1 fB10 = fB10, fInvX4, fB6
4118 nop.i 0
4119}
4120;;
4121{ .mfi
4122 nop.m 0
4123 fcvt.xf fFloatNDx = fFloatNDx
4124 nop.i 0
4125}
4126{ .mfi
4127 nop.m 0
4128 fma.s1 fLnSin14 = fLnSin14, fDx4, fLnSin12
4129 nop.i 0
4130}
4131;;
4132{ .mfi
4133 nop.m 0
4134 fma.s1 fLnSin36 = fLnSin36, fDx8, fLnSin22
4135 nop.i 0
4136}
4137{ .mfi
4138 nop.m 0
4139 fms.s1 fRes3L = fRes4H, f8, fRes3H // delta(-|x|*(ln(|x|)-1))
4140 nop.i 0
4141}
4142;;
4143{ .mfi
4144 nop.m 0
4145 fmpy.s1 fGDx = fGDx, fG3Dx // G = (G_1 * G_2) * G_3
4146 nop.i 0
4147}
4148{ .mfi
4149 nop.m 0
4150 // (-|x|*(ln(|x|)-1) - 0.5ln(|x|))hi
4151 fadd.s1 fRes4H = fRes3H, fResH
4152 nop.i 0
4153}
4154;;
4155{ .mfi
4156 nop.m 0
4157 fma.s1 fA2L = fInvXL, fB2, fA2L //(B2*(1/x))lo
4158 nop.i 0
4159}
4160{ .mfi
4161 nop.m 0
4162 // low part of log(|x|) = Y_lo = poly_hi + poly_lo
4163 fadd.s1 fResL = FR_poly_hi, FR_poly_lo
4164 nop.i 0
4165}
4166;;
4167{ .mfi
4168 nop.m 0
4169 fma.s1 fB20 = fB20, fInvX8, fB10
4170 nop.i 0
4171}
4172{ .mfi
4173 nop.m 0
4174 fma.s1 fInvX3 = fInvX, fRcpX, f0 // (1/x)^3
4175 nop.i 0
4176}
4177;;
4178{ .mfi
4179 nop.m 0
4180 fadd.s1 fHDx = fHDx, FR_H2 // H = H_1 + H_2
4181 nop.i 0
4182}
4183{ .mfi
4184 nop.m 0
4185 fadd.s1 fRes5L = fRes5L, fLnSin2L
4186 nop.i 0
4187}
4188;;
4189{ .mfi
4190 nop.m 0
4191 fadd.s1 fRes2L = fRes2L, fRes5H
4192 nop.i 0
4193}
4194{ .mfi
4195 nop.m 0
4196 fadd.s1 fhDx = fhDx, fh2Dx // h = h_1 + h_2
4197 nop.i 0
4198}
4199;;
4200{ .mfi
4201 nop.m 0
4202 fms.s1 fBrnL = fRes1H, fMOne, fBrnH
4203 nop.i 0
4204}
4205{ .mfi
4206 nop.m 0
4207 fms.s1 FR_r = fGDx, fNormDx, f1 // r = G * S_hi - 1
4208 nop.i 0
4209}
4210;;
4211{ .mfi
4212 nop.m 0
4213 fma.s1 fRes3L = fResL, f8 , fRes3L // (-|x|*(ln(|x|)-1))lo
4214 nop.i 0
4215}
4216{ .mfi
4217 nop.m 0
4218 fsub.s1 fRes4L = fRes3H, fRes4H
4219 nop.i 0
4220}
4221;;
4222{ .mfi
4223 nop.m 0
4224 // low part of "Bernulli" polynomial
4225 fma.s1 fB20 = fB20, fInvX3, fA2L
4226 nop.i 0
4227}
4228{ .mfi
4229 nop.m 0
4230 fnma.s1 fResL = fResL, FR_MHalf, fResL // -0.5*ln(|x|)lo
4231 nop.i 0
4232}
4233;;
4234{ .mfi
4235 nop.m 0
4236 fadd.s1 fHDx = fHDx, fH3Dx // H = (H_1 + H_2) + H_3
4237 nop.i 0
4238}
4239{ .mfi
4240 nop.m 0
4241 fms.s1 fPolL = fRes2H, fDxSqr, fPol
4242 nop.i 0
4243}
4244;;
4245{ .mfi
4246 nop.m 0
4247 fadd.s1 fhDx = fhDx, fh3Dx // h = (h_1 + h_2) + h_3
4248 nop.i 0
4249}
4250{ .mfi
4251 nop.m 0
4252 // (-|x|*(ln(|x|)-1) - 0.5ln(|x|) - C - S(1/x))hi
4253 fadd.s1 fB14 = fRes4H, fBrnH
4254 nop.i 0
4255}
4256;;
4257{ .mfi
4258 nop.m 0
4259 // poly_lo = r * Q4 + Q3
4260 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3
4261 nop.i 0
4262}
4263{ .mfi
4264 nop.m 0
4265 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r
4266 nop.i 0
4267}
4268;;
4269{ .mfi
4270 nop.m 0
4271 fadd.s1 fRes4L = fRes4L, fResH
4272 nop.i 0
4273}
4274{ .mfi
4275 nop.m 0
4276 fadd.s1 fBrnL = fBrnL, fA1L
4277 nop.i 0
4278}
4279;;
4280{ .mfi
4281 nop.m 0
4282 // (-|x|*(ln(|x|)-1))lo + (-0.5ln(|x|))lo
4283 fadd.s1 fRes3L = fRes3L, fResL
4284 nop.i 0
4285}
4286{ .mfi
4287 nop.m 0
4288 fnma.s1 fB20 = fRes1L, f1, fB20 // -Clo - S(1/x)lo
4289 nop.i 0
4290}
4291;;
4292{ .mfi
4293 nop.m 0
4294 fadd.s1 fRes2L = fRes2L, fRes5L // (lnSin4*DeltaX^2 + lnSin2)lo
4295 nop.i 0
4296}
4297{ .mfi
4298 nop.m 0
4299 fma.s1 fPolL = fDxSqrL, fRes2H, fPolL
4300 nop.i 0
4301}
4302;;
4303{ .mfi
4304 nop.m 0
4305 fma.s1 fLnSin14 = fLnSin14, fDx4, fLnSin8
4306 nop.i 0
4307}
4308{ .mfi
4309 nop.m 0
4310 fma.s1 fLnSin36 = fLnSin36, fDx8, f0
4311 nop.i 0
4312}
4313;;
4314{ .mfi
4315 nop.m 0
4316 // poly_lo = poly_lo * r + Q2
4317 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2
4318 nop.i 0
4319}
4320{ .mfi
4321 nop.m 0
4322 fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3
4323 nop.i 0
4324}
4325;;
4326{ .mfi
4327 nop.m 0
4328 // poly_hi = Q1 * rsq + r
4329 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
4330 nop.i 0
4331}
4332{ .mfi
4333 nop.m 0
4334 fsub.s1 fB12 = fRes4H, fB14
4335 nop.i 0
4336}
4337;;
4338{ .mfi
4339 nop.m 0
4340 // (-|x|*(ln(|x|)-1) - 0.5ln(|x|))lo
4341 fadd.s1 fRes4L = fRes4L, fRes3L
4342 nop.i 0
4343}
4344{ .mfi
4345 nop.m 0
4346 fadd.s1 fBrnL = fBrnL, fB20 // (-C - S(1/x))lo
4347 nop.i 0
4348}
4349;;
4350{ .mfi
4351 nop.m 0
4352 // high part of log(|DeltaX|) = Y_hi = N * log2_hi + H
4353 fma.s1 fLnDeltaH = fFloatNDx, FR_log2_hi, fHDx
4354 nop.i 0
4355}
4356{ .mfi
4357 nop.m 0
4358 // h = N * log2_lo + h
4359 fma.s1 fhDx = fFloatNDx, FR_log2_lo, fhDx
4360 nop.i 0
4361}
4362;;
4363{ .mfi
4364 nop.m 0
4365 fma.s1 fPolL = fRes2L, fDxSqr, fPolL
4366 nop.i 0
4367}
4368{ .mfi
4369 nop.m 0
4370 fma.s1 fLnSin14 = fLnSin36, fDxSqr, fLnSin14
4371 nop.i 0
4372}
4373;;
4374{ .mfi
4375 nop.m 0
4376 // (-|x|*(ln(|x|)-1) - 0.5ln(|x|))lo + (- C - S(1/x))lo
4377 fadd.s1 fBrnL = fBrnL, fRes4L
4378 nop.i 0
4379}
4380{ .mfi
4381 nop.m 0
4382 fadd.s1 fB12 = fB12, fBrnH
4383 nop.i 0
4384}
4385;;
4386{ .mfi
4387 nop.m 0
4388 // poly_lo = poly_lo*r^3 + h
4389 fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, fhDx
4390 nop.i 0
4391}
4392{ .mfi
4393 nop.m 0
4394 fnma.s1 fRes1H = fLnDeltaH, f1, fPol//(-ln(|DeltaX|) + LnSin)hi
4395 nop.i 0
4396}
4397;;
4398{ .mfi
4399 nop.m 0
4400 fma.s1 fPolL = fDxSqrL, fRes2L, fPolL
4401 nop.i 0
4402}
4403{ .mfi
4404 nop.m 0
4405 fma.s1 fLnSin36 = fLnSin14, fDx6, f0
4406 nop.i 0
4407}
4408;;
4409{ .mfi
4410 nop.m 0
4411 // (-|x|*(ln(|x|)-1) - 0.5ln(|x|) - C - S(1/x))lo
4412 fadd.s1 fB12 = fB12, fBrnL
4413 nop.i 0
4414}
4415;;
4416{ .mfi
4417 nop.m 0
4418 // low part of log(|DeltaX|) = Y_lo = poly_hi + poly_lo
4419 fadd.s1 fLnDeltaL= FR_poly_hi, FR_poly_lo
4420 nop.i 0
4421}
4422{ .mfi
4423 nop.m 0
4424 fms.s1 fRes1L = fLnDeltaH, fMOne, fRes1H
4425 nop.i 0
4426}
4427;;
4428{ .mfi
4429 nop.m 0
4430 fadd.s1 fPolL = fPolL, fLnSin36
4431 nop.i 0
4432}
4433{ .mfi
4434 nop.m 0
4435 //(-|x|*(ln(|x|)-1)-0.5ln(|x|) - C - S(1/x))hi + (-ln(|DeltaX|) + LnSin)hi
4436 fadd.s1 f8 = fRes1H, fB14
4437 nop.i 0
4438}
4439;;
4440{ .mfi
4441 nop.m 0
4442 //max((-|x|*(ln(|x|)-1)-0.5ln(|x|) - C - S(1/x))hi,
4443 // (-ln(|DeltaX|) + LnSin)hi)
4444 famax.s1 fMaxNegStir = fRes1H, fB14
4445 nop.i 0
4446}
4447{ .mfi
4448 nop.m 0
4449 //min((-|x|*(ln(|x|)-1)-0.5ln(|x|) - C - S(1/x))hi,
4450 // (-ln(|DeltaX|) + LnSin)hi)
4451 famin.s1 fMinNegStir = fRes1H, fB14
4452 nop.i 0
4453}
4454;;
4455{ .mfi
4456 nop.m 0
4457 fadd.s1 fRes1L = fRes1L, fPol
4458 nop.i 0
4459}
4460{ .mfi
4461 nop.m 0
4462 // (-ln(|DeltaX|))lo + (LnSin)lo
4463 fnma.s1 fPolL = fLnDeltaL, f1, fPolL
4464 nop.i 0
4465}
4466;;
4467{ .mfi
4468 nop.m 0
4469 fsub.s1 f9 = fMaxNegStir, f8 // delta1
4470 nop.i 0
4471}
4472;;
4473{ .mfi
4474 nop.m 0
4475 fadd.s1 fRes1L = fRes1L, fPolL // (-ln(|DeltaX|) + LnSin)lo
4476 nop.i 0
4477}
4478;;
4479{ .mfi
4480 nop.m 0
4481 fadd.s1 f9 = f9, fMinNegStir
4482 nop.i 0
4483}
4484;;
4485{ .mfi
4486 nop.m 0
4487 fadd.s1 fRes1L = fRes1L, fB12
4488 nop.i 0
4489}
4490;;
4491{ .mfi
4492 // low part of the result
4493 fadd.s1 f9 = f9, fRes1L
4494 nop.i 0
4495}
4496;;
4497{ .mfb
4498 nop.m 0
4499 // final result for -2^63 < x < -6.0 path
4500 fma.s0 f8 = f8, f1, f9
4501 // exit here for -2^63 < x < -6.0 path
4502 br.ret.sptk b0
4503}
4504;;
4505
4506// here if x falls in neighbourhood of any negative root
4507// "neighbourhood" typically means that |lgammal(x)| < 0.17
4508// on the [-3.0,-2.0] range |lgammal(x)| has even less
4509// magnitude
4510// rXint contains index of the root
4511// p10 is set if root belongs to "right" ones
4512// p11 is set if root belongs to "left" ones
4513// lgammal(x) is approximated by polynomial of
4514// 19th degree from (x - root) argument
4515.align 32
4516_negRoots:
4517{ .mfi
4518 addl rPolDataPtr= @ltoff(lgammal_right_roots_polynomial_data),gp
4519 nop.f 0
4520 shl rTmpPtr2 = rXint, 7 // (i*16)*8
4521}
4522{ .mfi
4523 adds rRootsAddr = -288, rRootsBndAddr
4524 nop.f 0
4525 nop.i 0
4526}
4527;;
4528{ .mfi
4529 ldfe fRoot = [rRootsAddr] // FP representation of root
4530 nop.f 0
4531 shl rTmpPtr = rXint, 6 // (i*16)*4
4532}
4533{ .mfi
4534(p11) adds rTmpPtr2 = 3536, rTmpPtr2
4535 nop.f 0
4536 nop.i 0
4537}
4538;;
4539{ .mfi
4540 ld8 rPolDataPtr = [rPolDataPtr]
4541 nop.f 0
4542 shladd rTmpPtr = rXint, 4, rTmpPtr // (i*16) + (i*16)*4
4543}
4544{ .mfi
4545 adds rTmpPtr3 = 32, rTmpPtr2
4546 nop.f 0
4547 nop.i 0
4548}
4549;;
4550.pred.rel "mutex",p10,p11
4551{ .mfi
4552 add rTmpPtr3 = rTmpPtr, rTmpPtr3
4553 nop.f 0
4554(p10) cmp.eq p8, p9 = rXRnd, r0
4555}
4556{ .mfi
4557 // (i*16) + (i*16)*4 + (i*16)*8
4558 add rTmpPtr = rTmpPtr, rTmpPtr2
4559 nop.f 0
4560(p11) cmp.eq p9, p8 = rXRnd, r0
4561}
4562;;
4563{ .mfi
4564 add rTmpPtr2 = rPolDataPtr, rTmpPtr3
4565 nop.f 0
4566 nop.i 0
4567}
4568{ .mfi
4569 add rPolDataPtr = rPolDataPtr, rTmpPtr // begin + offsett
4570 nop.f 0
4571 nop.i 0
4572}
4573;;
4574{ .mfi
4575 ldfpd fA0, fA0L = [rPolDataPtr], 16 // A0
4576 nop.f 0
4577 adds rTmpPtr = 112, rTmpPtr2
4578}
4579{ .mfi
4580 ldfpd fA2, fA2L = [rTmpPtr2], 16 // A2
4581 nop.f 0
4582 cmp.eq p12, p13 = 4, rSgnGamSize
4583}
4584;;
4585{ .mfi
4586 ldfpd fA1, fA1L = [rPolDataPtr], 16 // A1
4587 nop.f 0
4588 nop.i 0
4589}
4590{ .mfi
4591 ldfe fA3 = [rTmpPtr2], 128 // A4
4592 nop.f 0
4593 nop.i 0
4594}
4595;;
4596{ .mfi
4597 ldfpd fA12, fA13 = [rTmpPtr], 16 // A12, A13
4598 nop.f 0
4599 adds rTmpPtr3 = 64, rPolDataPtr
4600}
4601{ .mfi
4602 ldfpd fA16, fA17 = [rTmpPtr2], 16 // A16, A17
4603 nop.f 0
4604 adds rPolDataPtr = 32, rPolDataPtr
4605}
4606;;
4607.pred.rel "mutex",p8,p9
4608{ .mfi
4609 ldfpd fA14, fA15 = [rTmpPtr], 16 // A14, A15
4610 nop.f 0
4611 // sign of GAMMA(x) is negative
4612(p8) adds rSgnGam = -1, r0
4613}
4614{ .mfi
4615 ldfpd fA18, fA19 = [rTmpPtr2], 16 // A18, A19
4616 nop.f 0
4617 // sign of GAMMA(x) is positive
4618(p9) adds rSgnGam = 1, r0
4619}
4620;;
4621{ .mfi
4622 ldfe fA4 = [rPolDataPtr], 16 // A4
4623 nop.f 0
4624 nop.i 0
4625}
4626{ .mfi
4627 ldfpd fA6, fA7 = [rTmpPtr3], 16 // A6, A7
4628 nop.f 0
4629 nop.i 0
4630}
4631;;
4632{ .mfi
4633 ldfe fA5 = [rPolDataPtr], 16 // A5
4634 // if x equals to (rounded) root exactly
4635 fcmp.eq.s1 p6, p0 = f8, fRoot
4636 nop.i 0
4637}
4638{ .mfi
4639 ldfpd fA8, fA9 = [rTmpPtr3], 16 // A8, A9
4640 fms.s1 FR_FracX = f8, f1, fRoot
4641 nop.i 0
4642}
4643;;
4644{ .mfi
4645 // store signgam if size of variable is 4 bytes
4646(p12) st4 [rSgnGamAddr] = rSgnGam
4647 nop.f 0
4648 nop.i 0
4649}
4650{ .mfb
4651 // store signgam if size of variable is 8 bytes
4652(p13) st8 [rSgnGamAddr] = rSgnGam
4653 // answer if x equals to (rounded) root exactly
4654(p6) fadd.s0 f8 = fA0, fA0L
4655 // exit if x equals to (rounded) root exactly
4656(p6) br.ret.spnt b0
4657}
4658;;
4659{ .mmf
4660 ldfpd fA10, fA11 = [rTmpPtr3], 16 // A10, A11
4661 nop.m 0
4662 nop.f 0
4663}
4664;;
4665{ .mfi
4666 nop.m 0
4667 fma.s1 fResH = fA2, FR_FracX, f0 // (A2*x)hi
4668 nop.i 0
4669}
4670{ .mfi
4671 nop.m 0
4672 fma.s1 fA4L = FR_FracX, FR_FracX, f0 // x^2
4673 nop.i 0
4674}
4675;;
4676{ .mfi
4677 nop.m 0
4678 fma.s1 fA17 = fA17, FR_FracX, fA16
4679 nop.i 0
4680}
4681{.mfi
4682 nop.m 0
4683 fma.s1 fA13 = fA13, FR_FracX, fA12
4684 nop.i 0
4685}
4686;;
4687{ .mfi
4688 nop.m 0
4689 fma.s1 fA19 = fA19, FR_FracX, fA18
4690 nop.i 0
4691}
4692{.mfi
4693 nop.m 0
4694 fma.s1 fA15 = fA15, FR_FracX, fA14
4695 nop.i 0
4696}
4697;;
4698{.mfi
4699 nop.m 0
4700 fma.s1 fPol = fA7, FR_FracX, fA6
4701 nop.i 0
4702}
4703;;
4704{.mfi
4705 nop.m 0
4706 fma.s1 fA9 = fA9, FR_FracX, fA8
4707 nop.i 0
4708}
4709;;
4710{ .mfi
4711 nop.m 0
4712 fms.s1 fResL = fA2, FR_FracX, fResH // delta(A2*x)
4713 nop.i 0
4714}
4715{.mfi
4716 nop.m 0
4717 fadd.s1 fRes1H = fResH, fA1 // (A2*x + A1)hi
4718 nop.i 0
4719}
4720;;
4721{ .mfi
4722 nop.m 0
4723 fma.s1 fA11 = fA11, FR_FracX, fA10
4724 nop.i 0
4725}
4726{.mfi
4727 nop.m 0
4728 fma.s1 fA5L = fA4L, fA4L, f0 // x^4
4729 nop.i 0
4730}
4731;;
4732{ .mfi
4733 nop.m 0
4734 fma.s1 fA19 = fA19, fA4L, fA17
4735 nop.i 0
4736}
4737{.mfi
4738 nop.m 0
4739 fma.s1 fA15 = fA15, fA4L, fA13
4740 nop.i 0
4741}
4742;;
4743{ .mfi
4744 nop.m 0
4745 fma.s1 fPol = fPol, FR_FracX, fA5
4746 nop.i 0
4747}
4748{.mfi
4749 nop.m 0
4750 fma.s1 fA3L = fA4L, FR_FracX, f0 // x^3
4751 nop.i 0
4752}
4753;;
4754{ .mfi
4755 nop.m 0
4756 // delta(A2*x) + A2L*x = (A2*x)lo
4757 fma.s1 fResL = fA2L, FR_FracX, fResL
4758 nop.i 0
4759}
4760{.mfi
4761 nop.m 0
4762 fsub.s1 fRes1L = fA1, fRes1H
4763 nop.i 0
4764}
4765;;
4766{ .mfi
4767 nop.m 0
4768 fma.s1 fA11 = fA11, fA4L, fA9
4769 nop.i 0
4770}
4771{.mfi
4772 nop.m 0
4773 fma.s1 fA19 = fA19, fA5L, fA15
4774 nop.i 0
4775}
4776;;
4777{.mfi
4778 nop.m 0
4779 fma.s1 fPol = fPol, FR_FracX, fA4
4780 nop.i 0
4781}
4782;;
4783{ .mfi
4784 nop.m 0
4785 fadd.s1 fResL = fResL, fA1L // (A2*x)lo + A1
4786 nop.i 0
4787}
4788{.mfi
4789 nop.m 0
4790 fadd.s1 fRes1L = fRes1L, fResH
4791 nop.i 0
4792}
4793;;
4794{ .mfi
4795 nop.m 0
4796 fma.s1 fRes2H = fRes1H, FR_FracX, f0 // ((A2*x + A1)*x)hi
4797 nop.i 0
4798}
4799;;
4800{.mfi
4801 nop.m 0
4802 fma.s1 fA19 = fA19, fA5L, fA11
4803 nop.i 0
4804}
4805;;
4806{.mfi
4807 nop.m 0
4808 fma.s1 fPol = fPol, FR_FracX, fA3
4809 nop.i 0
4810}
4811;;
4812{ .mfi
4813 nop.m 0
4814 fadd.s1 fRes1L = fRes1L, fResL // (A2*x + A1)lo
4815 nop.i 0
4816}
4817;;
4818{ .mfi
4819 nop.m 0
4820 // delta((A2*x + A1)*x)
4821 fms.s1 fRes2L = fRes1H, FR_FracX, fRes2H
4822 nop.i 0
4823}
4824{.mfi
4825 nop.m 0
4826 fadd.s1 fRes3H = fRes2H, fA0 // ((A2*x + A1)*x + A0)hi
4827 nop.i 0
4828}
4829;;
4830{ .mfi
4831 nop.m 0
4832 fma.s1 fA19 = fA19, fA5L, f0
4833 nop.i 0
4834}
4835
4836;;
4837{ .mfi
4838 nop.m 0
4839 fma.s1 fRes2L = fRes1L, FR_FracX, fRes2L // ((A2*x + A1)*x)lo
4840 nop.i 0
4841}
4842{.mfi
4843 nop.m 0
4844 fsub.s1 fRes3L = fRes2H, fRes3H
4845 nop.i 0
4846}
4847;;
4848{.mfi
4849 nop.m 0
4850 fma.s1 fPol = fA19, FR_FracX, fPol
4851 nop.i 0
4852}
4853;;
4854{ .mfi
4855 nop.m 0
4856 fadd.s1 fRes3L = fRes3L, fA0
4857 nop.i 0
4858}
4859{.mfi
4860 nop.m 0
4861 fadd.s1 fRes2L = fRes2L, fA0L // ((A2*x + A1)*x)lo + A0L
4862 nop.i 0
4863}
4864;;
4865{ .mfi
4866 nop.m 0
4867 fadd.s1 fRes3L = fRes3L, fRes2L // (((A2*x + A1)*x) + A0)lo
4868 nop.i 0
4869}
4870;;
4871{.mfi
4872 nop.m 0
4873 fma.s1 fRes3L = fPol, fA3L, fRes3L
4874 nop.i 0
4875}
4876;;
4877{ .mfb
4878 nop.m 0
4879 // final result for arguments which are close to negative roots
4880 fma.s0 f8 = fRes3H, f1, fRes3L
4881 // exit here for arguments which are close to negative roots
4882 br.ret.sptk b0
4883}
4884;;
4885
4886// here if |x| < 0.5
4887.align 32
4888lgammal_0_half:
4889{ .mfi
4890 ld4 GR_Z_1 = [rZ1offsett] // Load Z_1
4891 fma.s1 fA4L = f8, f8, f0 // x^2
4892 addl rPolDataPtr = @ltoff(lgammal_0_Half_data), gp
4893}
4894{ .mfi
4895 shladd GR_ad_tbl_1 = GR_Index1, 4, rTbl1Addr// Point to G_1
4896 nop.f 0
4897 addl rLnSinDataPtr = @ltoff(lgammal_lnsin_data), gp
4898}
4899;;
4900{ .mfi
4901 ldfps FR_G, FR_H = [GR_ad_tbl_1],8 // Load G_1, H_1
4902 nop.f 0
4903 // Point to Constants_Z_2
4904 add GR_ad_z_2 = 0x140, GR_ad_z_1
4905}
4906{ .mfi
4907 add GR_ad_q = -0x60, GR_ad_z_1 // Point to Constants_Q
4908 nop.f 0
4909 // Point to Constants_G_H_h2
4910 add GR_ad_tbl_2 = 0x180, GR_ad_z_1
4911}
4912;;
4913{ .mfi
4914 ld8 rPolDataPtr = [rPolDataPtr]
4915 nop.f 0
4916 // Point to Constants_G_H_h3
4917 add GR_ad_tbl_3 = 0x280, GR_ad_z_1
4918}
4919{ .mfi
4920 ldfd FR_h = [GR_ad_tbl_1] // Load h_1
4921 nop.f 0
4922 sub GR_N = rExpX, rExpHalf, 1
4923}
4924;;
4925{ .mfi
4926 ld8 rLnSinDataPtr = [rLnSinDataPtr]
4927 nop.f 0
4928 // Get bits 30-15 of X_0 * Z_1
4929 pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15
4930}
4931{ .mfi
4932 ldfe FR_log2_hi = [GR_ad_q],16 // Load log2_hi
4933 nop.f 0
4934 sub GR_N = r0, GR_N
4935}
4936;;
4937//
4938// For performance, don't use result of pmpyshr2.u for 4 cycles.
4939//
4940{ .mfi
4941 ldfe FR_log2_lo = [GR_ad_q], 16 // Load log2_lo
4942 nop.f 0
4943 add rTmpPtr2 = 320, rPolDataPtr
4944}
4945{ .mfi
4946 add rTmpPtr = 32, rPolDataPtr
4947 nop.f 0
4948 // exponent of 0.25
4949 adds rExp2 = -1, rExpHalf
4950}
4951;;
4952{ .mfi
4953 ldfpd fA3, fA3L = [rPolDataPtr], 16 // A3
4954 fma.s1 fA5L = fA4L, fA4L, f0 // x^4
4955 nop.i 0
4956}
4957{ .mfi
4958 ldfpd fA1, fA1L = [rTmpPtr], 16 // A1
4959 fms.s1 fB8 = f8, f8, fA4L // x^2 - <x^2>
4960 // set p6 if -0.5 < x <= -0.25
4961(p15) cmp.eq.unc p6, p0 = rExpX, rExp2
4962}
4963;;
4964{ .mfi
4965 ldfpd fA2, fA2L = [rPolDataPtr], 16 // A2
4966 nop.f 0
4967 // set p6 if -0.5 < x <= -0.40625
4968(p6) cmp.le.unc p6, p0 = 10, GR_Index1
4969}
4970{ .mfi
4971 ldfe fA21 = [rTmpPtr2], -16 // A21
4972 // Put integer N into rightmost significand
4973 nop.f 0
4974 adds rTmpPtr = 240, rTmpPtr
4975}
4976;;
4977{ .mfi
4978 setf.sig fFloatN = GR_N
4979 nop.f 0
4980 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
4981}
4982{ .mfi
4983 ldfe FR_Q4 = [GR_ad_q], 16 // Load Q4
4984 nop.f 0
4985 adds rPolDataPtr = 304, rPolDataPtr
4986}
4987;;
4988{ .mfi
4989 ldfe fA20 = [rTmpPtr2], -32 // A20
4990 nop.f 0
4991 shladd GR_ad_z_2 = GR_Index2, 2, GR_ad_z_2 // Point to Z_2
4992}
4993{ .mfi
4994 ldfe fA19 = [rTmpPtr], -32 // A19
4995 nop.f 0
4996 shladd GR_ad_tbl_2 = GR_Index2, 4, GR_ad_tbl_2// Point to G_2
4997}
4998;;
4999{ .mfi
5000 ldfe fA17 = [rTmpPtr], -32 // A17
5001 nop.f 0
5002 adds rTmpPtr3 = 8, GR_ad_tbl_2
5003}
5004{ .mfb
5005 ldfe fA18 = [rTmpPtr2], -32 // A18
5006 nop.f 0
5007 // branch to special path for -0.5 < x <= 0.40625
5008(p6) br.cond.spnt lgammal_near_neg_half
5009}
5010;;
5011{ .mmf
5012 ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2
5013 ldfe fA15 = [rTmpPtr], -32 // A15
5014 fma.s1 fB20 = fA5L, fA5L, f0 // x^8
5015}
5016;;
5017{ .mmf
5018 ldfe fA16 = [rTmpPtr2], -32 // A16
5019 ldfe fA13 = [rTmpPtr], -32 // A13
5020 fms.s1 fB16 = fA4L, fA4L, fA5L
5021}
5022;;
5023{ .mmf
5024 ldfps FR_G2, FR_H2 = [GR_ad_tbl_2], 8 // Load G_2, H_2
5025 ldfd FR_h2 = [rTmpPtr3] // Load h_2
5026 fmerge.s fB10 = f8, fA5L // sign(x) * x^4
5027}
5028;;
5029{ .mmi
5030 ldfe fA14 = [rTmpPtr2], -32 // A14
5031 ldfe fA11 = [rTmpPtr], -32 // A11
5032 // Get bits 30-15 of X_1 * Z_2
5033 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15
5034}
5035;;
5036//
5037// For performance, don't use result of pmpyshr2.u for 4 cycles.
5038//
5039{ .mfi
5040 ldfe fA12 = [rTmpPtr2], -32 // A12
5041 fma.s1 fRes4H = fA3, fAbsX, f0
5042 adds rTmpPtr3 = 16, GR_ad_q
5043}
5044{ .mfi
5045 ldfe fA9 = [rTmpPtr], -32 // A9
5046 nop.f 0
5047 nop.i 0
5048}
5049;;
5050{ .mmf
5051 ldfe fA10 = [rTmpPtr2], -32 // A10
5052 ldfe fA7 = [rTmpPtr], -32 // A7
5053 fma.s1 fB18 = fB20, fB20, f0 // x^16
5054}
5055;;
5056{ .mmf
5057 ldfe fA8 = [rTmpPtr2], -32 // A8
5058 ldfe fA22 = [rPolDataPtr], 16 // A22
5059 fcvt.xf fFloatN = fFloatN
5060}
5061;;
5062{ .mfi
5063 ldfe fA5 = [rTmpPtr], -32 // A5
5064 fma.s1 fA21 = fA21, fAbsX, fA20 // v16
5065 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2
5066}
5067{ .mfi
5068 ldfe fA6 = [rTmpPtr2], -32 // A6
5069 nop.f 0
5070 nop.i 0
5071}
5072;;
5073{ .mmf
5074 // Point to G_3
5075 shladd GR_ad_tbl_3 = GR_Index3, 4, GR_ad_tbl_3
5076 ldfe fA4 = [rTmpPtr2], -32 // A4
5077 fma.s1 fA19 = fA19, fAbsX, fA18 // v13
5078}
5079;;
5080.pred.rel "mutex",p14,p15
5081{ .mfi
5082 ldfps FR_G3, FR_H3 = [GR_ad_tbl_3],8 // Load G_3, H_3
5083 fms.s1 fRes4L = fA3, fAbsX, fRes4H
5084(p14) adds rSgnGam = 1, r0
5085}
5086{ .mfi
5087 cmp.eq p6, p7 = 4, rSgnGamSize
5088 fadd.s1 fRes2H = fRes4H, fA2
5089(p15) adds rSgnGam = -1, r0
5090}
5091;;
5092
5093{ .mfi
5094 ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3
5095 fma.s1 fA17 = fA17, fAbsX, fA16 // v12
5096 nop.i 0
5097}
5098;;
5099{ .mfi
5100 ldfe FR_Q3 = [GR_ad_q], 32 // Load Q3
5101 fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2
5102 nop.i 0
5103}
5104{ .mfi
5105 ldfe FR_Q2 = [rTmpPtr3], 16 // Load Q2
5106 fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2
5107 nop.i 0
5108}
5109;;
5110{ .mfi
5111 ldfe FR_Q1 = [GR_ad_q] // Load Q1
5112 fma.s1 fA15 = fA15, fAbsX, fA14 // v8
5113 nop.i 0
5114}
5115{ .mfi
5116 adds rTmpPtr3 = 32, rLnSinDataPtr
5117 fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2
5118 nop.i 0
5119}
5120;;
5121{ .mmf
5122 ldfpd fLnSin2, fLnSin2L = [rLnSinDataPtr], 16
5123 ldfe fLnSin6 = [rTmpPtr3], 32
5124 fma.s1 fA13 = fA13, fAbsX, fA12 // v7
5125
5126}
5127;;
5128{ .mfi
5129 ldfe fLnSin4 = [rLnSinDataPtr], 32
5130 fma.s1 fRes4L = fA3L, fAbsX, fRes4L
5131 nop.i 0
5132}
5133{ .mfi
5134 ldfe fLnSin10 = [rTmpPtr3], 32
5135 fsub.s1 fRes2L = fA2, fRes2H
5136 nop.i 0
5137}
5138;;
5139{ .mfi
5140 ldfe fLnSin8 = [rLnSinDataPtr], 32
5141 fma.s1 fResH = fRes2H, fAbsX, f0
5142 nop.i 0
5143}
5144{ .mfi
5145 ldfe fLnSin14 = [rTmpPtr3], 32
5146 fma.s1 fA22 = fA22, fA4L, fA21 // v15
5147 nop.i 0
5148}
5149;;
5150{ .mfi
5151 ldfe fLnSin12 = [rLnSinDataPtr], 32
5152 fma.s1 fA9 = fA9, fAbsX, fA8 // v4
5153 nop.i 0
5154}
5155{ .mfi
5156 ldfd fLnSin18 = [rTmpPtr3], 16
5157 fma.s1 fA11 = fA11, fAbsX, fA10 // v5
5158 nop.i 0
5159}
5160;;
5161{ .mfi
5162 ldfe fLnSin16 = [rLnSinDataPtr], 24
5163 fma.s1 fA19 = fA19, fA4L, fA17 // v11
5164 nop.i 0
5165}
5166{ .mfi
5167 ldfd fLnSin22 = [rTmpPtr3], 16
5168 fma.s1 fPolL = fA7, fAbsX, fA6
5169 nop.i 0
5170}
5171;;
5172{ .mfi
5173 ldfd fLnSin20 = [rLnSinDataPtr], 16
5174 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3
5175 nop.i 0
5176}
5177{ .mfi
5178 ldfd fLnSin26 = [rTmpPtr3], 16
5179 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3
5180 nop.i 0
5181}
5182;;
5183{ .mfi
5184 ldfd fLnSin24 = [rLnSinDataPtr], 16
5185 fadd.s1 fRes2L = fRes2L, fRes4H
5186 nop.i 0
5187}
5188{ .mfi
5189 ldfd fLnSin30 = [rTmpPtr3], 16
5190 fadd.s1 fA2L = fA2L, fRes4L
5191 nop.i 0
5192}
5193;;
5194{ .mfi
5195 ldfd fLnSin28 = [rLnSinDataPtr], 16
5196 fms.s1 fResL = fRes2H, fAbsX, fResH
5197 nop.i 0
5198}
5199{ .mfi
5200 ldfd fLnSin34 = [rTmpPtr3], 8
5201 fadd.s1 fRes2H = fResH, fA1
5202 nop.i 0
5203}
5204;;
5205{ .mfi
5206 ldfd fLnSin32 = [rLnSinDataPtr]
5207 fma.s1 fA11 = fA11, fA4L, fA9 // v3
5208 nop.i 0
5209}
5210{ .mfi
5211 ldfd fLnSin36 = [rTmpPtr3]
5212 fma.s1 fA15 = fA15, fA4L, fA13 // v6
5213 nop.i 0
5214}
5215;;
5216
5217{ .mfi
5218 // store signgam if size of variable is 4 bytes
5219(p6) st4 [rSgnGamAddr] = rSgnGam
5220 fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3
5221 nop.i 0
5222}
5223{ .mfi
5224 // store signgam if size of variable is 8 bytes
5225(p7) st8 [rSgnGamAddr] = rSgnGam
5226 fma.s1 fA5 = fA5, fAbsX, fA4
5227 nop.i 0
5228}
5229;;
5230{ .mfi
5231 nop.m 0
5232 fms.s1 FR_r = FR_G, fSignifX, f1 // r = G * S_hi - 1
5233 nop.i 0
5234}
5235{ .mfi
5236 nop.m 0
5237 // High part of the log(|x|): Y_hi = N * log2_hi + H
5238 fms.s1 FR_log2_hi = fFloatN, FR_log2_hi, FR_H
5239 nop.i 0
5240}
5241;;
5242{ .mfi
5243 nop.m 0
5244 fadd.s1 fA3L = fRes2L, fA2L
5245 nop.i 0
5246}
5247{ .mfi
5248 nop.m 0
5249 fma.s1 fA22 = fA22, fA5L, fA19
5250 nop.i 0
5251}
5252;;
5253{ .mfi
5254 nop.m 0
5255 fsub.s1 fRes2L = fA1, fRes2H
5256 nop.i 0
5257}
5258{ .mfi
5259 nop.m 0
5260 fma.s1 fRes3H = fRes2H, f8, f0
5261 nop.i 0
5262}
5263;;
5264{ .mfi
5265 nop.m 0
5266 fma.s1 fA15 = fA15, fA5L, fA11 // v2
5267 nop.i 0
5268}
5269{ .mfi
5270 nop.m 0
5271 fma.s1 fLnSin18 = fLnSin18, fA4L, fLnSin16
5272 nop.i 0
5273}
5274;;
5275{ .mfi
5276 nop.m 0
5277 // h = N * log2_lo + h
5278 fms.s1 FR_h = fFloatN, FR_log2_lo, FR_h
5279 nop.i 0
5280}
5281{ .mfi
5282 nop.m 0
5283 fma.s1 fPolL = fPolL, fA4L, fA5
5284 nop.i 0
5285}
5286;;
5287{ .mfi
5288 nop.m 0
5289 // poly_lo = r * Q4 + Q3
5290 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3
5291 nop.i 0
5292}
5293{ .mfi
5294 nop.m 0
5295 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r
5296 nop.i 0
5297}
5298;;
5299{ .mfi
5300 nop.m 0
5301 fma.s1 fResL = fA3L, fAbsX, fResL
5302 nop.i 0
5303}
5304{ .mfi
5305 nop.m 0
5306 fma.s1 fLnSin30 = fLnSin30, fA4L, fLnSin28
5307 nop.i 0
5308}
5309;;
5310{ .mfi
5311 nop.m 0
5312 fadd.s1 fRes2L = fRes2L, fResH
5313 nop.i 0
5314}
5315{ .mfi
5316 nop.m 0
5317 fms.s1 fRes3L = fRes2H, f8, fRes3H
5318 nop.i 0
5319}
5320;;
5321{ .mfi
5322 nop.m 0
5323 fadd.s1 fRes1H = fRes3H, FR_log2_hi
5324 nop.i 0
5325}
5326{ .mfi
5327 nop.m 0
5328 fma.s1 fPol = fB20, fA22, fA15
5329 nop.i 0
5330}
5331;;
5332{ .mfi
5333 nop.m 0
5334 fma.s1 fLnSin34 = fLnSin34, fA4L, fLnSin32
5335 nop.i 0
5336}
5337{ .mfi
5338 nop.m 0
5339 fma.s1 fLnSin14 = fLnSin14, fA4L, fLnSin12
5340 nop.i 0
5341}
5342;;
5343
5344{ .mfi
5345 nop.m 0
5346 // poly_lo = poly_lo * r + Q2
5347 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2
5348 nop.i 0
5349}
5350{ .mfi
5351 nop.m 0
5352 fnma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3
5353 nop.i 0
5354}
5355;;
5356{ .mfi
5357 nop.m 0
5358 // poly_hi = Q1 * rsq + r
5359 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
5360 nop.i 0
5361}
5362{ .mfi
5363 nop.m 0
5364 fadd.s1 fA1L = fA1L, fResL
5365 nop.i 0
5366}
5367;;
5368
5369{ .mfi
5370 nop.m 0
5371 fma.s1 fLnSin22 = fLnSin22, fA4L, fLnSin20
5372 nop.i 0
5373}
5374{ .mfi
5375 nop.m 0
5376 fma.s1 fLnSin26 = fLnSin26, fA4L, fLnSin24
5377 nop.i 0
5378}
5379;;
5380
5381{ .mfi
5382 nop.m 0
5383 fsub.s1 fRes1L = FR_log2_hi, fRes1H
5384 nop.i 0
5385}
5386{ .mfi
5387 nop.m 0
5388 fma.s1 fPol = fPol, fA5L, fPolL
5389 nop.i 0
5390}
5391;;
5392{ .mfi
5393 nop.m 0
5394 fma.s1 fLnSin34 = fLnSin36, fA5L, fLnSin34
5395 nop.i 0
5396}
5397{ .mfi
5398 nop.m 0
5399 fma.s1 fLnSin18 = fLnSin18, fA5L, fLnSin14
5400 nop.i 0
5401}
5402;;
5403{ .mfi
5404 nop.m 0
5405 fma.s1 fLnSin6 = fLnSin6, fA4L, fLnSin4
5406 nop.i 0
5407}
5408{ .mfi
5409 nop.m 0
5410 fma.s1 fLnSin10 = fLnSin10, fA4L, fLnSin8
5411 nop.i 0
5412}
5413;;
5414{ .mfi
5415 nop.m 0
5416 // poly_hi = Q1 * rsq + r
5417 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
5418 nop.i 0
5419}
5420{ .mfi
5421 nop.m 0
5422 fadd.s1 fRes2L = fRes2L, fA1L
5423 nop.i 0
5424}
5425;;
5426{ .mfi
5427 nop.m 0
5428 // poly_lo = poly_lo*r^3 + h
5429 fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h
5430 nop.i 0
5431}
5432{ .mfi
5433 nop.m 0
5434 fma.s1 fB2 = fLnSin2, fA4L, f0
5435 nop.i 0
5436}
5437;;
5438{ .mfi
5439 nop.m 0
5440 fadd.s1 fRes1L = fRes1L, fRes3H
5441 nop.i 0
5442}
5443{ .mfi
5444 nop.m 0
5445 fma.s1 fPol = fPol, fB10, f0
5446 nop.i 0
5447}
5448;;
5449{ .mfi
5450 nop.m 0
5451 fma.s1 fLnSin26 = fLnSin26, fA5L, fLnSin22
5452 nop.i 0
5453}
5454{ .mfi
5455 nop.m 0
5456 fma.s1 fLnSin34 = fLnSin34, fA5L, fLnSin30
5457 nop.i 0
5458}
5459;;
5460{ .mfi
5461 nop.m 0
5462 fma.s1 fLnSin10 = fLnSin10, fA5L, fLnSin6
5463 nop.i 0
5464}
5465{ .mfi
5466 nop.m 0
5467 fma.s1 fLnSin2L = fLnSin2L, fA4L, f0
5468 nop.i 0
5469}
5470;;
5471
5472{ .mfi
5473 nop.m 0
5474 fma.s1 fRes3L = fRes2L, f8, fRes3L
5475 nop.i 0
5476}
5477;;
5478{ .mfi
5479 nop.m 0
5480 // Y_lo = poly_hi + poly_lo
5481 fsub.s1 FR_log2_lo = FR_poly_lo, FR_poly_hi
5482 nop.i 0
5483}
5484{ .mfi
5485 nop.m 0
5486 fms.s1 fB4 = fLnSin2, fA4L, fB2
5487 nop.i 0
5488}
5489;;
5490{ .mfi
5491 nop.m 0
5492 fadd.s1 fRes2H = fRes1H, fPol
5493 nop.i 0
5494}
5495;;
5496{ .mfi
5497 nop.m 0
5498 fma.s1 fLnSin34 = fLnSin34, fB20, fLnSin26
5499 nop.i 0
5500}
5501;;
5502{ .mfi
5503 nop.m 0
5504 fma.s1 fLnSin18 = fLnSin18, fB20, fLnSin10
5505 nop.i 0
5506}
5507{ .mfi
5508 nop.m 0
5509 fma.s1 fLnSin2L = fB8, fLnSin2, fLnSin2L
5510 nop.i 0
5511}
5512;;
5513
5514{ .mfi
5515 nop.m 0
5516 fadd.s1 FR_log2_lo = FR_log2_lo, fRes3L
5517 nop.i 0
5518}
5519;;
5520{ .mfi
5521 nop.m 0
5522 fsub.s1 fRes2L = fRes1H, fRes2H
5523 nop.i 0
5524}
5525;;
5526{ .mfi
5527 nop.m 0
5528 fma.s1 fB6 = fLnSin34, fB18, fLnSin18
5529 nop.i 0
5530}
5531{ .mfi
5532 nop.m 0
5533 fadd.s1 fB4 = fLnSin2L, fB4
5534 nop.i 0
5535}
5536;;
5537
5538{ .mfi
5539 nop.m 0
5540 fadd.s1 fRes1L = fRes1L, FR_log2_lo
5541 nop.i 0
5542}
5543;;
5544{ .mfi
5545 nop.m 0
5546 fadd.s1 fRes2L = fRes2L, fPol
5547 nop.i 0
5548}
5549;;
5550{ .mfi
5551 nop.m 0
5552 fma.s1 fB12 = fB6, fA5L, f0
5553 nop.i 0
5554}
5555;;
5556{ .mfi
5557 nop.m 0
5558 fadd.s1 fRes2L = fRes2L, fRes1L
5559 nop.i 0
5560}
5561;;
5562
5563{ .mfi
5564 nop.m 0
5565 fms.s1 fB14 = fB6, fA5L, fB12
5566 nop.i 0
5567}
5568{ .mfb
5569 nop.m 0
5570 fadd.s1 fLnSin30 = fB2, fB12
5571 // branch out if x is negative
5572(p15) br.cond.spnt _O_Half_neg
5573}
5574;;
5575{ .mfb
5576 nop.m 0
5577 // sign(x)*Pol(|x|) - log(|x|)
5578 fma.s0 f8 = fRes2H, f1, fRes2L
5579 // it's an answer already for positive x
5580 // exit if 0 < x < 0.5
5581 br.ret.sptk b0
5582}
5583;;
5584
5585// here if x is negative and |x| < 0.5
5586.align 32
5587_O_Half_neg:
5588{ .mfi
5589 nop.m 0
5590 fma.s1 fB14 = fB16, fB6, fB14
5591 nop.i 0
5592}
5593{ .mfi
5594 nop.m 0
5595 fsub.s1 fLnSin16 = fB2, fLnSin30
5596 nop.i 0
5597}
5598;;
5599{ .mfi
5600 nop.m 0
5601 fadd.s1 fResH = fLnSin30, fRes2H
5602 nop.i 0
5603}
5604;;
5605{ .mfi
5606 nop.m 0
5607 fadd.s1 fLnSin16 = fLnSin16, fB12
5608 nop.i 0
5609}
5610{ .mfi
5611 nop.m 0
5612 fadd.s1 fB4 = fB14, fB4
5613 nop.i 0
5614}
5615;;
5616{ .mfi
5617 nop.m 0
5618 fadd.s1 fLnSin16 = fB4, fLnSin16
5619 nop.i 0
5620}
5621{ .mfi
5622 nop.m 0
5623 fsub.s1 fResL = fRes2H, fResH
5624 nop.i 0
5625}
5626;;
5627{ .mfi
5628 nop.m 0
5629 fadd.s1 fResL = fResL, fLnSin30
5630 nop.i 0
5631}
5632{ .mfi
5633 nop.m 0
5634 fadd.s1 fLnSin16 = fLnSin16, fRes2L
5635 nop.i 0
5636}
5637;;
5638{ .mfi
5639 nop.m 0
5640 fadd.s1 fResL = fResL, fLnSin16
5641 nop.i 0
5642}
5643;;
5644{ .mfb
5645 nop.m 0
5646 // final result for -0.5 < x < 0
5647 fma.s0 f8 = fResH, f1, fResL
5648 // exit for -0.5 < x < 0
5649 br.ret.sptk b0
5650}
5651;;
5652
5653// here if x >= 8.0
5654// there are two computational paths:
5655// 1) For x >10.0 Stirling's formula is used
5656// 2) Polynomial approximation for 8.0 <= x <= 10.0
5657.align 32
5658lgammal_big_positive:
5659{ .mfi
5660 addl rPolDataPtr = @ltoff(lgammal_data), gp
5661 fmerge.se fSignifX = f1, f8
5662 // Get high 15 bits of significand
5663 extr.u GR_X_0 = rSignifX, 49, 15
5664}
5665{.mfi
5666 shladd rZ1offsett = GR_Index1, 2, GR_ad_z_1 // Point to Z_1
5667 fnma.s1 fInvX = f8, fRcpX, f1 // start of 1st NR iteration
5668 adds rSignif1andQ = 0x5, r0
5669}
5670;;
5671{.mfi
5672 ld4 GR_Z_1 = [rZ1offsett] // Load Z_1
5673 nop.f 0
5674 shl rSignif1andQ = rSignif1andQ, 61 // significand of 1.25
5675}
5676{ .mfi
5677 cmp.eq p8, p0 = rExpX, rExp8 // p8 = 1 if 8.0 <= x < 16
5678 nop.f 0
5679 adds rSgnGam = 1, r0 // gamma is positive at this range
5680}
5681;;
5682{ .mfi
5683 shladd GR_ad_tbl_1 = GR_Index1, 4, rTbl1Addr// Point to G_1
5684 nop.f 0
5685 add GR_ad_q = -0x60, GR_ad_z_1 // Point to Constants_Q
5686}
5687{ .mlx
5688 ld8 rPolDataPtr = [rPolDataPtr]
5689 movl rDelta = 0x3FF2000000000000
5690}
5691;;
5692{ .mfi
5693 ldfps FR_G, FR_H = [GR_ad_tbl_1],8 // Load G_1, H_1
5694 nop.f 0
5695 add GR_ad_z_2 = 0x140, GR_ad_z_1 // Point to Constants_Z_2
5696}
5697{ .mfi
5698 // Point to Constants_G_H_h2
5699 add GR_ad_tbl_2 = 0x180, GR_ad_z_1
5700 nop.f 0
5701 // p8 = 1 if 8.0 <= x <= 10.0
5702(p8) cmp.leu.unc p8, p0 = rSignifX, rSignif1andQ
5703}
5704;;
5705{ .mfi
5706 ldfd FR_h = [GR_ad_tbl_1] // Load h_1
5707 nop.f 0
5708 // Get bits 30-15 of X_0 * Z_1
5709 pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15
5710}
5711{ .mfb
5712(p8) setf.d FR_MHalf = rDelta
5713 nop.f 0
5714(p8) br.cond.spnt lgammal_8_10 // branch out if 8.0 <= x <= 10.0
5715}
5716;;
5717//
5718// For performance, don't use result of pmpyshr2.u for 4 cycles.
5719//
5720{ .mfi
5721 ldfe fA1 = [rPolDataPtr], 16 // Load overflow threshold
5722 fma.s1 fRcpX = fInvX, fRcpX, fRcpX // end of 1st NR iteration
5723 // Point to Constants_G_H_h3
5724 add GR_ad_tbl_3 = 0x280, GR_ad_z_1
5725}
5726{ .mlx
5727 nop.m 0
5728 movl rDelta = 0xBFE0000000000000 // -0.5 in DP
5729}
5730;;
5731{ .mfi
5732 ldfe FR_log2_hi = [GR_ad_q],16 // Load log2_hi
5733 nop.f 0
5734 sub GR_N = rExpX, rExpHalf, 1 // unbiased exponent of x
5735}
5736;;
5737{ .mfi
5738 ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo
5739 nop.f 0
5740 nop.i 0
5741}
5742{ .mfi
5743 setf.d FR_MHalf = rDelta
5744 nop.f 0
5745 nop.i 0
5746}
5747;;
5748{ .mfi
5749 // Put integer N into rightmost significand
5750 setf.sig fFloatN = GR_N
5751 nop.f 0
5752 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
5753}
5754{ .mfi
5755 ldfe FR_Q4 = [GR_ad_q], 16 // Load Q4
5756 nop.f 0
5757 nop.i 0
5758}
5759;;
5760{ .mfi
5761 shladd GR_ad_z_2 = GR_Index2, 2, GR_ad_z_2 // Point to Z_2
5762 nop.f 0
5763 shladd GR_ad_tbl_2 = GR_Index2, 4, GR_ad_tbl_2// Point to G_2
5764}
5765{ .mfi
5766 ldfe FR_Q3 = [GR_ad_q], 16 // Load Q3
5767 nop.f 0
5768 nop.i 0
5769}
5770;;
5771{ .mfi
5772 ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2
5773 fnma.s1 fInvX = f8, fRcpX, f1 // start of 2nd NR iteration
5774 nop.i 0
5775}
5776;;
5777{ .mfi
5778 ldfps FR_G2, FR_H2 = [GR_ad_tbl_2], 8 // Load G_2, H_2
5779 nop.f 0
5780 nop.i 0
5781}
5782;;
5783{ .mfi
5784 ldfd FR_h2 = [GR_ad_tbl_2] // Load h_2
5785 nop.f 0
5786 nop.i 0
5787}
5788;;
5789{ .mfi
5790 ldfe FR_Q2 = [GR_ad_q],16 // Load Q2
5791 nop.f 0
5792 // Get bits 30-15 of X_1 * Z_2
5793 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15
5794}
5795;;
5796//
5797// For performance, don't use result of pmpyshr2.u for 4 cycles.
5798//
5799{ .mfi
5800 ldfe FR_Q1 = [GR_ad_q] // Load Q1
5801 fcmp.gt.s1 p7,p0 = f8, fA1 // check if x > overflow threshold
5802 nop.i 0
5803}
5804;;
5805{.mfi
5806 ldfpd fA0, fA0L = [rPolDataPtr], 16 // Load two parts of C
5807 fma.s1 fRcpX = fInvX, fRcpX, fRcpX // end of 2nd NR iteration
5808 nop.i 0
5809}
5810;;
5811{ .mfb
5812 ldfpd fB2, fA1 = [rPolDataPtr], 16
5813 nop.f 0
5814(p7) br.cond.spnt lgammal_overflow // branch if x > overflow threshold
5815}
5816;;
5817{.mfi
5818 ldfe fB4 = [rPolDataPtr], 16
5819 fcvt.xf fFloatN = fFloatN
5820 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2
5821}
5822;;
5823{ .mfi
5824 shladd GR_ad_tbl_3 = GR_Index3, 4, GR_ad_tbl_3// Point to G_3
5825 nop.f 0
5826 nop.i 0
5827}
5828{ .mfi
5829 ldfe fB6 = [rPolDataPtr], 16
5830 nop.f 0
5831 nop.i 0
5832}
5833;;
5834{ .mfi
5835 ldfps FR_G3, FR_H3 = [GR_ad_tbl_3], 8 // Load G_3, H_3
5836 nop.f 0
5837 nop.i 0
5838}
5839;;
5840{ .mfi
5841 ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3
5842 fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2
5843 nop.i 0
5844}
5845{ .mfi
5846 nop.m 0
5847 fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2
5848 nop.i 0
5849}
5850;;
5851
5852{ .mfi
5853 ldfe fB8 = [rPolDataPtr], 16
5854 fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2
5855 nop.i 0
5856}
5857{ .mfi
5858 nop.m 0
5859 fnma.s1 fInvX = f8, fRcpX, f1 // start of 3rd NR iteration
5860 nop.i 0
5861}
5862;;
5863{ .mfi
5864 ldfe fB10 = [rPolDataPtr], 16
5865 nop.f 0
5866 cmp.eq p6, p7 = 4, rSgnGamSize
5867}
5868;;
5869{ .mfi
5870 ldfe fB12 = [rPolDataPtr], 16
5871 nop.f 0
5872 nop.i 0
5873}
5874;;
5875{ .mfi
5876 ldfe fB14 = [rPolDataPtr], 16
5877 nop.f 0
5878 nop.i 0
5879}
5880;;
5881
5882{ .mfi
5883 ldfe fB16 = [rPolDataPtr], 16
5884 // get double extended coefficients from two doubles
5885 // two doubles are needed in Stitling's formula for negative x
5886 fadd.s1 fB2 = fB2, fA1
5887 nop.i 0
5888}
5889;;
5890{ .mfi
5891 ldfe fB18 = [rPolDataPtr], 16
5892 fma.s1 fInvX = fInvX, fRcpX, fRcpX // end of 3rd NR iteration
5893 nop.i 0
5894}
5895;;
5896{ .mfi
5897 ldfe fB20 = [rPolDataPtr], 16
5898 nop.f 0
5899 nop.i 0
5900}
5901;;
5902{ .mfi
5903 // store signgam if size of variable is 4 bytes
5904(p6) st4 [rSgnGamAddr] = rSgnGam
5905 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3
5906 nop.i 0
5907}
5908{ .mfi
5909 // store signgam if size of variable is 8 bytes
5910(p7) st8 [rSgnGamAddr] = rSgnGam
5911 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3
5912 nop.i 0
5913}
5914;;
5915{ .mfi
5916 nop.m 0
5917 fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3
5918 nop.i 0
5919}
5920;;
5921{ .mfi
5922 nop.m 0
5923 fma.s1 fRcpX = fInvX, fInvX, f0 // 1/x^2
5924 nop.i 0
5925}
5926{ .mfi
5927 nop.m 0
5928 fma.s1 fA0L = fB2, fInvX, fA0L
5929 nop.i 0
5930}
5931;;
5932{ .mfi
5933 nop.m 0
5934 fms.s1 FR_r = fSignifX, FR_G, f1 // r = G * S_hi - 1
5935 nop.i 0
5936}
5937{ .mfi
5938 nop.m 0
5939 // High part of the log(x): Y_hi = N * log2_hi + H
5940 fma.s1 fRes2H = fFloatN, FR_log2_hi, FR_H
5941 nop.i 0
5942}
5943;;
5944
5945{ .mfi
5946 nop.m 0
5947 // h = N * log2_lo + h
5948 fma.s1 FR_h = fFloatN, FR_log2_lo, FR_h
5949 nop.i 0
5950}
5951{ .mfi
5952 nop.m 0
5953 // High part of the log(x): Y_hi = N * log2_hi + H
5954 fma.s1 fRes1H = fFloatN, FR_log2_hi, FR_H
5955 nop.i 0
5956}
5957;;
5958{.mfi
5959 nop.m 0
5960 fma.s1 fPol = fB18, fRcpX, fB16 // v9
5961 nop.i 0
5962}
5963{ .mfi
5964 nop.m 0
5965 fma.s1 fA2L = fRcpX, fRcpX, f0 // v10
5966 nop.i 0
5967}
5968;;
5969{.mfi
5970 nop.m 0
5971 fma.s1 fA3 = fB6, fRcpX, fB4 // v3
5972 nop.i 0
5973}
5974{ .mfi
5975 nop.m 0
5976 fma.s1 fA4 = fB10, fRcpX, fB8 // v4
5977 nop.i 0
5978}
5979;;
5980{ .mfi
5981 nop.m 0
5982 fms.s1 fRes2H =fRes2H, f1, f1 // log_Hi(x) -1
5983 nop.i 0
5984}
5985{ .mfi
5986 nop.m 0
5987 // poly_lo = r * Q4 + Q3
5988 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3
5989 nop.i 0
5990}
5991;;
5992{ .mfi
5993 nop.m 0
5994 fma.s1 fRes1H = fRes1H, FR_MHalf, f0 // -0.5*log_Hi(x)
5995 nop.i 0
5996}
5997{ .mfi
5998 nop.m 0
5999 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r
6000 nop.i 0
6001}
6002;;
6003{ .mfi
6004 nop.m 0
6005 fma.s1 fA7 = fB14, fRcpX, fB12 // v7
6006 nop.i 0
6007}
6008{ .mfi
6009 nop.m 0
6010 fma.s1 fA8 = fA2L, fB20, fPol // v8
6011 nop.i 0
6012}
6013;;
6014{ .mfi
6015 nop.m 0
6016 fma.s1 fA2 = fA4, fA2L, fA3 // v2
6017 nop.i 0
6018}
6019{ .mfi
6020 nop.m 0
6021 fma.s1 fA4L = fA2L, fA2L, f0 // v5
6022 nop.i 0
6023}
6024;;
6025{ .mfi
6026 nop.m 0
6027 fma.s1 fResH = fRes2H, f8, f0 // (x*(ln(x)-1))hi
6028 nop.i 0
6029}
6030{ .mfi
6031 nop.m 0
6032 // poly_lo = poly_lo * r + Q2
6033 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2
6034 nop.i 0
6035}
6036;;
6037{ .mfi
6038 nop.m 0
6039 fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3
6040 nop.i 0
6041}
6042{ .mfi
6043 nop.m 0
6044 // poly_hi = Q1 * rsq + r
6045 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
6046 nop.i 0
6047}
6048;;
6049{ .mfi
6050 nop.m 0
6051 fma.s1 fA11 = fRcpX, fInvX, f0 // 1/x^3
6052 nop.i 0
6053}
6054{ .mfi
6055 nop.m 0
6056 fma.s1 fA6 = fA8, fA2L, fA7 // v6
6057 nop.i 0
6058}
6059;;
6060{ .mfi
6061 nop.m 0
6062 fms.s1 fResL = fRes2H, f8, fResH // d(x*(ln(x)-1))
6063 nop.i 0
6064}
6065{ .mfi
6066 nop.m 0
6067 fadd.s1 fRes3H = fResH, fRes1H // (x*(ln(x)-1) -0.5ln(x))hi
6068 nop.i 0
6069}
6070;;
6071{ .mfi
6072 nop.m 0
6073 // poly_lo = poly_lo*r^3 + h
6074 fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h
6075 nop.i 0
6076}
6077;;
6078{ .mfi
6079 nop.m 0
6080 fma.s1 fPol = fA4L, fA6, fA2 // v1
6081 nop.i 0
6082}
6083{ .mfi
6084 nop.m 0
6085 // raise inexact exception
6086 fma.s0 FR_log2_lo = FR_log2_lo, FR_log2_lo, f0
6087 nop.i 0
6088}
6089;;
6090{ .mfi
6091 nop.m 0
6092 fadd.s1 fRes4H = fRes3H, fA0 // (x*(ln(x)-1) -0.5ln(x))hi + Chi
6093 nop.i 0
6094}
6095{ .mfi
6096 nop.m 0
6097 fsub.s1 fRes3L = fResH, fRes3H
6098 nop.i 0
6099}
6100;;
6101{ .mfi
6102 nop.m 0
6103 // Y_lo = poly_hi + poly_lo
6104 fadd.s1 fRes2L = FR_poly_hi, FR_poly_lo
6105 nop.i 0
6106}
6107;;
6108
6109{ .mfi
6110 nop.m 0
6111 fma.s1 fA0L = fPol, fA11, fA0L // S(1/x) + Clo
6112 nop.i 0
6113}
6114;;
6115{ .mfi
6116 nop.m 0
6117 fadd.s1 fRes3L = fRes3L, fRes1H
6118 nop.i 0
6119}
6120{ .mfi
6121 nop.m 0
6122 fsub.s1 fRes4L = fRes3H, fRes4H
6123 nop.i 0
6124}
6125;;
6126{ .mfi
6127 nop.m 0
6128 fma.s1 fResL = fRes2L, f8 , fResL // lo part of x*(ln(x)-1)
6129 nop.i 0
6130}
6131;;
6132{ .mfi
6133 nop.m 0
6134 // Clo + S(1/x) - 0.5*logLo(x)
6135 fma.s1 fA0L = fRes2L, FR_MHalf, fA0L
6136 nop.i 0
6137}
6138;;
6139{ .mfi
6140 nop.m 0
6141 fadd.s1 fRes4L = fRes4L, fA0
6142 nop.i 0
6143}
6144;;
6145{ .mfi
6146 nop.m 0
6147 // Clo + S(1/x) - 0.5*logLo(x) + (x*(ln(x)-1))lo
6148 fadd.s1 fA0L = fA0L, fResL
6149 nop.i 0
6150}
6151;;
6152{ .mfi
6153 nop.m 0
6154 fadd.s1 fRes4L = fRes4L, fRes3L
6155 nop.i 0
6156}
6157;;
6158{ .mfi
6159 nop.m 0
6160 fadd.s1 fRes4L = fRes4L, fA0L
6161 nop.i 0
6162}
6163;;
6164{ .mfb
6165 nop.m 0
6166 fma.s0 f8 = fRes4H, f1, fRes4L
6167 // exit for x > 10.0
6168 br.ret.sptk b0
6169}
6170;;
6171// here if 8.0 <= x <= 10.0
6172// Result = P15(y), where y = x/8.0 - 1.5
6173.align 32
6174lgammal_8_10:
6175{ .mfi
6176 addl rPolDataPtr = @ltoff(lgammal_8_10_data), gp
6177 fms.s1 FR_FracX = fSignifX, f1, FR_MHalf // y = x/8.0 - 1.5
6178 cmp.eq p6, p7 = 4, rSgnGamSize
6179}
6180;;
6181{ .mfi
6182 ld8 rLnSinDataPtr = [rPolDataPtr]
6183 nop.f 0
6184 nop.i 0
6185}
6186{ .mfi
6187 ld8 rPolDataPtr = [rPolDataPtr]
6188 nop.f 0
6189 nop.i 0
6190}
6191;;
6192{ .mfi
6193 adds rZ1offsett = 32, rLnSinDataPtr
6194 nop.f 0
6195 nop.i 0
6196}
6197{ .mfi
6198 adds rLnSinDataPtr = 48, rLnSinDataPtr
6199 nop.f 0
6200 nop.i 0
6201}
6202;;
6203{ .mfi
6204 ldfpd fA1, fA1L = [rPolDataPtr], 16 // A1
6205 nop.f 0
6206 nop.i 0
6207}
6208{ .mfi
6209 ldfe fA2 = [rZ1offsett], 32 // A5
6210 nop.f 0
6211 nop.i 0
6212}
6213;;
6214{ .mfi
6215 ldfpd fA0, fA0L = [rPolDataPtr], 16 // A0
6216 fma.s1 FR_rsq = FR_FracX, FR_FracX, f0 // y^2
6217 nop.i 0
6218}
6219{ .mfi
6220 ldfe fA3 = [rLnSinDataPtr],32 // A5
6221 nop.f 0
6222 nop.i 0
6223}
6224;;
6225{ .mmf
6226 ldfe fA4 = [rZ1offsett], 32 // A4
6227 ldfe fA5 = [rLnSinDataPtr], 32 // A5
6228 nop.f 0
6229}
6230;;
6231{ .mmf
6232 ldfe fA6 = [rZ1offsett], 32 // A6
6233 ldfe fA7 = [rLnSinDataPtr], 32 // A7
6234 nop.f 0
6235}
6236;;
6237{ .mmf
6238 ldfe fA8 = [rZ1offsett], 32 // A8
6239 ldfe fA9 = [rLnSinDataPtr], 32 // A9
6240 nop.f 0
6241}
6242;;
6243{ .mmf
6244 ldfe fA10 = [rZ1offsett], 32 // A10
6245 ldfe fA11 = [rLnSinDataPtr], 32 // A11
6246 nop.f 0
6247}
6248;;
6249{ .mmf
6250 ldfe fA12 = [rZ1offsett], 32 // A12
6251 ldfe fA13 = [rLnSinDataPtr], 32 // A13
6252 fma.s1 FR_Q4 = FR_rsq, FR_rsq, f0 // y^4
6253}
6254;;
6255{ .mmf
6256 ldfe fA14 = [rZ1offsett], 32 // A14
6257 ldfe fA15 = [rLnSinDataPtr], 32 // A15
6258 nop.f 0
6259}
6260;;
6261{ .mfi
6262 nop.m 0
6263 fma.s1 fRes1H = FR_FracX, fA1, f0
6264 nop.i 0
6265}
6266;;
6267{ .mfi
6268 nop.m 0
6269 fma.s1 fA3 = fA3, FR_FracX, fA2 // v4
6270 nop.i 0
6271}
6272;;
6273{ .mfi
6274 nop.m 0
6275 fma.s1 fA5 = fA5, FR_FracX, fA4 // v5
6276 nop.i 0
6277}
6278;;
6279{ .mfi
6280 // store sign of GAMMA(x) if size of variable is 4 bytes
6281(p6) st4 [rSgnGamAddr] = rSgnGam
6282 fma.s1 fA3L = FR_Q4, FR_Q4, f0 // v9 = y^8
6283 nop.i 0
6284}
6285{ .mfi
6286 // store sign of GAMMA(x) if size of variable is 8 bytes
6287(p7) st8 [rSgnGamAddr] = rSgnGam
6288 fma.s1 fA7 = fA7, FR_FracX, fA6 // v7
6289 nop.i 0
6290}
6291;;
6292{ .mfi
6293 nop.m 0
6294 fma.s1 fA9 = fA9, FR_FracX, fA8 // v8
6295 nop.i 0
6296}
6297;;
6298{ .mfi
6299 nop.m 0
6300 fms.s1 fRes1L = FR_FracX, fA1, fRes1H
6301 nop.i 0
6302}
6303{ .mfi
6304 nop.m 0
6305 fma.s1 fA11 = fA11, FR_FracX, fA10 // v12
6306 nop.i 0
6307}
6308;;
6309{ .mfi
6310 nop.m 0
6311 fma.s1 fA13 = fA13, FR_FracX, fA12 // v13
6312 nop.i 0
6313}
6314{ .mfi
6315 nop.m 0
6316 fma.s1 fRes2H = fRes1H, f1, fA0
6317 nop.i 0
6318}
6319;;
6320{ .mfi
6321 nop.m 0
6322 fma.s1 fA15 = fA15, FR_FracX, fA14 // v16
6323 nop.i 0
6324}
6325{ .mfi
6326 nop.m 0
6327 fma.s1 fA5 = fA5, FR_rsq, fA3 // v3
6328 nop.i 0
6329}
6330;;
6331{ .mfi
6332 nop.m 0
6333 fma.s1 fA9 = fA9, FR_rsq, fA7 // v6
6334 nop.i 0
6335}
6336;;
6337{ .mfi
6338 nop.m 0
6339 fma.s1 fRes1L = FR_FracX, fA1L, fRes1L
6340 nop.i 0
6341}
6342;;
6343{ .mfi
6344 nop.m 0
6345 fms.s1 fRes2L = fA0, f1, fRes2H
6346 nop.i 0
6347}
6348{ .mfi
6349 nop.m 0
6350 fma.s1 fA13 = fA13, FR_rsq, fA11 // v11
6351 nop.i 0
6352}
6353;;
6354{ .mfi
6355 nop.m 0
6356 fma.s1 fA9 = fA9, FR_Q4, fA5 // v2
6357 nop.i 0
6358}
6359;;
6360{ .mfi
6361 nop.m 0
6362 fma.s1 fRes1L = fRes1L, f1, fA0L
6363 nop.i 0
6364}
6365;;
6366{ .mfi
6367 nop.m 0
6368 fma.s1 fRes2L = fRes2L, f1, fRes1H
6369 nop.i 0
6370}
6371{ .mfi
6372 nop.m 0
6373 fma.s1 fA15 = fA15, FR_Q4, fA13 // v10
6374 nop.i 0
6375}
6376;;
6377{ .mfi
6378 nop.m 0
6379 fma.s1 fRes2L = fRes1L, f1, fRes2L
6380 nop.i 0
6381}
6382{ .mfi
6383 nop.m 0
6384 fma.s1 fPol = fA3L, fA15, fA9
6385 nop.i 0
6386}
6387;;
6388{ .mfi
6389 nop.m 0
6390 fma.s1 f8 = FR_rsq , fPol, fRes2H
6391 nop.i 0
6392}
6393{ .mfi
6394 nop.m 0
6395 fma.s1 fPol = fPol, FR_rsq, f0
6396 nop.i 0
6397}
6398;;
6399{ .mfi
6400 nop.m 0
6401 fms.s1 fRes1L = fRes2H, f1, f8
6402 nop.i 0
6403}
6404;;
6405{ .mfi
6406 nop.m 0
6407 fma.s1 fRes1L = fRes1L, f1, fPol
6408 nop.i 0
6409}
6410;;
6411{.mfi
6412 nop.m 0
6413 fma.s1 fRes1L = fRes1L, f1, fRes2L
6414 nop.i 0
6415}
6416;;
6417{ .mfb
6418 nop.m 0
6419 fma.s0 f8 = f8, f1, fRes1L
6420 // exit for 8.0 <= x <= 10.0
6421 br.ret.sptk b0
6422}
6423;;
6424
6425// here if 4.0 <=x < 8.0
6426.align 32
6427lgammal_4_8:
6428{ .mfi
6429 addl rPolDataPtr= @ltoff(lgammal_4_8_data),gp
6430 fms.s1 FR_FracX = fSignifX, f1, FR_MHalf
6431 adds rSgnGam = 1, r0
6432}
6433;;
6434{ .mfi
6435 ld8 rPolDataPtr = [rPolDataPtr]
6436 nop.f 0
6437 nop.i 0
6438}
6439;;
6440
6441{ .mfb
6442 adds rTmpPtr = 160, rPolDataPtr
6443 nop.f 0
6444 // branch to special path which computes polynomial of 25th degree
6445 br.sptk lgamma_polynom25
6446}
6447;;
6448
6449// here if 2.25 <=x < 4.0
6450.align 32
6451lgammal_2Q_4:
6452{ .mfi
6453 addl rPolDataPtr= @ltoff(lgammal_2Q_4_data),gp
6454 fms.s1 FR_FracX = fSignifX, f1, FR_MHalf
6455 adds rSgnGam = 1, r0
6456}
6457;;
6458{ .mfi
6459 ld8 rPolDataPtr = [rPolDataPtr]
6460 nop.f 0
6461 nop.i 0
6462}
6463;;
6464
6465{ .mfb
6466 adds rTmpPtr = 160, rPolDataPtr
6467 nop.f 0
6468 // branch to special path which computes polynomial of 25th degree
6469 br.sptk lgamma_polynom25
6470}
6471;;
6472
6473// here if 0.5 <= |x| < 0.75
6474.align 32
6475lgammal_half_3Q:
6476.pred.rel "mutex", p14, p15
6477{ .mfi
6478(p14) addl rPolDataPtr= @ltoff(lgammal_half_3Q_data),gp
6479 // FR_FracX = x - 0.625 for positive x
6480(p14) fms.s1 FR_FracX = f8, f1, FR_FracX
6481(p14) adds rSgnGam = 1, r0
6482}
6483{ .mfi
6484(p15) addl rPolDataPtr= @ltoff(lgammal_half_3Q_neg_data),gp
6485 // FR_FracX = x + 0.625 for negative x
6486(p15) fma.s1 FR_FracX = f8, f1, FR_FracX
6487(p15) adds rSgnGam = -1, r0
6488}
6489;;
6490{ .mfi
6491 ld8 rPolDataPtr = [rPolDataPtr]
6492 nop.f 0
6493 nop.i 0
6494}
6495;;
6496{ .mfb
6497 adds rTmpPtr = 160, rPolDataPtr
6498 nop.f 0
6499 // branch to special path which computes polynomial of 25th degree
6500 br.sptk lgamma_polynom25
6501}
6502;;
6503// here if 1.3125 <= x < 1.5625
6504.align 32
6505lgammal_loc_min:
6506{ .mfi
6507 adds rSgnGam = 1, r0
6508 nop.f 0
6509 nop.i 0
6510}
6511{ .mfb
6512 adds rTmpPtr = 160, rPolDataPtr
6513 fms.s1 FR_FracX = f8, f1, fA5L
6514 br.sptk lgamma_polynom25
6515}
6516;;
6517// here if -2.605859375 <= x < -2.5
6518// special polynomial approximation used since neither "near root"
6519// approximation nor reflection formula give satisfactory accuracy on
6520// this range
6521.align 32
6522_neg2andHalf:
6523{ .mfi
6524 addl rPolDataPtr= @ltoff(lgammal_neg2andHalf_data),gp
6525 fma.s1 FR_FracX = fB20, f1, f8 // 2.5 + x
6526 adds rSgnGam = -1, r0
6527}
6528;;
6529{.mfi
6530 ld8 rPolDataPtr = [rPolDataPtr]
6531 nop.f 0
6532 nop.i 0
6533}
6534;;
6535{ .mfb
6536 adds rTmpPtr = 160, rPolDataPtr
6537 nop.f 0
6538 // branch to special path which computes polynomial of 25th degree
6539 br.sptk lgamma_polynom25
6540}
6541;;
6542
6543// here if -0.5 < x <= -0.40625
6544.align 32
6545lgammal_near_neg_half:
6546{ .mmf
6547 addl rPolDataPtr= @ltoff(lgammal_near_neg_half_data),gp
6548 setf.exp FR_FracX = rExpHalf
6549 nop.f 0
6550}
6551;;
6552{ .mfi
6553 ld8 rPolDataPtr = [rPolDataPtr]
6554 nop.f 0
6555 adds rSgnGam = -1, r0
6556}
6557;;
6558{ .mfb
6559 adds rTmpPtr = 160, rPolDataPtr
6560 fma.s1 FR_FracX = FR_FracX, f1, f8
6561 // branch to special path which computes polynomial of 25th degree
6562 br.sptk lgamma_polynom25
6563}
6564;;
6565
6566// here if there an answer is P25(x)
6567// rPolDataPtr, rTmpPtr point to coefficients
6568// x is in FR_FracX register
6569.align 32
6570lgamma_polynom25:
6571{ .mfi
6572 ldfpd fA3, fA0L = [rPolDataPtr], 16 // A3
6573 nop.f 0
6574 cmp.eq p6, p7 = 4, rSgnGamSize
6575}
6576{ .mfi
6577 ldfpd fA18, fA19 = [rTmpPtr], 16 // D7, D6
6578 nop.f 0
6579 nop.i 0
6580}
6581;;
6582{ .mfi
6583 ldfpd fA1, fA1L = [rPolDataPtr], 16 // A1
6584 nop.f 0
6585 nop.i 0
6586}
6587{ .mfi
6588 ldfpd fA16, fA17 = [rTmpPtr], 16 // D4, D5
6589 nop.f 0
6590}
6591;;
6592{ .mfi
6593 ldfpd fA12, fA13 = [rPolDataPtr], 16 // D0, D1
6594 nop.f 0
6595 nop.i 0
6596}
6597{ .mfi
6598 ldfpd fA14, fA15 = [rTmpPtr], 16 // D2, D3
6599 nop.f 0
6600 nop.i 0
6601}
6602;;
6603{ .mfi
6604 ldfpd fA24, fA25 = [rPolDataPtr], 16 // C21, C20
6605 nop.f 0
6606 nop.i 0
6607}
6608{ .mfi
6609 ldfpd fA22, fA23 = [rTmpPtr], 16 // C19, C18
6610 nop.f 0
6611 nop.i 0
6612}
6613;;
6614{ .mfi
6615 ldfpd fA2, fA2L = [rPolDataPtr], 16 // A2
6616 fma.s1 fA4L = FR_FracX, FR_FracX, f0 // x^2
6617 nop.i 0
6618}
6619{ .mfi
6620 ldfpd fA20, fA21 = [rTmpPtr], 16 // C17, C16
6621 nop.f 0
6622 nop.i 0
6623}
6624;;
6625{ .mfi
6626 ldfe fA11 = [rTmpPtr], 16 // E7
6627 nop.f 0
6628 nop.i 0
6629}
6630{ .mfi
6631 ldfpd fA0, fA3L = [rPolDataPtr], 16 // A0
6632 nop.f 0
6633 nop.i 0
6634};;
6635{ .mfi
6636 ldfe fA10 = [rPolDataPtr], 16 // E6
6637 nop.f 0
6638 nop.i 0
6639}
6640{ .mfi
6641 ldfe fA9 = [rTmpPtr], 16 // E5
6642 nop.f 0
6643 nop.i 0
6644}
6645;;
6646{ .mmf
6647 ldfe fA8 = [rPolDataPtr], 16 // E4
6648 ldfe fA7 = [rTmpPtr], 16 // E3
6649 nop.f 0
6650}
6651;;
6652{ .mmf
6653 ldfe fA6 = [rPolDataPtr], 16 // E2
6654 ldfe fA5 = [rTmpPtr], 16 // E1
6655 nop.f 0
6656}
6657;;
6658{ .mfi
6659 ldfe fA4 = [rPolDataPtr], 16 // E0
6660 fma.s1 fA5L = fA4L, fA4L, f0 // x^4
6661 nop.i 0
6662}
6663{ .mfi
6664 nop.m 0
6665 fms.s1 fB2 = FR_FracX, FR_FracX, fA4L // x^2 - <x^2>
6666 nop.i 0
6667}
6668;;
6669{ .mfi
6670 // store signgam if size of variable is 4 bytes
6671(p6) st4 [rSgnGamAddr] = rSgnGam
6672 fma.s1 fRes4H = fA3, FR_FracX, f0 // (A3*x)hi
6673 nop.i 0
6674}
6675{ .mfi
6676 // store signgam if size of variable is 8 bytes
6677(p7) st8 [rSgnGamAddr] = rSgnGam
6678 fma.s1 fA19 = fA19, FR_FracX, fA18 // D7*x + D6
6679 nop.i 0
6680}
6681;;
6682{ .mfi
6683 nop.m 0
6684 fma.s1 fResH = fA1, FR_FracX, f0 // (A1*x)hi
6685 nop.i 0
6686}
6687{ .mfi
6688 nop.m 0
6689 fma.s1 fB6 = fA1L, FR_FracX, fA0L // A1L*x + A0L
6690 nop.i 0
6691}
6692;;
6693{ .mfi
6694 nop.m 0
6695 fma.s1 fA17 = fA17, FR_FracX, fA16 // D5*x + D4
6696 nop.i 0
6697}
6698{ .mfi
6699 nop.m 0
6700 fma.s1 fA15 = fA15, FR_FracX, fA14 // D3*x + D2
6701 nop.i 0
6702}
6703;;
6704{ .mfi
6705 nop.m 0
6706 fma.s1 fA25 = fA25, FR_FracX, fA24 // C21*x + C20
6707 nop.i 0
6708}
6709{ .mfi
6710 nop.m 0
6711 fma.s1 fA13 = fA13, FR_FracX, fA12 // D1*x + D0
6712 nop.i 0
6713}
6714;;
6715{ .mfi
6716 nop.m 0
6717 fma.s1 fA23 = fA23, FR_FracX, fA22 // C19*x + C18
6718 nop.i 0
6719}
6720{ .mfi
6721 nop.m 0
6722 fma.s1 fA21 = fA21, FR_FracX, fA20 // C17*x + C16
6723 nop.i 0
6724}
6725;;
6726{ .mfi
6727 nop.m 0
6728 fms.s1 fRes4L = fA3, FR_FracX, fRes4H // delta((A3*x)hi)
6729 nop.i 0
6730}
6731{ .mfi
6732 nop.m 0
6733 fadd.s1 fRes2H = fRes4H, fA2 // (A3*x + A2)hi
6734 nop.i 0
6735}
6736;;
6737{ .mfi
6738 nop.m 0
6739 fms.s1 fResL = fA1, FR_FracX, fResH // d(A1*x)
6740 nop.i 0
6741}
6742{ .mfi
6743 nop.m 0
6744 fadd.s1 fRes1H = fResH, fA0 // (A1*x + A0)hi
6745 nop.i 0
6746}
6747;;
6748{ .mfi
6749 nop.m 0
6750 fma.s1 fA19 = fA19, fA4L, fA17 // Dhi
6751 nop.i 0
6752}
6753{ .mfi
6754 nop.m 0
6755 fma.s1 fA11 = fA11, FR_FracX, fA10 // E7*x + E6
6756 nop.i 0
6757}
6758;;
6759{ .mfi
6760 nop.m 0
6761 // Doing this to raise inexact flag
6762 fma.s0 fA10 = fA0, fA0, f0
6763 nop.i 0
6764}
6765;;
6766{ .mfi
6767 nop.m 0
6768 fma.s1 fA15 = fA15, fA4L, fA13 // Dlo
6769 nop.i 0
6770}
6771{ .mfi
6772 nop.m 0
6773 // (C21*x + C20)*x^2 + C19*x + C18
6774 fma.s1 fA25 = fA25, fA4L, fA23
6775 nop.i 0
6776}
6777;;
6778{ .mfi
6779 nop.m 0
6780 fma.s1 fA9 = fA9, FR_FracX, fA8 // E5*x + E4
6781 nop.i 0
6782}
6783{ .mfi
6784 nop.m 0
6785 fma.s1 fA7 = fA7, FR_FracX, fA6 // E3*x + E2
6786 nop.i 0
6787}
6788;;
6789{ .mfi
6790 nop.m 0
6791 fma.s1 fRes4L = fA3L, FR_FracX, fRes4L // (A3*x)lo
6792 nop.i 0
6793}
6794{ .mfi
6795 nop.m 0
6796 fsub.s1 fRes2L = fA2, fRes2H
6797 nop.i 0
6798}
6799;;
6800{ .mfi
6801 nop.m 0
6802 fadd.s1 fResL = fResL, fB6 // (A1L*x + A0L) + d(A1*x)
6803 nop.i 0
6804}
6805{ .mfi
6806 nop.m 0
6807 fsub.s1 fRes1L = fA0, fRes1H
6808 nop.i 0
6809}
6810;;
6811{ .mfi
6812 nop.m 0
6813 fma.s1 fA5 = fA5, FR_FracX, fA4 // E1*x + E0
6814 nop.i 0
6815}
6816{ .mfi
6817 nop.m 0
6818 fma.s1 fB8 = fA5L, fA5L, f0 // x^8
6819 nop.i 0
6820}
6821;;
6822{ .mfi
6823 nop.m 0
6824 // ((C21*x + C20)*x^2 + C19*x + C18)*x^2 + C17*x + C16
6825 fma.s1 fA25 = fA25, fA4L, fA21
6826 nop.i 0
6827}
6828{ .mfi
6829 nop.m 0
6830 fma.s1 fA19 = fA19, fA5L, fA15 // D
6831 nop.i 0
6832}
6833;;
6834{ .mfi
6835 nop.m 0
6836 fma.s1 fA11 = fA11, fA4L, fA9 // Ehi
6837 nop.i 0
6838}
6839;;
6840{ .mfi
6841 nop.m 0
6842 fadd.s1 fRes2L = fRes2L, fRes4H
6843 nop.i 0
6844}
6845{ .mfi
6846 nop.m 0
6847 fadd.s1 fRes4L = fRes4L, fA2L // (A3*x)lo + A2L
6848 nop.i 0
6849}
6850;;
6851{ .mfi
6852 nop.m 0
6853 fma.s1 fRes3H = fRes2H, fA4L, f0 // ((A3*x + A2)*x^2)hi
6854 nop.i 0
6855}
6856{ .mfi
6857 nop.m 0
6858 fadd.s1 fRes1L = fRes1L, fResH
6859 nop.i 0
6860}
6861;;
6862{ .mfi
6863 nop.m 0
6864 fma.s1 fRes3L = fRes2H, fB2, f0 // (A3*x + A2)hi*d(x^2)
6865 nop.i 0
6866}
6867{ .mfi
6868 nop.m 0
6869 fma.s1 fA7 = fA7, fA4L, fA5 // Elo
6870 nop.i 0
6871}
6872;;
6873{ .mfi
6874 nop.m 0
6875 fma.s1 fA25 = fA25, fB8, fA19 // C*x^8 + D
6876 nop.i 0
6877}
6878;;
6879{ .mfi
6880 nop.m 0
6881 fadd.s1 fRes2L = fRes2L, fRes4L // (A3*x + A2)lo
6882 nop.i 0
6883}
6884;;
6885{ .mfi
6886 nop.m 0
6887 fms.s1 fB4 = fRes2H, fA4L, fRes3H // d((A3*x + A2)*x^2))
6888 nop.i 0
6889}
6890{ .mfi
6891 nop.m 0
6892 fadd.s1 fRes1L = fRes1L, fResL // (A1*x + A0)lo
6893 nop.i 0
6894}
6895;;
6896{ .mfi
6897 nop.m 0
6898 fadd.s1 fB20 = fRes3H, fRes1H // Phi
6899 nop.i 0
6900}
6901{ .mfi
6902 nop.m 0
6903 fma.s1 fA11 = fA11, fA5L, fA7 // E
6904 nop.i 0
6905}
6906;;
6907{ .mfi
6908 nop.m 0
6909 // ( (A3*x + A2)lo*<x^2> + (A3*x + A2)hi*d(x^2))
6910 fma.s1 fRes3L = fRes2L, fA4L, fRes3L
6911 nop.i 0
6912}
6913;;
6914{ .mfi
6915 nop.m 0
6916 // d((A3*x + A2)*x^2)) + (A1*x + A0)lo
6917 fadd.s1 fRes1L = fRes1L, fB4
6918 nop.i 0
6919}
6920;;
6921{ .mfi
6922 nop.m 0
6923 fsub.s1 fB18 = fRes1H, fB20
6924 nop.i 0
6925}
6926{ .mfi
6927 nop.m 0
6928 fma.s1 fPol = fA25, fB8, fA11
6929 nop.i 0
6930}
6931;;
6932{ .mfi
6933 nop.m 0
6934 fadd.s1 fRes1L = fRes1L, fRes3L
6935 nop.i 0
6936}
6937;;
6938{ .mfi
6939 nop.m 0
6940 fadd.s1 fB18 = fB18, fRes3H
6941 nop.i 0
6942}
6943{ .mfi
6944 nop.m 0
6945 fma.s1 fRes4H = fPol, fA5L, fB20
6946 nop.i 0
6947}
6948;;
6949{ .mfi
6950 nop.m 0
6951 fma.s1 fPolL = fPol, fA5L, f0
6952 nop.i 0
6953}
6954;;
6955{ .mfi
6956 nop.m 0
6957 fadd.s1 fB18 = fB18, fRes1L // Plo
6958 nop.i 0
6959}
6960{ .mfi
6961 nop.m 0
6962 fsub.s1 fRes4L = fB20, fRes4H
6963 nop.i 0
6964}
6965;;
6966{ .mfi
6967 nop.m 0
6968 fadd.s1 fB18 = fB18, fPolL
6969 nop.i 0
6970}
6971;;
6972{ .mfi
6973 nop.m 0
6974 fadd.s1 fRes4L = fRes4L, fB18
6975 nop.i 0
6976}
6977;;
6978{ .mfb
6979 nop.m 0
6980 fma.s0 f8 = fRes4H, f1, fRes4L
6981 // P25(x) computed, exit here
6982 br.ret.sptk b0
6983}
6984;;
6985
6986
6987// here if 0.75 <= x < 1.3125
6988.align 32
6989lgammal_03Q_1Q:
6990{ .mfi
6991 addl rPolDataPtr= @ltoff(lgammal_03Q_1Q_data),gp
6992 fma.s1 FR_FracX = fA5L, f1, f0 // x
6993 adds rSgnGam = 1, r0
6994}
6995{ .mfi
6996 nop.m 0
6997 fma.s1 fB4 = fA5L, fA5L, f0 // x^2
6998 nop.i 0
6999}
7000;;
7001{ .mfi
7002 ld8 rPolDataPtr = [rPolDataPtr]
7003 nop.f 0
7004 nop.i 0
7005}
7006;;
7007{ .mfb
7008 adds rTmpPtr = 144, rPolDataPtr
7009 nop.f 0
7010 br.sptk lgamma_polynom24x
7011}
7012;;
7013
7014// here if 1.5625 <= x < 2.25
7015.align 32
7016lgammal_13Q_2Q:
7017{ .mfi
7018 addl rPolDataPtr= @ltoff(lgammal_13Q_2Q_data),gp
7019 fma.s1 FR_FracX = fB4, f1, f0 // x
7020 adds rSgnGam = 1, r0
7021}
7022{ .mfi
7023 nop.m 0
7024 fma.s1 fB4 = fB4, fB4, f0 // x^2
7025 nop.i 0
7026}
7027;;
7028{ .mfi
7029 ld8 rPolDataPtr = [rPolDataPtr]
7030 nop.f 0
7031 nop.i 0
7032}
7033;;
7034{ .mfb
7035 adds rTmpPtr = 144, rPolDataPtr
7036 nop.f 0
7037 br.sptk lgamma_polynom24x
7038}
7039;;
7040
7041// here if result is Pol24(x)
7042// x is in FR_FracX,
7043// rPolDataPtr, rTmpPtr point to coefficients
7044.align 32
7045lgamma_polynom24x:
7046{ .mfi
7047 ldfpd fA4, fA2L = [rPolDataPtr], 16
7048 nop.f 0
7049 cmp.eq p6, p7 = 4, rSgnGamSize
7050}
7051{ .mfi
7052 ldfpd fA23, fA24 = [rTmpPtr], 16 // C18, C19
7053 nop.f 0
7054 nop.i 0
7055}
7056;;
7057{ .mfi
7058 ldfpd fA3, fA1L = [rPolDataPtr], 16
7059 fma.s1 fA5L = fB4, fB4, f0 // x^4
7060 nop.i 0
7061}
7062{ .mfi
7063 ldfpd fA19, fA20 = [rTmpPtr], 16 // D6, D7
7064 fms.s1 fB2 = FR_FracX, FR_FracX, fB4 // x^2 - <x^2>
7065 nop.i 0
7066}
7067;;
7068{ .mmf
7069 ldfpd fA15, fA16 = [rPolDataPtr], 16 // D2, D3
7070 ldfpd fA17, fA18 = [rTmpPtr], 16 // D4, D5
7071 nop.f 0
7072}
7073;;
7074{ .mmf
7075 ldfpd fA13, fA14 = [rPolDataPtr], 16 // D0, D1
7076 ldfpd fA12, fA21 = [rTmpPtr], 16 // E7, C16
7077 nop.f 0
7078}
7079;;
7080{ .mfi
7081 ldfe fA11 = [rPolDataPtr], 16 // E6
7082 nop.f 0
7083 nop.i 0
7084}
7085{ .mfi
7086 ldfe fA10 = [rTmpPtr], 16 // E5
7087 nop.f 0
7088 nop.i 0
7089}
7090;;
7091{ .mfi
7092 ldfpd fA2, fA4L = [rPolDataPtr], 16
7093 nop.f 0
7094 nop.i 0
7095}
7096{ .mfi
7097 ldfpd fA1, fA3L = [rTmpPtr], 16
7098 nop.f 0
7099 nop.i 0
7100}
7101;;
7102{ .mfi
7103 ldfpd fA22, fA25 = [rPolDataPtr], 16 // C17, C20
7104 fma.s1 fA0 = fA5L, fA5L, f0 // x^8
7105 nop.i 0
7106}
7107{ .mfi
7108 nop.m 0
7109 fma.s1 fA0L = fA5L, FR_FracX, f0 // x^5
7110 nop.i 0
7111}
7112;;
7113{ .mmf
7114 ldfe fA9 = [rPolDataPtr], 16 // E4
7115 ldfe fA8 = [rTmpPtr], 16 // E3
7116 nop.f 0
7117}
7118;;
7119{ .mmf
7120 ldfe fA7 = [rPolDataPtr], 16 // E2
7121 ldfe fA6 = [rTmpPtr], 16 // E1
7122 nop.f 0
7123}
7124;;
7125{ .mfi
7126 ldfe fA5 = [rTmpPtr], 16 // E0
7127 fma.s1 fRes4H = fA4, fB4, f0 // A4*<x^2>
7128 nop.i 0
7129}
7130{ .mfi
7131 nop.m 0
7132 fma.s1 fPol = fA24, FR_FracX, fA23 // C19*x + C18
7133 nop.i 0
7134}
7135;;
7136{ .mfi
7137 // store signgam if size of variable is 4 bytes
7138(p6) st4 [rSgnGamAddr] = rSgnGam
7139 fma.s1 fRes1H = fA3, fB4, f0 // A3*<x^2>
7140 nop.i 0
7141}
7142{ .mfi
7143 // store signgam if size of variable is 8 bytes
7144(p7) st8 [rSgnGamAddr] = rSgnGam
7145 fma.s1 fA1L = fA3, fB2,fA1L // A3*d(x^2) + A1L
7146 nop.i 0
7147}
7148;;
7149{ .mfi
7150 nop.m 0
7151 fma.s1 fA20 = fA20, FR_FracX, fA19 // D7*x + D6
7152 nop.i 0
7153}
7154{ .mfi
7155 nop.m 0
7156 fma.s1 fA18 = fA18, FR_FracX, fA17 // D5*x + D4
7157 nop.i 0
7158}
7159;;
7160{ .mfi
7161 nop.m 0
7162 fma.s1 fA16 = fA16, FR_FracX, fA15 // D3*x + D2
7163 nop.i 0
7164}
7165{ .mfi
7166 nop.m 0
7167 fma.s1 fA14 = fA14, FR_FracX, fA13 // D1*x + D0
7168 nop.i 0
7169}
7170;;
7171{ .mfi
7172 nop.m 0
7173 fma.s1 fA2L = fA4, fB2,fA2L // A4*d(x^2) + A2L
7174 nop.i 0
7175}
7176{ .mfi
7177 nop.m 0
7178 fma.s1 fA12 = fA12, FR_FracX, fA11 // E7*x + E6
7179 nop.i 0
7180}
7181;;
7182{ .mfi
7183 nop.m 0
7184 fms.s1 fRes2L = fA4, fB4, fRes4H // delta(A4*<x^2>)
7185 nop.i 0
7186}
7187{ .mfi
7188 nop.m 0
7189 fadd.s1 fRes2H = fRes4H, fA2 // A4*<x^2> + A2
7190 nop.i 0
7191}
7192;;
7193{ .mfi
7194 nop.m 0
7195 fms.s1 fRes3L = fA3, fB4, fRes1H // delta(A3*<x^2>)
7196 nop.i 0
7197}
7198{ .mfi
7199 nop.m 0
7200 fadd.s1 fRes3H = fRes1H, fA1 // A3*<x^2> + A1
7201 nop.i 0
7202}
7203;;
7204{ .mfi
7205 nop.m 0
7206 fma.s1 fA20 = fA20, fB4, fA18 // (D7*x + D6)*x^2 + D5*x + D4
7207 nop.i 0
7208}
7209{ .mfi
7210 nop.m 0
7211 fma.s1 fA22 = fA22, FR_FracX, fA21 // C17*x + C16
7212 nop.i 0
7213}
7214;;
7215{ .mfi
7216 nop.m 0
7217 fma.s1 fA16 = fA16, fB4, fA14 // (D3*x + D2)*x^2 + D1*x + D0
7218 nop.i 0
7219}
7220{ .mfi
7221 nop.m 0
7222 fma.s1 fPol = fA25, fB4, fPol // C20*x^2 + C19*x + C18
7223 nop.i 0
7224}
7225;;
7226{ .mfi
7227 nop.m 0
7228 fma.s1 fA2L = fA4L, fB4, fA2L // A4L*<x^2> + A4*d(x^2) + A2L
7229 nop.i 0
7230}
7231{ .mfi
7232 nop.m 0
7233 fma.s1 fA1L = fA3L, fB4, fA1L // A3L*<x^2> + A3*d(x^2) + A1L
7234 nop.i 0
7235}
7236;;
7237{ .mfi
7238 nop.m 0
7239 fsub.s1 fRes4L = fA2, fRes2H // d1
7240 nop.i 0
7241}
7242{ .mfi
7243 nop.m 0
7244 fma.s1 fResH = fRes2H, fB4, f0 // (A4*<x^2> + A2)*x^2
7245 nop.i 0
7246}
7247;;
7248{ .mfi
7249 nop.m 0
7250 fsub.s1 fRes1L = fA1, fRes3H // d1
7251 nop.i 0
7252}
7253{ .mfi
7254 nop.m 0
7255 fma.s1 fB6 = fRes3H, FR_FracX, f0 // (A3*<x^2> + A1)*x
7256 nop.i 0
7257}
7258;;
7259{ .mfi
7260 nop.m 0
7261 fma.s1 fA10 = fA10, FR_FracX, fA9 // E5*x + E4
7262 nop.i 0
7263}
7264{ .mfi
7265 nop.m 0
7266 fma.s1 fA8 = fA8, FR_FracX, fA7 // E3*x + E2
7267 nop.i 0
7268}
7269;;
7270{ .mfi
7271 nop.m 0
7272 // (C20*x^2 + C19*x + C18)*x^2 + C17*x + C16
7273 fma.s1 fPol = fPol, fB4, fA22
7274 nop.i 0
7275}
7276{ .mfi
7277 nop.m 0
7278 fma.s1 fA6 = fA6, FR_FracX, fA5 // E1*x + E0
7279 nop.i 0
7280}
7281;;
7282{ .mfi
7283 nop.m 0
7284 // A4L*<x^2> + A4*d(x^2) + A2L + delta(A4*<x^2>)
7285 fadd.s1 fRes2L = fA2L, fRes2L
7286 nop.i 0
7287}
7288{ .mfi
7289 nop.m 0
7290 // A3L*<x^2> + A3*d(x^2) + A1L + delta(A3*<x^2>)
7291 fadd.s1 fRes3L = fA1L, fRes3L
7292 nop.i 0
7293}
7294;;
7295{ .mfi
7296 nop.m 0
7297 fadd.s1 fRes4L = fRes4L, fRes4H // d2
7298 nop.i 0
7299}
7300{ .mfi
7301 nop.m 0
7302 fms.s1 fResL = fRes2H, fB4, fResH // d(A4*<x^2> + A2)*x^2)
7303 nop.i 0
7304}
7305;;
7306{ .mfi
7307 nop.m 0
7308 fadd.s1 fRes1L = fRes1L, fRes1H // d2
7309 nop.i 0
7310}
7311{ .mfi
7312 nop.m 0
7313 fms.s1 fB8 = fRes3H, FR_FracX, fB6 // d((A3*<x^2> + A1)*x)
7314 nop.i 0
7315}
7316;;
7317{ .mfi
7318 nop.m 0
7319 fadd.s1 fB10 = fResH, fB6 // (A4*x^4 + .. + A1*x)hi
7320 nop.i 0
7321}
7322{ .mfi
7323 nop.m 0
7324 fma.s1 fA12 = fA12, fB4, fA10 // Ehi
7325 nop.i 0
7326}
7327;;
7328{ .mfi
7329 nop.m 0
7330 // ((D7*x + D6)*x^2 + D5*x + D4)*x^4 + (D3*x + D2)*x^2 + D1*x + D0
7331 fma.s1 fA20 = fA20, fA5L, fA16
7332 nop.i 0
7333}
7334{ .mfi
7335 nop.m 0
7336 fma.s1 fA8 = fA8, fB4, fA6 // Elo
7337 nop.i 0
7338}
7339;;
7340{ .mfi
7341 nop.m 0
7342 fadd.s1 fRes2L = fRes2L, fRes4L // (A4*<x^2> + A2)lo
7343 nop.i 0
7344}
7345{ .mfi
7346 nop.m 0
7347 // d(A4*<x^2> + A2)*x^2) + A4*<x^2> + A2)*d(x^2)
7348 fma.s1 fResL = fRes2H, fB2, fResL
7349 nop.i 0
7350}
7351;;
7352{ .mfi
7353 nop.m 0
7354 fadd.s1 fRes3L = fRes3L, fRes1L // (A4*<x^2> + A2)lo
7355 nop.i 0
7356}
7357;;
7358{ .mfi
7359 nop.m 0
7360 fsub.s1 fB12 = fB6, fB10
7361 nop.i 0
7362}
7363;;
7364{ .mfi
7365 nop.m 0
7366 fma.s1 fPol = fPol, fA0, fA20 // PolC*x^8 + PolD
7367 nop.i 0
7368}
7369{ .mfi
7370 nop.m 0
7371 fma.s1 fPolL = fA12, fA5L, fA8 // E
7372 nop.i 0
7373}
7374;;
7375{ .mfi
7376 nop.m 0
7377 fma.s1 fResL = fB4, fRes2L, fResL // ((A4*<x^2> + A2)*x^2)lo
7378 nop.i 0
7379}
7380;;
7381{ .mfi
7382 nop.m 0
7383 fma.s1 fRes3L = fRes3L, FR_FracX, fB8 // ((A3*<x^2> + A1)*x)lo
7384 nop.i 0
7385}
7386;;
7387{ .mfi
7388 nop.m 0
7389 fadd.s1 fB12 = fB12, fResH
7390 nop.i 0
7391}
7392;;
7393{ .mfi
7394 nop.m 0
7395 fma.s1 fPol = fPol, fA0, fPolL
7396 nop.i 0
7397}
7398;;
7399{ .mfi
7400 nop.m 0
7401 fadd.s1 fRes3L = fRes3L, fResL
7402 nop.i 0
7403}
7404;;
7405{ .mfi
7406 nop.m 0
7407 fma.s1 fRes2H = fPol, fA0L, fB10
7408 nop.i 0
7409}
7410;;
7411{ .mfi
7412 nop.m 0
7413 fadd.s1 fRes3L = fB12, fRes3L
7414 nop.i 0
7415}
7416;;
7417{ .mfi
7418 nop.m 0
7419 fsub.s1 fRes4L = fB10, fRes2H
7420 nop.i 0
7421}
7422;;
7423{ .mfi
7424 nop.m 0
7425 fma.s1 fRes4L = fPol, fA0L, fRes4L
7426 nop.i 0
7427}
7428;;
7429{ .mfi
7430 nop.m 0
7431 fadd.s1 fRes4L = fRes4L, fRes3L
7432 nop.i 0
7433}
7434;;
7435{ .mfb
7436 nop.m 0
7437 // final result for all paths for which the result is Pol24(x)
7438 fma.s0 f8 = fRes2H, f1, fRes4L
7439 // here is the exit for all paths for which the result is Pol24(x)
7440 br.ret.sptk b0
7441}
7442;;
7443
7444
7445// here if x is natval, nan, +/-inf, +/-0, or denormal
7446.align 32
7447lgammal_spec:
7448{ .mfi
7449 nop.m 0
7450 fclass.m p9, p0 = f8, 0xB // +/-denormals
7451 nop.i 0
7452};;
7453{ .mfi
7454 nop.m 0
7455 fclass.m p6, p0 = f8, 0x1E1 // Test x for natval, nan, +inf
7456 nop.i 0
7457};;
7458{ .mfb
7459 nop.m 0
7460 fclass.m p7, p0 = f8, 0x7 // +/-0
7461(p9) br.cond.sptk lgammal_denormal_input
7462};;
7463{ .mfb
7464 nop.m 0
7465 nop.f 0
7466 // branch out if x is natval, nan, +inf
7467(p6) br.cond.spnt lgammal_nan_pinf
7468};;
7469{ .mfb
7470 nop.m 0
7471 nop.f 0
7472(p7) br.cond.spnt lgammal_singularity
7473};;
7474// if we are still here then x = -inf
7475{ .mfi
7476 cmp.eq p6, p7 = 4, rSgnGamSize
7477 nop.f 0
7478 adds rSgnGam = 1, r0
7479};;
7480{ .mfi
7481 // store signgam if size of variable is 4 bytes
7482(p6) st4 [rSgnGamAddr] = rSgnGam
7483 nop.f 0
7484 nop.i 0
7485}
7486{ .mfb
7487 // store signgam if size of variable is 8 bytes
7488(p7) st8 [rSgnGamAddr] = rSgnGam
7489 fma.s0 f8 = f8,f8,f0 // return +inf, no call to error support
7490 br.ret.spnt b0
7491};;
7492
7493// here if x is NaN, NatVal or +INF
7494.align 32
7495lgammal_nan_pinf:
7496{ .mfi
7497 cmp.eq p6, p7 = 4, rSgnGamSize
7498 nop.f 0
7499 adds rSgnGam = 1, r0
7500}
7501;;
7502{ .mfi
7503 // store signgam if size of variable is 4 bytes
7504(p6) st4 [rSgnGamAddr] = rSgnGam
7505 fma.s0 f8 = f8,f1,f8 // return x+x if x is natval, nan, +inf
7506 nop.i 0
7507}
7508{ .mfb
7509 // store signgam if size of variable is 8 bytes
7510(p7) st8 [rSgnGamAddr] = rSgnGam
7511 nop.f 0
7512 br.ret.sptk b0
7513}
7514;;
7515
7516// here if x denormal or unnormal
7517.align 32
7518lgammal_denormal_input:
7519{ .mfi
7520 nop.m 0
7521 fma.s0 fResH = f1, f1, f8 // raise denormal exception
7522 nop.i 0
7523}
7524{ .mfi
7525 nop.m 0
7526 fnorm.s1 f8 = f8 // normalize input value
7527 nop.i 0
7528}
7529;;
7530{ .mfi
7531 getf.sig rSignifX = f8
7532 fmerge.se fSignifX = f1, f8
7533 nop.i 0
7534}
7535{ .mfi
7536 getf.exp rSignExpX = f8
7537 fcvt.fx.s1 fXint = f8 // Convert arg to int (int repres. in FR)
7538 nop.i 0
7539}
7540;;
7541{ .mfi
7542 getf.exp rSignExpX = f8
7543 fcmp.lt.s1 p15, p14 = f8, f0
7544 nop.i 0
7545}
7546;;
7547{ .mfb
7548 and rExpX = rSignExpX, r17Ones
7549 fmerge.s fAbsX = f1, f8 // |x|
7550 br.cond.sptk _deno_back_to_main_path
7551}
7552;;
7553
7554
7555// here if overflow (x > overflow_bound)
7556.align 32
7557lgammal_overflow:
7558{ .mfi
7559 addl r8 = 0x1FFFE, r0
7560 nop.f 0
7561 cmp.eq p6, p7 = 4, rSgnGamSize
7562}
7563{ .mfi
7564 adds rSgnGam = 1, r0
7565 nop.f 0
7566 nop.i 0
7567}
7568;;
7569{ .mfi
7570 setf.exp f9 = r8
7571 fmerge.s FR_X = f8,f8
7572 mov GR_Parameter_TAG = 102 // overflow
7573};;
7574{ .mfi
7575 // store signgam if size of variable is 4 bytes
7576(p6) st4 [rSgnGamAddr] = rSgnGam
7577 nop.f 0
7578 nop.i 0
7579}
7580{ .mfb
7581 // store signgam if size of variable is 8 bytes
7582(p7) st8 [rSgnGamAddr] = rSgnGam
7583 fma.s0 FR_RESULT = f9,f9,f0 // Set I,O and +INF result
7584 br.cond.sptk __libm_error_region
7585};;
7586
7587// here if x is negative integer or +/-0 (SINGULARITY)
7588.align 32
7589lgammal_singularity:
7590{ .mfi
7591 adds rSgnGam = 1, r0
7592 fclass.m p8,p0 = f8,0x6 // is x -0?
7593 mov GR_Parameter_TAG = 103 // negative
7594}
7595{ .mfi
7596 cmp.eq p6, p7 = 4, rSgnGamSize
7597 fma.s1 FR_X = f0,f0,f8
7598 nop.i 0
7599};;
7600{ .mfi
7601(p8) sub rSgnGam = r0, rSgnGam
7602 nop.f 0
7603 nop.i 0
7604}
7605{ .mfi
7606 nop.m 0
7607 nop.f 0
7608 nop.i 0
7609};;
7610{ .mfi
7611 // store signgam if size of variable is 4 bytes
7612(p6) st4 [rSgnGamAddr] = rSgnGam
7613 nop.f 0
7614 nop.i 0
7615}
7616{ .mfb
7617 // store signgam if size of variable is 8 bytes
7618(p7) st8 [rSgnGamAddr] = rSgnGam
7619 frcpa.s0 FR_RESULT, p0 = f1, f0
7620 br.cond.sptk __libm_error_region
7621};;
7622
7623GLOBAL_LIBM_END(__libm_lgammal)
7624
7625
7626
7627LOCAL_LIBM_ENTRY(__libm_error_region)
7628.prologue
7629{ .mfi
7630 add GR_Parameter_Y=-32,sp // Parameter 2 value
7631 nop.f 0
7632.save ar.pfs,GR_SAVE_PFS
7633 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
7634}
7635{ .mfi
7636.fframe 64
7637 add sp=-64,sp // Create new stack
7638 nop.f 0
7639 mov GR_SAVE_GP=gp // Save gp
7640};;
7641{ .mmi
7642 stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
7643 add GR_Parameter_X = 16,sp // Parameter 1 address
7644.save b0, GR_SAVE_B0
7645 mov GR_SAVE_B0=b0 // Save b0
7646};;
7647.body
7648{ .mib
7649 stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
7650 add GR_Parameter_RESULT = 0,GR_Parameter_Y
7651 nop.b 0 // Parameter 3 address
7652}
7653{ .mib
7654 stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
7655 add GR_Parameter_Y = -16,GR_Parameter_Y
7656 br.call.sptk b0=__libm_error_support# // Call error handling function
7657};;
7658{ .mmi
7659 add GR_Parameter_RESULT = 48,sp
7660 nop.m 999
7661 nop.i 999
7662};;
7663{ .mmi
7664 ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
7665.restore sp
7666 add sp = 64,sp // Restore stack pointer
7667 mov b0 = GR_SAVE_B0 // Restore return address
7668};;
7669{ .mib
7670 mov gp = GR_SAVE_GP // Restore gp
7671 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
7672 br.ret.sptk b0 // Return
7673};;
7674
7675LOCAL_LIBM_END(__libm_error_region#)
7676
7677.type __libm_error_support#,@function
7678.global __libm_error_support#