]>
Commit | Line | Data |
---|---|---|
d5efd131 MF |
1 | .file "libm_lgammal.s" |
2 | ||
3 | ||
4 | // Copyright (c) 2002 - 2005, Intel Corporation | |
5 | // All rights reserved. | |
6 | // | |
7 | // Contributed 2002 by the Intel Numerics Group, Intel Corporation | |
8 | // | |
9 | // Redistribution and use in source and binary forms, with or without | |
10 | // modification, are permitted provided that the following conditions are | |
11 | // met: | |
12 | // | |
13 | // * Redistributions of source code must retain the above copyright | |
14 | // notice, this list of conditions and the following disclaimer. | |
15 | // | |
16 | // * Redistributions in binary form must reproduce the above copyright | |
17 | // notice, this list of conditions and the following disclaimer in the | |
18 | // documentation and/or other materials provided with the distribution. | |
19 | // | |
20 | // * The name of Intel Corporation may not be used to endorse or promote | |
21 | // products derived from this software without specific prior written | |
22 | // permission. | |
23 | ||
24 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
25 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT | |
26 | // LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
27 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS | |
28 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL, | |
29 | // EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO, | |
30 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR | |
31 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
32 | // OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING | |
33 | // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
34 | // SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
35 | // | |
36 | // Intel Corporation is the author of this code,and requests that all | |
37 | // problem reports or change requests be submitted to it directly at | |
38 | // http://www.intel.com/software/products/opensource/libraries/num.htm. | |
39 | // | |
40 | //********************************************************************* | |
41 | // | |
42 | // History: | |
43 | // 03/28/02 Original version | |
44 | // 05/20/02 Cleaned up namespace and sf0 syntax | |
45 | // 08/21/02 Added support of SIGN(GAMMA(x)) calculation | |
46 | // 09/26/02 Algorithm description improved | |
47 | // 10/21/02 Now it returns SIGN(GAMMA(x))=-1 for negative zero | |
48 | // 02/10/03 Reordered header: .section, .global, .proc, .align | |
49 | // 03/31/05 Reformatted delimiters between data tables | |
50 | // | |
51 | //********************************************************************* | |
52 | // | |
53 | // Function: __libm_lgammal(long double x, int* signgam, int szsigngam) | |
54 | // computes the principal value of the logarithm of the GAMMA function | |
55 | // of x. Signum of GAMMA(x) is stored to memory starting at the address | |
56 | // specified by the signgam. | |
57 | // | |
58 | //********************************************************************* | |
59 | // | |
60 | // Resources Used: | |
61 | // | |
62 | // Floating-Point Registers: f8 (Input and Return Value) | |
63 | // f9-f15 | |
64 | // f32-f127 | |
65 | // | |
66 | // General Purpose Registers: | |
67 | // r2, r3, r8-r11, r14-r31 | |
68 | // r32-r65 | |
69 | // r66-r69 (Used to pass arguments to error handling routine) | |
70 | // | |
71 | // Predicate Registers: p6-p15 | |
72 | // | |
73 | //********************************************************************* | |
74 | // | |
75 | // IEEE Special Conditions: | |
76 | // | |
77 | // __libm_lgammal(+inf) = +inf | |
78 | // __libm_lgammal(-inf) = QNaN | |
79 | // __libm_lgammal(+/-0) = +inf | |
80 | // __libm_lgammal(x<0, x - integer) = QNaN | |
81 | // __libm_lgammal(SNaN) = QNaN | |
82 | // __libm_lgammal(QNaN) = QNaN | |
83 | // | |
84 | //********************************************************************* | |
85 | // | |
86 | // ALGORITHM DESCRIPTION | |
87 | // | |
88 | // Below we suppose that there is log(z) function which takes an long | |
89 | // double argument and returns result as a pair of long double numbers | |
90 | // lnHi and lnLo (such that sum lnHi + lnLo provides ~80 correct bits | |
91 | // of significand). Algorithm description for such log(z) function | |
92 | // see below. | |
93 | // Also, it this algorithm description we use the following notational | |
94 | // conventions: | |
95 | // a) pair A = (Ahi, Alo) means number A represented as sum of Ahi and Alo | |
96 | // b) C = A + B = (Ahi, Alo) + (Bhi, Blo) means multi-precision addition. | |
97 | // The result would be C = (Chi, Clo). Notice, that Clo shouldn't be | |
98 | // equal to Alo + Blo | |
99 | // c) D = A*B = (Ahi, Alo)*(Bhi, Blo) = (Dhi, Dlo) multi-precisiion | |
100 | // multiplication. | |
101 | // | |
102 | // So, lgammal has the following computational paths: | |
103 | // 1) |x| < 0.5 | |
104 | // P = A1*|x| + A2*|x|^2 + ... + A22*|x|^22 | |
105 | // A1, A2, A3 represented as a sum of two double precision | |
106 | // numbers and multi-precision computations are used for 3 higher | |
107 | // terms of the polynomial. We get polynomial as a sum of two | |
108 | // double extended numbers: P = (Phi, Plo) | |
109 | // 1.1) x > 0 | |
110 | // lgammal(x) = P - log(|x|) = (Phi, Plo) - (lnHi(|x|), lnLo(|x|)) | |
111 | // 1.2) x < 0 | |
112 | // lgammal(x) = -P - log(|x|) - log(sin(Pi*x)/(Pi*x)) | |
113 | // P and log(|x|) are computed by the same way as in 1.1; | |
114 | // - log(sin(Pi*x)/(Pi*x)) is approximated by a polynomial Plnsin. | |
115 | // Plnsin:= fLnSin2*|x|^2 + fLnSin4*|x|^4 + ... + fLnSin36*|x|^36 | |
116 | // The first coefficient of Plnsin is represented as sum of two | |
117 | // double precision numbers (fLnSin2, fLnSin2L). Multi-precision | |
118 | // computations for higher two terms of Plnsin are used. | |
119 | // So, the final result is reconstructed by the following formula | |
120 | // lgammal(x) = (-(Phi, Plo) - (lnHi(|x|), lnLo(|x|))) - | |
121 | // - (PlnsinHi,PlnsinLo) | |
122 | // | |
123 | // 2) 0.5 <= x < 0.75 -> t = x - 0.625 | |
124 | // -0.75 < x <= -0.5 -> t = x + 0.625 | |
125 | // 2.25 <= x < 4.0 -> t = x/2 - 1.5 | |
126 | // 4.0 <= x < 8.0 -> t = x/4 - 1.5 | |
127 | // -0.5 < x <= -0.40625 -> t = x + 0.5 | |
128 | // -2.6005859375 < x <= -2.5 -> t = x + 2.5 | |
129 | // 1.3125 <= x < 1.5625 -> t = x - LOC_MIN, where LOC_MIN is point in | |
130 | // which lgammal has local minimum. Exact | |
131 | // value can be found in the table below, | |
132 | // approximate value is ~1.46 | |
133 | // | |
134 | // lgammal(x) is approximated by the polynomial of 25th degree: P25(t) | |
135 | // P25(t) = A0 + A1*t + ... + A25*t^25 = (Phi, Plo) + t^4*P21(t), | |
136 | // where | |
137 | // (Phi, Plo) is sum of four highest terms of the polynomial P25(t): | |
138 | // (Phi, Plo) = ((A0, A0L) + (A1, A1L)*t) + t^2 *((A2, A2L) + (A3, A3L)*t), | |
139 | // (Ai, AiL) - coefficients represented as pairs of DP numbers. | |
140 | // | |
141 | // P21(t) = (PolC(t)*t^8 + PolD(t))*t^8 + PolE(t), | |
142 | // where | |
143 | // PolC(t) = C21*t^5 + C20*t^4 + ... + C16, | |
144 | // C21 = A25, C20 = A24, ..., C16 = A20 | |
145 | // | |
146 | // PolD(t) = D7*t^7 + D6*t^6 + ... + D0, | |
147 | // D7 = A19, D6 = A18, ..., D0 = A12 | |
148 | // | |
149 | // PolE(t) = E7*t^7 + E6*t^6 + ... + E0, | |
150 | // E7 = A11, E6 = A10, ..., E0 = A4 | |
151 | // | |
152 | // Cis and Dis are represented as double precision numbers, | |
153 | // Eis are represented as double extended numbers. | |
154 | // | |
155 | // 3) 0.75 <= x < 1.3125 -> t = x - 1.0 | |
156 | // 1.5625 <= x < 2.25 -> t = x - 2.0 | |
157 | // lgammal(x) is approximated by the polynomial of 25th degree: P25(t) | |
158 | // P25(t) = A1*t + ... + A25*t^25, and computations are carried out | |
159 | // by similar way as in the previous case | |
160 | // | |
161 | // 4) 10.0 < x <= Overflow Bound ("positive Sterling" range) | |
162 | // lgammal(x) is approximated using Sterling's formula: | |
163 | // lgammal(x) ~ ((x*(lnHi(x) - 1, lnLo(x))) - 0.5*(lnHi(x), lnLo(x))) + | |
164 | // + ((Chi, Clo) + S(1/x)) | |
165 | // where | |
166 | // C = (Chi, Clo) - pair of double precision numbers representing constant | |
167 | // 0.5*ln(2*Pi); | |
168 | // S(1/x) = 1/x * (B2 + B4*(1/x)^2 + ... + B20*(1/x)^18), B2, ..., B20 are | |
169 | // Bernulli numbers. S is computed in native precision and then added to | |
170 | // Clo; | |
171 | // lnHi(x) - 1 is computed in native precision and the multiprecision | |
172 | // multiplication (x, 0) *(lnHi(x) - 1, lnLo(x)) is used. | |
173 | // | |
174 | // 5) -INF < x <= -2^63, any negative integer < 0 | |
175 | // All numbers in this range are integers -> error handler is called | |
176 | // | |
177 | // 6) -2^63 < x <= -0.75 ("negative Sterling" range), x is "far" from root, | |
178 | // lgammal(-t) for positive t is approximated using the following formula: | |
179 | // lgammal(-t) = -lgammal(t)-log(t)-log(|dT|)+log(sin(Pi*|dT|)/(Pi*|dT|)) | |
180 | // where dT = -t -round_to_nearest_integer(-t) | |
181 | // Last item is approximated by the same polynomial as described in 1.2. | |
182 | // We split the whole range into three subranges due to different ways of | |
183 | // approximation of the first terms. | |
184 | // 6.1) -2^63 < x < -6.0 ("negative Sterling" range) | |
185 | // lgammal(t) is approximated exactly as in #4. The only difference that | |
186 | // for -13.0 < x < -6.0 subrange instead of Bernulli numbers we use their | |
187 | // minimax approximation on this range. | |
188 | // log(t), log(|dT|) are approximated by the log routine mentioned above. | |
189 | // 6.2) -6.0 < x <= -0.75, |x + 1|> 2^(-7) | |
190 | // log(t), log(|dT|) are approximated by the log routine mentioned above, | |
191 | // lgammal(t) is approximated by polynomials of the 25th degree similar | |
192 | // to ones from #2. Arguments z of the polynomials are as follows | |
193 | // a) 0.75 <= t < 1.0 - 2^(-7), z = 2*t - 1.5 | |
194 | // b) 1.0 - 2^(-7) < t < 2.0, z = t - 1.5 | |
195 | // c) 2.0 < t < 3.0, z = t/2 - 1.5 | |
196 | // d) 3.0 < t < 4.0, z = t/2 - 1.5. Notice, that range reduction is | |
197 | // the same as in case c) but the set of coefficients is different | |
198 | // e) 4.0 < t < 6.0, z = t/4 - 1.5 | |
199 | // 6.3) |x + 1| <= 2^(-7) | |
200 | // log(1 + (x-1)) is approximated by Taylor series, | |
201 | // log(sin(Pi*|dT|)/(Pi*|dT|)) is still approximated by polynomial but | |
202 | // it has just 4th degree. | |
203 | // log(|dT|) is approximated by the log routine mentioned above. | |
204 | // lgammal(-x) is approximated by polynomial of 8th degree from (-x + 1). | |
205 | // | |
206 | // 7) -20.0 < x < -2.0, x falls in root "neighbourhood". | |
207 | // "Neighbourhood" means that |lgammal(x)| < epsilon, where epsilon is | |
208 | // different for every root (and it is stored in the table), but typically | |
209 | // it is ~ 0.15. There are 35 roots significant from "double extended" | |
210 | // point of view. We split all the roots into two subsets: "left" and "right" | |
211 | // roots. Considering [-(N+1), -N] range we call root as "left" one if it | |
212 | // lies closer to -(N+1) and "right" otherwise. There is no "left" root in | |
213 | // the [-20, -19] range (it exists, but is insignificant for double extended | |
214 | // precision). To determine if x falls in root "neighbourhood" we store | |
215 | // significands of all the 35 roots as well as epsilon values (expressed | |
216 | // by the left and right bound). | |
217 | // In these ranges we approximate lgammal(x) by polynomial series of 19th | |
218 | // degree: | |
219 | // lgammal(x) = P19(t) = A0 + A1*t + ...+ A19*t^19, where t = x - EDP_Root, | |
220 | // EDP_Root is the exact value of the corresponding root rounded to double | |
221 | // extended precision. So, we have 35 different polynomials which make our | |
222 | // table rather big. We may hope that x falls in root "neighbourhood" | |
223 | // quite rarely -> ther might be no need in frequent use of different | |
224 | // polynomials. | |
225 | // A0, A1, A2, A3 are represented as pairs of double precision numbers, | |
226 | // A4, A5 are long doubles, and to decrease the size of the table we | |
227 | // keep the rest of coefficients in just double precision | |
228 | // | |
229 | //********************************************************************* | |
230 | // Algorithm for log(X) = (lnHi(X), lnLo(X)) | |
231 | // | |
232 | // ALGORITHM | |
233 | // | |
234 | // Here we use a table lookup method. The basic idea is that in | |
235 | // order to compute logl(Arg) for an argument Arg in [1,2), we | |
236 | // construct a value G such that G*Arg is close to 1 and that | |
237 | // logl(1/G) is obtainable easily from a table of values calculated | |
238 | // beforehand. Thus | |
239 | // | |
240 | // logl(Arg) = logl(1/G) + logl(G*Arg) | |
241 | // = logl(1/G) + logl(1 + (G*Arg - 1)) | |
242 | // | |
243 | // Because |G*Arg - 1| is small, the second term on the right hand | |
244 | // side can be approximated by a short polynomial. We elaborate | |
245 | // this method in four steps. | |
246 | // | |
247 | // Step 0: Initialization | |
248 | // | |
249 | // We need to calculate logl( X ). Obtain N, S_hi such that | |
250 | // | |
251 | // X = 2^N * S_hi exactly | |
252 | // | |
253 | // where S_hi in [1,2) | |
254 | // | |
255 | // Step 1: Argument Reduction | |
256 | // | |
257 | // Based on S_hi, obtain G_1, G_2, G_3 from a table and calculate | |
258 | // | |
259 | // G := G_1 * G_2 * G_3 | |
260 | // r := (G * S_hi - 1) | |
261 | // | |
262 | // These G_j's have the property that the product is exactly | |
263 | // representable and that |r| < 2^(-12) as a result. | |
264 | // | |
265 | // Step 2: Approximation | |
266 | // | |
267 | // | |
268 | // logl(1 + r) is approximated by a short polynomial poly(r). | |
269 | // | |
270 | // Step 3: Reconstruction | |
271 | // | |
272 | // | |
273 | // Finally, logl( X ) is given by | |
274 | // | |
275 | // logl( X ) = logl( 2^N * S_hi ) | |
276 | // ~=~ N*logl(2) + logl(1/G) + logl(1 + r) | |
277 | // ~=~ N*logl(2) + logl(1/G) + poly(r). | |
278 | // | |
279 | // IMPLEMENTATION | |
280 | // | |
281 | // Step 0. Initialization | |
282 | // ---------------------- | |
283 | // | |
284 | // Z := X | |
285 | // N := unbaised exponent of Z | |
286 | // S_hi := 2^(-N) * Z | |
287 | // | |
288 | // Step 1. Argument Reduction | |
289 | // -------------------------- | |
290 | // | |
291 | // Let | |
292 | // | |
293 | // Z = 2^N * S_hi = 2^N * 1.d_1 d_2 d_3 ... d_63 | |
294 | // | |
295 | // We obtain G_1, G_2, G_3 by the following steps. | |
296 | // | |
297 | // | |
298 | // Define X_0 := 1.d_1 d_2 ... d_14. This is extracted | |
299 | // from S_hi. | |
300 | // | |
301 | // Define A_1 := 1.d_1 d_2 d_3 d_4. This is X_0 truncated | |
302 | // to lsb = 2^(-4). | |
303 | // | |
304 | // Define index_1 := [ d_1 d_2 d_3 d_4 ]. | |
305 | // | |
306 | // Fetch Z_1 := (1/A_1) rounded UP in fixed point with | |
307 | // fixed point lsb = 2^(-15). | |
308 | // Z_1 looks like z_0.z_1 z_2 ... z_15 | |
309 | // Note that the fetching is done using index_1. | |
310 | // A_1 is actually not needed in the implementation | |
311 | // and is used here only to explain how is the value | |
312 | // Z_1 defined. | |
313 | // | |
314 | // Fetch G_1 := (1/A_1) truncated to 21 sig. bits. | |
315 | // floating pt. Again, fetching is done using index_1. A_1 | |
316 | // explains how G_1 is defined. | |
317 | // | |
318 | // Calculate X_1 := X_0 * Z_1 truncated to lsb = 2^(-14) | |
319 | // = 1.0 0 0 0 d_5 ... d_14 | |
c0c3f78a | 320 | // This is accomplished by integer multiplication. |
d5efd131 MF |
321 | // It is proved that X_1 indeed always begin |
322 | // with 1.0000 in fixed point. | |
323 | // | |
324 | // | |
325 | // Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1 | |
326 | // truncated to lsb = 2^(-8). Similar to A_1, | |
327 | // A_2 is not needed in actual implementation. It | |
328 | // helps explain how some of the values are defined. | |
329 | // | |
330 | // Define index_2 := [ d_5 d_6 d_7 d_8 ]. | |
331 | // | |
332 | // Fetch Z_2 := (1/A_2) rounded UP in fixed point with | |
333 | // fixed point lsb = 2^(-15). Fetch done using index_2. | |
334 | // Z_2 looks like z_0.z_1 z_2 ... z_15 | |
335 | // | |
336 | // Fetch G_2 := (1/A_2) truncated to 21 sig. bits. | |
337 | // floating pt. | |
338 | // | |
339 | // Calculate X_2 := X_1 * Z_2 truncated to lsb = 2^(-14) | |
340 | // = 1.0 0 0 0 0 0 0 0 d_9 d_10 ... d_14 | |
c0c3f78a | 341 | // This is accomplished by integer multiplication. |
d5efd131 MF |
342 | // It is proved that X_2 indeed always begin |
343 | // with 1.00000000 in fixed point. | |
344 | // | |
345 | // | |
346 | // Define A_3 := 1.0 0 0 0 0 0 0 0 d_9 d_10 d_11 d_12 d_13 1. | |
347 | // This is 2^(-14) + X_2 truncated to lsb = 2^(-13). | |
348 | // | |
349 | // Define index_3 := [ d_9 d_10 d_11 d_12 d_13 ]. | |
350 | // | |
351 | // Fetch G_3 := (1/A_3) truncated to 21 sig. bits. | |
352 | // floating pt. Fetch is done using index_3. | |
353 | // | |
354 | // Compute G := G_1 * G_2 * G_3. | |
355 | // | |
356 | // This is done exactly since each of G_j only has 21 sig. bits. | |
357 | // | |
358 | // Compute | |
359 | // | |
360 | // r := (G*S_hi - 1) | |
361 | // | |
362 | // | |
363 | // Step 2. Approximation | |
364 | // --------------------- | |
365 | // | |
366 | // This step computes an approximation to logl( 1 + r ) where r is the | |
367 | // reduced argument just obtained. It is proved that |r| <= 1.9*2^(-13); | |
368 | // thus logl(1+r) can be approximated by a short polynomial: | |
369 | // | |
370 | // logl(1+r) ~=~ poly = r + Q1 r^2 + ... + Q4 r^5 | |
371 | // | |
372 | // | |
373 | // Step 3. Reconstruction | |
374 | // ---------------------- | |
375 | // | |
376 | // This step computes the desired result of logl(X): | |
377 | // | |
378 | // logl(X) = logl( 2^N * S_hi ) | |
379 | // = N*logl(2) + logl( S_hi ) | |
380 | // = N*logl(2) + logl(1/G) + | |
381 | // logl(1 + G*S_hi - 1 ) | |
382 | // | |
383 | // logl(2), logl(1/G_j) are stored as pairs of (single,double) numbers: | |
384 | // log2_hi, log2_lo, log1byGj_hi, log1byGj_lo. The high parts are | |
385 | // single-precision numbers and the low parts are double precision | |
386 | // numbers. These have the property that | |
387 | // | |
388 | // N*log2_hi + SUM ( log1byGj_hi ) | |
389 | // | |
390 | // is computable exactly in double-extended precision (64 sig. bits). | |
391 | // Finally | |
392 | // | |
393 | // lnHi(X) := N*log2_hi + SUM ( log1byGj_hi ) | |
394 | // lnLo(X) := poly_hi + [ poly_lo + | |
395 | // ( SUM ( log1byGj_lo ) + N*log2_lo ) ] | |
396 | // | |
397 | // | |
398 | //********************************************************************* | |
399 | // General Purpose Registers | |
400 | // scratch registers | |
401 | rPolDataPtr = r2 | |
402 | rLnSinDataPtr = r3 | |
403 | rExpX = r8 | |
404 | rSignifX = r9 | |
405 | rDelta = r10 | |
406 | rSignExpX = r11 | |
407 | GR_ad_z_1 = r14 | |
408 | r17Ones = r15 | |
409 | GR_Index1 = r16 | |
410 | rSignif1andQ = r17 | |
411 | GR_X_0 = r18 | |
412 | GR_X_1 = r19 | |
413 | GR_X_2 = r20 | |
414 | GR_Z_1 = r21 | |
415 | GR_Z_2 = r22 | |
416 | GR_N = r23 | |
417 | rExpHalf = r24 | |
418 | rExp8 = r25 | |
419 | rX0Dx = r25 | |
420 | GR_ad_tbl_1 = r26 | |
421 | GR_ad_tbl_2 = r27 | |
422 | GR_ad_tbl_3 = r28 | |
423 | GR_ad_q = r29 | |
424 | GR_ad_z_1 = r30 | |
425 | GR_ad_z_2 = r31 | |
426 | // stacked registers | |
427 | rPFS_SAVED = r32 | |
428 | GR_ad_z_3 = r33 | |
429 | rSgnGamAddr = r34 | |
430 | rSgnGamSize = r35 | |
431 | rLogDataPtr = r36 | |
432 | rZ1offsett = r37 | |
433 | rTmpPtr = r38 | |
434 | rTmpPtr2 = r39 | |
435 | rTmpPtr3 = r40 | |
436 | rExp2 = r41 | |
437 | rExp2tom7 = r42 | |
438 | rZ625 = r42 | |
439 | rExpOne = r43 | |
440 | rNegSingularity = r44 | |
441 | rXint = r45 | |
442 | rTbl1Addr = r46 | |
443 | rTbl2Addr = r47 | |
444 | rTbl3Addr = r48 | |
445 | rZ2Addr = r49 | |
446 | rRootsAddr = r50 | |
447 | rRootsBndAddr = r51 | |
448 | rRoot = r52 | |
449 | rRightBound = r53 | |
450 | rLeftBound = r54 | |
451 | rSignifDx = r55 | |
452 | rBernulliPtr = r56 | |
453 | rLnSinTmpPtr = r56 | |
454 | rIndex1Dx = r57 | |
455 | rIndexPol = r58 | |
456 | GR_Index3 = r59 | |
457 | GR_Index2 = r60 | |
458 | rSgnGam = r61 | |
459 | rXRnd = r62 | |
460 | ||
461 | GR_SAVE_B0 = r63 | |
462 | GR_SAVE_GP = r64 | |
463 | GR_SAVE_PFS = r65 | |
464 | // output parameters when calling error handling routine | |
465 | GR_Parameter_X = r66 | |
466 | GR_Parameter_Y = r67 | |
467 | GR_Parameter_RESULT = r68 | |
468 | GR_Parameter_TAG = r69 | |
469 | ||
470 | //******************************************************************** | |
471 | // Floating Point Registers | |
472 | // CAUTION: due to the lack of registers there exist (below in the code) | |
473 | // sometimes "unconventional" use of declared registers | |
474 | // | |
475 | fAbsX = f6 | |
476 | fDelX4 = f6 | |
477 | fSignifX = f7 | |
478 | // macros for error handling routine | |
479 | FR_X = f10 // first argument | |
480 | FR_Y = f1 // second argument (lgammal has just one) | |
481 | FR_RESULT = f8 // result | |
482 | ||
483 | // First 7 Bernulli numbers | |
484 | fB2 = f9 | |
485 | fLnDeltaL = f9 | |
486 | fXSqr = f9 | |
487 | fB4 = f10 | |
488 | fX4 = f10 | |
489 | fB6 = f11 | |
490 | fX6 = f11 | |
491 | fB8 = f12 | |
492 | fXSqrL = f12 | |
493 | fB10 = f13 | |
494 | fRes7H = f13 | |
495 | fB12 = f14 | |
496 | fRes7L = f14 | |
497 | fB14 = f15 | |
498 | ||
499 | // stack registers | |
500 | // Polynomial coefficients: A0, ..., A25 | |
501 | fA0 = f32 | |
502 | fA0L = f33 | |
503 | fInvXL = f33 | |
504 | fA1 = f34 | |
505 | fA1L = f35 | |
506 | fA2 = f36 | |
507 | fA2L = f37 | |
508 | fA3 = f38 | |
509 | fA3L = f39 | |
510 | fA4 = f40 | |
511 | fA4L = f41 | |
512 | fRes6H = f41 | |
513 | fA5 = f42 | |
514 | fB2L = f42 | |
515 | fA5L = f43 | |
516 | fMinNegStir = f43 | |
517 | fRes6L = f43 | |
518 | fA6 = f44 | |
519 | fMaxNegStir = f44 | |
520 | fA7 = f45 | |
521 | fLnDeltaH = f45 | |
522 | fA8 = f46 | |
523 | fBrnL = f46 | |
524 | fA9 = f47 | |
525 | fBrnH = f47 | |
526 | fA10 = f48 | |
527 | fRes5L = f48 | |
528 | fA11 = f49 | |
529 | fRes5H = f49 | |
530 | fA12 = f50 | |
531 | fDx6 = f50 | |
532 | fA13 = f51 | |
533 | fDx8 = f51 | |
534 | fA14 = f52 | |
535 | fDx4 = f52 | |
536 | fA15 = f53 | |
537 | fYL = f53 | |
538 | fh3Dx = f53 | |
539 | fA16 = f54 | |
540 | fYH = f54 | |
541 | fH3Dx = f54 | |
542 | fA17 = f55 | |
543 | fResLnDxL = f55 | |
544 | fG3Dx = f55 | |
545 | fA18 = f56 | |
546 | fResLnDxH = f56 | |
547 | fh2Dx = f56 | |
548 | fA19 = f57 | |
549 | fFloatNDx = f57 | |
550 | fA20 = f58 | |
551 | fPolyHiDx = f58 | |
552 | fhDx = f58 | |
553 | fA21 = f59 | |
554 | fRDxCub = f59 | |
555 | fHDx = f59 | |
556 | fA22 = f60 | |
557 | fRDxSq = f60 | |
558 | fGDx = f60 | |
559 | fA23 = f61 | |
560 | fPolyLoDx = f61 | |
561 | fInvX3 = f61 | |
562 | fA24 = f62 | |
563 | fRDx = f62 | |
564 | fInvX8 = f62 | |
565 | fA25 = f63 | |
566 | fInvX4 = f63 | |
567 | fPol = f64 | |
568 | fPolL = f65 | |
569 | // Coefficients of ln(sin(Pi*x)/Pi*x) | |
570 | fLnSin2 = f66 | |
571 | fLnSin2L = f67 | |
572 | fLnSin4 = f68 | |
573 | fLnSin6 = f69 | |
574 | fLnSin8 = f70 | |
575 | fLnSin10 = f71 | |
576 | fLnSin12 = f72 | |
577 | fLnSin14 = f73 | |
578 | fLnSin16 = f74 | |
579 | fLnSin18 = f75 | |
580 | fDelX8 = f75 | |
581 | fLnSin20 = f76 | |
582 | fLnSin22 = f77 | |
583 | fDelX6 = f77 | |
584 | fLnSin24 = f78 | |
585 | fLnSin26 = f79 | |
586 | fLnSin28 = f80 | |
587 | fLnSin30 = f81 | |
588 | fhDelX = f81 | |
589 | fLnSin32 = f82 | |
590 | fLnSin34 = f83 | |
591 | fLnSin36 = f84 | |
592 | fXint = f85 | |
593 | fDxSqr = f85 | |
594 | fRes3L = f86 | |
595 | fRes3H = f87 | |
596 | fRes4H = f88 | |
597 | fRes4L = f89 | |
598 | fResH = f90 | |
599 | fResL = f91 | |
600 | fDx = f92 | |
601 | FR_MHalf = f93 | |
602 | fRes1H = f94 | |
603 | fRes1L = f95 | |
604 | fRes2H = f96 | |
605 | fRes2L = f97 | |
606 | FR_FracX = f98 | |
607 | fRcpX = f99 | |
608 | fLnSinH = f99 | |
609 | fTwo = f100 | |
610 | fMOne = f100 | |
611 | FR_G = f101 | |
612 | FR_H = f102 | |
613 | FR_h = f103 | |
614 | FR_G2 = f104 | |
615 | FR_H2 = f105 | |
616 | FR_poly_lo = f106 | |
617 | FR_poly_hi = f107 | |
618 | FR_h2 = f108 | |
619 | FR_rsq = f109 | |
620 | FR_r = f110 | |
621 | FR_log2_hi = f111 | |
622 | FR_log2_lo = f112 | |
623 | fFloatN = f113 | |
624 | FR_Q4 = f114 | |
625 | FR_G3 = f115 | |
626 | FR_H3 = f116 | |
627 | FR_h3 = f117 | |
628 | FR_Q3 = f118 | |
629 | FR_Q2 = f119 | |
630 | FR_Q1 = f120 | |
631 | fThirteen = f121 | |
632 | fSix = f121 | |
633 | FR_rcub = f121 | |
634 | // Last three Bernulli numbers | |
635 | fB16 = f122 | |
636 | fB18 = f123 | |
637 | fB20 = f124 | |
638 | fInvX = f125 | |
639 | fLnSinL = f125 | |
640 | fDxSqrL = f126 | |
641 | fFltIntX = f126 | |
642 | fRoot = f127 | |
643 | fNormDx = f127 | |
644 | ||
645 | // Data tables | |
646 | //============================================================== | |
647 | RODATA | |
648 | // ************* DO NOT CHANGE THE ORDER OF THESE TABLES ************* | |
649 | .align 16 | |
650 | LOCAL_OBJECT_START(lgammal_right_roots_data) | |
651 | // List of all right roots themselves | |
652 | data8 0x9D3FE4B007C360AB, 0x0000C000 // Range [-3, -2] | |
653 | data8 0xC9306DE4F2CD7BEE, 0x0000C000 // Range [-4, -3] | |
654 | data8 0x814273C2CCAC0618, 0x0000C001 // Range [-5, -4] | |
655 | data8 0xA04352BF85B6C865, 0x0000C001 // Range [-6, -5] | |
656 | data8 0xC00B592C4BE4676C, 0x0000C001 // Range [-7, -6] | |
657 | data8 0xE0019FEF6FF0F5BF, 0x0000C001 // Range [-8, -7] | |
658 | data8 0x80001A01459FC9F6, 0x0000C002 // Range [-9, -8] | |
659 | data8 0x900002E3BB47D86D, 0x0000C002 // Range [-10, -9] | |
660 | data8 0xA0000049F93BB992, 0x0000C002 // Range [-11, -10] | |
661 | data8 0xB0000006B9915316, 0x0000C002 // Range [-12, -11] | |
662 | data8 0xC00000008F76C773, 0x0000C002 // Range [-13, -12] | |
663 | data8 0xD00000000B09230A, 0x0000C002 // Range [-14, -13] | |
664 | data8 0xE000000000C9CBA5, 0x0000C002 // Range [-15, -14] | |
665 | data8 0xF0000000000D73FA, 0x0000C002 // Range [-16, -15] | |
666 | data8 0x8000000000006BA0, 0x0000C003 // Range [-17, -16] | |
667 | data8 0x8800000000000655, 0x0000C003 // Range [-18, -17] | |
668 | data8 0x900000000000005A, 0x0000C003 // Range [-19, -18] | |
669 | data8 0x9800000000000005, 0x0000C003 // Range [-20, -19] | |
670 | // List of bounds of ranges with special polynomial approximation near root | |
671 | // Only significands of bounds are actually stored | |
672 | data8 0xA000000000000000, 0x9800000000000000 // Bounds for root on [-3, -2] | |
673 | data8 0xCAB88035C5EFBB41, 0xC7E05E31F4B02115 // Bounds for root on [-4, -3] | |
674 | data8 0x817831B899735C72, 0x8114633941B8053A // Bounds for root on [-5, -4] | |
675 | data8 0xA04E8B34C6AA9476, 0xA039B4A42978197B // Bounds for root on [-6, -5] | |
676 | data8 0xC00D3D5E588A78A9, 0xC009BA25F7E858A6 // Bounds for root on [-7, -6] | |
677 | data8 0xE001E54202991EB4, 0xE001648416CE897F // Bounds for root on [-8, -7] | |
678 | data8 0x80001E56D13A6B9F, 0x8000164A3BAD888A // Bounds for root on [-9, -8] | |
679 | data8 0x9000035F0529272A, 0x9000027A0E3D94F0 // Bounds for root on [-10, -9] | |
680 | data8 0xA00000564D705880, 0xA000003F67EA0CC7 // Bounds for root on [-11, -10] | |
681 | data8 0xB0000007D87EE0EF, 0xB0000005C3A122A5 // Bounds for root on [-12, -11] | |
682 | data8 0xC0000000A75FE8B1, 0xC00000007AF818AC // Bounds for root on [-13, -12] | |
683 | data8 0xD00000000CDFFE36, 0xD000000009758BBF // Bounds for root on [-14, -13] | |
684 | data8 0xE000000000EB6D96, 0xE000000000ACF7B2 // Bounds for root on [-15, -14] | |
685 | data8 0xF0000000000FB1F9, 0xF0000000000B87FB // Bounds for root on [-16, -15] | |
686 | data8 0x8000000000007D90, 0x8000000000005C40 // Bounds for root on [-17, -16] | |
687 | data8 0x8800000000000763, 0x880000000000056D // Bounds for root on [-18, -17] | |
688 | data8 0x9000000000000069, 0x900000000000004D // Bounds for root on [-19, -18] | |
689 | data8 0x9800000000000006, 0x9800000000000005 // Bounds for root on [-20, -19] | |
690 | // List of all left roots themselves | |
691 | data8 0xAFDA0850DEC8065E, 0x0000C000 // Range [-3, -2] | |
692 | data8 0xFD238AA3E17F285C, 0x0000C000 // Range [-4, -3] | |
693 | data8 0x9FBABBD37757E6A2, 0x0000C001 // Range [-5, -4] | |
694 | data8 0xBFF497AC8FA06AFC, 0x0000C001 // Range [-6, -5] | |
695 | data8 0xDFFE5FBB5C377FE8, 0x0000C001 // Range [-7, -6] | |
696 | data8 0xFFFFCBFC0ACE7879, 0x0000C001 // Range [-8, -7] | |
697 | data8 0x8FFFFD1C425E8100, 0x0000C002 // Range [-9, -8] | |
698 | data8 0x9FFFFFB606BDFDCD, 0x0000C002 // Range [-10, -9] | |
699 | data8 0xAFFFFFF9466E9F1B, 0x0000C002 // Range [-11, -10] | |
700 | data8 0xBFFFFFFF70893874, 0x0000C002 // Range [-12, -11] | |
701 | data8 0xCFFFFFFFF4F6DCF6, 0x0000C002 // Range [-13, -12] | |
702 | data8 0xDFFFFFFFFF36345B, 0x0000C002 // Range [-14, -13] | |
703 | data8 0xEFFFFFFFFFF28C06, 0x0000C002 // Range [-15, -14] | |
704 | data8 0xFFFFFFFFFFFF28C0, 0x0000C002 // Range [-16, -15] | |
705 | data8 0x87FFFFFFFFFFF9AB, 0x0000C003 // Range [-17, -16] | |
706 | data8 0x8FFFFFFFFFFFFFA6, 0x0000C003 // Range [-18, -17] | |
707 | data8 0x97FFFFFFFFFFFFFB, 0x0000C003 // Range [-19, -18] | |
708 | data8 0x0000000000000000, 0x00000000 // pad to keep logic in the main path | |
709 | // List of bounds of ranges with special polynomial approximation near root | |
710 | // Only significands of bounds are actually stored | |
711 | data8 0xB235880944CC758E, 0xADD2F1A9FBE76C8B // Bounds for root on [-3, -2] | |
712 | data8 0xFD8E7844F307B07C, 0xFCA655C2152BDE4D // Bounds for root on [-4, -3] | |
713 | data8 0x9FC4D876EE546967, 0x9FAEE4AF68BC4292 // Bounds for root on [-5, -4] | |
714 | data8 0xBFF641FFBFCC44F1, 0xBFF2A47919F4BA89 // Bounds for root on [-6, -5] | |
715 | data8 0xDFFE9C803DEFDD59, 0xDFFE18932EB723FE // Bounds for root on [-7, -6] | |
716 | data8 0xFFFFD393FA47AFC3, 0xFFFFC317CF638AE1 // Bounds for root on [-8, -7] | |
717 | data8 0x8FFFFD8840279925, 0x8FFFFC9DCECEEE92 // Bounds for root on [-9, -8] | |
718 | data8 0x9FFFFFC0D34E2AF8, 0x9FFFFFA9619AA3B7 // Bounds for root on [-10, -9] | |
719 | data8 0xAFFFFFFA41C18246, 0xAFFFFFF82025A23C // Bounds for root on [-11, -10] | |
720 | data8 0xBFFFFFFF857ACB4E, 0xBFFFFFFF58032378 // Bounds for root on [-12, -11] | |
721 | data8 0xCFFFFFFFF6934AB8, 0xCFFFFFFFF313EF0A // Bounds for root on [-13, -12] | |
722 | data8 0xDFFFFFFFFF53A9E9, 0xDFFFFFFFFF13B5A5 // Bounds for root on [-14, -13] | |
723 | data8 0xEFFFFFFFFFF482CB, 0xEFFFFFFFFFF03F4F // Bounds for root on [-15, -14] | |
724 | data8 0xFFFFFFFFFFFF482D, 0xFFFFFFFFFFFF03F5 // Bounds for root on [-16, -15] | |
725 | data8 0x87FFFFFFFFFFFA98, 0x87FFFFFFFFFFF896 // Bounds for root on [-17, -16] | |
726 | data8 0x8FFFFFFFFFFFFFB3, 0x8FFFFFFFFFFFFF97 // Bounds for root on [-18, -17] | |
727 | data8 0x97FFFFFFFFFFFFFC, 0x97FFFFFFFFFFFFFB // Bounds for root on [-19, -18] | |
728 | LOCAL_OBJECT_END(lgammal_right_roots_data) | |
729 | ||
730 | LOCAL_OBJECT_START(lgammal_0_Half_data) | |
731 | // Polynomial coefficients for the lgammal(x), 0.0 < |x| < 0.5 | |
732 | data8 0xBFD9A4D55BEAB2D6, 0xBC8AA3C097746D1F //A3 | |
733 | data8 0x3FEA51A6625307D3, 0x3C7180E7BD2D0DCC //A2 | |
734 | data8 0xBFE2788CFC6FB618, 0xBC9E9346C4692BCC //A1 | |
735 | data8 0x8A8991563EC1BD13, 0x00003FFD //A4 | |
736 | data8 0xD45CE0BD52C27EF2, 0x0000BFFC //A5 | |
737 | data8 0xADA06587FA2BBD47, 0x00003FFC //A6 | |
738 | data8 0x9381D0ED2194902A, 0x0000BFFC //A7 | |
739 | data8 0x80859B3CF92D4192, 0x00003FFC //A8 | |
740 | data8 0xE4033517C622A946, 0x0000BFFB //A9 | |
741 | data8 0xCD00CE67A51FC82A, 0x00003FFB //A10 | |
742 | data8 0xBA44E2A96C3B5700, 0x0000BFFB //A11 | |
743 | data8 0xAAAD008FA46DBD99, 0x00003FFB //A12 | |
744 | data8 0x9D604AC65A41153D, 0x0000BFFB //A13 | |
745 | data8 0x917CECB864B5A861, 0x00003FFB //A14 | |
746 | data8 0x85A4810EB730FDE4, 0x0000BFFB //A15 | |
747 | data8 0xEF2761C38BD21F77, 0x00003FFA //A16 | |
748 | data8 0xC913043A128367DA, 0x0000BFFA //A17 | |
749 | data8 0x96A29B71FF7AFFAA, 0x00003FFA //A18 | |
750 | data8 0xBB9FFA1A5FE649BB, 0x0000BFF9 //A19 | |
751 | data8 0xB17982CD2DAA0EE3, 0x00003FF8 //A20 | |
752 | data8 0xDE1DDCBFFB9453F0, 0x0000BFF6 //A21 | |
753 | data8 0x87FBF5D7ACD9FA9D, 0x00003FF4 //A22 | |
754 | LOCAL_OBJECT_END(lgammal_0_Half_data) | |
755 | ||
756 | LOCAL_OBJECT_START(Constants_Q) | |
757 | // log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1 | |
758 | data4 0x00000000,0xB1721800,0x00003FFE,0x00000000 | |
759 | data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000 | |
760 | data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000 | |
761 | data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000 | |
762 | data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000 | |
763 | data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000 | |
764 | LOCAL_OBJECT_END(Constants_Q) | |
765 | ||
766 | LOCAL_OBJECT_START(Constants_Z_1) | |
767 | // Z1 - 16 bit fixed | |
768 | data4 0x00008000 | |
769 | data4 0x00007879 | |
770 | data4 0x000071C8 | |
771 | data4 0x00006BCB | |
772 | data4 0x00006667 | |
773 | data4 0x00006187 | |
774 | data4 0x00005D18 | |
775 | data4 0x0000590C | |
776 | data4 0x00005556 | |
777 | data4 0x000051EC | |
778 | data4 0x00004EC5 | |
779 | data4 0x00004BDB | |
780 | data4 0x00004925 | |
781 | data4 0x0000469F | |
782 | data4 0x00004445 | |
783 | data4 0x00004211 | |
784 | LOCAL_OBJECT_END(Constants_Z_1) | |
785 | ||
786 | LOCAL_OBJECT_START(Constants_G_H_h1) | |
787 | // G1 and H1 - IEEE single and h1 - IEEE double | |
788 | data4 0x3F800000,0x00000000,0x00000000,0x00000000 | |
789 | data4 0x3F70F0F0,0x3D785196,0x617D741C,0x3DA163A6 | |
790 | data4 0x3F638E38,0x3DF13843,0xCBD3D5BB,0x3E2C55E6 | |
791 | data4 0x3F579430,0x3E2FF9A0,0xD86EA5E7,0xBE3EB0BF | |
792 | data4 0x3F4CCCC8,0x3E647FD6,0x86B12760,0x3E2E6A8C | |
793 | data4 0x3F430C30,0x3E8B3AE7,0x5C0739BA,0x3E47574C | |
794 | data4 0x3F3A2E88,0x3EA30C68,0x13E8AF2F,0x3E20E30F | |
795 | data4 0x3F321640,0x3EB9CEC8,0xF2C630BD,0xBE42885B | |
796 | data4 0x3F2AAAA8,0x3ECF9927,0x97E577C6,0x3E497F34 | |
797 | data4 0x3F23D708,0x3EE47FC5,0xA6B0A5AB,0x3E3E6A6E | |
798 | data4 0x3F1D89D8,0x3EF8947D,0xD328D9BE,0xBDF43E3C | |
799 | data4 0x3F17B420,0x3F05F3A1,0x0ADB090A,0x3E4094C3 | |
800 | data4 0x3F124920,0x3F0F4303,0xFC1FE510,0xBE28FBB2 | |
801 | data4 0x3F0D3DC8,0x3F183EBF,0x10FDE3FA,0x3E3A7895 | |
802 | data4 0x3F088888,0x3F20EC80,0x7CC8C98F,0x3E508CE5 | |
803 | data4 0x3F042108,0x3F29516A,0xA223106C,0xBE534874 | |
804 | LOCAL_OBJECT_END(Constants_G_H_h1) | |
805 | ||
806 | LOCAL_OBJECT_START(Constants_Z_2) | |
807 | // Z2 - 16 bit fixed | |
808 | data4 0x00008000 | |
809 | data4 0x00007F81 | |
810 | data4 0x00007F02 | |
811 | data4 0x00007E85 | |
812 | data4 0x00007E08 | |
813 | data4 0x00007D8D | |
814 | data4 0x00007D12 | |
815 | data4 0x00007C98 | |
816 | data4 0x00007C20 | |
817 | data4 0x00007BA8 | |
818 | data4 0x00007B31 | |
819 | data4 0x00007ABB | |
820 | data4 0x00007A45 | |
821 | data4 0x000079D1 | |
822 | data4 0x0000795D | |
823 | data4 0x000078EB | |
824 | LOCAL_OBJECT_END(Constants_Z_2) | |
825 | ||
826 | LOCAL_OBJECT_START(Constants_G_H_h2) | |
827 | // G2 and H2 - IEEE single and h2 - IEEE double | |
828 | data4 0x3F800000,0x00000000,0x00000000,0x00000000 | |
829 | data4 0x3F7F00F8,0x3B7F875D,0x22C42273,0x3DB5A116 | |
830 | data4 0x3F7E03F8,0x3BFF015B,0x21F86ED3,0x3DE620CF | |
831 | data4 0x3F7D08E0,0x3C3EE393,0x484F34ED,0xBDAFA07E | |
832 | data4 0x3F7C0FC0,0x3C7E0586,0x3860BCF6,0xBDFE07F0 | |
833 | data4 0x3F7B1880,0x3C9E75D2,0xA78093D6,0x3DEA370F | |
834 | data4 0x3F7A2328,0x3CBDC97A,0x72A753D0,0x3DFF5791 | |
835 | data4 0x3F792FB0,0x3CDCFE47,0xA7EF896B,0x3DFEBE6C | |
836 | data4 0x3F783E08,0x3CFC15D0,0x409ECB43,0x3E0CF156 | |
837 | data4 0x3F774E38,0x3D0D874D,0xFFEF71DF,0xBE0B6F97 | |
838 | data4 0x3F766038,0x3D1CF49B,0x5D59EEE8,0xBE080483 | |
839 | data4 0x3F757400,0x3D2C531D,0xA9192A74,0x3E1F91E9 | |
840 | data4 0x3F748988,0x3D3BA322,0xBF72A8CD,0xBE139A06 | |
841 | data4 0x3F73A0D0,0x3D4AE46F,0xF8FBA6CF,0x3E1D9202 | |
842 | data4 0x3F72B9D0,0x3D5A1756,0xBA796223,0xBE1DCCC4 | |
843 | data4 0x3F71D488,0x3D693B9D,0xB6B7C239,0xBE049391 | |
844 | LOCAL_OBJECT_END(Constants_G_H_h2) | |
845 | ||
846 | LOCAL_OBJECT_START(Constants_G_H_h3) | |
847 | // G3 and H3 - IEEE single and h3 - IEEE double | |
848 | data4 0x3F7FFC00,0x38800100,0x562224CD,0x3D355595 | |
849 | data4 0x3F7FF400,0x39400480,0x06136FF6,0x3D8200A2 | |
850 | data4 0x3F7FEC00,0x39A00640,0xE8DE9AF0,0x3DA4D68D | |
851 | data4 0x3F7FE400,0x39E00C41,0xB10238DC,0xBD8B4291 | |
852 | data4 0x3F7FDC00,0x3A100A21,0x3B1952CA,0xBD89CCB8 | |
853 | data4 0x3F7FD400,0x3A300F22,0x1DC46826,0xBDB10707 | |
854 | data4 0x3F7FCC08,0x3A4FF51C,0xF43307DB,0x3DB6FCB9 | |
855 | data4 0x3F7FC408,0x3A6FFC1D,0x62DC7872,0xBD9B7C47 | |
856 | data4 0x3F7FBC10,0x3A87F20B,0x3F89154A,0xBDC3725E | |
857 | data4 0x3F7FB410,0x3A97F68B,0x62B9D392,0xBD93519D | |
858 | data4 0x3F7FAC18,0x3AA7EB86,0x0F21BD9D,0x3DC18441 | |
859 | data4 0x3F7FA420,0x3AB7E101,0x2245E0A6,0xBDA64B95 | |
860 | data4 0x3F7F9C20,0x3AC7E701,0xAABB34B8,0x3DB4B0EC | |
861 | data4 0x3F7F9428,0x3AD7DD7B,0x6DC40A7E,0x3D992337 | |
862 | data4 0x3F7F8C30,0x3AE7D474,0x4F2083D3,0x3DC6E17B | |
863 | data4 0x3F7F8438,0x3AF7CBED,0x811D4394,0x3DAE314B | |
864 | data4 0x3F7F7C40,0x3B03E1F3,0xB08F2DB1,0xBDD46F21 | |
865 | data4 0x3F7F7448,0x3B0BDE2F,0x6D34522B,0xBDDC30A4 | |
866 | data4 0x3F7F6C50,0x3B13DAAA,0xB1F473DB,0x3DCB0070 | |
867 | data4 0x3F7F6458,0x3B1BD766,0x6AD282FD,0xBDD65DDC | |
868 | data4 0x3F7F5C68,0x3B23CC5C,0xF153761A,0xBDCDAB83 | |
869 | data4 0x3F7F5470,0x3B2BC997,0x341D0F8F,0xBDDADA40 | |
870 | data4 0x3F7F4C78,0x3B33C711,0xEBC394E8,0x3DCD1BD7 | |
871 | data4 0x3F7F4488,0x3B3BBCC6,0x52E3E695,0xBDC3532B | |
872 | data4 0x3F7F3C90,0x3B43BAC0,0xE846B3DE,0xBDA3961E | |
873 | data4 0x3F7F34A0,0x3B4BB0F4,0x785778D4,0xBDDADF06 | |
874 | data4 0x3F7F2CA8,0x3B53AF6D,0xE55CE212,0x3DCC3ED1 | |
875 | data4 0x3F7F24B8,0x3B5BA620,0x9E382C15,0xBDBA3103 | |
876 | data4 0x3F7F1CC8,0x3B639D12,0x5C5AF197,0x3D635A0B | |
877 | data4 0x3F7F14D8,0x3B6B9444,0x71D34EFC,0xBDDCCB19 | |
878 | data4 0x3F7F0CE0,0x3B7393BC,0x52CD7ADA,0x3DC74502 | |
879 | data4 0x3F7F04F0,0x3B7B8B6D,0x7D7F2A42,0xBDB68F17 | |
880 | LOCAL_OBJECT_END(Constants_G_H_h3) | |
881 | ||
882 | LOCAL_OBJECT_START(lgammal_data) | |
883 | // Positive overflow value | |
884 | data8 0xB8D54C8BFFFDEBF4, 0x00007FF1 | |
885 | LOCAL_OBJECT_END(lgammal_data) | |
886 | ||
887 | LOCAL_OBJECT_START(lgammal_Stirling) | |
888 | // Coefficients needed for Strirling's formula | |
889 | data8 0x3FED67F1C864BEB4 // High part of 0.5*ln(2*Pi) | |
890 | data8 0x3C94D252F2400510 // Low part of 0.5*ln(2*Pi) | |
891 | // | |
892 | // Bernulli numbers used in Striling's formula for -2^63 < |x| < -13.0 | |
893 | //(B1H, B1L) = 8.3333333333333333333262747254e-02 | |
894 | data8 0x3FB5555555555555, 0x3C55555555555555 | |
895 | data8 0xB60B60B60B60B60B, 0x0000BFF6 //B2 = -2.7777777777777777777777777778e-03 | |
896 | data8 0xD00D00D00D00D00D, 0x00003FF4 //B3 = 7.9365079365079365079365079365e-04 | |
897 | data8 0x9C09C09C09C09C0A, 0x0000BFF4 //B4 = -5.9523809523809523809523809524e-04 | |
898 | data8 0xDCA8F158C7F91AB8, 0x00003FF4 //B5 = 8.4175084175084175084175084175e-04 | |
899 | data8 0xFB5586CCC9E3E410, 0x0000BFF5 //B6 = -1.9175269175269175269175269175e-03 | |
900 | data8 0xD20D20D20D20D20D, 0x00003FF7 //B7 = 6.4102564102564102564102564103e-03 | |
901 | data8 0xF21436587A9CBEE1, 0x0000BFF9 //B8 = -2.9550653594771241830065359477e-02 | |
902 | data8 0xB7F4B1C0F033FFD1, 0x00003FFC //B9 = 1.7964437236883057316493849002e-01 | |
903 | data8 0xB23B3808C0F9CF6E, 0x0000BFFF //B10 = -1.3924322169059011164274322169e+00 | |
904 | // Polynomial coefficients for Stirling's formula, -13.0 < x < -6.0 | |
905 | data8 0x3FB5555555555555, 0x3C4D75060289C58B //A0 | |
906 | data8 0xB60B60B60B0F0876, 0x0000BFF6 //A1 | |
907 | data8 0xD00D00CE54B1256C, 0x00003FF4 //A2 | |
908 | data8 0x9C09BF46B58F75E1, 0x0000BFF4 //A3 | |
909 | data8 0xDCA8483BC91ACC6D, 0x00003FF4 //A4 | |
910 | data8 0xFB3965C939CC9FEE, 0x0000BFF5 //A5 | |
911 | data8 0xD0723ADE3F0BC401, 0x00003FF7 //A6 | |
912 | data8 0xE1ED7434E81F0B73, 0x0000BFF9 //A7 | |
913 | data8 0x8069C6982F993283, 0x00003FFC //A8 | |
914 | data8 0xC271F65BFA5BEE3F, 0x0000BFFD //A9 | |
915 | LOCAL_OBJECT_END(lgammal_Stirling) | |
916 | ||
917 | LOCAL_OBJECT_START(lgammal_lnsin_data) | |
918 | // polynomial approximation of -ln(sin(Pi*x)/(Pi*x)), 0 < x <= 0.5 | |
919 | data8 0x3FFA51A6625307D3, 0x3C81873332FAF94C //A2 | |
920 | data8 0x8A8991563EC241C3, 0x00003FFE //A4 | |
921 | data8 0xADA06588061805DF, 0x00003FFD //A6 | |
922 | data8 0x80859B57C338D0F7, 0x00003FFD //A8 | |
923 | data8 0xCD00F1C2D78754BD, 0x00003FFC //A10 | |
924 | data8 0xAAB56B1D3A1F4655, 0x00003FFC //A12 | |
925 | data8 0x924B6F2FBBED12B1, 0x00003FFC //A14 | |
926 | data8 0x80008E58765F43FC, 0x00003FFC //A16 | |
927 | data8 0x3FBC718EC115E429//A18 | |
928 | data8 0x3FB99CE544FE183E//A20 | |
929 | data8 0x3FB7251C09EAAD89//A22 | |
930 | data8 0x3FB64A970733628C//A24 | |
931 | data8 0x3FAC92D6802A3498//A26 | |
932 | data8 0x3FC47E1165261586//A28 | |
933 | data8 0xBFCA1BAA434750D4//A30 | |
934 | data8 0x3FE460001C4D5961//A32 | |
935 | data8 0xBFE6F06A3E4908AD//A34 | |
936 | data8 0x3FE300889EBB203A//A36 | |
937 | LOCAL_OBJECT_END(lgammal_lnsin_data) | |
938 | ||
939 | LOCAL_OBJECT_START(lgammal_half_3Q_data) | |
940 | // Polynomial coefficients for the lgammal(x), 0.5 <= x < 0.75 | |
941 | data8 0xBFF7A648EE90C62E, 0x3C713F326857E066 // A3, A0L | |
942 | data8 0xBFF73E4B8BA780AE, 0xBCA953BC788877EF // A1, A1L | |
943 | data8 0x403774DCD58D0291, 0xC0415254D5AE6623 // D0, D1 | |
944 | data8 0x40B07213855CBFB0, 0xC0B8855E25D2D229 // C20, C21 | |
945 | data8 0x3FFB359F85FF5000, 0x3C9BAECE6EF9EF3A // A2, A2L | |
946 | data8 0x3FD717D498A3A8CC, 0xBC9088E101CFEDFA // A0, A3L | |
947 | data8 0xAFEF36CC5AEC3FF0, 0x00004002 // E6 | |
948 | data8 0xABE2054E1C34E791, 0x00004001 // E4 | |
949 | data8 0xB39343637B2900D1, 0x00004000 // E2 | |
950 | data8 0xD74FB710D53F58F6, 0x00003FFF // E0 | |
951 | data8 0x4070655963BA4256, 0xC078DA9D263C4EA3 // D6, D7 | |
952 | data8 0x405CD2B6A9B90978, 0xC065B3B9F4F4F171 // D4, D5 | |
953 | data8 0x4049BC2204CF61FF, 0xC05337227E0BA152 // D2, D3 | |
954 | data8 0x4095509A50C07A96, 0xC0A0747949D2FB45 // C18, C19 | |
955 | data8 0x4082ECCBAD709414, 0xC08CD02FB088A702 // C16, C17 | |
956 | data8 0xFFE4B2A61B508DD5, 0x0000C002 // E7 | |
957 | data8 0xF461ADB8AE17E0A5, 0x0000C001 // E5 | |
958 | data8 0xF5BE8B0B90325F20, 0x0000C000 // E3 | |
959 | data8 0x877B275F3FB78DCA, 0x0000C000 // E1 | |
960 | LOCAL_OBJECT_END(lgammal_half_3Q_data) | |
961 | ||
962 | LOCAL_OBJECT_START(lgammal_half_3Q_neg_data) | |
963 | // Polynomial coefficients for the lgammal(x), -0.75 < x <= -0.5 | |
964 | data8 0xC014836EFD94899C, 0x3C9835679663B44F // A3, A0L | |
965 | data8 0xBFF276C7B4FB1875, 0xBC92D3D9FA29A1C0 // A1, A1L | |
966 | data8 0x40C5178F24E1A435, 0xC0D9DE84FBC5D76A // D0, D1 | |
967 | data8 0x41D4D1B236BF6E93, 0xC1EBB0445CE58550 // C20, C21 | |
968 | data8 0x4015718CD67F63D3, 0x3CC5354B6F04B59C // A2, A2L | |
969 | data8 0x3FF554493087E1ED, 0xBCB72715E37B02B9 // A0, A3L | |
970 | data8 0xE4AC7E915FA72229, 0x00004009 // E6 | |
971 | data8 0xA28244206395FCC6, 0x00004007 // E4 | |
972 | data8 0xFB045F19C07B2544, 0x00004004 // E2 | |
973 | data8 0xE5C8A6E6A9BA7D7B, 0x00004002 // E0 | |
974 | data8 0x4143943B55BF5118, 0xC158AC05EA675406 // D6, D7 | |
975 | data8 0x4118F6833D19717C, 0xC12F51A6F375CC80 // D4, D5 | |
976 | data8 0x40F00C209483481C, 0xC103F1DABF750259 // D2, D3 | |
977 | data8 0x4191038F2D8F9E40, 0xC1A413066DA8AE4A // C18, C19 | |
978 | data8 0x4170B537EDD833DE, 0xC1857E79424C61CE // C16, C17 | |
979 | data8 0x8941D8AB4855DB73, 0x0000C00B // E7 | |
980 | data8 0xBB822B131BD2E813, 0x0000C008 // E5 | |
981 | data8 0x852B4C03B83D2D4F, 0x0000C006 // E3 | |
982 | data8 0xC754CA7E2DDC0F1F, 0x0000C003 // E1 | |
983 | LOCAL_OBJECT_END(lgammal_half_3Q_neg_data) | |
984 | ||
985 | LOCAL_OBJECT_START(lgammal_2Q_4_data) | |
986 | // Polynomial coefficients for the lgammal(x), 2.25 <= |x| < 4.0 | |
987 | data8 0xBFCA4D55BEAB2D6F, 0x3C7ABC9DA14141F5 // A3, A0L | |
988 | data8 0x3FFD8773039049E7, 0x3C66CB7957A95BA4 // A1, A1L | |
989 | data8 0x3F45C3CC79E91E7D, 0xBF3A8E5005937E97 // D0, D1 | |
990 | data8 0x3EC951E35E1C9203, 0xBEB030A90026C5DF // C20, C21 | |
991 | data8 0x3FE94699894C1F4C, 0x3C91884D21D123F1 // A2, A2L | |
992 | data8 0x3FE62E42FEFA39EF, 0xBC66480CEB70870F // A0, A3L | |
993 | data8 0xF1C2EAFF0B3A7579, 0x00003FF5 // E6 | |
994 | data8 0xB36AF863926B55A3, 0x00003FF7 // E4 | |
995 | data8 0x9620656185BB44CA, 0x00003FF9 // E2 | |
996 | data8 0xA264558FB0906AFF, 0x00003FFB // E0 | |
997 | data8 0x3F03D59E9666C961, 0xBEF91115893D84A6 // D6, D7 | |
998 | data8 0x3F19333611C46225, 0xBF0F89EB7D029870 // D4, D5 | |
999 | data8 0x3F3055A96B347AFE, 0xBF243B5153E178A8 // D2, D3 | |
1000 | data8 0x3ED9A4AEF30C4BB2, 0xBED388138B1CEFF2 // C18, C19 | |
1001 | data8 0x3EEF7945A3C3A254, 0xBEE36F32A938EF11 // C16, C17 | |
1002 | data8 0x9028923F47C82118, 0x0000BFF5 // E7 | |
1003 | data8 0xCE0DAAFB6DC93B22, 0x0000BFF6 // E5 | |
1004 | data8 0xA0D0983B34AC4C8D, 0x0000BFF8 // E3 | |
1005 | data8 0x94D6C50FEB8B0CE7, 0x0000BFFA // E1 | |
1006 | LOCAL_OBJECT_END(lgammal_2Q_4_data) | |
1007 | ||
1008 | LOCAL_OBJECT_START(lgammal_4_8_data) | |
1009 | // Polynomial coefficients for the lgammal(x), 4.0 <= |x| < 8.0 | |
1010 | data8 0xBFD6626BC9B31B54, 0x3CAA53C82493A92B // A3, A0L | |
1011 | data8 0x401B4C420A50AD7C, 0x3C8C6E9929F789A3 // A1, A1L | |
1012 | data8 0x3F49410427E928C2, 0xBF3E312678F8C146 // D0, D1 | |
1013 | data8 0x3ED51065F7CD5848, 0xBED052782A03312F // C20, C21 | |
1014 | data8 0x3FF735973273D5EC, 0x3C831DFC65BF8CCF // A2, A2L | |
1015 | data8 0x401326643C4479C9, 0xBC6FA0498C5548A6 // A0, A3L | |
1016 | data8 0x9382D8B3CD4EB7E3, 0x00003FF6 // E6 | |
1017 | data8 0xE9F92CAD8A85CBCD, 0x00003FF7 // E4 | |
1018 | data8 0xD58389FE38258CEC, 0x00003FF9 // E2 | |
1019 | data8 0x81310136363AE8AA, 0x00003FFC // E0 | |
1020 | data8 0x3F04F0AE38E78570, 0xBEF9E2144BB8F03C // D6, D7 | |
1021 | data8 0x3F1B5E992A6CBC2A, 0xBF10F3F400113911 // D4, D5 | |
1022 | data8 0x3F323EE00AAB7DEE, 0xBF2640FDFA9FB637 // D2, D3 | |
1023 | data8 0x3ED2143EBAFF067A, 0xBEBBDEB92D6FF35D // C18, C19 | |
1024 | data8 0x3EF173A42B69AAA4, 0xBEE78B9951A2EAA5 // C16, C17 | |
1025 | data8 0xAB3CCAC6344E52AA, 0x0000BFF5 // E7 | |
1026 | data8 0x81ACCB8915B16508, 0x0000BFF7 // E5 | |
1027 | data8 0xDA62C7221102C426, 0x0000BFF8 // E3 | |
1028 | data8 0xDF1BD44C4083580A, 0x0000BFFA // E1 | |
1029 | LOCAL_OBJECT_END(lgammal_4_8_data) | |
1030 | ||
1031 | LOCAL_OBJECT_START(lgammal_loc_min_data) | |
1032 | // Polynomial coefficients for the lgammal(x), 1.3125 <= x < 1.5625 | |
1033 | data8 0xBB16C31AB5F1FB71, 0x00003FFF // xMin - point of local minimum | |
1034 | data8 0xBFC2E4278DC6BC23, 0xBC683DA8DDCA9650 // A3, A0L | |
1035 | data8 0x3BD4DB7D0CA61D5F, 0x386E719EDD01D801 // A1, A1L | |
1036 | data8 0x3F4CC72638E1D93F, 0xBF4228EC9953CCB9 // D0, D1 | |
1037 | data8 0x3ED222F97A04613E,0xBED3DDD58095CB6C // C20, C21 | |
1038 | data8 0x3FDEF72BC8EE38AB, 0x3C863AFF3FC48940 // A2, A2L | |
1039 | data8 0xBFBF19B9BCC38A41, 0xBC7425F1BFFC1442// A0, A3L | |
1040 | data8 0x941890032BEB34C3, 0x00003FF6 // E6 | |
1041 | data8 0xC7E701591CE534BC, 0x00003FF7 // E4 | |
1042 | data8 0x93373CBD05138DD4, 0x00003FF9 // E2 | |
1043 | data8 0x845A14A6A81C05D6, 0x00003FFB // E0 | |
1044 | data8 0x3F0F6C4DF6D47A13, 0xBF045DCDB5B49E19 // D6, D7 | |
1045 | data8 0x3F22E23345DDE59C, 0xBF1851159AFB1735 // D4, D5 | |
1046 | data8 0x3F37101EA4022B78, 0xBF2D721E6323AF13 // D2, D3 | |
1047 | data8 0x3EE691EBE82DF09D, 0xBEDD42550961F730 // C18, C19 | |
1048 | data8 0x3EFA793EDE99AD85, 0xBEF14000108E70BE // C16, C17 | |
1049 | data8 0xB7CBC033ACE0C99C, 0x0000BFF5 // E7 | |
1050 | data8 0xF178D1F7B1A45E27, 0x0000BFF6 // E5 | |
1051 | data8 0xA8FCFCA8106F471C, 0x0000BFF8 // E3 | |
1052 | data8 0x864D46FA898A9AD2, 0x0000BFFA // E1 | |
1053 | LOCAL_OBJECT_END(lgammal_loc_min_data) | |
1054 | ||
1055 | LOCAL_OBJECT_START(lgammal_03Q_1Q_data) | |
1056 | // Polynomial coefficients for the lgammal(x), 0.75 <= |x| < 1.3125 | |
1057 | data8 0x3FD151322AC7D848, 0x3C7184DE0DB7B4EE // A4, A2L | |
1058 | data8 0x3FD9A4D55BEAB2D6, 0x3C9E934AAB10845F // A3, A1L | |
1059 | data8 0x3FB111289C381259, 0x3FAFFFCFB32AE18D // D2, D3 | |
1060 | data8 0x3FB3B1D9E0E3E00D, 0x3FB2496F0D3768DF // D0, D1 | |
1061 | data8 0xBA461972C057D439, 0x00003FFB // E6 | |
1062 | data8 0x3FEA51A6625307D3, 0x3C76ABC886A72DA2 // A2, A4L | |
1063 | data8 0x3FA8EFE46B32A70E, 0x3F8F31B3559576B6 // C17, C20 | |
1064 | data8 0xE403383700387D85, 0x00003FFB // E4 | |
1065 | data8 0x9381D0EE74BF7251, 0x00003FFC // E2 | |
1066 | data8 0x3FAA2177A6D28177, 0x3FA4895E65FBD995 // C18, C19 | |
1067 | data8 0x3FAAED2C77DBEE5D, 0x3FA94CA59385512C // D6, D7 | |
1068 | data8 0x3FAE1F522E8A5941, 0x3FAC785EF56DD87E // D4, D5 | |
1069 | data8 0x3FB556AD5FA56F0A, 0x3FA81F416E87C783 // E7, C16 | |
1070 | data8 0xCD00F1C2DC2C9F1E, 0x00003FFB // E5 | |
1071 | data8 0x3FE2788CFC6FB618, 0x3C8E52519B5B17CB // A1, A3L | |
1072 | data8 0x80859B57C3E7F241, 0x00003FFC // E3 | |
1073 | data8 0xADA065880615F401, 0x00003FFC // E1 | |
1074 | data8 0xD45CE0BD530AB50E, 0x00003FFC // E0 | |
1075 | LOCAL_OBJECT_END(lgammal_03Q_1Q_data) | |
1076 | ||
1077 | LOCAL_OBJECT_START(lgammal_13Q_2Q_data) | |
1078 | // Polynomial coefficients for the lgammal(x), 1.5625 <= |x| < 2.25 | |
1079 | data8 0x3F951322AC7D8483, 0x3C71873D88C6539D // A4, A2L | |
1080 | data8 0xBFB13E001A557606, 0x3C56CB907018A101 // A3, A1L | |
1081 | data8 0xBEC11B2EC1E7F6FC, 0x3EB0064ED9824CC7 // D2, D3 | |
1082 | data8 0xBEE3CBC963EC103A, 0x3ED2597A330C107D // D0, D1 | |
1083 | data8 0xBC6F2DEBDFE66F38, 0x0000BFF0 // E6 | |
1084 | data8 0x3FD4A34CC4A60FA6, 0x3C3AFC9BF775E8A0 // A2, A4L | |
1085 | data8 0x3E48B0C542F85B32, 0xBE347F12EAF787AB // C17, C20 | |
1086 | data8 0xE9FEA63B6984FA1E, 0x0000BFF2 // E4 | |
1087 | data8 0x9C562E15FC703BBF, 0x0000BFF5 // E2 | |
1088 | data8 0xBE3C12A50AB0355E, 0xBE1C941626AE4717 // C18, C19 | |
1089 | data8 0xBE7AFA8714342BC4,0x3E69A12D2B7761CB // D6, D7 | |
1090 | data8 0xBE9E25EF1D526730, 0x3E8C762291889B99 // D4, D5 | |
1091 | data8 0x3EF580DCEE754733, 0xBE57C811D070549C // E7, C16 | |
1092 | data8 0xD093D878BE209C98, 0x00003FF1 // E5 | |
1093 | data8 0x3FDB0EE6072093CE, 0xBC6024B9E81281C4 // A1, A3L | |
1094 | data8 0x859B57C31CB77D96, 0x00003FF4 // E3 | |
1095 | data8 0xBD6EB756DB617E8D, 0x00003FF6 // E1 | |
1096 | data8 0xF2027E10C7AF8C38, 0x0000BFF7 // E0 | |
1097 | LOCAL_OBJECT_END(lgammal_13Q_2Q_data) | |
1098 | ||
1099 | LOCAL_OBJECT_START(lgammal_8_10_data) | |
1100 | // Polynomial coefficients for the lgammal(x), 8.0 <= |x| < 10.0 | |
1101 | // Multi Precision terms | |
1102 | data8 0x40312008A3A23E5C, 0x3CE020B4F2E4083A //A1 | |
1103 | data8 0x4025358E82FCB70C, 0x3CD4A5A74AF7B99C //A0 | |
1104 | // Native precision terms | |
1105 | data8 0xF0AA239FFBC616D2, 0x00004000 //A2 | |
1106 | data8 0x96A8EA798FE57D66, 0x0000BFFF //A3 | |
1107 | data8 0x8D501B7E3B9B9BDB, 0x00003FFE //A4 | |
1108 | data8 0x9EE062401F4B1DC2, 0x0000BFFD //A5 | |
1109 | data8 0xC63FD8CD31E93431, 0x00003FFC //A6 | |
1110 | data8 0x8461101709C23C30, 0x0000BFFC //A7 | |
1111 | data8 0xB96D7EA7EF3648B2, 0x00003FFB //A8 | |
1112 | data8 0x86886759D2ACC906, 0x0000BFFB //A9 | |
1113 | data8 0xC894B6E28265B183, 0x00003FFA //A10 | |
1114 | data8 0x98C4348CAD821662, 0x0000BFFA //A11 | |
1115 | data8 0xEC9B092226A94DF2, 0x00003FF9 //A12 | |
1116 | data8 0xB9F169FF9B98CDDC, 0x0000BFF9 //A13 | |
1117 | data8 0x9A3A32BB040894D3, 0x00003FF9 //A14 | |
1118 | data8 0xF9504CCC1003B3C3, 0x0000BFF8 //A15 | |
1119 | LOCAL_OBJECT_END(lgammal_8_10_data) | |
1120 | ||
1121 | LOCAL_OBJECT_START(lgammal_03Q_6_data) | |
1122 | // Polynomial coefficients for the lgammal(x), 0.75 <= |x| < 1.0 | |
1123 | data8 0xBFBC47DCA479E295, 0xBC607E6C1A379D55 //A3 | |
1124 | data8 0x3FCA051C372609ED, 0x3C7B02D73EB7D831 //A0 | |
1125 | data8 0xBFE15FAFA86B04DB, 0xBC3F52EE4A8945B5 //A1 | |
1126 | data8 0x3FD455C4FF28F0BF, 0x3C75F8C6C99F30BB //A2 | |
1127 | data8 0xD2CF04CD934F03E1, 0x00003FFA //A4 | |
1128 | data8 0xDB4ED667E29256E1, 0x0000BFF9 //A5 | |
1129 | data8 0xF155A33A5B6021BF, 0x00003FF8 //A6 | |
1130 | data8 0x895E9B9D386E0338, 0x0000BFF8 //A7 | |
1131 | data8 0xA001BE94B937112E, 0x00003FF7 //A8 | |
1132 | data8 0xBD82846E490ED048, 0x0000BFF6 //A9 | |
1133 | data8 0xE358D24EC30DBB5D, 0x00003FF5 //A10 | |
1134 | data8 0x89C4F3652446B78B, 0x0000BFF5 //A11 | |
1135 | data8 0xA86043E10280193D, 0x00003FF4 //A12 | |
1136 | data8 0xCF3A2FBA61EB7682, 0x0000BFF3 //A13 | |
1137 | data8 0x3F300900CC9200EC //A14 | |
1138 | data8 0xBF23F42264B94AE8 //A15 | |
1139 | data8 0x3F18EEF29895FE73 //A16 | |
1140 | data8 0xBF0F3C4563E3EDFB //A17 | |
1141 | data8 0x3F0387DBBC385056 //A18 | |
1142 | data8 0xBEF81B4004F92900 //A19 | |
1143 | data8 0x3EECA6692A9A5B81 //A20 | |
1144 | data8 0xBEDF61A0059C15D3 //A21 | |
1145 | data8 0x3ECDA9F40DCA0111 //A22 | |
1146 | data8 0xBEB60FE788217BAF //A23 | |
1147 | data8 0x3E9661D795DFC8C6 //A24 | |
1148 | data8 0xBE66C7756A4EDEE5 //A25 | |
1149 | // Polynomial coefficients for the lgammal(x), 1.0 <= |x| < 2.0 | |
1150 | data8 0xBFC1AE55B180726B, 0xBC7DE1BC478453F5 //A3 | |
1151 | data8 0xBFBEEB95B094C191, 0xBC53456FF6F1C9D9 //A0 | |
1152 | data8 0x3FA2AED059BD608A, 0x3C0B65CC647D557F //A1 | |
1153 | data8 0x3FDDE9E64DF22EF2, 0x3C8993939A8BA8E4 //A2 | |
1154 | data8 0xF07C206D6B100CFF, 0x00003FFA //A4 | |
1155 | data8 0xED2CEA9BA52FE7FB, 0x0000BFF9 //A5 | |
1156 | data8 0xFCE51CED52DF3602, 0x00003FF8 //A6 | |
1157 | data8 0x8D45D27872326619, 0x0000BFF8 //A7 | |
1158 | data8 0xA2B78D6BCEBE27F7, 0x00003FF7 //A8 | |
1159 | data8 0xBF6DC0996A895B6F, 0x0000BFF6 //A9 | |
1160 | data8 0xE4B9AD335AF82D79, 0x00003FF5 //A10 | |
1161 | data8 0x8A451880195362A1, 0x0000BFF5 //A11 | |
1162 | data8 0xA8BE35E63089A7A9, 0x00003FF4 //A12 | |
1163 | data8 0xCF7FA175FA11C40C, 0x0000BFF3 //A13 | |
1164 | data8 0x3F300C282FAA3B02 //A14 | |
1165 | data8 0xBF23F6AEBDA68B80 //A15 | |
1166 | data8 0x3F18F6860E2224DD //A16 | |
1167 | data8 0xBF0F542B3CE32F28 //A17 | |
1168 | data8 0x3F039436218C9BF8 //A18 | |
1169 | data8 0xBEF8AE6307677AEC //A19 | |
1170 | data8 0x3EF0B55527B3A211 //A20 | |
1171 | data8 0xBEE576AC995E7605 //A21 | |
1172 | data8 0x3ED102DDC1365D2D //A22 | |
1173 | data8 0xBEC442184F97EA54 //A23 | |
1174 | data8 0x3ED4D2283DFE5FC6 //A24 | |
1175 | data8 0xBECB9219A9B46787 //A25 | |
1176 | // Polynomial coefficients for the lgammal(x), 2.0 <= |x| < 3.0 | |
1177 | data8 0xBFCA4D55BEAB2D6F, 0xBC66F80E5BFD5AF5 //A3 | |
1178 | data8 0x3FE62E42FEFA39EF, 0x3C7ABC9E3B347E3D //A0 | |
1179 | data8 0x3FFD8773039049E7, 0x3C66CB9007C426EA //A1 | |
1180 | data8 0x3FE94699894C1F4C, 0x3C918726EB111663 //A2 | |
1181 | data8 0xA264558FB0906209, 0x00003FFB //A4 | |
1182 | data8 0x94D6C50FEB902ADC, 0x0000BFFA //A5 | |
1183 | data8 0x9620656184243D17, 0x00003FF9 //A6 | |
1184 | data8 0xA0D0983B8BCA910B, 0x0000BFF8 //A7 | |
1185 | data8 0xB36AF8559B222BD3, 0x00003FF7 //A8 | |
1186 | data8 0xCE0DACB3260AE6E5, 0x0000BFF6 //A9 | |
1187 | data8 0xF1C2C0BF0437C7DB, 0x00003FF5 //A10 | |
1188 | data8 0x902A2F2F3AB74A92, 0x0000BFF5 //A11 | |
1189 | data8 0xAE05009B1B2C6E4C, 0x00003FF4 //A12 | |
1190 | data8 0xD5B71F6456D7D4CB, 0x0000BFF3 //A13 | |
1191 | data8 0x3F2F0351D71BC9C6 //A14 | |
1192 | data8 0xBF2B53BC56A3B793 //A15 | |
1193 | data8 0xBF18B12DC6F6B861 //A16 | |
1194 | data8 0xBF43EE6EB5215C2F //A17 | |
1195 | data8 0xBF5474787CDD455E //A18 | |
1196 | data8 0xBF642B503C9C060A //A19 | |
1197 | data8 0xBF6E07D1AA254AA3 //A20 | |
1198 | data8 0xBF71C785443AAEE8 //A21 | |
1199 | data8 0xBF6F67BF81B71052 //A22 | |
1200 | data8 0xBF63E4BCCF4FFABF //A23 | |
1201 | data8 0xBF50067F8C671D5A //A24 | |
1202 | data8 0xBF29C770D680A5AC //A25 | |
1203 | // Polynomial coefficients for the lgammal(x), 4.0 <= |x| < 6.0 | |
1204 | data8 0xBFD6626BC9B31B54, 0xBC85AABE08680902 //A3 | |
1205 | data8 0x401326643C4479C9, 0x3CAA53C26F31E364 //A0 | |
1206 | data8 0x401B4C420A50AD7C, 0x3C8C76D55E57DD8D //A1 | |
1207 | data8 0x3FF735973273D5EC, 0x3C83A0B78E09188A //A2 | |
1208 | data8 0x81310136363AAB6D, 0x00003FFC //A4 | |
1209 | data8 0xDF1BD44C4075C0E6, 0x0000BFFA //A5 | |
1210 | data8 0xD58389FE38D8D664, 0x00003FF9 //A6 | |
1211 | data8 0xDA62C7221D5B5F87, 0x0000BFF8 //A7 | |
1212 | data8 0xE9F92CAD0263E157, 0x00003FF7 //A8 | |
1213 | data8 0x81ACCB8606C165FE, 0x0000BFF7 //A9 | |
1214 | data8 0x9382D8D263D1C2A3, 0x00003FF6 //A10 | |
1215 | data8 0xAB3CCBA4C853B12C, 0x0000BFF5 //A11 | |
1216 | data8 0xCA0818BBCCC59296, 0x00003FF4 //A12 | |
1217 | data8 0xF18912691CBB5BD0, 0x0000BFF3 //A13 | |
1218 | data8 0x3F323EF5D8330339 //A14 | |
1219 | data8 0xBF2641132EA571F7 //A15 | |
1220 | data8 0x3F1B5D9576175CA9 //A16 | |
1221 | data8 0xBF10F56A689C623D //A17 | |
1222 | data8 0x3F04CACA9141A18D //A18 | |
1223 | data8 0xBEFA307AC9B4E85D //A19 | |
1224 | data8 0x3EF4B625939FBE32 //A20 | |
1225 | data8 0xBECEE6AC1420F86F //A21 | |
1226 | data8 0xBE9A95AE2E485964 //A22 | |
1227 | data8 0xBF039EF47F8C09BB //A23 | |
1228 | data8 0xBF05345957F7B7A9 //A24 | |
1229 | data8 0xBEF85AE6385D4CCC //A25 | |
1230 | // Polynomial coefficients for the lgammal(x), 3.0 <= |x| < 4.0 | |
1231 | data8 0xBFCA4D55BEAB2D6F, 0xBC667B20FF46C6A8 //A3 | |
1232 | data8 0x3FE62E42FEFA39EF, 0x3C7ABC9E3B398012 //A0 | |
1233 | data8 0x3FFD8773039049E7, 0x3C66CB9070238D77 //A1 | |
1234 | data8 0x3FE94699894C1F4C, 0x3C91873D8839B1CD //A2 | |
1235 | data8 0xA264558FB0906D7E, 0x00003FFB //A4 | |
1236 | data8 0x94D6C50FEB8AFD72, 0x0000BFFA //A5 | |
1237 | data8 0x9620656185B68F14, 0x00003FF9 //A6 | |
1238 | data8 0xA0D0983B34B7088A, 0x0000BFF8 //A7 | |
1239 | data8 0xB36AF863964AA440, 0x00003FF7 //A8 | |
1240 | data8 0xCE0DAAFB5497AFB8, 0x0000BFF6 //A9 | |
1241 | data8 0xF1C2EAFA79CC2864, 0x00003FF5 //A10 | |
1242 | data8 0x9028922A839572B8, 0x0000BFF5 //A11 | |
1243 | data8 0xAE1E62F870BA0278, 0x00003FF4 //A12 | |
1244 | data8 0xD4726F681E2ABA29, 0x0000BFF3 //A13 | |
1245 | data8 0x3F30559B9A02FADF //A14 | |
1246 | data8 0xBF243ADEB1266CAE //A15 | |
1247 | data8 0x3F19303B6F552603 //A16 | |
1248 | data8 0xBF0F768C288EC643 //A17 | |
1249 | data8 0x3F039D5356C21DE1 //A18 | |
1250 | data8 0xBEF81BCA8168E6BE //A19 | |
1251 | data8 0x3EEC74A53A06AD54 //A20 | |
1252 | data8 0xBEDED52D1A5DACDF //A21 | |
1253 | data8 0x3ECCB4C2C7087342 //A22 | |
1254 | data8 0xBEB4F1FAFDFF5C2F //A23 | |
1255 | data8 0x3E94C80B52D58904 //A24 | |
1256 | data8 0xBE64A328CBE92A27 //A25 | |
1257 | LOCAL_OBJECT_END(lgammal_03Q_6_data) | |
1258 | ||
1259 | LOCAL_OBJECT_START(lgammal_1pEps_data) | |
1260 | // Polynomial coefficients for the lgammal(x), 1 - 2^(-7) <= |x| < 1 + 2^(-7) | |
1261 | data8 0x93C467E37DB0C7A5, 0x00003FFE //A1 | |
1262 | data8 0xD28D3312983E9919, 0x00003FFE //A2 | |
1263 | data8 0xCD26AADF559A47E3, 0x00003FFD //A3 | |
1264 | data8 0x8A8991563EC22E81, 0x00003FFD //A4 | |
1265 | data8 0x3FCA8B9C168D52FE //A5 | |
1266 | data8 0x3FC5B40CB0696370 //A6 | |
1267 | data8 0x3FC270AC2229A65D //A7 | |
1268 | data8 0x3FC0110AF10FCBFC //A8 | |
1269 | // Polynomial coefficients for the log1p(x), - 2^(-7) <= |x| < 2^(-7) | |
1270 | data8 0x3FBC71C71C71C71C //P8 | |
1271 | data8 0xBFC0000000000000 //P7 | |
1272 | data8 0x3FC2492492492492 //P6 | |
1273 | data8 0xBFC5555555555555 //P5 | |
1274 | data8 0x3FC999999999999A //P4 | |
1275 | data8 0xBFD0000000000000 //P3 | |
1276 | data8 0x3FD5555555555555 //P2 | |
1277 | data8 0xBFE0000000000000 //P1 | |
1278 | // short version of "lnsin" polynomial | |
1279 | data8 0xD28D3312983E9918, 0x00003FFF //A2 | |
1280 | data8 0x8A8991563EC241B6, 0x00003FFE //A4 | |
1281 | data8 0xADA06588061830A5, 0x00003FFD //A6 | |
1282 | data8 0x80859B57C31CB746, 0x00003FFD //A8 | |
1283 | LOCAL_OBJECT_END(lgammal_1pEps_data) | |
1284 | ||
1285 | LOCAL_OBJECT_START(lgammal_neg2andHalf_data) | |
1286 | // Polynomial coefficients for the lgammal(x), -2.005859375 <= x < -2.5 | |
1287 | data8 0xBF927781D4BB093A, 0xBC511D86D85B7045 // A3, A0L | |
1288 | data8 0x3FF1A68793DEFC15, 0x3C9852AE2DA7DEEF // A1, A1L | |
1289 | data8 0x408555562D45FAFD, 0xBF972CDAFE5FEFAD // D0, D1 | |
1290 | data8 0xC18682331EF492A5, 0xC1845E3E0D29606B // C20, C21 | |
1291 | data8 0x4013141822E16979, 0x3CCF8718B6E75F6C // A2, A2L | |
1292 | data8 0xBFACCBF9F5ED0F15, 0xBBDD1AEB73297401 // A0, A3L | |
1293 | data8 0xCCCDB17423046445, 0x00004006 // E6 | |
1294 | data8 0x800514E230A3A452, 0x00004005 // E4 | |
1295 | data8 0xAAE9A48EC162E76F, 0x00004003 // E2 | |
1296 | data8 0x81D4F88B3F3EA0FC, 0x00004002 // E0 | |
1297 | data8 0x40CF3F3E35238DA0, 0xC0F8B340945F1A7E // D6, D7 | |
1298 | data8 0x40BF89EC0BD609C6, 0xC095897242AEFEE2 // D4, D5 | |
1299 | data8 0x40A2482FF01DBC5C, 0xC02095E275FDCF62 // D2, D3 | |
1300 | data8 0xC1641354F2312A6A, 0xC17B3657F85258E9 // C18, C19 | |
1301 | data8 0xC11F964E9ECBE2C9, 0xC146D7A90F70696C // C16, C17 | |
1302 | data8 0xE7AECDE6AF8EA816, 0x0000BFEF // E7 | |
1303 | data8 0xD711252FEBBE1091, 0x0000BFEB // E5 | |
1304 | data8 0xE648BD10F8C43391, 0x0000BFEF // E3 | |
1305 | data8 0x948A1E78AA00A98D, 0x0000BFF4 // E1 | |
1306 | LOCAL_OBJECT_END(lgammal_neg2andHalf_data) | |
1307 | ||
1308 | LOCAL_OBJECT_START(lgammal_near_neg_half_data) | |
1309 | // Polynomial coefficients for the lgammal(x), -0.5 < x < -0.40625 | |
1310 | data8 0xBFC1AE55B180726C, 0x3C8053CD734E6A1D // A3, A0L | |
1311 | data8 0x3FA2AED059BD608A, 0x3C0CD3D2CDBA17F4 // A1, A1L | |
1312 | data8 0x40855554DBCD1E1E, 0x3F96C51AC2BEE9E1 // D0, D1 | |
1313 | data8 0xC18682331EF4927D, 0x41845E3E0D295DFC // C20, C21 | |
1314 | data8 0x4011DE9E64DF22EF, 0x3CA692B70DAD6B7B // A2, A2L | |
1315 | data8 0x3FF43F89A3F0EDD6, 0xBC4955AED0FA087D // A0, A3L | |
1316 | data8 0xCCCD3F1DF4A2C1DD, 0x00004006 // E6 | |
1317 | data8 0x80028ADE33C7FCD9, 0x00004005 // E4 | |
1318 | data8 0xAACA474E485507EF, 0x00004003 // E2 | |
1319 | data8 0x80F07C206D6B0ECD, 0x00004002 // E0 | |
1320 | data8 0x40CF3F3E33E83056, 0x40F8B340944633D9 // D6, D7 | |
1321 | data8 0x40BF89EC059931F0, 0x409589723307AD20 // D4, D5 | |
1322 | data8 0x40A2482FD0054824, 0x402095CE7F19D011 // D2, D3 | |
1323 | data8 0xC1641354F2313614, 0x417B3657F8525354 // C18, C19 | |
1324 | data8 0xC11F964E9ECFD21C, 0x4146D7A90F701836 // C16, C17 | |
1325 | data8 0x86A9C01F0EA11E5A, 0x0000BFF5 // E7 | |
1326 | data8 0xBF6D8469142881C0, 0x0000BFF6 // E5 | |
1327 | data8 0x8D45D277BA8255F1, 0x0000BFF8 // E3 | |
1328 | data8 0xED2CEA9BA528BCC3, 0x0000BFF9 // E1 | |
1329 | LOCAL_OBJECT_END(lgammal_near_neg_half_data) | |
1330 | ||
1331 | //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
1332 | ////////////// POLYNOMIAL COEFFICIENTS FOR "NEAR ROOTS" RANGES ///////////// | |
1333 | ////////////// THIS PART OF TABLE SHOULD BE ADDRESSED REALLY RARE ///////////// | |
1334 | //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
1335 | LOCAL_OBJECT_START(lgammal_right_roots_polynomial_data) | |
1336 | // Polynomial coefficients for right root on [-3, -2] | |
6f65e668 | 1337 | // Lgammal is approximated by polynomial within [-.056244 ; .158208 ] range |
d5efd131 MF |
1338 | data8 0xBBBD5E9DCD11030B, 0xB867411D9FF87DD4 //A0 |
1339 | data8 0x3FF83FE966AF535E, 0x3CAA21235B8A769A //A1 | |
1340 | data8 0x40136EEBB002F55C, 0x3CC3959A6029838E //A2 | |
1341 | data8 0xB4A5302C53C2BEDD, 0x00003FFF //A3 | |
1342 | data8 0x8B8C6BE504F2DA1C, 0x00004002 //A4 | |
1343 | data8 0xB99CFF02593B4D98, 0x00004001 //A5 | |
1344 | data8 0x4038D32F682AA1CF //A6 | |
1345 | data8 0x403809F04EE6C5B5 //A7 | |
1346 | data8 0x40548EAA81634CEE //A8 | |
1347 | data8 0x4059297ADB6BC03D //A9 | |
1348 | data8 0x407286FB8EC5C9DA //A10 | |
1349 | data8 0x407A92E05B744CFB //A11 | |
1350 | data8 0x4091A9D4144258CD //A12 | |
1351 | data8 0x409C4D01D24F367E //A13 | |
1352 | data8 0x40B1871B9A426A83 //A14 | |
1353 | data8 0x40BE51C48BD9A583 //A15 | |
1354 | data8 0x40D2140D0C6153E7 //A16 | |
1355 | data8 0x40E0FB2C989CE4A3 //A17 | |
1356 | data8 0x40E52739AB005641 //A18 | |
1357 | data8 0x41161E3E6DDF503A //A19 | |
1358 | // Polynomial coefficients for right root on [-4, -3] | |
6f65e668 | 1359 | // Lgammal is approximated by polynomial within [-.172797 ; .171573 ] range |
d5efd131 MF |
1360 | data8 0x3C172712B248E42E, 0x38CB8D17801A5D67 //A0 |
1361 | data8 0x401F20A65F2FAC54, 0x3CCB9EA1817A824E //A1 | |
1362 | data8 0x4039D4D2977150EF, 0x3CDA42E149B6276A //A2 | |
1363 | data8 0xE089B8926AE2D9CB, 0x00004005 //A3 | |
1364 | data8 0x933901EBBB586C37, 0x00004008 //A4 | |
1365 | data8 0xCCD319BED1CFA1CD, 0x0000400A //A5 | |
1366 | data8 0x40D293C3F78D3C37 //A6 | |
1367 | data8 0x40FBB97AA0B6DD02 //A7 | |
1368 | data8 0x41251EA3345E5EB9 //A8 | |
1369 | data8 0x415057F65C92E7B0 //A9 | |
1370 | data8 0x41799C865241B505 //A10 | |
1371 | data8 0x41A445209EFE896B //A11 | |
1372 | data8 0x41D02D21880C953B //A12 | |
1373 | data8 0x41F9FFDE8C63E16D //A13 | |
1374 | data8 0x422504DC8302D2BE //A14 | |
1375 | data8 0x425111BF18C95414 //A15 | |
1376 | data8 0x427BCBE74A2B8EF7 //A16 | |
1377 | data8 0x42A7256F59B286F7 //A17 | |
1378 | data8 0x42D462D1586DE61F //A18 | |
1379 | data8 0x42FBB1228D6C5118 //A19 | |
1380 | // Polynomial coefficients for right root on [-5, -4] | |
6f65e668 | 1381 | // Lgammal is approximated by polynomial within [-.163171 ; .161988 ] range |
d5efd131 MF |
1382 | data8 0x3C5840FBAFDEE5BB, 0x38CAC0336E8C490A //A0 |
1383 | data8 0x403ACA5CF4921642, 0x3CCEDCDDA5491E56 //A1 | |
1384 | data8 0x40744415CD813F8E, 0x3CFBFEBC17E39146 //A2 | |
1385 | data8 0xAACD88D954E3E1BD, 0x0000400B //A3 | |
1386 | data8 0xCB68C710D75ED802, 0x0000400F //A4 | |
1387 | data8 0x8130F5AB997277AC, 0x00004014 //A5 | |
1388 | data8 0x41855E3DBF99EBA7 //A6 | |
1389 | data8 0x41CD14FE49C49FC2 //A7 | |
1390 | data8 0x421433DCE281F07D //A8 | |
1391 | data8 0x425C8399C7A92B6F //A9 | |
1392 | data8 0x42A45FBE67840F1A //A10 | |
1393 | data8 0x42ED68D75F9E6C98 //A11 | |
1394 | data8 0x433567291C27E5BE //A12 | |
1395 | data8 0x437F5ED7A9D9FD28 //A13 | |
1396 | data8 0x43C720A65C8AB711 //A14 | |
1397 | data8 0x441120A6C1D40B9B //A15 | |
1398 | data8 0x44596F561F2D1CBE //A16 | |
1399 | data8 0x44A3507DA81D5C01 //A17 | |
1400 | data8 0x44EF06A31E39EEDF //A18 | |
1401 | data8 0x45333774C99F523F //A19 | |
1402 | // Polynomial coefficients for right root on [-6, -5] | |
6f65e668 | 1403 | // Lgammal is approximated by polynomial within [-.156450 ; .156126 ] range |
d5efd131 MF |
1404 | data8 0x3C71B82D6B2B3304, 0x3917186E3C0DC231 //A0 |
1405 | data8 0x405ED72E0829AE02, 0x3C960C25157980EB //A1 | |
1406 | data8 0x40BCECC32EC22F9B, 0x3D5D8335A32F019C //A2 | |
1407 | data8 0x929EC2B1FB931F17, 0x00004012 //A3 | |
1408 | data8 0xD112EF96D37316DE, 0x00004018 //A4 | |
1409 | data8 0x9F00BB9BB13416AB, 0x0000401F //A5 | |
1410 | data8 0x425F7D8D5BDCB223 //A6 | |
1411 | data8 0x42C9A8D00C776CC6 //A7 | |
1412 | data8 0x433557FD8C481424 //A8 | |
1413 | data8 0x43A209221A953EF0 //A9 | |
1414 | data8 0x440EDC98D5618AB7 //A10 | |
1415 | data8 0x447AABD25E367378 //A11 | |
1416 | data8 0x44E73DE20CC3B288 //A12 | |
1417 | data8 0x455465257B4E0BD8 //A13 | |
1418 | data8 0x45C2011532085353 //A14 | |
1419 | data8 0x462FEE4CC191945B //A15 | |
1420 | data8 0x469C63AEEFEF0A7F //A16 | |
1421 | data8 0x4709D045390A3810 //A17 | |
1422 | data8 0x4778D360873C9F64 //A18 | |
1423 | data8 0x47E26965BE9A682A //A19 | |
1424 | // Polynomial coefficients for right root on [-7, -6] | |
6f65e668 | 1425 | // Lgammal is approximated by polynomial within [-.154582 ; .154521 ] range |
d5efd131 MF |
1426 | data8 0x3C75F103A1B00A48, 0x391C041C190C726D //A0 |
1427 | data8 0x40869DE49E3AF2AA, 0x3D1C17E1F813063B //A1 | |
1428 | data8 0x410FCE23484CFD10, 0x3DB6F38C2F11DAB9 //A2 | |
1429 | data8 0xEF281D1E1BE2055A, 0x00004019 //A3 | |
1430 | data8 0xFCE3DA92AC55DFF8, 0x00004022 //A4 | |
1431 | data8 0x8E9EA838A20BD58E, 0x0000402C //A5 | |
1432 | data8 0x4354F21E2FB9E0C9 //A6 | |
1433 | data8 0x43E9500994CD4F09 //A7 | |
1434 | data8 0x447F3A2C23C033DF //A8 | |
1435 | data8 0x45139152656606D8 //A9 | |
1436 | data8 0x45A8D45F8D3BF2E8 //A10 | |
1437 | data8 0x463FD32110E5BFE5 //A11 | |
1438 | data8 0x46D490B3BDBAE0BE //A12 | |
1439 | data8 0x476AC3CAD905DD23 //A13 | |
1440 | data8 0x48018558217AD473 //A14 | |
1441 | data8 0x48970AF371D30585 //A15 | |
1442 | data8 0x492E6273A8BEFFE3 //A16 | |
1443 | data8 0x49C47CC9AE3F1073 //A17 | |
1444 | data8 0x4A5D38E8C35EFF45 //A18 | |
1445 | data8 0x4AF0123E89694CD8 //A19 | |
1446 | // Polynomial coefficients for right root on [-8, -7] | |
6f65e668 | 1447 | // Lgammal is approximated by polynomial within [-.154217 ; .154208 ] range |
d5efd131 MF |
1448 | data8 0xBCD2507D818DDD68, 0xB97F6940EA2871A0 //A0 |
1449 | data8 0x40B3B407AA387BCB, 0x3D6320238F2C43D1 //A1 | |
1450 | data8 0x41683E85DAAFBAC7, 0x3E148D085958EA3A //A2 | |
1451 | data8 0x9F2A95AF1E10A548, 0x00004022 //A3 | |
1452 | data8 0x92F21522F482300E, 0x0000402E //A4 | |
1453 | data8 0x90B51AB03A1F244D, 0x0000403A //A5 | |
1454 | data8 0x44628E1C70EF534F //A6 | |
1455 | data8 0x452393E2BC32D244 //A7 | |
1456 | data8 0x45E5164141F4BA0B //A8 | |
1457 | data8 0x46A712B3A8AF5808 //A9 | |
1458 | data8 0x47698FD36CEDD0F2 //A10 | |
1459 | data8 0x482C9AE6BBAA3637 //A11 | |
1460 | data8 0x48F023821857C8E9 //A12 | |
1461 | data8 0x49B2569053FC106F //A13 | |
1462 | data8 0x4A74F646D5C1604B //A14 | |
1463 | data8 0x4B3811CF5ABA4934 //A15 | |
1464 | data8 0x4BFBB5DD6C84E233 //A16 | |
1465 | data8 0x4CC05021086F637B //A17 | |
1466 | data8 0x4D8450A345B0FB49 //A18 | |
1467 | data8 0x4E43825848865DB2 //A19 | |
1468 | // Polynomial coefficients for right root on [-9, -8] | |
6f65e668 | 1469 | // Lgammal is approximated by polynomial within [-.154160 ; .154158 ] range |
d5efd131 MF |
1470 | data8 0x3CDF4358564F2B46, 0x397969BEE6042F81 //A0 |
1471 | data8 0x40E3B088FED67721, 0x3D82787BA937EE85 //A1 | |
1472 | data8 0x41C83A3893550EF4, 0x3E542ED57E244DA8 //A2 | |
1473 | data8 0x9F003C6DC56E0B8E, 0x0000402B //A3 | |
1474 | data8 0x92BDF64A3213A699, 0x0000403A //A4 | |
1475 | data8 0x9074F503AAD417AF, 0x00004049 //A5 | |
1476 | data8 0x4582843E1313C8CD //A6 | |
1477 | data8 0x467387BD6A7826C1 //A7 | |
1478 | data8 0x4765074E788CF440 //A8 | |
1479 | data8 0x4857004DD9D1E09D //A9 | |
1480 | data8 0x4949792ED7530EAF //A10 | |
1481 | data8 0x4A3C7F089A292ED3 //A11 | |
1482 | data8 0x4B30125BF0AABB86 //A12 | |
1483 | data8 0x4C224175195E307E //A13 | |
1484 | data8 0x4D14DC4C8B32C08D //A14 | |
1485 | data8 0x4E07F1DB2786197E //A15 | |
1486 | data8 0x4EFB8EA1C336DACB //A16 | |
1487 | data8 0x4FF03797EACD0F23 //A17 | |
1488 | data8 0x50E4304A8E68A730 //A18 | |
1489 | data8 0x51D3618FB2EC9F93 //A19 | |
1490 | // Polynomial coefficients for right root on [-10, -9] | |
6f65e668 | 1491 | // Lgammal is approximated by polynomial within [-.154152 ; .154152 ] range |
d5efd131 MF |
1492 | data8 0x3D42F34DA97ECF0C, 0x39FD1256F345B0D0 //A0 |
1493 | data8 0x4116261203919787, 0x3DC12D44055588EB //A1 | |
1494 | data8 0x422EA8F32FB7FE99, 0x3ED849CE4E7B2D77 //A2 | |
1495 | data8 0xE25BAF73477A57B5, 0x00004034 //A3 | |
1496 | data8 0xEB021FD10060504A, 0x00004046 //A4 | |
1497 | data8 0x8220A208EE206C5F, 0x00004059 //A5 | |
1498 | data8 0x46B2C3903EC9DA14 //A6 | |
1499 | data8 0x47D64393744B9C67 //A7 | |
1500 | data8 0x48FAF79CCDC604DD //A8 | |
1501 | data8 0x4A20975DB8061EBA //A9 | |
1502 | data8 0x4B44AB9CBB38DB21 //A10 | |
1503 | data8 0x4C6A032F60094FE9 //A11 | |
1504 | data8 0x4D908103927634B4 //A12 | |
1505 | data8 0x4EB516CA21D30861 //A13 | |
1506 | data8 0x4FDB1BF12C58D318 //A14 | |
1507 | data8 0x510180AAE094A553 //A15 | |
1508 | data8 0x5226A8F2A2D45D57 //A16 | |
1509 | data8 0x534E00B6B0C8B809 //A17 | |
1510 | data8 0x5475022FE21215B2 //A18 | |
1511 | data8 0x5596B02BF6C5E19B //A19 | |
1512 | // Polynomial coefficients for right root on [-11, -10] | |
6f65e668 | 1513 | // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range |
d5efd131 MF |
1514 | data8 0x3D7AA9C2E2B1029C, 0x3A15FB37578544DB //A0 |
1515 | data8 0x414BAF825A0C91D4, 0x3DFB9DA2CE398747 //A1 | |
1516 | data8 0x4297F3EC8AE0AF03, 0x3F34208B55FB8781 //A2 | |
1517 | data8 0xDD0C97D3197F56DE, 0x0000403E //A3 | |
1518 | data8 0x8F6F3AF7A5499674, 0x00004054 //A4 | |
1519 | data8 0xC68DA1AF6D878EEB, 0x00004069 //A5 | |
1520 | data8 0x47F1E4E1E2197CE0 //A6 | |
1521 | data8 0x494A8A28E597C3EB //A7 | |
1522 | data8 0x4AA4175D0D35D705 //A8 | |
1523 | data8 0x4BFEE6F0AF69E814 //A9 | |
1524 | data8 0x4D580FE7B3DBB3C6 //A10 | |
1525 | data8 0x4EB2ECE60E4608AF //A11 | |
1526 | data8 0x500E04BE3E2B4F24 //A12 | |
1527 | data8 0x5167F9450F0FB8FD //A13 | |
1528 | data8 0x52C342BDE747603F //A14 | |
1529 | data8 0x541F1699D557268C //A15 | |
1530 | data8 0x557927C5F079864E //A16 | |
1531 | data8 0x56D4D10FEEDB030C //A17 | |
1532 | data8 0x5832385DF86AD28A //A18 | |
1533 | data8 0x598898914B4D6523 //A19 | |
1534 | // Polynomial coefficients for right root on [-12, -11] | |
6f65e668 | 1535 | // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range |
d5efd131 MF |
1536 | data8 0xBD96F61647C58B03, 0xBA3ABB0C2A6C755B //A0 |
1537 | data8 0x418308A82714B70D, 0x3E1088FC6A104C39 //A1 | |
1538 | data8 0x4306A493DD613C39, 0x3FB2341ECBF85741 //A2 | |
1539 | data8 0x8FA8FE98339474AB, 0x00004049 //A3 | |
1540 | data8 0x802CCDF570BA7942, 0x00004062 //A4 | |
1541 | data8 0xF3F748AF11A32890, 0x0000407A //A5 | |
1542 | data8 0x493E3B567EF178CF //A6 | |
1543 | data8 0x4ACED38F651BA362 //A7 | |
1544 | data8 0x4C600B357337F946 //A8 | |
1545 | data8 0x4DF0F71A52B54CCF //A9 | |
1546 | data8 0x4F8229F3B9FA2C70 //A10 | |
1547 | data8 0x5113A4C4979B770E //A11 | |
1548 | data8 0x52A56BC367F298D5 //A12 | |
1549 | data8 0x543785CF31842DC0 //A13 | |
1550 | data8 0x55C9FC37E3E40896 //A14 | |
1551 | data8 0x575CD5D1BA556C82 //A15 | |
1552 | data8 0x58F00A7AD99A9E08 //A16 | |
1553 | data8 0x5A824088688B008D //A17 | |
1554 | data8 0x5C15F75EF7E08EBD //A18 | |
1555 | data8 0x5DA462EA902F0C90 //A19 | |
1556 | // Polynomial coefficients for right root on [-13, -12] | |
6f65e668 | 1557 | // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range |
d5efd131 MF |
1558 | data8 0x3DC3191752ACFC9D, 0x3A26CB6629532DBF //A0 |
1559 | data8 0x41BC8CFC051191BD, 0x3E68A84DA4E62AF2 //A1 | |
1560 | data8 0x43797926294A0148, 0x400F345FF3723CFF //A2 | |
1561 | data8 0xF26D2AF700B82625, 0x00004053 //A3 | |
1562 | data8 0xA238B24A4B1F7B15, 0x00004070 //A4 | |
1563 | data8 0xE793B5C0A41A264F, 0x0000408C //A5 | |
1564 | data8 0x4A9585BDDACE863D //A6 | |
1565 | data8 0x4C6075953448088A //A7 | |
1566 | data8 0x4E29B2F38D1FC670 //A8 | |
1567 | data8 0x4FF4619B079C440F //A9 | |
1568 | data8 0x51C05DAE118D8AD9 //A10 | |
1569 | data8 0x538A8C7F87326AD4 //A11 | |
1570 | data8 0x5555B6937588DAB3 //A12 | |
1571 | data8 0x5721E1F8B6E6A7DB //A13 | |
1572 | data8 0x58EDA1D7A77DD6E5 //A14 | |
1573 | data8 0x5AB8A9616B7DC9ED //A15 | |
1574 | data8 0x5C84942AA209ED17 //A16 | |
1575 | data8 0x5E518FC34C6F54EF //A17 | |
1576 | data8 0x601FB3F17BCCD9A0 //A18 | |
1577 | data8 0x61E61128D512FE97 //A1 | |
1578 | // Polynomial coefficients for right root on [-14, -13] | |
6f65e668 | 1579 | // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range |
d5efd131 MF |
1580 | data8 0xBE170D646421B3F5, 0xBAAD95F79FCB5097 //A0 |
1581 | data8 0x41F7328CBFCD9AC7, 0x3E743B8B1E8AEDB1 //A1 | |
1582 | data8 0x43F0D0FA2DBDA237, 0x40A0422D6A227B55 //A2 | |
1583 | data8 0x82082DF2D32686CC, 0x0000405F //A3 | |
1584 | data8 0x8D64EE9B42E68B43, 0x0000407F //A4 | |
1585 | data8 0xA3FFD82E08C5F1F1, 0x0000409F //A5 | |
1586 | data8 0x4BF8C49D99123454 //A6 | |
1587 | data8 0x4DFEC79DDF11342F //A7 | |
1588 | data8 0x50038615A892F6BD //A8 | |
1589 | data8 0x520929453DB32EF1 //A9 | |
1590 | data8 0x54106A7808189A7F //A10 | |
1591 | data8 0x5615A302D03C207B //A11 | |
1592 | data8 0x581CC175AA736F5E //A12 | |
1593 | data8 0x5A233E071147C017 //A13 | |
1594 | data8 0x5C29E81917243F22 //A14 | |
1595 | data8 0x5E3184B0B5AC4707 //A15 | |
1596 | data8 0x6037C11DE62D8388 //A16 | |
1597 | data8 0x6240787C4B1C9D6C //A17 | |
1598 | data8 0x6448289235E80977 //A18 | |
1599 | data8 0x664B5352C6C3449E //A19 | |
1600 | // Polynomial coefficients for right root on [-15, -14] | |
6f65e668 | 1601 | // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range |
d5efd131 MF |
1602 | data8 0x3E562C2E34A9207D, 0x3ADC00DA3DFF7A83 //A0 |
1603 | data8 0x42344C3B2F0D90AB, 0x3EB8A2E979F24536 //A1 | |
1604 | data8 0x4469BFFF28B50D07, 0x41181E3D05C1C294 //A2 | |
1605 | data8 0xAE38F64DCB24D9F8, 0x0000406A //A3 | |
1606 | data8 0xA5C3F52C1B350702, 0x0000408E //A4 | |
1607 | data8 0xA83BC857BCD67A1B, 0x000040B2 //A5 | |
1608 | data8 0x4D663B4727B4D80A //A6 | |
1609 | data8 0x4FA82C965B0F7788 //A7 | |
1610 | data8 0x51EAD58C02908D95 //A8 | |
1611 | data8 0x542E427970E073D8 //A9 | |
1612 | data8 0x56714644C558A818 //A10 | |
1613 | data8 0x58B3EC2040C77BAE //A11 | |
1614 | data8 0x5AF72AE6A83D45B1 //A12 | |
1615 | data8 0x5D3B214F611F5D12 //A13 | |
1616 | data8 0x5F7FF5E49C54E92A //A14 | |
1617 | data8 0x61C2E917AB765FB2 //A15 | |
1618 | data8 0x64066FD70907B4C1 //A16 | |
1619 | data8 0x664B3998D60D0F9B //A17 | |
1620 | data8 0x689178710782FA8B //A18 | |
1621 | data8 0x6AD14A66C1C7BEC3 //A19 | |
1622 | // Polynomial coefficients for right root on [-16, -15] | |
6f65e668 | 1623 | // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range |
d5efd131 MF |
1624 | data8 0xBE6D7E7192615BAE, 0xBB0137677D7CC719 //A0 |
1625 | data8 0x4273077763F6628C, 0x3F09250FB8FC8EC9 //A1 | |
1626 | data8 0x44E6A1BF095B1AB3, 0x4178D5A74F6CB3B3 //A2 | |
1627 | data8 0x8F8E0D5060FCC76E, 0x00004076 //A3 | |
1628 | data8 0x800CC1DCFF092A63, 0x0000409E //A4 | |
1629 | data8 0xF3AB0BA9D14CDA72, 0x000040C5 //A5 | |
1630 | data8 0x4EDE3000A2F6D54F //A6 | |
1631 | data8 0x515EC613B9C8E241 //A7 | |
1632 | data8 0x53E003309FEEEA96 //A8 | |
1633 | data8 0x5660ED908D7C9A90 //A9 | |
1634 | data8 0x58E21E9B517B1A50 //A10 | |
1635 | data8 0x5B639745E4374EE2 //A11 | |
1636 | data8 0x5DE55BB626B2075D //A12 | |
1637 | data8 0x606772B7506BA747 //A13 | |
1638 | data8 0x62E9E581AB2E057B //A14 | |
1639 | data8 0x656CBAD1CF85D396 //A15 | |
1640 | data8 0x67EFF4EBD7989872 //A16 | |
1641 | data8 0x6A722D2B19B7E2F9 //A17 | |
1642 | data8 0x6CF5DEB3073B0743 //A18 | |
1643 | data8 0x6F744AC11550B93A //A19 | |
1644 | // Polynomial coefficients for right root on [-17, -16] | |
6f65e668 | 1645 | // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range |
d5efd131 MF |
1646 | data8 0xBEDCC6291188207E, 0xBB872E3FDD48F5B7 //A0 |
1647 | data8 0x42B3076EE7525EF9, 0x3F6687A5038CA81C //A1 | |
1648 | data8 0x4566A1AAD96EBCB5, 0x421F0FEDFBF548D2 //A2 | |
1649 | data8 0x8F8D4D3DE9850DBA, 0x00004082 //A3 | |
1650 | data8 0x800BDD6DA2CE1859, 0x000040AE //A4 | |
1651 | data8 0xF3A8EC4C9CDC1CE5, 0x000040D9 //A5 | |
1652 | data8 0x505E2FAFDB812628 //A6 | |
1653 | data8 0x531EC5B3A7508719 //A7 | |
1654 | data8 0x55E002F77E99B628 //A8 | |
1655 | data8 0x58A0ED4C9B4DAE54 //A9 | |
1656 | data8 0x5B621E4A8240F90C //A10 | |
1657 | data8 0x5E2396E5C8849814 //A11 | |
1658 | data8 0x60E55B43D8C5CE71 //A12 | |
1659 | data8 0x63A7722F5D45D01D //A13 | |
1660 | data8 0x6669E4E010DCE45A //A14 | |
1661 | data8 0x692CBA120D5E78F6 //A15 | |
1662 | data8 0x6BEFF4045350B22E //A16 | |
1663 | data8 0x6EB22C9807C21819 //A17 | |
1664 | data8 0x7175DE20D04617C4 //A18 | |
1665 | data8 0x74344AB87C6D655F //A19 | |
1666 | // Polynomial coefficients for right root on [-18, -17] | |
6f65e668 | 1667 | // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range |
d5efd131 MF |
1668 | data8 0xBF28AEEE7B61D77C, 0xBBDBBB5FC57ABF79 //A0 |
1669 | data8 0x42F436F56B3B8A0C, 0x3FA43EE3C5C576E9 //A1 | |
1670 | data8 0x45E98A22535D115D, 0x42984678BE78CC48 //A2 | |
1671 | data8 0xAC176F3775E6FCFC, 0x0000408E //A3 | |
1672 | data8 0xA3114F53A9FEB922, 0x000040BE //A4 | |
1673 | data8 0xA4D168A8334ABF41, 0x000040EE //A5 | |
1674 | data8 0x51E5B0E7EC7182BB //A6 | |
1675 | data8 0x54E77D67B876EAB6 //A7 | |
1676 | data8 0x57E9F7C30C09C4B6 //A8 | |
1677 | data8 0x5AED29B0488614CA //A9 | |
1678 | data8 0x5DF09486F87E79F9 //A10 | |
1679 | data8 0x60F30B199979654E //A11 | |
1680 | data8 0x63F60E02C7DCCC5F //A12 | |
1681 | data8 0x66F9B8A00EB01684 //A13 | |
1682 | data8 0x69FE2D3ED0700044 //A14 | |
1683 | data8 0x6D01C8363C7DCC84 //A15 | |
1684 | data8 0x700502B29C2F06E3 //A16 | |
1685 | data8 0x730962B4500F4A61 //A17 | |
1686 | data8 0x76103C6ED099192A //A18 | |
1687 | data8 0x79100C7132CFD6E3 //A19 | |
1688 | // Polynomial coefficients for right root on [-19, -18] | |
6f65e668 | 1689 | // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range |
d5efd131 MF |
1690 | data8 0x3F3C19A53328A0C3, 0x3BE04ADC3FBE1458 //A0 |
1691 | data8 0x4336C16C16C16C19, 0x3FE58CE3AC4A7C28 //A1 | |
1692 | data8 0x46702E85C0898B70, 0x432C922E412CEC6E //A2 | |
1693 | data8 0xF57B99A1C034335D, 0x0000409A //A3 | |
1694 | data8 0x82EC9634223DF909, 0x000040CF //A4 | |
1695 | data8 0x94F66D7557E2EA60, 0x00004103 //A5 | |
1696 | data8 0x5376118B79AE34D0 //A6 | |
1697 | data8 0x56BAE7106D52E548 //A7 | |
1698 | data8 0x5A00BD48CC8E25AB //A8 | |
1699 | data8 0x5D4529722821B493 //A9 | |
1700 | data8 0x608B1654AF31BBC1 //A10 | |
1701 | data8 0x63D182CC98AEA859 //A11 | |
1702 | data8 0x6716D43D5EEB05E8 //A12 | |
1703 | data8 0x6A5DF884FC172E1C //A13 | |
1704 | data8 0x6DA3CA7EBB97976B //A14 | |
1705 | data8 0x70EA416D0BE6D2EF //A15 | |
1706 | data8 0x743176C31EBB65F2 //A16 | |
1707 | data8 0x7777C401A8715CF9 //A17 | |
1708 | data8 0x7AC1110C6D350440 //A18 | |
1709 | data8 0x7E02D0971CF84865 //A19 | |
1710 | // Polynomial coefficients for right root on [-20, -19] | |
6f65e668 | 1711 | // Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range |
d5efd131 MF |
1712 | data8 0xBFAB767F9BE21803, 0xBC5ACEF5BB1BD8B5 //A0 |
1713 | data8 0x4379999999999999, 0x4029241C7F5914C8 //A1 | |
1714 | data8 0x46F47AE147AE147A, 0x43AC2979B64B9D7E //A2 | |
1715 | data8 0xAEC33E1F67152993, 0x000040A7 //A3 | |
1716 | data8 0xD1B71758E219616F, 0x000040DF //A4 | |
1717 | data8 0x8637BD05AF6CF468, 0x00004118 //A5 | |
1718 | data8 0x55065E9F80F293DE //A6 | |
1719 | data8 0x588EADA78C44EE66 //A7 | |
1720 | data8 0x5C15798EE22DEF09 //A8 | |
1721 | data8 0x5F9E8ABFD644FA63 //A9 | |
1722 | data8 0x6325FD7FE29BD7CD //A10 | |
1723 | data8 0x66AFFC5C57E1F802 //A11 | |
1724 | data8 0x6A3774CD7D5C0181 //A12 | |
1725 | data8 0x6DC152724DE2A6FE //A13 | |
1726 | data8 0x7149BB138EB3D0C2 //A14 | |
1727 | data8 0x74D32FF8A70896C2 //A15 | |
1728 | data8 0x785D3749F9C72BD7 //A16 | |
1729 | data8 0x7BE5CCF65EBC4E40 //A17 | |
1730 | data8 0x7F641A891B5FC652 //A18 | |
1731 | data8 0x7FEFFFFFFFFFFFFF //A19 | |
1732 | LOCAL_OBJECT_END(lgammal_right_roots_polynomial_data) | |
1733 | ||
1734 | LOCAL_OBJECT_START(lgammal_left_roots_polynomial_data) | |
1735 | // Polynomial coefficients for left root on [-3, -2] | |
6f65e668 | 1736 | // Lgammal is approximated by polynomial within [.084641 ; -.059553 ] range |
d5efd131 MF |
1737 | data8 0xBC0844590979B82E, 0xB8BC7CE8CE2ECC3B //A0 |
1738 | data8 0xBFFEA12DA904B18C, 0xBC91A6B2BAD5EF6E //A1 | |
1739 | data8 0x4023267F3C265A51, 0x3CD7055481D03AED //A2 | |
1740 | data8 0xA0C2D618645F8E00, 0x0000C003 //A3 | |
1741 | data8 0xFA8256664F8CD2BE, 0x00004004 //A4 | |
1742 | data8 0xC2C422C103F57158, 0x0000C006 //A5 | |
1743 | data8 0x4084373F7CC70AF5 //A6 | |
1744 | data8 0xC0A12239BDD6BB95 //A7 | |
1745 | data8 0x40BDBA65E2709397 //A8 | |
1746 | data8 0xC0DA2D2504DFB085 //A9 | |
1747 | data8 0x40F758173CA5BF3C //A10 | |
1748 | data8 0xC11506C65C267E72 //A11 | |
1749 | data8 0x413318EE3A6B05FC //A12 | |
1750 | data8 0xC1517767F247DA98 //A13 | |
1751 | data8 0x41701237B4754D73 //A14 | |
1752 | data8 0xC18DB8A03BC5C3D8 //A15 | |
1753 | data8 0x41AB80953AC14A07 //A16 | |
1754 | data8 0xC1C9B7B76638D0A4 //A17 | |
1755 | data8 0x41EA727E3033E2D9 //A18 | |
1756 | data8 0xC20812C297729142 //A19 | |
1757 | // | |
1758 | // Polynomial coefficients for left root on [-4, -3] | |
6f65e668 | 1759 | // Lgammal is approximated by polynomial within [.147147 ; -.145158 ] range |
d5efd131 MF |
1760 | data8 0xBC3130AE5C4F54DB, 0xB8ED23294C13398A //A0 |
1761 | data8 0xC034B99D966C5646, 0xBCE2E5FE3BC3DBB9 //A1 | |
1762 | data8 0x406F76DEAE0436BD, 0x3D14974DDEC057BD //A2 | |
1763 | data8 0xE929ACEA5979BE96, 0x0000C00A //A3 | |
1764 | data8 0xF47C14F8A0D52771, 0x0000400E //A4 | |
1765 | data8 0x88B7BC036937481C, 0x0000C013 //A5 | |
1766 | data8 0x4173E8F3AB9FC266 //A6 | |
1767 | data8 0xC1B7DBBE062FB11B //A7 | |
1768 | data8 0x41FD2F76DE7A47A7 //A8 | |
1769 | data8 0xC242225FE53B124D //A9 | |
1770 | data8 0x4286D12AE2FBFA30 //A10 | |
1771 | data8 0xC2CCFFC267A3C4C0 //A11 | |
1772 | data8 0x431294E10008E014 //A12 | |
1773 | data8 0xC357FAC8C9A2DF6A //A13 | |
1774 | data8 0x439F2190AB9FAE01 //A14 | |
1775 | data8 0xC3E44C1D8E8C67C3 //A15 | |
1776 | data8 0x442A8901105D5A38 //A16 | |
1777 | data8 0xC471C4421E908C3A //A17 | |
1778 | data8 0x44B92CD4D59D6D17 //A18 | |
1779 | data8 0xC4FB3A078B5247FA //A19 | |
1780 | // Polynomial coefficients for left root on [-5, -4] | |
6f65e668 | 1781 | // Lgammal is approximated by polynomial within [.155671 ; -.155300 ] range |
d5efd131 MF |
1782 | data8 0xBC57BF3C6E8A94C1, 0xB902FB666934AC9E //A0 |
1783 | data8 0xC05D224A3EF9E41F, 0xBCF6F5713913E440 //A1 | |
1784 | data8 0x40BB533C678A3955, 0x3D688E53E3C72538 //A2 | |
1785 | data8 0x869FBFF732E99B84, 0x0000C012 //A3 | |
1786 | data8 0xBA9537AD61392DEC, 0x00004018 //A4 | |
1787 | data8 0x89EAE8B1DEA06B05, 0x0000C01F //A5 | |
1788 | data8 0x425A8C5C53458D3C //A6 | |
1789 | data8 0xC2C5068B3ED6509B //A7 | |
1790 | data8 0x4330FFA575E99B4E //A8 | |
1791 | data8 0xC39BEC12DDDF7669 //A9 | |
1792 | data8 0x44073825725F74F9 //A10 | |
1793 | data8 0xC47380EBCA299047 //A11 | |
1794 | data8 0x44E084DD9B666437 //A12 | |
1795 | data8 0xC54C2DA6BF787ACF //A13 | |
1796 | data8 0x45B82D65C8D6FA42 //A14 | |
1797 | data8 0xC624D62113FE950A //A15 | |
1798 | data8 0x469200CC19B45016 //A16 | |
1799 | data8 0xC6FFDDC6DD938E2E //A17 | |
1800 | data8 0x476DD7C07184B9F9 //A18 | |
1801 | data8 0xC7D554A30085C052 //A19 | |
1802 | // Polynomial coefficients for left root on [-6, -5] | |
6f65e668 | 1803 | // Lgammal is approximated by polynomial within [.157425 ; -.157360 ] range |
d5efd131 MF |
1804 | data8 0x3C9E20A87C8B79F1, 0x39488BE34B2427DB //A0 |
1805 | data8 0xC08661F6A43A5E12, 0xBD3D912526D759CC //A1 | |
1806 | data8 0x410F79DCB794F270, 0x3DB9BEE7CD3C1BF5 //A2 | |
1807 | data8 0xEB7404450D0005DB, 0x0000C019 //A3 | |
1808 | data8 0xF7AE9846DFE4D4AB, 0x00004022 //A4 | |
1809 | data8 0x8AF535855A95B6DA, 0x0000C02C //A5 | |
1810 | data8 0x43544D54E9FE240E //A6 | |
1811 | data8 0xC3E8684E40CE6CFC //A7 | |
1812 | data8 0x447DF44C1D803454 //A8 | |
1813 | data8 0xC512AC305439B2BA //A9 | |
1814 | data8 0x45A79226AF79211A //A10 | |
1815 | data8 0xC63E0DFF7244893A //A11 | |
1816 | data8 0x46D35216C3A83AF3 //A12 | |
1817 | data8 0xC76903BE0C390E28 //A13 | |
1818 | data8 0x48004A4DECFA4FD5 //A14 | |
1819 | data8 0xC8954FBD243DB8BE //A15 | |
1820 | data8 0x492BF3A31EB18DDA //A16 | |
1821 | data8 0xC9C2C6A864521F3A //A17 | |
1822 | data8 0x4A5AB127C62E8DA1 //A18 | |
1823 | data8 0xCAECF60EF3183C57 //A19 | |
1824 | // Polynomial coefficients for left root on [-7, -6] | |
6f65e668 | 1825 | // Lgammal is approximated by polynomial within [.157749 ; -.157739 ] range |
d5efd131 MF |
1826 | data8 0x3CC9B9E8B8D551D6, 0x3961813C8E1E10DB //A0 |
1827 | data8 0xC0B3ABF7A5CEA91F, 0xBD55638D4BCB4CC4 //A1 | |
1828 | data8 0x4168349A25504236, 0x3E0287ECE50CCF76 //A2 | |
1829 | data8 0x9EC8ED6E4C219E67, 0x0000C022 //A3 | |
1830 | data8 0x9279EB1B799A3FF3, 0x0000402E //A4 | |
1831 | data8 0x90213EF8D9A5DBCF, 0x0000C03A //A5 | |
1832 | data8 0x4462775E857FB71C //A6 | |
1833 | data8 0xC52377E70B45FDBF //A7 | |
1834 | data8 0x45E4F3D28EDA8C28 //A8 | |
1835 | data8 0xC6A6E85571BD2D0B //A9 | |
1836 | data8 0x47695BB17E74DF74 //A10 | |
1837 | data8 0xC82C5AC0ED6A662F //A11 | |
1838 | data8 0x48EFF8159441C2E3 //A12 | |
1839 | data8 0xC9B22602C1B68AE5 //A13 | |
1840 | data8 0x4A74BA8CE7B34100 //A14 | |
1841 | data8 0xCB37C7E208482E4B //A15 | |
1842 | data8 0x4BFB5A1D57352265 //A16 | |
1843 | data8 0xCCC01CB3021212FF //A17 | |
1844 | data8 0x4D841613AC3431D1 //A18 | |
1845 | data8 0xCE431C9E9EE43AD9 //A19 | |
1846 | // Polynomial coefficients for left root on [-8, -7] | |
6f65e668 | 1847 | // Lgammal is approximated by polynomial within [.157799 ; -.157798 ] range |
d5efd131 MF |
1848 | data8 0xBCF9C7A33AD9478C, 0xB995B0470F11E5ED //A0 |
1849 | data8 0xC0E3AF76FE4C2F8B, 0xBD8DBCD503250511 //A1 | |
1850 | data8 0x41C838E76CAAF0D5, 0x3E5D79F5E2E069C3 //A2 | |
1851 | data8 0x9EF345992B262CE0, 0x0000C02B //A3 | |
1852 | data8 0x92AE0292985FD559, 0x0000403A //A4 | |
1853 | data8 0x90615420C08F7D8C, 0x0000C049 //A5 | |
1854 | data8 0x45828139342CEEB7 //A6 | |
1855 | data8 0xC67384066C31E2D3 //A7 | |
1856 | data8 0x476502BC4DAC2C35 //A8 | |
1857 | data8 0xC856FAADFF22ADC6 //A9 | |
1858 | data8 0x49497243255AB3CE //A10 | |
1859 | data8 0xCA3C768489520F6B //A11 | |
1860 | data8 0x4B300D1EA47AF838 //A12 | |
1861 | data8 0xCC223B0508AC620E //A13 | |
1862 | data8 0x4D14D46583338CD8 //A14 | |
1863 | data8 0xCE07E7A87AA068E4 //A15 | |
1864 | data8 0x4EFB811AD2F8BEAB //A16 | |
1865 | data8 0xCFF0351B51508523 //A17 | |
1866 | data8 0x50E4364CCBF53100 //A18 | |
1867 | data8 0xD1D33CFD0BF96FA6 //A19 | |
1868 | // Polynomial coefficients for left root on [-9, -8] | |
6f65e668 | 1869 | // Lgammal is approximated by polynomial within [.157806 ; -.157806 ] range |
d5efd131 MF |
1870 | data8 0x3D333E4438B1B9D4, 0x39E7B956B83964C1 //A0 |
1871 | data8 0xC11625EDFC63DCD8, 0xBDCF39625709EFAC //A1 | |
1872 | data8 0x422EA8C150480F16, 0x3EC16ED908AB7EDD //A2 | |
1873 | data8 0xE2598725E2E11646, 0x0000C034 //A3 | |
1874 | data8 0xEAFF2346DE3EBC98, 0x00004046 //A4 | |
1875 | data8 0x821E90DE12A0F05F, 0x0000C059 //A5 | |
1876 | data8 0x46B2C334AE5366FE //A6 | |
1877 | data8 0xC7D64314B43191B6 //A7 | |
1878 | data8 0x48FAF6ED5899E01B //A8 | |
1879 | data8 0xCA2096E4472AF37D //A9 | |
1880 | data8 0x4B44AAF49FB7E4C8 //A10 | |
1881 | data8 0xCC6A02469F2BD920 //A11 | |
1882 | data8 0x4D9080626D2EFC07 //A12 | |
1883 | data8 0xCEB515EDCF0695F7 //A13 | |
1884 | data8 0x4FDB1AC69BF36960 //A14 | |
1885 | data8 0xD1017F8274339270 //A15 | |
1886 | data8 0x5226A684961BAE2F //A16 | |
1887 | data8 0xD34E085C088404A5 //A17 | |
1888 | data8 0x547511892FF8960E //A18 | |
1889 | data8 0xD5968FA3B1ED67A9 //A19 | |
1890 | // Polynomial coefficients for left root on [-10, -9] | |
6f65e668 | 1891 | // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range |
d5efd131 MF |
1892 | data8 0xBD355818A2B42BA2, 0xB9B7320B6A0D61EA //A0 |
1893 | data8 0xC14BAF7DA5F3770E, 0xBDE64AF9A868F719 //A1 | |
1894 | data8 0x4297F3E8791F9CD3, 0x3F2A553E59B4835E //A2 | |
1895 | data8 0xDD0C5F7E551BD13C, 0x0000C03E //A3 | |
1896 | data8 0x8F6F0A3B2EB08BBB, 0x00004054 //A4 | |
1897 | data8 0xC68D4D5AD230BA08, 0x0000C069 //A5 | |
1898 | data8 0x47F1E4D8C35D1A3E //A6 | |
1899 | data8 0xC94A8A191DB0A466 //A7 | |
1900 | data8 0x4AA4174F65FE6AE8 //A8 | |
1901 | data8 0xCBFEE6D90F94E9DD //A9 | |
1902 | data8 0x4D580FD3438BE16C //A10 | |
1903 | data8 0xCEB2ECD456D50224 //A11 | |
1904 | data8 0x500E049F7FE64546 //A12 | |
1905 | data8 0xD167F92D9600F378 //A13 | |
1906 | data8 0x52C342AE2B43261A //A14 | |
1907 | data8 0xD41F15DEEDA4B67E //A15 | |
1908 | data8 0x55792638748AFB7D //A16 | |
1909 | data8 0xD6D4D760074F6E6B //A17 | |
1910 | data8 0x5832469D58ED3FA9 //A18 | |
1911 | data8 0xD988769F3DC76642 //A19 | |
1912 | // Polynomial coefficients for left root on [-11, -10] | |
6f65e668 | 1913 | // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range |
d5efd131 MF |
1914 | data8 0xBDA050601F39778A, 0xBA0D4D1CE53E8241 //A0 |
1915 | data8 0xC18308A7D8EA4039, 0xBE370C379D3EAD41 //A1 | |
1916 | data8 0x4306A49380644E6C, 0x3FBBB143C0E7B5C8 //A2 | |
1917 | data8 0x8FA8FB233E4AA6D2, 0x0000C049 //A3 | |
1918 | data8 0x802CC9D8AEAC207D, 0x00004062 //A4 | |
1919 | data8 0xF3F73EE651A37A13, 0x0000C07A //A5 | |
1920 | data8 0x493E3B550A7B9568 //A6 | |
1921 | data8 0xCACED38DAA060929 //A7 | |
1922 | data8 0x4C600B346BAB3BC6 //A8 | |
1923 | data8 0xCDF0F719193E3D26 //A9 | |
1924 | data8 0x4F8229F24528B151 //A10 | |
1925 | data8 0xD113A4C2D32FBBE2 //A11 | |
1926 | data8 0x52A56BC13DC4474D //A12 | |
1927 | data8 0xD43785CFAF5E3CE3 //A13 | |
1928 | data8 0x55C9FC3EA5941202 //A14 | |
1929 | data8 0xD75CD545A3341AF5 //A15 | |
1930 | data8 0x58F009911F77C282 //A16 | |
1931 | data8 0xDA8246294D210BEC //A17 | |
1932 | data8 0x5C1608AAC32C3A8E //A18 | |
1933 | data8 0xDDA446E570A397D5 //A19 | |
1934 | // Polynomial coefficients for left root on [-12, -11] | |
6f65e668 | 1935 | // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range |
d5efd131 MF |
1936 | data8 0x3DEACBB3081C502E, 0x3A8AA6F01DEDF745 //A0 |
1937 | data8 0xC1BC8CFBFB0A9912, 0xBE6556B6504A2AE6 //A1 | |
1938 | data8 0x43797926206941D7, 0x40289A9644C2A216 //A2 | |
1939 | data8 0xF26D2A78446D0839, 0x0000C053 //A3 | |
1940 | data8 0xA238B1D937FFED38, 0x00004070 //A4 | |
1941 | data8 0xE793B4F6DE470538, 0x0000C08C //A5 | |
1942 | data8 0x4A9585BDC44DC45D //A6 | |
1943 | data8 0xCC60759520342C47 //A7 | |
1944 | data8 0x4E29B2F3694C0404 //A8 | |
1945 | data8 0xCFF4619AE7B6BBAB //A9 | |
1946 | data8 0x51C05DADF52B89E8 //A10 | |
1947 | data8 0xD38A8C7F48819A4A //A11 | |
1948 | data8 0x5555B6932D687860 //A12 | |
1949 | data8 0xD721E1FACB6C1B5B //A13 | |
1950 | data8 0x58EDA1E2677C8F91 //A14 | |
1951 | data8 0xDAB8A8EC523C1F71 //A15 | |
1952 | data8 0x5C84930133F30411 //A16 | |
1953 | data8 0xDE51952FDFD1EC49 //A17 | |
1954 | data8 0x601FCCEC1BBD25F1 //A18 | |
1955 | data8 0xE1E5F2D76B610920 //A19 | |
1956 | // Polynomial coefficients for left root on [-13, -12] | |
6f65e668 | 1957 | // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range |
d5efd131 MF |
1958 | data8 0xBE01612F373268ED, 0xBA97B7A18CDF103B //A0 |
1959 | data8 0xC1F7328CBF7A4FAC, 0xBE89A25A6952F481 //A1 | |
1960 | data8 0x43F0D0FA2DBDA237, 0x40A0422EC1CE6084 //A2 | |
1961 | data8 0x82082DF2D32686C5, 0x0000C05F //A3 | |
1962 | data8 0x8D64EE9B42E68B36, 0x0000407F //A4 | |
1963 | data8 0xA3FFD82E08C630C9, 0x0000C09F //A5 | |
1964 | data8 0x4BF8C49D99123466 //A6 | |
1965 | data8 0xCDFEC79DDF1119ED //A7 | |
1966 | data8 0x50038615A892D242 //A8 | |
1967 | data8 0xD20929453DC8B537 //A9 | |
1968 | data8 0x54106A78083BA1EE //A10 | |
1969 | data8 0xD615A302C69E27B2 //A11 | |
1970 | data8 0x581CC175870FF16F //A12 | |
1971 | data8 0xDA233E0979E12B74 //A13 | |
1972 | data8 0x5C29E822BC568C80 //A14 | |
1973 | data8 0xDE31845DB5340FBC //A15 | |
1974 | data8 0x6037BFC6D498D5F9 //A16 | |
1975 | data8 0xE2407D92CD613E82 //A17 | |
1976 | data8 0x64483B9B62367EB7 //A18 | |
1977 | data8 0xE64B2DC830E8A799 //A1 | |
1978 | // Polynomial coefficients for left root on [-14, -13] | |
6f65e668 | 1979 | // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range |
d5efd131 MF |
1980 | data8 0x3E563D0B930B371F, 0x3AE779957E14F012 //A0 |
1981 | data8 0xC2344C3B2F083767, 0xBEC0B7769AA3DD66 //A1 | |
1982 | data8 0x4469BFFF28B50D07, 0x41181E3F13ED2401 //A2 | |
1983 | data8 0xAE38F64DCB24D9EE, 0x0000C06A //A3 | |
1984 | data8 0xA5C3F52C1B3506F2, 0x0000408E //A4 | |
1985 | data8 0xA83BC857BCD6BA92, 0x0000C0B2 //A5 | |
1986 | data8 0x4D663B4727B4D81A //A6 | |
1987 | data8 0xCFA82C965B0F62E9 //A7 | |
1988 | data8 0x51EAD58C02905B71 //A8 | |
1989 | data8 0xD42E427970FA56AD //A9 | |
1990 | data8 0x56714644C57D8476 //A10 | |
1991 | data8 0xD8B3EC2037EC95F2 //A11 | |
1992 | data8 0x5AF72AE68BBA5B3D //A12 | |
1993 | data8 0xDD3B2152C67AA6B7 //A13 | |
1994 | data8 0x5F7FF5F082861B8B //A14 | |
1995 | data8 0xE1C2E8BE125A5B7A //A15 | |
1996 | data8 0x64066E92FE9EBE7D //A16 | |
1997 | data8 0xE64B4201CDF9F138 //A17 | |
1998 | data8 0x689186351E58AA88 //A18 | |
1999 | data8 0xEAD132A585DFC60A //A19 | |
2000 | // Polynomial coefficients for left root on [-15, -14] | |
6f65e668 | 2001 | // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range |
d5efd131 MF |
2002 | data8 0xBE6D7DDE12700AC1, 0xBB1E025BF1667FB5 //A0 |
2003 | data8 0xC273077763F60AD5, 0xBF2A1698184C7A9A //A1 | |
2004 | data8 0x44E6A1BF095B1AB3, 0x4178D5AE8A4A2874 //A2 | |
2005 | data8 0x8F8E0D5060FCC767, 0x0000C076 //A3 | |
2006 | data8 0x800CC1DCFF092A57, 0x0000409E //A4 | |
2007 | data8 0xF3AB0BA9D14D37D1, 0x0000C0C5 //A5 | |
2008 | data8 0x4EDE3000A2F6D565 //A6 | |
2009 | data8 0xD15EC613B9C8C800 //A7 | |
2010 | data8 0x53E003309FEECCAA //A8 | |
2011 | data8 0xD660ED908D8B15C4 //A9 | |
2012 | data8 0x58E21E9B51A1C4AE //A10 | |
2013 | data8 0xDB639745DB82210D //A11 | |
2014 | data8 0x5DE55BB60C68FCF6 //A12 | |
2015 | data8 0xE06772BA3FCA23C6 //A13 | |
2016 | data8 0x62E9E58B4F702C31 //A14 | |
2017 | data8 0xE56CBA49B071ABE2 //A15 | |
2018 | data8 0x67EFF31E4F2BA36A //A16 | |
2019 | data8 0xEA7232C8804F32C3 //A17 | |
2020 | data8 0x6CF5EFEE929A0928 //A18 | |
2021 | data8 0xEF742EE03EC3E8FF //A19 | |
2022 | // Polynomial coefficients for left root on [-16, -15] | |
6f65e668 | 2023 | // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range |
d5efd131 MF |
2024 | data8 0xBEDCC628FEAC7A1B, 0xBB80582C8BEBB198 //A0 |
2025 | data8 0xC2B3076EE752595E, 0xBF5388F55AFAE53E //A1 | |
2026 | data8 0x4566A1AAD96EBCB5, 0x421F0FEFE2444293 //A2 | |
2027 | data8 0x8F8D4D3DE9850DB2, 0x0000C082 //A3 | |
2028 | data8 0x800BDD6DA2CE184C, 0x000040AE //A4 | |
2029 | data8 0xF3A8EC4C9CDC7A43, 0x0000C0D9 //A5 | |
2030 | data8 0x505E2FAFDB81263F //A6 | |
2031 | data8 0xD31EC5B3A7506CD9 //A7 | |
2032 | data8 0x55E002F77E999810 //A8 | |
2033 | data8 0xD8A0ED4C9B5C2900 //A9 | |
2034 | data8 0x5B621E4A8267C401 //A10 | |
2035 | data8 0xDE2396E5BFCFDA7A //A11 | |
2036 | data8 0x60E55B43BE6F9A79 //A12 | |
2037 | data8 0xE3A772324C7405FA //A13 | |
2038 | data8 0x6669E4E9B7E57A2D //A14 | |
2039 | data8 0xE92CB989F8A8FB37 //A15 | |
2040 | data8 0x6BEFF2368849A36E //A16 | |
2041 | data8 0xEEB23234FE191D55 //A17 | |
2042 | data8 0x7175EF5D1080B105 //A18 | |
2043 | data8 0xF4342ED7B1B7BE31 //A19 | |
2044 | // Polynomial coefficients for left root on [-17, -16] | |
6f65e668 | 2045 | // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range |
d5efd131 MF |
2046 | data8 0xBF28AEEE7B58C790, 0xBBC4448DE371FA0A //A0 |
2047 | data8 0xC2F436F56B3B89B1, 0xBF636755245AC63A //A1 | |
2048 | data8 0x45E98A22535D115D, 0x4298467DA93DB784 //A2 | |
2049 | data8 0xAC176F3775E6FCF2, 0x0000C08E //A3 | |
2050 | data8 0xA3114F53A9FEB908, 0x000040BE //A4 | |
2051 | data8 0xA4D168A8334AFE5A, 0x0000C0EE //A5 | |
2052 | data8 0x51E5B0E7EC7182CF //A6 | |
2053 | data8 0xD4E77D67B876D6B4 //A7 | |
2054 | data8 0x57E9F7C30C098C83 //A8 | |
2055 | data8 0xDAED29B0489EF7A7 //A9 | |
2056 | data8 0x5DF09486F8A524B8 //A10 | |
2057 | data8 0xE0F30B19910A2393 //A11 | |
2058 | data8 0x63F60E02AB3109F4 //A12 | |
2059 | data8 0xE6F9B8A3431854D5 //A13 | |
2060 | data8 0x69FE2D4A6D94218E //A14 | |
2061 | data8 0xED01C7E272A73560 //A15 | |
2062 | data8 0x7005017D82B186B6 //A16 | |
2063 | data8 0xF3096A81A69BD8AE //A17 | |
2064 | data8 0x76104951BAD67D5C //A18 | |
2065 | data8 0xF90FECC99786FD5B //A19 | |
2066 | // Polynomial coefficients for left root on [-18, -17] | |
6f65e668 | 2067 | // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range |
d5efd131 MF |
2068 | data8 0x3F3C19A53328E26A, 0x3BE238D7BA036B3B //A0 |
2069 | data8 0xC336C16C16C16C13, 0xBFEACE245DEC56F3 //A1 | |
2070 | data8 0x46702E85C0898B70, 0x432C922B64FD1DA4 //A2 | |
2071 | data8 0xF57B99A1C0343350, 0x0000C09A //A3 | |
2072 | data8 0x82EC9634223DF90D, 0x000040CF //A4 | |
2073 | data8 0x94F66D7557E3237D, 0x0000C103 //A5 | |
2074 | data8 0x5376118B79AE34D6 //A6 | |
2075 | data8 0xD6BAE7106D52CE49 //A7 | |
2076 | data8 0x5A00BD48CC8E11AB //A8 | |
2077 | data8 0xDD4529722833E2DF //A9 | |
2078 | data8 0x608B1654AF5F46AF //A10 | |
2079 | data8 0xE3D182CC90D8723F //A11 | |
2080 | data8 0x6716D43D46706AA0 //A12 | |
2081 | data8 0xEA5DF888C5B428D3 //A13 | |
2082 | data8 0x6DA3CA85888931A6 //A14 | |
2083 | data8 0xF0EA40EF2AC7E070 //A15 | |
2084 | data8 0x743175D1A251AFCD //A16 | |
2085 | data8 0xF777CB6E2B550D73 //A17 | |
2086 | data8 0x7AC11E468A134A51 //A18 | |
2087 | data8 0xFE02B6BDD0FC40AA //A19 | |
2088 | // Polynomial coefficients for left root on [-19, -18] | |
6f65e668 | 2089 | // Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range |
d5efd131 MF |
2090 | data8 0xBFAB767F9BE217FC, 0xBC4A5541CE0D8D0D //A0 |
2091 | data8 0xC379999999999999, 0xC01A84981B490BE8 //A1 | |
2092 | data8 0x46F47AE147AE147A, 0x43AC2987BBC466EB //A2 | |
2093 | data8 0xAEC33E1F67152987, 0x0000C0A7 //A3 | |
2094 | data8 0xD1B71758E2196153, 0x000040DF //A4 | |
2095 | data8 0x8637BD05AF6D420E, 0x0000C118 //A5 | |
2096 | data8 0x55065E9F80F293B2 //A6 | |
2097 | data8 0xD88EADA78C44BFA7 //A7 | |
2098 | data8 0x5C15798EE22EC6CD //A8 | |
2099 | data8 0xDF9E8ABFD67895CF //A9 | |
2100 | data8 0x6325FD7FE13B0DE0 //A10 | |
2101 | data8 0xE6AFFC5C3DE70858 //A11 | |
2102 | data8 0x6A3774CE81C70D43 //A12 | |
2103 | data8 0xEDC1527412D8129F //A13 | |
2104 | data8 0x7149BABCDA8B7A72 //A14 | |
2105 | data8 0xF4D330AD49071BB5 //A15 | |
2106 | data8 0x785D4046F4C5F1FD //A16 | |
2107 | data8 0xFBE59BFEDBA73FAF //A17 | |
2108 | data8 0x7F64BEF2B2EC8DA1 //A18 | |
2109 | data8 0xFFEFFFFFFFFFFFFF //A19 | |
2110 | LOCAL_OBJECT_END(lgammal_left_roots_polynomial_data) | |
2111 | ||
2112 | ||
2113 | //============================================================== | |
2114 | // Code | |
2115 | //============================================================== | |
2116 | ||
2117 | .section .text | |
2118 | GLOBAL_LIBM_ENTRY(__libm_lgammal) | |
2119 | { .mfi | |
2120 | getf.exp rSignExpX = f8 | |
2121 | // Test x for NaTVal, NaN, +/-0, +/-INF, denormals | |
2122 | fclass.m p6,p0 = f8,0x1EF | |
2123 | addl r17Ones = 0x1FFFF, r0 // exponent mask | |
2124 | } | |
2125 | { .mfi | |
2126 | addl GR_ad_z_1 = @ltoff(Constants_Z_1#),gp | |
2127 | fcvt.fx.s1 fXint = f8 // Convert arg to int (int repres. in FR) | |
2128 | adds rDelta = 0x3FC, r0 | |
2129 | } | |
2130 | ;; | |
2131 | { .mfi | |
2132 | getf.sig rSignifX = f8 | |
2133 | fcmp.lt.s1 p15, p14 = f8, f0 | |
2134 | shl rDelta = rDelta, 20 // single precision 1.5 | |
2135 | } | |
2136 | { .mfi | |
2137 | ld8 GR_ad_z_1 = [GR_ad_z_1]// get pointer to Constants_Z_1 | |
2138 | fma.s1 fTwo = f1, f1, f1 // 2.0 | |
2139 | addl rExp8 = 0x10002, r0 // exponent of 8.0 | |
2140 | } | |
2141 | ;; | |
2142 | { .mfi | |
2143 | alloc rPFS_SAVED = ar.pfs, 0, 34, 4, 0 // get some registers | |
2144 | fmerge.s fAbsX = f1, f8 // |x| | |
2145 | and rExpX = rSignExpX, r17Ones // mask sign bit | |
2146 | } | |
2147 | { .mib | |
2148 | addl rExpHalf = 0xFFFE, r0 // exponent of 0.5 | |
2149 | addl rExp2 = 0x10000, r0 // exponent of 2.0 | |
2150 | // branch out if x is NaTVal, NaN, +/-0, +/-INF, or denormalized number | |
2151 | (p6) br.cond.spnt lgammal_spec | |
2152 | } | |
2153 | ;; | |
2154 | _deno_back_to_main_path: | |
2155 | { .mfi | |
2156 | // Point to Constants_G_H_h1 | |
2157 | add rTbl1Addr = 0x040, GR_ad_z_1 | |
2158 | frcpa.s1 fRcpX, p0 = f1, f8 // initial approximation of 1/x | |
2159 | extr.u GR_Index1 = rSignifX, 59, 4 | |
2160 | } | |
2161 | { .mib | |
2162 | (p14) cmp.ge.unc p8, p0 = rExpX, rExp8 // p8 = 1 if x >= 8.0 | |
2163 | adds rZ625 = 0x3F2, r0 | |
2164 | (p8) br.cond.spnt lgammal_big_positive // branch out if x >= 8.0 | |
2165 | } | |
2166 | ;; | |
2167 | { .mfi | |
2168 | shladd rZ1offsett = GR_Index1, 2, GR_ad_z_1 // Point to Z_1 | |
2169 | fmerge.se fSignifX = f1, f8 // sifnificand of x | |
2170 | // Get high 15 bits of significand | |
2171 | extr.u GR_X_0 = rSignifX, 49, 15 | |
2172 | } | |
2173 | { .mib | |
2174 | cmp.lt.unc p9, p0 = rExpX, rExpHalf // p9 = 1 if |x| < 0.5 | |
2175 | // set p11 if 2 <= x < 4 | |
2176 | (p14) cmp.eq.unc p11, p0 = rExpX, rExp2 | |
2177 | (p9) br.cond.spnt lgammal_0_half // branch out if |x| < 0.5 | |
2178 | } | |
2179 | ;; | |
2180 | { .mfi | |
2181 | ld4 GR_Z_1 = [rZ1offsett] // Load Z_1 | |
2182 | fms.s1 fA5L = f1, f1, f8 // for 0.75 <= x < 1.3125 path | |
2183 | shl rZ625 = rZ625, 20 // sinfle precision 0.625 | |
2184 | } | |
2185 | { .mib | |
2186 | setf.s FR_MHalf = rDelta | |
2187 | // set p10 if x >= 4.0 | |
2188 | (p14) cmp.gt.unc p10, p0 = rExpX, rExp2 | |
2189 | // branch to special path for 4.0 <= x < 8 | |
2190 | (p10) br.cond.spnt lgammal_4_8 | |
2191 | } | |
2192 | ;; | |
2193 | { .mfi | |
2194 | // for 1.3125 <= x < 1.5625 path | |
2195 | addl rPolDataPtr= @ltoff(lgammal_loc_min_data),gp | |
2196 | // argument of polynomial approximation for 1.5625 <= x < 2.25 | |
2197 | fms.s1 fB4 = f8, f1, fTwo | |
2198 | cmp.eq p12, p0 = rExpX, rExpHalf | |
2199 | } | |
2200 | { .mib | |
2201 | addl rExpOne = 0xFFFF, r0 // exponent of 1.0 | |
2202 | // set p10 if significand of x >= 1.125 | |
2203 | (p11) cmp.le p11, p0 = 2, GR_Index1 | |
2204 | (p11) br.cond.spnt lgammal_2Q_4 | |
2205 | } | |
2206 | ;; | |
2207 | { .mfi | |
2208 | // point to xMin for 1.3125 <= x < 1.5625 path | |
2209 | ld8 rPolDataPtr = [rPolDataPtr] | |
2210 | fcvt.xf fFltIntX = fXint // RTN(x) | |
2211 | (p14) cmp.eq.unc p13, p7 = rExpX, rExpOne // p13 set if 1.0 <= x < 2.0 | |
2212 | } | |
2213 | { .mib | |
2214 | setf.s FR_FracX = rZ625 | |
2215 | // set p12 if |x| < 0.75 | |
2216 | (p12) cmp.gt.unc p12, p0 = 8, GR_Index1 | |
2217 | // branch out to special path for |x| < 0.75 | |
2218 | (p12) br.cond.spnt lgammal_half_3Q | |
2219 | } | |
2220 | ;; | |
2221 | .pred.rel "mutex", p7, p13 | |
2222 | { .mfi | |
2223 | getf.sig rXRnd = fXint // integer part of the input value | |
2224 | fnma.s1 fInvX = f8, fRcpX, f1 // start of 1st NR iteration | |
2225 | // Get bits 30-15 of X_0 * Z_1 | |
2226 | pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 | |
2227 | } | |
2228 | { .mib | |
2229 | (p7) cmp.eq p6, p0 = rExpX, rExp2 // p6 set if 2.0 <= x < 2.25 | |
2230 | (p13) cmp.le p6, p0 = 9, GR_Index1 | |
2231 | // branch to special path 1.5625 <= x < 2.25 | |
2232 | (p6) br.cond.spnt lgammal_13Q_2Q | |
2233 | } | |
2234 | ;; | |
2235 | // | |
2236 | // For performance, don't use result of pmpyshr2.u for 4 cycles. | |
2237 | // | |
2238 | { .mfi | |
2239 | shladd GR_ad_tbl_1 = GR_Index1, 4, rTbl1Addr // Point to G_1 | |
2240 | fma.s1 fSix = fTwo, fTwo, fTwo // 6.0 | |
2241 | add GR_ad_q = -0x60, GR_ad_z_1 // Point to Constants_Q | |
2242 | } | |
2243 | { .mib | |
2244 | add rTmpPtr3 = -0x50, GR_ad_z_1 | |
2245 | (p13) cmp.gt p7, p0 = 5, GR_Index1 | |
2246 | // branch to special path 0.75 <= x < 1.3125 | |
2247 | (p7) br.cond.spnt lgammal_03Q_1Q | |
2248 | } | |
2249 | ;; | |
2250 | { .mfi | |
2251 | add rTmpPtr = 8, GR_ad_tbl_1 | |
2252 | fma.s1 fRoot = f8, f1, f1 // x + 1 | |
2253 | // Absolute value of int arg. Will be used as index in table with roots | |
2254 | sub rXRnd = r0, rXRnd | |
2255 | } | |
2256 | { .mib | |
2257 | ldfe fA5L = [rPolDataPtr], 16 // xMin | |
2258 | addl rNegSingularity = 0x3003E, r0 | |
2259 | (p14) br.cond.spnt lgammal_loc_min | |
2260 | } | |
2261 | ;; | |
2262 | { .mfi | |
2263 | ldfps FR_G, FR_H = [GR_ad_tbl_1], 8 // Load G_1, H_1 | |
2264 | nop.f 0 | |
2265 | add rZ2Addr = 0x140, GR_ad_z_1 // Point to Constants_Z_2 | |
2266 | } | |
2267 | { .mib | |
2268 | ldfd FR_h = [rTmpPtr] // Load h_1 | |
2269 | // If arg is less or equal to -2^63 | |
2270 | cmp.geu.unc p8,p0 = rSignExpX, rNegSingularity | |
2271 | // Singularity for x < -2^63 since all such arguments are integers | |
2272 | // branch to special code which deals with singularity | |
2273 | (p8) br.cond.spnt lgammal_singularity | |
2274 | } | |
2275 | ;; | |
2276 | { .mfi | |
2277 | ldfe FR_log2_hi = [GR_ad_q], 32 // Load log2_hi | |
2278 | nop.f 0 | |
2279 | extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 | |
2280 | } | |
2281 | { .mfi | |
2282 | ldfe FR_log2_lo = [rTmpPtr3], 32 // Load log2_lo | |
2283 | fms.s1 fDx = f8, f1, fFltIntX // x - RTN(x) | |
2284 | // index in table with roots and bounds | |
2285 | adds rXint = -2, rXRnd | |
2286 | } | |
2287 | ;; | |
2288 | { .mfi | |
2289 | ldfe FR_Q4 = [GR_ad_q], 32 // Load Q4 | |
2290 | nop.f 0 | |
2291 | // set p12 if x may be close to negative root: -19.5 < x < -2.0 | |
2292 | cmp.gtu p12, p0 = 18, rXint | |
2293 | } | |
2294 | { .mfi | |
2295 | shladd GR_ad_z_2 = GR_Index2, 2, rZ2Addr // Point to Z_2 | |
2296 | fma.s1 fRcpX = fInvX, fRcpX, fRcpX // end of 1st NR iteration | |
2297 | // Point to Constants_G_H_h2 | |
2298 | add rTbl2Addr = 0x180, GR_ad_z_1 | |
2299 | } | |
2300 | ;; | |
2301 | { .mfi | |
2302 | shladd GR_ad_tbl_2 = GR_Index2, 4, rTbl2Addr // Point to G_2 | |
2303 | // set p9 if x is integer and negative | |
2304 | fcmp.eq.s1 p9, p0 = f8,fFltIntX | |
2305 | // Point to Constants_G_H_h3 | |
2306 | add rTbl3Addr = 0x280, GR_ad_z_1 | |
2307 | } | |
2308 | { .mfi | |
2309 | ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2 | |
2310 | nop.f 0 | |
2311 | sub GR_N = rExpX, rExpHalf, 1 | |
2312 | } | |
2313 | ;; | |
2314 | { .mfi | |
2315 | ldfe FR_Q3 = [rTmpPtr3], 32 // Load Q3 | |
2316 | nop.f 0 | |
2317 | // Point to lnsin polynomial coefficients | |
2318 | adds rLnSinDataPtr = 864, rTbl3Addr | |
2319 | } | |
2320 | { .mfi | |
2321 | ldfe FR_Q2 = [GR_ad_q],32 // Load Q2 | |
2322 | nop.f 0 | |
2323 | add rTmpPtr = 8, GR_ad_tbl_2 | |
2324 | } | |
2325 | ;; | |
2326 | { .mfi | |
2327 | ldfe FR_Q1 = [rTmpPtr3] // Load Q1 | |
2328 | fcmp.lt.s1 p0, p15 = fAbsX, fSix // p15 is set when x < -6.0 | |
2329 | // point to table with roots and bounds | |
2330 | adds rRootsBndAddr = -1296, GR_ad_z_1 | |
2331 | } | |
2332 | { .mfb | |
2333 | // Put integer N into rightmost significand | |
2334 | setf.sig fFloatN = GR_N | |
2335 | fma.s1 fThirteen = fSix, fTwo, f1 // 13.0 | |
2336 | // Singularity if -2^63 < x < 0 and x is integer | |
2337 | // branch to special code which deals with singularity | |
2338 | (p9) br.cond.spnt lgammal_singularity | |
2339 | } | |
2340 | ;; | |
2341 | { .mfi | |
2342 | ldfps FR_G2, FR_H2 = [GR_ad_tbl_2] // Load G_2, H_2 | |
2343 | // y = |x|/2^(exponent(x)) - 1.5 | |
2344 | fms.s1 FR_FracX = fSignifX, f1, FR_MHalf | |
2345 | // Get bits 30-15 of X_1 * Z_2 | |
2346 | pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 | |
2347 | } | |
2348 | { .mfi | |
2349 | ldfd FR_h2 = [rTmpPtr] // Load h_2 | |
2350 | fma.s1 fDxSqr = fDx, fDx, f0 // deltaX^2 | |
2351 | adds rTmpPtr3 = 128, rLnSinDataPtr | |
2352 | } | |
2353 | ;; | |
2354 | // | |
2355 | // For performance, don't use result of pmpyshr2.u for 4 cycles. | |
2356 | // | |
2357 | { .mfi | |
2358 | getf.exp rRoot = fRoot // sign and biased exponent of (x + 1) | |
2359 | nop.f 0 | |
2360 | // set p6 if -4 < x <= -2 | |
2361 | cmp.eq p6, p0 = rExpX, rExp2 | |
2362 | } | |
2363 | { .mfi | |
2364 | ldfpd fLnSin2, fLnSin2L = [rLnSinDataPtr], 16 | |
2365 | fnma.s1 fInvX = f8, fRcpX, f1 // start of 2nd NR iteration | |
2366 | sub rIndexPol = rExpX, rExpHalf // index of polynom | |
2367 | } | |
2368 | ;; | |
2369 | { .mfi | |
2370 | ldfe fLnSin4 = [rLnSinDataPtr], 96 | |
2371 | // p10 is set if x is potential "right" root | |
2372 | // p11 set for possible "left" root | |
2373 | fcmp.lt.s1 p10, p11 = fDx, f0 | |
2374 | shl rIndexPol = rIndexPol, 6 // (i*16)*4 | |
2375 | } | |
2376 | { .mfi | |
2377 | ldfpd fLnSin18, fLnSin20 = [rTmpPtr3], 16 | |
2378 | nop.f 0 | |
2379 | mov rExp2tom7 = 0x0fff8 // Exponent of 2^-7 | |
2380 | } | |
2381 | ;; | |
2382 | { .mfi | |
2383 | getf.sig rSignifDx = fDx // Get significand of RTN(x) | |
2384 | nop.f 0 | |
2385 | // set p6 if -4 < x <= -3.0 | |
2386 | (p6) cmp.le.unc p6, p0 = 0x8, GR_Index1 | |
2387 | } | |
2388 | { .mfi | |
2389 | ldfpd fLnSin22, fLnSin24 = [rTmpPtr3], 16 | |
2390 | nop.f 0 | |
2391 | // mask sign bit in the exponent of (x + 1) | |
2392 | and rRoot = rRoot, r17Ones | |
2393 | } | |
2394 | ;; | |
2395 | { .mfi | |
2396 | ldfe fLnSin16 = [rLnSinDataPtr], -80 | |
2397 | nop.f 0 | |
2398 | extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2 | |
2399 | } | |
2400 | { .mfi | |
2401 | ldfpd fLnSin26, fLnSin28 = [rTmpPtr3], 16 | |
2402 | nop.f 0 | |
2403 | and rXRnd = 1, rXRnd | |
2404 | } | |
2405 | ;; | |
2406 | { .mfi | |
2407 | shladd GR_ad_tbl_3 = GR_Index3, 4, rTbl3Addr // Point to G_3 | |
2408 | fms.s1 fDxSqrL = fDx, fDx, fDxSqr // low part of deltaX^2 | |
2409 | // potential "left" root | |
2410 | (p11) adds rRootsBndAddr = 560, rRootsBndAddr | |
2411 | } | |
2412 | { .mib | |
2413 | ldfpd fLnSin30, fLnSin32 = [rTmpPtr3], 16 | |
2414 | // set p7 if |x+1| < 2^-7 | |
2415 | cmp.lt p7, p0 = rRoot, rExp2tom7 | |
2416 | // branch to special path for |x+1| < 2^-7 | |
2417 | (p7) br.cond.spnt _closeToNegOne | |
2418 | } | |
2419 | ;; | |
2420 | { .mfi | |
2421 | ldfps FR_G3, FR_H3 = [GR_ad_tbl_3], 8 // Load G_3, H_3 | |
2422 | fcmp.lt.s1 p14, p0 = fAbsX, fThirteen // set p14 if x > -13.0 | |
2423 | // base address of polynomial on range [-6.0, -0.75] | |
2424 | adds rPolDataPtr = 3440, rTbl3Addr | |
2425 | } | |
2426 | { .mfi | |
2427 | // (i*16)*4 + (i*16)*8 - offsett of polynomial on range [-6.0, -0.75] | |
2428 | shladd rTmpPtr = rIndexPol, 2, rIndexPol | |
2429 | fma.s1 fXSqr = FR_FracX, FR_FracX, f0 // y^2 | |
2430 | // point to left "near root" bound | |
2431 | (p12) shladd rRootsBndAddr = rXint, 4, rRootsBndAddr | |
2432 | } | |
2433 | ;; | |
2434 | { .mfi | |
2435 | ldfpd fLnSin34, fLnSin36 = [rTmpPtr3], 16 | |
2436 | fma.s1 fRcpX = fInvX, fRcpX, fRcpX // end of 2nd NR iteration | |
2437 | // add special offsett if -4 < x <= -3.0 | |
2438 | (p6) adds rPolDataPtr = 640, rPolDataPtr | |
2439 | } | |
2440 | { .mfi | |
2441 | // point to right "near root" bound | |
2442 | adds rTmpPtr2 = 8, rRootsBndAddr | |
2443 | fnma.s1 fMOne = f1, f1, f0 // -1.0 | |
2444 | // Point to Bernulli numbers | |
2445 | adds rBernulliPtr = 544, rTbl3Addr | |
2446 | } | |
2447 | ;; | |
2448 | { .mfi | |
2449 | // left bound of "near root" range | |
2450 | (p12) ld8 rLeftBound = [rRootsBndAddr] | |
2451 | fmerge.se fNormDx = f1, fDx // significand of DeltaX | |
2452 | // base address + offsett for polynomial coeff. on range [-6.0, -0.75] | |
2453 | add rPolDataPtr = rPolDataPtr, rTmpPtr | |
2454 | } | |
2455 | { .mfi | |
2456 | // right bound of "near root" range | |
2457 | (p12) ld8 rRightBound = [rTmpPtr2] | |
2458 | fcvt.xf fFloatN = fFloatN | |
2459 | // special "Bernulli" numbers for Stirling's formula for -13 < x < -6 | |
2460 | (p14) adds rBernulliPtr = 160, rBernulliPtr | |
2461 | } | |
2462 | ;; | |
2463 | { .mfi | |
2464 | ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3 | |
2465 | fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2 | |
2466 | adds rTmpPtr3 = -160, rTmpPtr3 | |
2467 | } | |
2468 | { .mfb | |
2469 | adds rTmpPtr = 80, rPolDataPtr | |
2470 | fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2 | |
2471 | // p15 is set if -2^63 < x < 6.0 and x is not an integer | |
2472 | // branch to path with implementation using Stirling's formula for neg. x | |
2473 | (p15) br.cond.spnt _negStirling | |
2474 | } | |
2475 | ;; | |
2476 | { .mfi | |
2477 | ldfpd fA3, fA3L = [rPolDataPtr], 16 // A3 | |
2478 | fma.s1 fDelX4 = fDxSqr, fDxSqr, f0 // deltaX^4 | |
2479 | // Get high 4 bits of signif | |
2480 | extr.u rIndex1Dx = rSignifDx, 59, 4 | |
2481 | } | |
2482 | { .mfi | |
2483 | ldfe fA5 = [rTmpPtr], -16 // A5 | |
2484 | fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2 | |
2485 | adds rLnSinTmpPtr = 16, rLnSinDataPtr | |
2486 | } | |
2487 | ;; | |
2488 | { .mfi | |
2489 | ldfpd fA0, fA0L = [rPolDataPtr], 16 // A0 | |
2490 | fma.s1 fLnSin20 = fLnSin20, fDxSqr, fLnSin18 | |
2491 | // Get high 15 bits of significand | |
2492 | extr.u rX0Dx = rSignifDx, 49, 15 | |
2493 | } | |
2494 | { .mfi | |
2495 | ldfe fA4 = [rTmpPtr], 192 // A4 | |
2496 | fms.s1 fXSqrL = FR_FracX, FR_FracX, fXSqr // low part of y^2 | |
2497 | shladd GR_ad_z_1 = rIndex1Dx, 2, GR_ad_z_1 // Point to Z_1 | |
2498 | } | |
2499 | ;; | |
2500 | { .mfi | |
2501 | ldfpd fA1, fA1L = [rPolDataPtr], 16 // A1 | |
2502 | fma.s1 fX4 = fXSqr, fXSqr, f0 // y^4 | |
2503 | adds rTmpPtr2 = 32, rTmpPtr | |
2504 | } | |
2505 | { .mfi | |
2506 | ldfpd fA18, fA19 = [rTmpPtr], 16 // A18, A19 | |
2507 | fma.s1 fLnSin24 = fLnSin24, fDxSqr, fLnSin22 | |
2508 | nop.i 0 | |
2509 | } | |
2510 | ;; | |
2511 | { .mfi | |
2512 | ldfe fLnSin6 = [rLnSinDataPtr], 32 | |
2513 | fma.s1 fLnSin28 = fLnSin28, fDxSqr, fLnSin26 | |
2514 | nop.i 0 | |
2515 | } | |
2516 | { .mfi | |
2517 | ldfe fLnSin8 = [rLnSinTmpPtr], 32 | |
2518 | nop.f 0 | |
2519 | nop.i 0 | |
2520 | } | |
2521 | ;; | |
2522 | { .mfi | |
2523 | ldfpd fA20, fA21 = [rTmpPtr], 16 // A20, A21 | |
2524 | fma.s1 fLnSin32 = fLnSin32, fDxSqr, fLnSin30 | |
2525 | nop.i 0 | |
2526 | } | |
2527 | { .mfi | |
2528 | ldfpd fA22, fA23 = [rTmpPtr2], 16 // A22, A23 | |
2529 | fma.s1 fB20 = f1, f1, FR_MHalf // 2.5 | |
2530 | (p12) cmp.ltu.unc p6, p0 = rSignifX, rLeftBound | |
2531 | } | |
2532 | ;; | |
2533 | { .mfi | |
2534 | ldfpd fA2, fA2L = [rPolDataPtr], 16 // A2 | |
2535 | fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3 | |
2536 | // set p6 if x falls in "near root" range | |
2537 | (p6) cmp.geu.unc p6, p0 = rSignifX, rRightBound | |
2538 | } | |
2539 | { .mfb | |
2540 | adds rTmpPtr3 = -64, rTmpPtr | |
2541 | fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3 | |
2542 | // branch to special path if x falls in "near root" range | |
2543 | (p6) br.cond.spnt _negRoots | |
2544 | } | |
2545 | ;; | |
2546 | { .mfi | |
2547 | ldfpd fA24, fA25 = [rTmpPtr2], 16 // A24, A25 | |
2548 | fma.s1 fLnSin36 = fLnSin36, fDxSqr, fLnSin34 | |
2549 | (p11) cmp.eq.unc p7, p0 = 1,rXint // p7 set if -3.0 < x < -2.5 | |
2550 | } | |
2551 | { .mfi | |
2552 | adds rTmpPtr = -48, rTmpPtr | |
2553 | fma.s1 fLnSin20 = fLnSin20, fDxSqr, fLnSin16 | |
2554 | addl rDelta = 0x5338, r0 // significand of -2.605859375 | |
2555 | } | |
2556 | ;; | |
2557 | { .mfi | |
2558 | getf.exp GR_N = fDx // Get N = exponent of DeltaX | |
2559 | fma.s1 fX6 = fX4, fXSqr, f0 // y^6 | |
2560 | // p7 set if -2.605859375 <= x < -2.5 | |
2561 | (p7) cmp.gt.unc p7, p0 = rDelta, GR_X_0 | |
2562 | } | |
2563 | { .mfb | |
2564 | ld4 GR_Z_1 = [GR_ad_z_1] // Load Z_1 | |
2565 | fma.s1 fDelX8 = fDelX4, fDelX4, f0 // deltaX^8 | |
2566 | // branch to special path for -2.605859375 <= x < -2.5 | |
2567 | (p7) br.cond.spnt _neg2andHalf | |
2568 | } | |
2569 | ;; | |
2570 | { .mfi | |
2571 | ldfpd fA14, fA15 = [rTmpPtr3], 16 // A14, A15 | |
2572 | fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3 | |
2573 | adds rTmpPtr2 = 128 , rPolDataPtr | |
2574 | } | |
2575 | { .mfi | |
2576 | ldfpd fA16, fA17 = [rTmpPtr], 16 // A16, A17 | |
2577 | fma.s1 fLnSin28 = fLnSin28, fDelX4, fLnSin24 | |
2578 | adds rPolDataPtr = 144 , rPolDataPtr | |
2579 | } | |
2580 | ;; | |
2581 | { .mfi | |
2582 | ldfe fLnSin10 = [rLnSinDataPtr], 32 | |
2583 | fma.s1 fRes1H = fA3, FR_FracX, f0 // (A3*y)hi | |
2584 | and GR_N = GR_N, r17Ones // mask sign bit | |
2585 | } | |
2586 | { .mfi | |
2587 | ldfe fLnSin12 = [rLnSinTmpPtr] | |
2588 | fma.s1 fDelX6 = fDxSqr, fDelX4, f0 // DeltaX^6 | |
2589 | shladd GR_ad_tbl_1 = rIndex1Dx, 4, rTbl1Addr // Point to G_1 | |
2590 | } | |
2591 | ;; | |
2592 | { .mfi | |
2593 | ldfe fA13 = [rPolDataPtr], -32 // A13 | |
2594 | fma.s1 fA4 = fA5, FR_FracX, fA4 // A5*y + A4 | |
2595 | // Get bits 30-15 of X_0 * Z_1 | |
2596 | pmpyshr2.u GR_X_1 = rX0Dx, GR_Z_1, 15 | |
2597 | } | |
2598 | { .mfi | |
2599 | ldfe fA12 = [rTmpPtr2], -32 // A12 | |
2600 | fms.s1 FR_r = FR_G, fSignifX, f1 // r = G * S_hi - 1 | |
2601 | sub GR_N = GR_N, rExpHalf, 1 // unbisaed exponent of DeltaX | |
2602 | } | |
2603 | ;; | |
2604 | // | |
2605 | // For performance, don't use result of pmpyshr2.u for 4 cycles. | |
2606 | // | |
2607 | .pred.rel "mutex",p10,p11 | |
2608 | { .mfi | |
2609 | ldfe fA11 = [rPolDataPtr], -32 // A11 | |
2610 | // High part of log(|x|) = Y_hi = N * log2_hi + H | |
2611 | fma.s1 fResH = fFloatN, FR_log2_hi, FR_H | |
2612 | (p10) cmp.eq p8, p9 = rXRnd, r0 | |
2613 | } | |
2614 | { .mfi | |
2615 | ldfe fA10 = [rTmpPtr2], -32 // A10 | |
2616 | fma.s1 fRes6H = fA1, FR_FracX, f0 // (A1*y)hi | |
2617 | (p11) cmp.eq p9, p8 = rXRnd, r0 | |
2618 | } | |
2619 | ;; | |
2620 | { .mfi | |
2621 | ldfe fA9 = [rPolDataPtr], -32 // A9 | |
2622 | fma.s1 fB14 = fLnSin6, fDxSqr, f0 // (LnSin6*deltaX^2)hi | |
2623 | cmp.eq p6, p7 = 4, rSgnGamSize | |
2624 | } | |
2625 | { .mfi | |
2626 | ldfe fA8 = [rTmpPtr2], -32 // A8 | |
2627 | fma.s1 fA18 = fA19, FR_FracX, fA18 | |
2628 | nop.i 0 | |
2629 | } | |
2630 | ;; | |
2631 | { .mfi | |
2632 | ldfe fA7 = [rPolDataPtr] // A7 | |
2633 | fma.s1 fA23 = fA23, FR_FracX, fA22 | |
2634 | nop.i 0 | |
2635 | } | |
2636 | { .mfi | |
2637 | ldfe fA6 = [rTmpPtr2] // A6 | |
2638 | fma.s1 fA21 = fA21, FR_FracX, fA20 | |
2639 | nop.i 0 | |
2640 | } | |
2641 | ;; | |
2642 | { .mfi | |
2643 | ldfe fLnSin14 = [rLnSinDataPtr] | |
2644 | fms.s1 fRes1L = fA3, FR_FracX, fRes1H // delta((A3*y)hi) | |
2645 | extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 | |
2646 | } | |
2647 | { .mfi | |
2648 | setf.sig fFloatNDx = GR_N | |
2649 | fadd.s1 fPol = fRes1H, fA2 // (A3*y + A2)hi | |
2650 | nop.i 0 | |
2651 | } | |
2652 | ;; | |
2653 | { .mfi | |
2654 | ldfps FR_G, FR_H = [GR_ad_tbl_1], 8 // Load G_1, H_1 | |
2655 | fma.s1 fRes2H = fA4, fXSqr, f0 // ((A5 + A4*y)*y^2)hi | |
2656 | nop.i 0 | |
2657 | } | |
2658 | { .mfi | |
2659 | shladd GR_ad_z_2 = GR_Index2, 2, rZ2Addr // Point to Z_2 | |
2660 | fma.s1 fA25 = fA25, FR_FracX, fA24 | |
2661 | shladd GR_ad_tbl_2 = GR_Index2, 4, rTbl2Addr // Point to G_2 | |
2662 | } | |
2663 | ;; | |
2664 | .pred.rel "mutex",p8,p9 | |
2665 | { .mfi | |
2666 | ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2 | |
2667 | fms.s1 fRes6L = fA1, FR_FracX, fRes6H // delta((A1*y)hi) | |
2668 | // sign of GAMMA(x) is negative | |
2669 | (p8) adds rSgnGam = -1, r0 | |
2670 | } | |
2671 | { .mfi | |
2672 | adds rTmpPtr = 8, GR_ad_tbl_2 | |
2673 | fadd.s1 fRes3H = fRes6H, fA0 // (A1*y + A0)hi | |
2674 | // sign of GAMMA(x) is positive | |
2675 | (p9) adds rSgnGam = 1, r0 | |
2676 | } | |
2677 | ;; | |
2678 | { .mfi | |
2679 | ldfps FR_G2, FR_H2 = [GR_ad_tbl_2] // Load G_2, H_2 | |
2680 | // (LnSin6*deltaX^2 + LnSin4)hi | |
2681 | fadd.s1 fLnSinH = fB14, fLnSin4 | |
2682 | nop.i 0 | |
2683 | } | |
2684 | { .mfi | |
2685 | ldfd FR_h2 = [rTmpPtr] // Load h_2 | |
2686 | fms.s1 fB16 = fLnSin6, fDxSqr, fB14 // delta(LnSin6*deltaX^2) | |
2687 | nop.i 0 | |
2688 | } | |
2689 | ;; | |
2690 | { .mfi | |
2691 | ldfd fhDelX = [GR_ad_tbl_1] // Load h_1 | |
2692 | fma.s1 fA21 = fA21, fXSqr, fA18 | |
2693 | nop.i 0 | |
2694 | } | |
2695 | { .mfi | |
2696 | nop.m 0 | |
2697 | fma.s1 fLnSin36 = fLnSin36, fDelX4, fLnSin32 | |
2698 | nop.i 0 | |
2699 | } | |
2700 | ;; | |
2701 | { .mfi | |
2702 | nop.m 0 | |
2703 | fma.s1 fRes1L = fA3L, FR_FracX, fRes1L // (A3*y)lo | |
2704 | // Get bits 30-15 of X_1 * Z_ | |
2705 | pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 | |
2706 | } | |
2707 | { .mfi | |
2708 | nop.m 0 | |
2709 | fsub.s1 fPolL = fA2, fPol | |
2710 | nop.i 0 | |
2711 | } | |
2712 | ;; | |
2713 | // | |
2714 | // For performance, don't use result of pmpyshr2.u for 4 cycles. | |
2715 | // | |
2716 | { .mfi | |
2717 | nop.m 0 | |
2718 | // delta(((A5 + A4*y)*y^2)hi) | |
2719 | fms.s1 fRes2L = fA4, fXSqr, fRes2H | |
2720 | nop.i 0 | |
2721 | } | |
2722 | { .mfi | |
2723 | nop.m 0 | |
2724 | // (((A5 + A4*y)*y^2) + A3*y + A2)hi | |
2725 | fadd.s1 fRes4H = fRes2H, fPol | |
2726 | nop.i 0 | |
2727 | } | |
2728 | ;; | |
2729 | { .mfi | |
2730 | // store signgam if size of variable is 4 bytes | |
2731 | (p6) st4 [rSgnGamAddr] = rSgnGam | |
2732 | fma.s1 fRes6L = fA1L, FR_FracX, fRes6L // (A1*y)lo | |
2733 | nop.i 0 | |
2734 | } | |
2735 | { .mfi | |
2736 | // store signgam if size of variable is 8 bytes | |
2737 | (p7) st8 [rSgnGamAddr] = rSgnGam | |
2738 | fsub.s1 fRes3L = fA0, fRes3H | |
2739 | nop.i 0 | |
2740 | } | |
2741 | ;; | |
2742 | { .mfi | |
2743 | nop.m 0 | |
2744 | fsub.s1 fLnSinL = fLnSin4, fLnSinH | |
2745 | nop.i 0 | |
2746 | } | |
2747 | { .mfi | |
2748 | nop.m 0 | |
2749 | // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2)hi | |
2750 | fma.s1 fB18 = fLnSinH, fDxSqr, f0 | |
2751 | nop.i 0 | |
2752 | } | |
2753 | ;; | |
2754 | { .mfi | |
2755 | adds rTmpPtr = 8, rTbl3Addr | |
2756 | fma.s1 fB16 = fLnSin6, fDxSqrL, fB16 // (LnSin6*deltaX^2)lo | |
2757 | extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2 | |
2758 | } | |
2759 | { .mfi | |
2760 | nop.m 0 | |
2761 | fma.s1 fA25 = fA25, fXSqr, fA23 | |
2762 | nop.i 0 | |
2763 | } | |
2764 | ;; | |
2765 | { .mfi | |
2766 | shladd GR_ad_tbl_3 = GR_Index3, 4, rTbl3Addr // Point to G_3 | |
2767 | fadd.s1 fPolL = fPolL, fRes1H | |
2768 | nop.i 0 | |
2769 | } | |
2770 | { .mfi | |
2771 | shladd rTmpPtr = GR_Index3, 4, rTmpPtr // Point to G_3 | |
2772 | fadd.s1 fRes1L = fRes1L, fA2L // (A3*y)lo + A2lo | |
2773 | nop.i 0 | |
2774 | } | |
2775 | ;; | |
2776 | { .mfi | |
2777 | ldfps FR_G3, FR_H3 = [GR_ad_tbl_3] // Load G_3, H_3 | |
2778 | fma.s1 fRes2L = fA4, fXSqrL, fRes2L // ((A5 + A4*y)*y^2)lo | |
2779 | nop.i 0 | |
2780 | } | |
2781 | { .mfi | |
2782 | ldfd FR_h3 = [rTmpPtr] // Load h_3 | |
2783 | fsub.s1 fRes4L = fPol, fRes4H | |
2784 | nop.i 0 | |
2785 | } | |
2786 | ;; | |
2787 | { .mfi | |
2788 | nop.m 0 | |
2789 | // ((((A5 + A4*y)*y^2) + A3*y + A2)*y^2)hi | |
2790 | fma.s1 fRes7H = fRes4H, fXSqr, f0 | |
2791 | nop.i 0 | |
2792 | } | |
2793 | { .mfi | |
2794 | nop.m 0 | |
2795 | fma.s1 fA15 = fA15, FR_FracX, fA14 | |
2796 | nop.i 0 | |
2797 | } | |
2798 | ;; | |
2799 | { .mfi | |
2800 | nop.m 0 | |
2801 | fadd.s1 fRes3L = fRes3L, fRes6H | |
2802 | nop.i 0 | |
2803 | } | |
2804 | { .mfi | |
2805 | nop.m 0 | |
2806 | fadd.s1 fRes6L = fRes6L, fA0L // (A1*y)lo + A0lo | |
2807 | nop.i 0 | |
2808 | } | |
2809 | ;; | |
2810 | { .mfi | |
2811 | nop.m 0 | |
2812 | fadd.s1 fLnSinL = fLnSinL, fB14 | |
2813 | ||
2814 | nop.i 0 | |
2815 | } | |
2816 | { .mfi | |
2817 | nop.m 0 | |
2818 | // delta((LnSin6*deltaX^2 + LnSin4)*deltaX^2) | |
2819 | fms.s1 fB20 = fLnSinH, fDxSqr, fB18 | |
2820 | nop.i 0 | |
2821 | } | |
2822 | ;; | |
2823 | { .mfi | |
2824 | nop.m 0 | |
2825 | fadd.s1 fPolL = fPolL, fRes1L // (A3*y + A2)lo | |
2826 | ||
2827 | nop.i 0 | |
2828 | } | |
2829 | { .mfi | |
2830 | nop.m 0 | |
2831 | // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2 + LnSin2)hi | |
2832 | fadd.s1 fLnSin6 = fB18, fLnSin2 | |
2833 | nop.i 0 | |
2834 | } | |
2835 | ;; | |
2836 | { .mfi | |
2837 | nop.m 0 | |
2838 | fadd.s1 fRes4L = fRes4L, fRes2H | |
2839 | nop.i 0 | |
2840 | } | |
2841 | { .mfi | |
2842 | nop.m 0 | |
2843 | fma.s1 fA17 = fA17, FR_FracX, fA16 | |
2844 | nop.i 0 | |
2845 | } | |
2846 | ;; | |
2847 | { .mfi | |
2848 | nop.m 0 | |
2849 | // delta(((((A5 + A4*y)*y^2) + A3*y + A2)*y^2) | |
2850 | fms.s1 fRes7L = fRes4H, fXSqr, fRes7H | |
2851 | nop.i 0 | |
2852 | } | |
2853 | { .mfi | |
2854 | nop.m 0 | |
2855 | fadd.s1 fPol = fRes7H, fRes3H | |
2856 | nop.i 0 | |
2857 | } | |
2858 | ;; | |
2859 | { .mfi | |
2860 | nop.m 0 | |
2861 | fadd.s1 fRes3L = fRes3L, fRes6L // (A1*y + A0)lo | |
2862 | nop.i 0 | |
2863 | } | |
2864 | { .mfi | |
2865 | nop.m 0 | |
2866 | fma.s1 fA25 = fA25, fX4, fA21 | |
2867 | nop.i 0 | |
2868 | } | |
2869 | ;; | |
2870 | { .mfi | |
2871 | nop.m 0 | |
2872 | // (LnSin6*deltaX^2 + LnSin4)lo | |
2873 | fadd.s1 fLnSinL = fLnSinL, fB16 | |
2874 | nop.i 0 | |
2875 | } | |
2876 | { .mfi | |
2877 | nop.m 0 | |
2878 | fma.s1 fB20 = fLnSinH, fDxSqrL, fB20 | |
2879 | nop.i 0 | |
2880 | } | |
2881 | ;; | |
2882 | { .mfi | |
2883 | nop.m 0 | |
2884 | fsub.s1 fLnSin4 = fLnSin2, fLnSin6 | |
2885 | nop.i 0 | |
2886 | } | |
2887 | { .mfi | |
2888 | nop.m 0 | |
2889 | // (((LnSin6*deltaX^2 + LnSin4)*deltaX^2 + LnSin2)*DeltaX^2)hi | |
2890 | fma.s1 fLnSinH = fLnSin6, fDxSqr, f0 | |
2891 | nop.i 0 | |
2892 | } | |
2893 | ;; | |
2894 | { .mfi | |
2895 | nop.m 0 | |
2896 | // ((A5 + A4*y)*y^2)lo + (A3*y + A2)lo | |
2897 | fadd.s1 fRes2L = fRes2L, fPolL | |
2898 | nop.i 0 | |
2899 | } | |
2900 | { .mfi | |
2901 | nop.m 0 | |
2902 | fma.s1 fA17 = fA17, fXSqr, fA15 | |
2903 | nop.i 0 | |
2904 | } | |
2905 | ;; | |
2906 | { .mfi | |
2907 | nop.m 0 | |
2908 | // ((((A5 + A4*y)*y^2) + A3*y + A2)*y^2)lo | |
2909 | fma.s1 fRes7L = fRes4H, fXSqrL, fRes7L | |
2910 | nop.i 0 | |
2911 | } | |
2912 | { .mfi | |
2913 | nop.m 0 | |
2914 | fsub.s1 fPolL = fRes3H, fPol | |
2915 | nop.i 0 | |
2916 | } | |
2917 | ;; | |
2918 | { .mfi | |
2919 | nop.m 0 | |
2920 | fma.s1 fA13 = fA13, FR_FracX, fA12 | |
2921 | nop.i 0 | |
2922 | } | |
2923 | { .mfi | |
2924 | nop.m 0 | |
2925 | fma.s1 fA11 = fA11, FR_FracX, fA10 | |
2926 | nop.i 0 | |
2927 | } | |
2928 | ;; | |
2929 | { .mfi | |
2930 | nop.m 0 | |
2931 | // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2)lo | |
2932 | fma.s1 fB20 = fLnSinL, fDxSqr, fB20 | |
2933 | nop.i 0 | |
2934 | } | |
2935 | { .mfi | |
2936 | nop.m 0 | |
2937 | fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2 | |
2938 | nop.i 0 | |
2939 | } | |
2940 | ;; | |
2941 | { .mfi | |
2942 | nop.m 0 | |
2943 | fadd.s1 fLnSin4 = fLnSin4, fB18 | |
2944 | nop.i 0 | |
2945 | } | |
2946 | { .mfi | |
2947 | nop.m 0 | |
2948 | fms.s1 fLnSinL = fLnSin6, fDxSqr, fLnSinH | |
2949 | nop.i 0 | |
2950 | } | |
2951 | ;; | |
2952 | { .mfi | |
2953 | nop.m 0 | |
2954 | // (((A5 + A4*y)*y^2) + A3*y + A2)lo | |
2955 | fadd.s1 fRes4L = fRes4L, fRes2L | |
2956 | nop.i 0 | |
2957 | } | |
2958 | { .mfi | |
2959 | nop.m 0 | |
2960 | fadd.s1 fhDelX = fhDelX, FR_h2 // h = h_1 + h_2 | |
2961 | nop.i 0 | |
2962 | } | |
2963 | ;; | |
2964 | { .mfi | |
2965 | nop.m 0 | |
2966 | fadd.s1 fRes7L = fRes7L, fRes3L | |
2967 | nop.i 0 | |
2968 | } | |
2969 | { .mfi | |
2970 | nop.m 0 | |
2971 | fadd.s1 fPolL = fPolL, fRes7H | |
2972 | nop.i 0 | |
2973 | } | |
2974 | ;; | |
2975 | { .mfi | |
2976 | nop.m 0 | |
2977 | fcvt.xf fFloatNDx = fFloatNDx | |
2978 | nop.i 0 | |
2979 | } | |
2980 | { .mfi | |
2981 | nop.m 0 | |
2982 | fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2 | |
2983 | nop.i 0 | |
2984 | } | |
2985 | ;; | |
2986 | { .mfi | |
2987 | nop.m 0 | |
2988 | fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3 | |
2989 | nop.i 0 | |
2990 | } | |
2991 | { .mfi | |
2992 | nop.m 0 | |
2993 | // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2)lo + (LnSin2)lo | |
2994 | fadd.s1 fLnSin2L = fLnSin2L, fB20 | |
2995 | nop.i 0 | |
2996 | } | |
2997 | ;; | |
2998 | { .mfi | |
2999 | nop.m 0 | |
3000 | fma.s1 fA25 = fA25, fX4, fA17 | |
3001 | nop.i 0 | |
3002 | } | |
3003 | { .mfi | |
3004 | nop.m 0 | |
3005 | fma.s1 fA13 = fA13, fXSqr, fA11 | |
3006 | nop.i 0 | |
3007 | } | |
3008 | ;; | |
3009 | { .mfi | |
3010 | nop.m 0 | |
3011 | fma.s1 fA9 = fA9, FR_FracX, fA8 | |
3012 | nop.i 0 | |
3013 | } | |
3014 | { .mfi | |
3015 | nop.m 0 | |
3016 | fma.s1 fA7 = fA7, FR_FracX, fA6 | |
3017 | nop.i 0 | |
3018 | } | |
3019 | ;; | |
3020 | { .mfi | |
3021 | nop.m 0 | |
3022 | fma.s1 fLnSin36 = fLnSin36, fDelX8, fLnSin28 | |
3023 | nop.i 0 | |
3024 | } | |
3025 | { .mfi | |
3026 | nop.m 0 | |
3027 | fma.s1 fLnSin14 = fLnSin14, fDxSqr, fLnSin12 | |
3028 | nop.i 0 | |
3029 | } | |
3030 | ;; | |
3031 | { .mfi | |
3032 | nop.m 0 | |
3033 | fma.s1 fLnSin10 = fLnSin10, fDxSqr, fLnSin8 | |
3034 | nop.i 0 | |
3035 | } | |
3036 | { .mfi | |
3037 | nop.m 0 | |
3038 | fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3 | |
3039 | nop.i 0 | |
3040 | } | |
3041 | ;; | |
3042 | { .mfi | |
3043 | nop.m 0 | |
3044 | fms.s1 fRDx = FR_G, fNormDx, f1 // r = G * S_hi - 1 | |
3045 | nop.i 0 | |
3046 | } | |
3047 | { .mfi | |
3048 | nop.m 0 | |
3049 | // poly_lo = r * Q4 + Q3 | |
3050 | fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 | |
3051 | nop.i 0 | |
3052 | } | |
3053 | ;; | |
3054 | { .mfi | |
3055 | nop.m 0 | |
3056 | fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r | |
3057 | nop.i 0 | |
3058 | } | |
3059 | { .mfi | |
3060 | nop.m 0 | |
3061 | // ((((A5 + A4*y)*y^2) + A3*y + A2)*y^2)lo + (A1*y + A0)lo | |
3062 | fma.s1 fRes7L = fRes4L, fXSqr, fRes7L | |
3063 | nop.i 0 | |
3064 | } | |
3065 | ;; | |
3066 | { .mfi | |
3067 | nop.m 0 | |
3068 | fma.s1 fA25 = fA25, fX4, fA13 | |
3069 | nop.i 0 | |
3070 | } | |
3071 | { .mfi | |
3072 | nop.m 0 | |
3073 | fma.s1 fA9 = fA9, fXSqr, fA7 | |
3074 | nop.i 0 | |
3075 | } | |
3076 | ;; | |
3077 | { .mfi | |
3078 | nop.m 0 | |
3079 | // h = N * log2_lo + h | |
3080 | fma.s1 FR_h = fFloatN, FR_log2_lo, FR_h | |
3081 | nop.i 0 | |
3082 | } | |
3083 | { .mfi | |
3084 | nop.m 0 | |
3085 | fadd.s1 fhDelX = fhDelX, FR_h3 // h = (h_1 + h_2) + h_3 | |
3086 | nop.i 0 | |
3087 | } | |
3088 | ;; | |
3089 | { .mfi | |
3090 | nop.m 0 | |
3091 | fma.s1 fLnSin36 = fLnSin36, fDelX6, fLnSin20 | |
3092 | nop.i 0 | |
3093 | } | |
3094 | { .mfi | |
3095 | nop.m 0 | |
3096 | fma.s1 fLnSin14 = fLnSin14, fDelX4, fLnSin10 | |
3097 | nop.i 0 | |
3098 | } | |
3099 | ;; | |
3100 | { .mfi | |
3101 | nop.m 0 | |
3102 | // poly_lo = r * Q4 + Q3 | |
3103 | fma.s1 fPolyLoDx = fRDx, FR_Q4, FR_Q3 | |
3104 | nop.i 0 | |
3105 | } | |
3106 | { .mfi | |
3107 | nop.m 0 | |
3108 | fmpy.s1 fRDxSq = fRDx, fRDx // rsq = r * r | |
3109 | nop.i 0 | |
3110 | } | |
3111 | ;; | |
3112 | { .mfi | |
3113 | nop.m 0 | |
3114 | // Y_hi = N * log2_hi + H | |
3115 | fma.s1 fResLnDxH = fFloatNDx, FR_log2_hi, FR_H | |
3116 | nop.i 0 | |
3117 | } | |
3118 | { .mfi | |
3119 | nop.m 0 | |
3120 | fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3 | |
3121 | nop.i 0 | |
3122 | } | |
3123 | ;; | |
3124 | { .mfi | |
3125 | nop.m 0 | |
3126 | fma.s1 fA9 = fA25, fX4, fA9 | |
3127 | nop.i 0 | |
3128 | } | |
3129 | { .mfi | |
3130 | nop.m 0 | |
3131 | fadd.s1 fPolL = fPolL, fRes7L | |
3132 | nop.i 0 | |
3133 | } | |
3134 | ;; | |
3135 | { .mfi | |
3136 | nop.m 0 | |
3137 | fadd.s1 fLnSin4 = fLnSin4, fLnSin2L | |
3138 | nop.i 0 | |
3139 | } | |
3140 | { .mfi | |
3141 | nop.m 0 | |
3142 | // h = N * log2_lo + h | |
3143 | fma.s1 fhDelX = fFloatNDx, FR_log2_lo, fhDelX | |
3144 | nop.i 0 | |
3145 | } | |
3146 | ;; | |
3147 | { .mfi | |
3148 | nop.m 0 | |
3149 | fma.s1 fLnSin36 = fLnSin36, fDelX8, fLnSin14 | |
3150 | nop.i 0 | |
3151 | } | |
3152 | { .mfi | |
3153 | nop.m 0 | |
3154 | // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2 + LnSin2)lo | |
3155 | fma.s1 fLnSinL = fLnSin6, fDxSqrL, fLnSinL | |
3156 | nop.i 0 | |
3157 | } | |
3158 | ;; | |
3159 | { .mfi | |
3160 | nop.m 0 | |
3161 | // poly_lo = poly_lo * r + Q2 | |
3162 | fma.s1 fPolyLoDx = fPolyLoDx, fRDx, FR_Q2 | |
3163 | nop.i 0 | |
3164 | } | |
3165 | { .mfi | |
3166 | nop.m 0 | |
3167 | fma.s1 fRDxCub = fRDxSq, fRDx, f0 // rcub = r^3 | |
3168 | nop.i 0 | |
3169 | } | |
3170 | ;; | |
3171 | { .mfi | |
3172 | nop.m 0 | |
3173 | famax.s0 fRes5H = fPol, fResH | |
3174 | nop.i 0 | |
3175 | } | |
3176 | { .mfi | |
3177 | nop.m 0 | |
3178 | // High part of (lgammal(|x|) + log(|x|)) | |
3179 | fadd.s1 fRes1H = fPol, fResH | |
3180 | nop.i 0 | |
3181 | } | |
3182 | ;; | |
3183 | { .mfi | |
3184 | nop.m 0 | |
3185 | // poly_lo = poly_lo * r + Q2 | |
3186 | fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 | |
3187 | nop.i 0 | |
3188 | } | |
3189 | { .mfi | |
3190 | nop.m 0 | |
3191 | fma.s1 fPolL = fA9, fX6, fPolL // P25lo | |
3192 | nop.i 0 | |
3193 | } | |
3194 | ;; | |
3195 | ||
3196 | { .mfi | |
3197 | nop.m 0 | |
3198 | famin.s0 fRes5L = fPol, fResH | |
3199 | nop.i 0 | |
3200 | } | |
3201 | { .mfi | |
3202 | nop.m 0 | |
3203 | // High part of -(LnSin + log(|DeltaX|)) | |
3204 | fnma.s1 fRes2H = fResLnDxH, f1, fLnSinH | |
3205 | nop.i 0 | |
3206 | } | |
3207 | ;; | |
3208 | ||
3209 | { .mfi | |
3210 | nop.m 0 | |
3211 | // (((LnSin6*deltaX^2 + LnSin4)*deltaX^2 + LnSin2)*DeltaX^2)lo | |
3212 | fma.s1 fLnSinL = fLnSin4, fDxSqr, fLnSinL | |
3213 | nop.i 0 | |
3214 | } | |
3215 | { .mfi | |
3216 | nop.m 0 | |
3217 | fma.s1 fLnSin36 = fLnSin36, fDelX6, f0 | |
3218 | nop.i 0 | |
3219 | } | |
3220 | ;; | |
3221 | { .mfi | |
3222 | nop.m 0 | |
3223 | // poly_hi = Q1 * rsq + r | |
3224 | fma.s1 fPolyHiDx = FR_Q1, fRDxSq, fRDx | |
3225 | nop.i 0 | |
3226 | } | |
3227 | { .mfi | |
3228 | nop.m 0 | |
3229 | // poly_lo = poly_lo*r^3 + h | |
3230 | fma.s1 fPolyLoDx = fPolyLoDx, fRDxCub, fhDelX | |
3231 | nop.i 0 | |
3232 | } | |
3233 | ;; | |
3234 | { .mfi | |
3235 | nop.m 0 | |
3236 | fsub.s1 fRes1L = fRes5H, fRes1H | |
3237 | nop.i 0 | |
3238 | } | |
3239 | { .mfi | |
3240 | nop.m 0 | |
3241 | // -(lgammal(|x|) + log(|x|))hi | |
3242 | fnma.s1 fRes1H = fRes1H, f1, f0 | |
3243 | ||
3244 | nop.i 0 | |
3245 | } | |
3246 | ;; | |
3247 | { .mfi | |
3248 | nop.m 0 | |
3249 | // poly_hi = Q1 * rsq + r | |
3250 | fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r | |
3251 | nop.i 0 | |
3252 | } | |
3253 | { .mfi | |
3254 | nop.m 0 | |
3255 | // poly_lo = poly_lo*r^3 + h | |
3256 | fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h | |
3257 | nop.i 0 | |
3258 | } | |
3259 | ;; | |
3260 | { .mfi | |
3261 | nop.m 0 | |
3262 | fms.s1 fRes2L = fResLnDxH, fMOne, fRes2H | |
3263 | nop.i 0 | |
3264 | } | |
3265 | ;; | |
3266 | { .mfi | |
3267 | nop.m 0 | |
3268 | fma.s1 fLnSinL = fLnSin36, fDxSqr, fLnSinL | |
3269 | nop.i 0 | |
3270 | } | |
3271 | { .mfi | |
3272 | nop.m 0 | |
3273 | // Y_lo = poly_hi + poly_lo | |
3274 | fadd.s1 fResLnDxL = fPolyHiDx, fPolyLoDx | |
3275 | nop.i 0 | |
3276 | } | |
3277 | ;; | |
3278 | { .mfi | |
3279 | nop.m 0 | |
3280 | fadd.s1 fRes1L = fRes1L, fRes5L | |
3281 | nop.i 0 | |
3282 | } | |
3283 | { .mfi | |
3284 | nop.m 0 | |
3285 | // high part of the final result | |
3286 | fadd.s1 fYH = fRes2H, fRes1H | |
3287 | nop.i 0 | |
3288 | } | |
3289 | ;; | |
3290 | { .mfi | |
3291 | nop.m 0 | |
3292 | // Y_lo = poly_hi + poly_lo | |
3293 | fadd.s1 fResL = FR_poly_hi, FR_poly_lo | |
3294 | nop.i 0 | |
3295 | } | |
3296 | ;; | |
3297 | { .mfi | |
3298 | nop.m 0 | |
3299 | famax.s0 fRes4H = fRes2H, fRes1H | |
3300 | nop.i 0 | |
3301 | } | |
3302 | ;; | |
3303 | { .mfi | |
3304 | nop.m 0 | |
3305 | famin.s0 fRes4L = fRes2H, fRes1H | |
3306 | nop.i 0 | |
3307 | } | |
3308 | ;; | |
3309 | { .mfi | |
3310 | nop.m 0 | |
3311 | // (LnSin)lo + (log(|DeltaX|))lo | |
3312 | fsub.s1 fLnSinL = fLnSinL, fResLnDxL | |
3313 | nop.i 0 | |
3314 | } | |
3315 | { .mfi | |
3316 | nop.m 0 | |
3317 | fadd.s1 fRes2L = fRes2L, fLnSinH | |
3318 | nop.i 0 | |
3319 | } | |
3320 | ;; | |
3321 | { .mfi | |
3322 | nop.m 0 | |
3323 | //(lgammal(|x|))lo + (log(|x|))lo | |
3324 | fadd.s1 fPolL = fResL, fPolL | |
3325 | nop.i 0 | |
3326 | } | |
3327 | ;; | |
3328 | { .mfi | |
3329 | nop.m 0 | |
3330 | fsub.s1 fYL = fRes4H, fYH | |
3331 | nop.i 0 | |
3332 | } | |
3333 | ;; | |
3334 | { .mfi | |
3335 | nop.m 0 | |
3336 | // Low part of -(LnSin + log(|DeltaX|)) | |
3337 | fadd.s1 fRes2L = fRes2L, fLnSinL | |
3338 | nop.i 0 | |
3339 | } | |
3340 | { .mfi | |
3341 | nop.m 0 | |
3342 | // High part of (lgammal(|x|) + log(|x|)) | |
3343 | fadd.s1 fRes1L = fRes1L, fPolL | |
3344 | nop.i 0 | |
3345 | } | |
3346 | ;; | |
3347 | { .mfi | |
3348 | nop.m 0 | |
3349 | fadd.s1 fYL = fYL, fRes4L | |
3350 | nop.i 0 | |
3351 | } | |
3352 | { .mfi | |
3353 | nop.m 0 | |
3354 | fsub.s1 fRes2L = fRes2L, fRes1L | |
3355 | nop.i 0 | |
3356 | } | |
3357 | ;; | |
3358 | { .mfi | |
3359 | nop.m 0 | |
3360 | // low part of the final result | |
3361 | fadd.s1 fYL = fYL, fRes2L | |
3362 | nop.i 0 | |
3363 | } | |
3364 | ;; | |
3365 | { .mfb | |
3366 | nop.m 0 | |
3367 | // final result for -6.0 < x <= -0.75, non-integer, "far" from roots | |
3368 | fma.s0 f8 = fYH, f1, fYL | |
3369 | // exit here for -6.0 < x <= -0.75, non-integer, "far" from roots | |
3370 | br.ret.sptk b0 | |
3371 | } | |
3372 | ;; | |
3373 | ||
3374 | // here if |x+1| < 2^(-7) | |
3375 | .align 32 | |
3376 | _closeToNegOne: | |
3377 | { .mfi | |
3378 | getf.exp GR_N = fDx // Get N = exponent of x | |
3379 | fmerge.se fAbsX = f1, fDx // Form |deltaX| | |
3380 | // Get high 4 bits of significand of deltaX | |
3381 | extr.u rIndex1Dx = rSignifDx, 59, 4 | |
3382 | } | |
3383 | { .mfi | |
3384 | addl rPolDataPtr= @ltoff(lgammal_1pEps_data),gp | |
3385 | fma.s1 fA0L = fDxSqr, fDxSqr, f0 // deltaX^4 | |
3386 | // sign of GAMMA is positive if p10 is set to 1 | |
3387 | (p10) adds rSgnGam = 1, r0 | |
3388 | } | |
3389 | ;; | |
3390 | { .mfi | |
3391 | shladd GR_ad_z_1 = rIndex1Dx, 2, GR_ad_z_1 // Point to Z_1 | |
3392 | fnma.s1 fResL = fDx, f1, f0 // -(x+1) | |
3393 | // Get high 15 bits of significand | |
3394 | extr.u GR_X_0 = rSignifDx, 49, 15 | |
3395 | } | |
3396 | { .mfi | |
3397 | ld8 rPolDataPtr = [rPolDataPtr] | |
3398 | nop.f 0 | |
3399 | shladd GR_ad_tbl_1 = rIndex1Dx, 4, rTbl1Addr // Point to G_1 | |
3400 | } | |
3401 | ;; | |
3402 | { .mfi | |
3403 | ld4 GR_Z_1 = [GR_ad_z_1] // Load Z_1 | |
3404 | nop.f 0 | |
3405 | and GR_N = GR_N, r17Ones // mask sign bit | |
3406 | } | |
3407 | { .mfi | |
3408 | adds rTmpPtr = 8, GR_ad_tbl_1 | |
3409 | nop.f 0 | |
3410 | cmp.eq p6, p7 = 4, rSgnGamSize | |
3411 | } | |
3412 | ;; | |
3413 | { .mfi | |
3414 | ldfps FR_G, FR_H = [GR_ad_tbl_1],8 // Load G_1, H_1 | |
3415 | nop.f 0 | |
3416 | adds rTmpPtr2 = 96, rPolDataPtr | |
3417 | } | |
3418 | { .mfi | |
3419 | ldfd FR_h = [rTmpPtr] // Load h_1 | |
3420 | nop.f 0 | |
3421 | // unbiased exponent of deltaX | |
3422 | sub GR_N = GR_N, rExpHalf, 1 | |
3423 | } | |
3424 | ;; | |
3425 | { .mfi | |
3426 | adds rTmpPtr3 = 192, rPolDataPtr | |
3427 | nop.f 0 | |
3428 | // sign of GAMMA is negative if p11 is set to 1 | |
3429 | (p11) adds rSgnGam = -1, r0 | |
3430 | } | |
3431 | { .mfi | |
3432 | ldfe fA1 = [rPolDataPtr], 16 // A1 | |
3433 | nop.f 0 | |
3434 | nop.i 0 | |
3435 | } | |
3436 | ;; | |
3437 | {.mfi | |
3438 | ldfe fA2 = [rPolDataPtr], 16 // A2 | |
3439 | nop.f 0 | |
3440 | // Get bits 30-15 of X_0 * Z_1 | |
3441 | pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 | |
3442 | } | |
3443 | { .mfi | |
3444 | ldfpd fA20, fA19 = [rTmpPtr2], 16 // P8, P7 | |
3445 | nop.f 0 | |
3446 | nop.i 0 | |
3447 | } | |
3448 | ;; | |
3449 | // | |
3450 | // For performance, don't use result of pmpyshr2.u for 4 cycles. | |
3451 | // | |
3452 | { .mfi | |
3453 | ldfe fA3 = [rPolDataPtr], 16 // A3 | |
3454 | nop.f 0 | |
3455 | nop.i 0 | |
3456 | } | |
3457 | { .mfi | |
3458 | ldfpd fA18, fA17 = [rTmpPtr2], 16 // P6, P5 | |
3459 | nop.f 0 | |
3460 | nop.i 0 | |
3461 | } | |
3462 | ;; | |
3463 | { .mfi | |
3464 | ldfe fA4 = [rPolDataPtr], 16 // A4 | |
3465 | nop.f 0 | |
3466 | nop.i 0 | |
3467 | } | |
3468 | { .mfi | |
3469 | ldfpd fA16, fA15 = [rTmpPtr2], 16 // P4, p3 | |
3470 | nop.f 0 | |
3471 | nop.i 0 | |
3472 | } | |
3473 | ;; | |
3474 | { .mfi | |
3475 | ldfpd fA5L, fA6 = [rPolDataPtr], 16 // A5, A6 | |
3476 | nop.f 0 | |
3477 | nop.i 0 | |
3478 | } | |
3479 | { .mfi | |
3480 | ldfpd fA14, fA13 = [rTmpPtr2], 16 // P2, P1 | |
3481 | nop.f 0 | |
3482 | nop.i 0 | |
3483 | } | |
3484 | ;; | |
3485 | { .mfi | |
3486 | ldfpd fA7, fA8 = [rPolDataPtr], 16 // A7, A8 | |
3487 | nop.f 0 | |
3488 | extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 | |
3489 | } | |
3490 | { .mfi | |
3491 | ldfe fLnSin2 = [rTmpPtr2], 16 | |
3492 | nop.f 0 | |
3493 | nop.i 0 | |
3494 | } | |
3495 | ;; | |
3496 | { .mfi | |
3497 | shladd GR_ad_z_2 = GR_Index2, 2, rZ2Addr // Point to Z_2 | |
3498 | nop.f 0 | |
3499 | shladd GR_ad_tbl_2 = GR_Index2, 4, rTbl2Addr // Point to G_2 | |
3500 | } | |
3501 | { .mfi | |
3502 | ldfe fLnSin4 = [rTmpPtr2], 32 | |
3503 | nop.f 0 | |
3504 | nop.i 0 | |
3505 | } | |
3506 | ;; | |
3507 | { .mfi | |
3508 | ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2 | |
3509 | nop.f 0 | |
3510 | adds rTmpPtr = 8, GR_ad_tbl_2 | |
3511 | } | |
3512 | { .mfi | |
3513 | // Put integer N into rightmost significand | |
3514 | setf.sig fFloatN = GR_N | |
3515 | nop.f 0 | |
3516 | nop.i 0 | |
3517 | } | |
3518 | ;; | |
3519 | { .mfi | |
3520 | ldfe fLnSin6 = [rTmpPtr3] | |
3521 | nop.f 0 | |
3522 | nop.i 0 | |
3523 | } | |
3524 | { .mfi | |
3525 | ldfe fLnSin8 = [rTmpPtr2] | |
3526 | nop.f 0 | |
3527 | nop.i 0 | |
3528 | } | |
3529 | ;; | |
3530 | { .mfi | |
3531 | ldfps FR_G2, FR_H2 = [GR_ad_tbl_2],8 // Load G_2, H_2 | |
3532 | nop.f 0 | |
3533 | nop.i 0 | |
3534 | } | |
3535 | { .mfi | |
3536 | ldfd FR_h2 = [rTmpPtr] // Load h_2 | |
3537 | nop.f 0 | |
3538 | nop.i 0 | |
3539 | } | |
3540 | ;; | |
3541 | { .mfi | |
3542 | // store signgam if size of variable is 4 bytes | |
3543 | (p6) st4 [rSgnGamAddr] = rSgnGam | |
3544 | fma.s1 fResH = fA20, fResL, fA19 //polynomial for log(|x|) | |
3545 | // Get bits 30-15 of X_1 * Z_2 | |
3546 | pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 | |
3547 | } | |
3548 | { .mfi | |
3549 | // store signgam if size of variable is 8 bytes | |
3550 | (p7) st8 [rSgnGamAddr] = rSgnGam | |
3551 | fma.s1 fA2 = fA2, fDx, fA1 // polynomial for lgammal(|x|) | |
3552 | nop.i 0 | |
3553 | } | |
3554 | ;; | |
3555 | // | |
3556 | // For performance, don't use result of pmpyshr2.u for 4 cycles. | |
3557 | // | |
3558 | { .mfi | |
3559 | nop.m 0 | |
3560 | fma.s1 fA18 = fA18, fResL, fA17 //polynomial for log(|x|) | |
3561 | nop.i 0 | |
3562 | } | |
3563 | ;; | |
3564 | { .mfi | |
3565 | nop.m 0 | |
3566 | fma.s1 fA16 = fA16, fResL, fA15 //polynomial for log(|x|) | |
3567 | nop.i 0 | |
3568 | } | |
3569 | { .mfi | |
3570 | nop.m 0 | |
3571 | fma.s1 fA4 = fA4, fDx, fA3 // polynomial for lgammal(|x|) | |
3572 | nop.i 0 | |
3573 | } | |
3574 | ;; | |
3575 | { .mfi | |
3576 | nop.m 0 | |
3577 | fma.s1 fA14 = fA14, fResL, fA13 //polynomial for log(|x|) | |
3578 | nop.i 0 | |
3579 | } | |
3580 | { .mfi | |
3581 | nop.m 0 | |
3582 | fma.s1 fA6 = fA6, fDx, fA5L // polynomial for lgammal(|x|) | |
3583 | nop.i 0 | |
3584 | } | |
3585 | ;; | |
3586 | { .mfi | |
3587 | nop.m 0 | |
3588 | fma.s1 fPol = fA8, fDx, fA7 // polynomial for lgammal(|x|) | |
3589 | extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2 | |
3590 | } | |
3591 | ;; | |
3592 | { .mfi | |
3593 | shladd GR_ad_tbl_3 = GR_Index3, 4, rTbl3Addr // Point to G_3 | |
3594 | // loqw part of lnsin polynomial | |
3595 | fma.s1 fRes3L = fLnSin4, fDxSqr, fLnSin2 | |
3596 | nop.i 0 | |
3597 | } | |
3598 | ;; | |
3599 | { .mfi | |
3600 | ldfps FR_G3, FR_H3 = [GR_ad_tbl_3], 8 // Load G_3, H_3 | |
3601 | fcvt.xf fFloatN = fFloatN // N as FP number | |
3602 | nop.i 0 | |
3603 | } | |
3604 | { .mfi | |
3605 | nop.m 0 | |
3606 | fma.s1 fResH = fResH, fDxSqr, fA18 // High part of log(|x|) | |
3607 | nop.i 0 | |
3608 | } | |
3609 | ;; | |
3610 | { .mfi | |
3611 | ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3 | |
3612 | fma.s1 fA4 = fA4, fDxSqr, fA2 // Low part of lgammal(|x|) | |
3613 | nop.i 0 | |
3614 | } | |
3615 | { .mfi | |
3616 | nop.m 0 | |
3617 | // high part of lnsin polynomial | |
3618 | fma.s1 fRes3H = fLnSin8, fDxSqr, fLnSin6 | |
3619 | nop.i 0 | |
3620 | } | |
3621 | ;; | |
3622 | { .mfi | |
3623 | nop.m 0 | |
3624 | fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2 | |
3625 | nop.i 0 | |
3626 | } | |
3627 | { .mfi | |
3628 | nop.m 0 | |
3629 | fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2 | |
3630 | nop.i 0 | |
3631 | } | |
3632 | ;; | |
3633 | { .mfi | |
3634 | nop.m 0 | |
3635 | fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2 | |
3636 | nop.i 0 | |
3637 | } | |
3638 | { .mfi | |
3639 | nop.m 0 | |
3640 | fma.s1 fA16 = fA16, fDxSqr, fA14 // Low part of log(|x|) | |
3641 | nop.i 0 | |
3642 | } | |
3643 | ;; | |
3644 | { .mfi | |
3645 | nop.m 0 | |
3646 | fma.s1 fPol = fPol, fDxSqr, fA6 // High part of lgammal(|x|) | |
3647 | nop.i 0 | |
3648 | } | |
3649 | ;; | |
3650 | { .mfi | |
3651 | nop.m 0 | |
3652 | fma.s1 fResH = fResH, fA0L, fA16 // log(|x|)/deltaX^2 - deltaX | |
3653 | nop.i 0 | |
3654 | } | |
3655 | ;; | |
3656 | { .mfi | |
3657 | nop.m 0 | |
3658 | fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3 | |
3659 | nop.i 0 | |
3660 | } | |
3661 | { .mfi | |
3662 | nop.m 0 | |
3663 | fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3 | |
3664 | nop.i 0 | |
3665 | } | |
3666 | ;; | |
3667 | { .mfi | |
3668 | nop.m 0 | |
3669 | fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3 | |
3670 | nop.i 0 | |
3671 | } | |
3672 | ;; | |
3673 | { .mfi | |
3674 | nop.m 0 | |
3675 | fma.s1 fResH = fResH, fDxSqr, fResL // log(|x|) | |
3676 | nop.i 0 | |
3677 | } | |
3678 | { .mfi | |
3679 | nop.m 0 | |
3680 | fma.s1 fPol = fPol, fA0L, fA4 // lgammal(|x|)/|x| | |
3681 | nop.i 0 | |
3682 | } | |
3683 | ;; | |
3684 | { .mfi | |
3685 | nop.m 0 | |
3686 | fms.s1 FR_r = FR_G, fAbsX, f1 // r = G * S_hi - 1 | |
3687 | nop.i 0 | |
3688 | } | |
3689 | { .mfi | |
3690 | nop.m 0 | |
3691 | // high part of log(deltaX)= Y_hi = N * log2_hi + H | |
3692 | fma.s1 fRes4H = fFloatN, FR_log2_hi, FR_H | |
3693 | nop.i 0 | |
3694 | } | |
3695 | ;; | |
3696 | { .mfi | |
3697 | nop.m 0 | |
3698 | // h = N * log2_lo + h | |
3699 | fma.s1 FR_h = fFloatN, FR_log2_lo, FR_h | |
3700 | nop.i 0 | |
3701 | } | |
3702 | ;; | |
3703 | { .mfi | |
3704 | nop.m 0 | |
3705 | fma.s1 fResH = fPol, fDx, fResH // lgammal(|x|) + log(|x|) | |
3706 | nop.i 0 | |
3707 | } | |
3708 | { .mfi | |
3709 | nop.m 0 | |
3710 | // lnsin/deltaX^2 | |
3711 | fma.s1 fRes3H = fRes3H, fA0L, fRes3L | |
3712 | nop.i 0 | |
3713 | } | |
3714 | ;; | |
3715 | { .mfi | |
3716 | nop.m 0 | |
3717 | // poly_lo = r * Q4 + Q3 | |
3718 | fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 | |
3719 | nop.i 0 | |
3720 | } | |
3721 | { .mfi | |
3722 | nop.m 0 | |
3723 | fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r | |
3724 | nop.i 0 | |
3725 | } | |
3726 | ;; | |
3727 | { .mfi | |
3728 | nop.m 0 | |
3729 | // lnSin - log(|x|) - lgammal(|x|) | |
3730 | fms.s1 fResH = fRes3H, fDxSqr, fResH | |
3731 | nop.i 0 | |
3732 | } | |
3733 | ;; | |
3734 | ||
3735 | { .mfi | |
3736 | nop.m 0 | |
3737 | // poly_lo = poly_lo * r + Q2 | |
3738 | fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 | |
3739 | nop.i 0 | |
3740 | } | |
3741 | { .mfi | |
3742 | nop.m 0 | |
3743 | fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3 | |
3744 | nop.i 0 | |
3745 | } | |
3746 | ;; | |
3747 | ||
3748 | { .mfi | |
3749 | nop.m 0 | |
3750 | // poly_hi = Q1 * rsq + r | |
3751 | fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r | |
3752 | nop.i 0 | |
3753 | } | |
3754 | ;; | |
3755 | ||
3756 | { .mfi | |
3757 | nop.m 0 | |
3758 | // poly_lo = poly_lo*r^3 + h | |
3759 | fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h | |
3760 | nop.i 0 | |
3761 | } | |
3762 | ;; | |
3763 | ||
3764 | { .mfi | |
3765 | nop.m 0 | |
3766 | // low part of log(|deltaX|) = Y_lo = poly_hi + poly_lo | |
3767 | fadd.s1 fRes4L = FR_poly_hi, FR_poly_lo | |
3768 | nop.i 0 | |
3769 | } | |
3770 | ;; | |
3771 | { .mfi | |
3772 | nop.m 0 | |
3773 | fsub.s1 fResH = fResH, fRes4L | |
3774 | nop.i 0 | |
3775 | } | |
3776 | ;; | |
3777 | { .mfb | |
3778 | nop.m 0 | |
3779 | // final result for |x+1|< 2^(-7) path | |
3780 | fsub.s0 f8 = fResH, fRes4H | |
3781 | // exit for |x+1|< 2^(-7) path | |
3782 | br.ret.sptk b0 | |
3783 | } | |
3784 | ;; | |
3785 | ||
3786 | ||
3787 | // here if -2^63 < x < -6.0 and x is not an integer | |
3788 | // Also we are going to filter out cases when x falls in | |
3789 | // range which is "close enough" to negative root. Rhis case | |
3790 | // may occur only for -19.5 < x since other roots of lgamma are | |
3791 | // insignificant from double extended point of view (they are closer | |
3792 | // to RTN(x) than one ulp(x). | |
3793 | .align 32 | |
3794 | _negStirling: | |
3795 | { .mfi | |
3796 | ldfe fLnSin6 = [rLnSinDataPtr], 32 | |
3797 | fnma.s1 fInvX = f8, fRcpX, f1 // start of 3rd NR iteration | |
3798 | // Get high 4 bits of significand of deltaX | |
3799 | extr.u rIndex1Dx = rSignifDx, 59, 4 | |
3800 | } | |
3801 | { .mfi | |
3802 | ldfe fLnSin8 = [rTmpPtr3], 32 | |
3803 | fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2 | |
3804 | (p12) cmp.ltu.unc p6, p0 = rSignifX, rLeftBound | |
3805 | } | |
3806 | ;; | |
3807 | { .mfi | |
3808 | ldfe fLnSin10 = [rLnSinDataPtr], 32 | |
3809 | fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3 | |
3810 | // Get high 15 bits of significand | |
3811 | extr.u GR_X_0 = rSignifDx, 49, 15 | |
3812 | } | |
3813 | { .mfi | |
3814 | shladd GR_ad_z_1 = rIndex1Dx, 2, GR_ad_z_1 // Point to Z_1 | |
3815 | fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3 | |
3816 | // set p6 if x falls in "near root" range | |
3817 | (p6) cmp.geu.unc p6, p0 = rSignifX, rRightBound | |
3818 | } | |
3819 | ;; | |
3820 | { .mfi | |
3821 | getf.exp GR_N = fDx // Get N = exponent of x | |
3822 | fma.s1 fDx4 = fDxSqr, fDxSqr, f0 // deltaX^4 | |
3823 | adds rTmpPtr = 96, rBernulliPtr | |
3824 | } | |
3825 | { .mfb | |
3826 | ld4 GR_Z_1 = [GR_ad_z_1] // Load Z_1 | |
3827 | fma.s1 fLnSin34 = fLnSin34, fDxSqr, fLnSin32 | |
3828 | // branch to special path if x falls in "near root" range | |
3829 | (p6) br.cond.spnt _negRoots | |
3830 | } | |
3831 | ;; | |
3832 | .pred.rel "mutex",p10,p11 | |
3833 | { .mfi | |
3834 | ldfe fLnSin12 = [rTmpPtr3] | |
3835 | fma.s1 fLnSin26 = fLnSin26, fDxSqr, fLnSin24 | |
3836 | (p10) cmp.eq p8, p9 = rXRnd, r0 | |
3837 | } | |
3838 | { .mfi | |
3839 | ldfe fLnSin14 = [rLnSinDataPtr] | |
3840 | fma.s1 fLnSin30 = fLnSin30, fDxSqr, fLnSin28 | |
3841 | (p11) cmp.eq p9, p8 = rXRnd, r0 | |
3842 | } | |
3843 | ;; | |
3844 | { .mfi | |
3845 | ldfpd fB2, fB2L = [rBernulliPtr], 16 | |
3846 | fma.s1 fLnSin18 = fLnSin18, fDxSqr, fLnSin16 | |
3847 | shladd GR_ad_tbl_1 = rIndex1Dx, 4, rTbl1Addr // Point to G_1 | |
3848 | ||
3849 | } | |
3850 | { .mfi | |
3851 | ldfe fB14 = [rTmpPtr], 16 | |
3852 | fma.s1 fLnSin22 = fLnSin22, fDxSqr, fLnSin20 | |
3853 | and GR_N = GR_N, r17Ones // mask sign bit | |
3854 | } | |
3855 | ;; | |
3856 | { .mfi | |
3857 | ldfe fB4 = [rBernulliPtr], 16 | |
3858 | fma.s1 fInvX = fInvX, fRcpX, fRcpX // end of 3rd NR iteration | |
3859 | // Get bits 30-15 of X_0 * Z_1 | |
3860 | pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 | |
3861 | } | |
3862 | { .mfi | |
3863 | ldfe fB16 = [rTmpPtr], 16 | |
3864 | fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3 | |
3865 | adds rTmpPtr2 = 8, GR_ad_tbl_1 | |
3866 | } | |
3867 | ;; | |
3868 | // | |
3869 | // For performance, don't use result of pmpyshr2.u for 4 cycles. | |
3870 | // | |
3871 | { .mfi | |
3872 | ldfe fB6 = [rBernulliPtr], 16 | |
3873 | fms.s1 FR_r = FR_G, fSignifX, f1 // r = G * S_hi - 1 | |
3874 | adds rTmpPtr3 = -48, rTmpPtr | |
3875 | } | |
3876 | { .mfi | |
3877 | ldfe fB18 = [rTmpPtr], 16 | |
3878 | // High part of the log(|x|) = Y_hi = N * log2_hi + H | |
3879 | fma.s1 fResH = fFloatN, FR_log2_hi, FR_H | |
3880 | sub GR_N = GR_N, rExpHalf, 1 // unbiased exponent of deltaX | |
3881 | } | |
3882 | ;; | |
3883 | .pred.rel "mutex",p8,p9 | |
3884 | { .mfi | |
3885 | ldfe fB8 = [rBernulliPtr], 16 | |
3886 | fma.s1 fLnSin36 = fLnSin36, fDx4, fLnSin34 | |
3887 | // sign of GAMMA(x) is negative | |
3888 | (p8) adds rSgnGam = -1, r0 | |
3889 | } | |
3890 | { .mfi | |
3891 | ldfe fB20 = [rTmpPtr], -160 | |
3892 | fma.s1 fRes5H = fLnSin4, fDxSqr, f0 | |
3893 | // sign of GAMMA(x) is positive | |
3894 | (p9) adds rSgnGam = 1, r0 | |
3895 | ||
3896 | } | |
3897 | ;; | |
3898 | { .mfi | |
3899 | ldfe fB10 = [rBernulliPtr], 16 | |
3900 | fma.s1 fLnSin30 = fLnSin30, fDx4, fLnSin26 | |
3901 | (p14) adds rTmpPtr = -160, rTmpPtr | |
3902 | } | |
3903 | { .mfi | |
3904 | ldfe fB12 = [rTmpPtr3], 16 | |
3905 | fma.s1 fDx8 = fDx4, fDx4, f0 // deltaX^8 | |
3906 | cmp.eq p6, p7 = 4, rSgnGamSize | |
3907 | } | |
3908 | ;; | |
3909 | { .mfi | |
3910 | ldfps fGDx, fHDx = [GR_ad_tbl_1], 8 // Load G_1, H_1 | |
3911 | fma.s1 fDx6 = fDx4, fDxSqr, f0 // deltaX^6 | |
3912 | extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 | |
3913 | } | |
3914 | { .mfi | |
3915 | ldfd fhDx = [rTmpPtr2] // Load h_1 | |
3916 | fma.s1 fLnSin22 = fLnSin22, fDx4, fLnSin18 | |
3917 | nop.i 0 | |
3918 | } | |
3919 | ;; | |
3920 | { .mfi | |
3921 | // Load two parts of C | |
3922 | ldfpd fRes1H, fRes1L = [rTmpPtr], 16 | |
3923 | fma.s1 fRcpX = fInvX, fInvX, f0 // (1/x)^2 | |
3924 | shladd GR_ad_tbl_2 = GR_Index2, 4, rTbl2Addr // Point to G_2 | |
3925 | } | |
3926 | { .mfi | |
3927 | shladd GR_ad_z_2 = GR_Index2, 2, rZ2Addr // Point to Z_2 | |
3928 | fma.s1 FR_h = fFloatN, FR_log2_lo, FR_h// h = N * log2_lo + h | |
3929 | nop.i 0 | |
3930 | } | |
3931 | ;; | |
3932 | { .mfi | |
3933 | ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2 | |
3934 | fnma.s1 fInvXL = f8, fInvX, f1 // relative error of 1/x | |
3935 | nop.i 0 | |
3936 | } | |
3937 | { .mfi | |
3938 | adds rTmpPtr2 = 8, GR_ad_tbl_2 | |
3939 | fma.s1 fLnSin8 = fLnSin8, fDxSqr, fLnSin6 | |
3940 | nop.i 0 | |
3941 | } | |
3942 | ;; | |
3943 | { .mfi | |
3944 | ldfps FR_G2, FR_H2 = [GR_ad_tbl_2],8 // Load G_2, H_2 | |
3945 | // poly_lo = r * Q4 + Q3 | |
3946 | fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 | |
3947 | nop.i 0 | |
3948 | } | |
3949 | { .mfi | |
3950 | ldfd fh2Dx = [rTmpPtr2] // Load h_2 | |
3951 | fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r | |
3952 | nop.i 0 | |
3953 | } | |
3954 | ;; | |
3955 | { .mfi | |
3956 | nop.m 0 | |
3957 | fma.s1 fA1L = fB2, fInvX, f0 // (B2*(1/x))hi | |
3958 | nop.i 0 | |
3959 | } | |
3960 | { .mfi | |
3961 | // Put integer N into rightmost significand | |
3962 | setf.sig fFloatNDx = GR_N | |
3963 | fms.s1 fRes4H = fResH, f1, f1 // ln(|x|)hi - 1 | |
3964 | nop.i 0 | |
3965 | } | |
3966 | ;; | |
3967 | { .mfi | |
3968 | nop.m 0 | |
3969 | fadd.s1 fRes2H = fRes5H, fLnSin2//(lnSin4*DeltaX^2 + lnSin2)hi | |
3970 | // Get bits 30-15 of X_1 * Z_2 | |
3971 | pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 | |
3972 | } | |
3973 | { .mfi | |
3974 | nop.m 0 | |
3975 | fms.s1 fRes5L = fLnSin4, fDxSqr, fRes5H | |
3976 | nop.i 0 | |
3977 | } | |
3978 | ;; | |
3979 | // | |
3980 | // For performance, don't use result of pmpyshr2.u for 4 cycles. | |
3981 | // | |
3982 | { .mfi | |
3983 | nop.m 0 | |
3984 | fma.s1 fInvX4 = fRcpX, fRcpX, f0 // (1/x)^4 | |
3985 | nop.i 0 | |
3986 | } | |
3987 | { .mfi | |
3988 | nop.m 0 | |
3989 | fma.s1 fB6 = fB6, fRcpX, fB4 | |
3990 | nop.i 0 | |
3991 | } | |
3992 | ;; | |
3993 | { .mfi | |
3994 | // store signgam if size of variable is 4 bytes | |
3995 | (p6) st4 [rSgnGamAddr] = rSgnGam | |
3996 | fma.s1 fB18 = fB18, fRcpX, fB16 | |
3997 | nop.i 0 | |
3998 | } | |
3999 | { .mfi | |
4000 | // store signgam if size of variable is 8 bytes | |
4001 | (p7) st8 [rSgnGamAddr] = rSgnGam | |
4002 | fma.s1 fInvXL = fInvXL, fInvX, f0 // low part of 1/x | |
4003 | nop.i 0 | |
4004 | } | |
4005 | ;; | |
4006 | { .mfi | |
4007 | nop.m 0 | |
4008 | // poly_lo = poly_lo * r + Q2 | |
4009 | fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 | |
4010 | nop.i 0 | |
4011 | } | |
4012 | { .mfi | |
4013 | nop.m 0 | |
4014 | fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3 | |
4015 | nop.i 0 | |
4016 | } | |
4017 | ;; | |
4018 | { .mfi | |
4019 | nop.m 0 | |
4020 | fma.s1 fRes3H = fRes4H, f8, f0 // (-|x|*(ln(|x|)-1))hi | |
4021 | extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2 | |
4022 | } | |
4023 | { .mfi | |
4024 | nop.m 0 | |
4025 | // poly_hi = Q1 * rsq + r | |
4026 | fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r | |
4027 | nop.i 0 | |
4028 | } | |
4029 | ;; | |
4030 | { .mfi | |
4031 | shladd GR_ad_tbl_3 = GR_Index3, 4, rTbl3Addr // Point to G_3 | |
4032 | fms.s1 fA2L = fB2, fInvX, fA1L // delta(B2*(1/x)) | |
4033 | nop.i 0 | |
4034 | } | |
4035 | { .mfi | |
4036 | nop.m 0 | |
4037 | fnma.s1 fBrnH = fRes1H, f1, fA1L // (-C - S(1/x))hi | |
4038 | nop.i 0 | |
4039 | } | |
4040 | ;; | |
4041 | { .mfi | |
4042 | ldfps fG3Dx, fH3Dx = [GR_ad_tbl_3],8 // Load G_3, H_3 | |
4043 | fma.s1 fInvX8 = fInvX4, fInvX4, f0 // (1/x)^8 | |
4044 | nop.i 0 | |
4045 | } | |
4046 | { .mfi | |
4047 | nop.m 0 | |
4048 | fma.s1 fB10 = fB10, fRcpX, fB8 | |
4049 | nop.i 0 | |
4050 | } | |
4051 | ;; | |
4052 | ||
4053 | { .mfi | |
4054 | ldfd fh3Dx = [GR_ad_tbl_3] // Load h_3 | |
4055 | fma.s1 fB20 = fB20, fInvX4, fB18 | |
4056 | nop.i 0 | |
4057 | } | |
4058 | { .mfi | |
4059 | nop.m 0 | |
4060 | fma.s1 fB14 = fB14, fRcpX, fB12 | |
4061 | nop.i 0 | |
4062 | } | |
4063 | ;; | |
4064 | { .mfi | |
4065 | nop.m 0 | |
4066 | fma.s1 fLnSin36 = fLnSin36, fDx8, fLnSin30 | |
4067 | nop.i 0 | |
4068 | } | |
4069 | { .mfi | |
4070 | nop.m 0 | |
4071 | fma.s1 fLnSin12 = fLnSin12, fDxSqr, fLnSin10 | |
4072 | nop.i 0 | |
4073 | } | |
4074 | ;; | |
4075 | { .mfi | |
4076 | nop.m 0 | |
4077 | fsub.s1 fRes2L = fLnSin2, fRes2H | |
4078 | nop.i 0 | |
4079 | } | |
4080 | { .mfi | |
4081 | nop.m 0 | |
4082 | fma.s1 fPol = fRes2H, fDxSqr, f0 // high part of LnSin | |
4083 | nop.i 0 | |
4084 | } | |
4085 | ;; | |
4086 | { .mfi | |
4087 | nop.m 0 | |
4088 | fnma.s1 fResH = fResH, FR_MHalf, fResH // -0.5*ln(|x|)hi | |
4089 | nop.i 0 | |
4090 | } | |
4091 | { .mfi | |
4092 | nop.m 0 | |
4093 | fmpy.s1 fGDx = fGDx, FR_G2 // G = G_1 * G_2 | |
4094 | nop.i 0 | |
4095 | } | |
4096 | ;; | |
4097 | { .mfi | |
4098 | nop.m 0 | |
4099 | // poly_lo = poly_lo*r^3 + h | |
4100 | fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h | |
4101 | nop.i 0 | |
4102 | } | |
4103 | { .mfi | |
4104 | nop.m 0 | |
4105 | // B2lo*(1/x)hi+ delta(B2*(1/x)) | |
4106 | fma.s1 fA2L = fB2L, fInvX, fA2L | |
4107 | nop.i 0 | |
4108 | } | |
4109 | ;; | |
4110 | { .mfi | |
4111 | nop.m 0 | |
4112 | fma.s1 fB20 = fB20, fInvX4, fB14 | |
4113 | nop.i 0 | |
4114 | } | |
4115 | { .mfi | |
4116 | nop.m 0 | |
4117 | fma.s1 fB10 = fB10, fInvX4, fB6 | |
4118 | nop.i 0 | |
4119 | } | |
4120 | ;; | |
4121 | { .mfi | |
4122 | nop.m 0 | |
4123 | fcvt.xf fFloatNDx = fFloatNDx | |
4124 | nop.i 0 | |
4125 | } | |
4126 | { .mfi | |
4127 | nop.m 0 | |
4128 | fma.s1 fLnSin14 = fLnSin14, fDx4, fLnSin12 | |
4129 | nop.i 0 | |
4130 | } | |
4131 | ;; | |
4132 | { .mfi | |
4133 | nop.m 0 | |
4134 | fma.s1 fLnSin36 = fLnSin36, fDx8, fLnSin22 | |
4135 | nop.i 0 | |
4136 | } | |
4137 | { .mfi | |
4138 | nop.m 0 | |
4139 | fms.s1 fRes3L = fRes4H, f8, fRes3H // delta(-|x|*(ln(|x|)-1)) | |
4140 | nop.i 0 | |
4141 | } | |
4142 | ;; | |
4143 | { .mfi | |
4144 | nop.m 0 | |
4145 | fmpy.s1 fGDx = fGDx, fG3Dx // G = (G_1 * G_2) * G_3 | |
4146 | nop.i 0 | |
4147 | } | |
4148 | { .mfi | |
4149 | nop.m 0 | |
4150 | // (-|x|*(ln(|x|)-1) - 0.5ln(|x|))hi | |
4151 | fadd.s1 fRes4H = fRes3H, fResH | |
4152 | nop.i 0 | |
4153 | } | |
4154 | ;; | |
4155 | { .mfi | |
4156 | nop.m 0 | |
4157 | fma.s1 fA2L = fInvXL, fB2, fA2L //(B2*(1/x))lo | |
4158 | nop.i 0 | |
4159 | } | |
4160 | { .mfi | |
4161 | nop.m 0 | |
4162 | // low part of log(|x|) = Y_lo = poly_hi + poly_lo | |
4163 | fadd.s1 fResL = FR_poly_hi, FR_poly_lo | |
4164 | nop.i 0 | |
4165 | } | |
4166 | ;; | |
4167 | { .mfi | |
4168 | nop.m 0 | |
4169 | fma.s1 fB20 = fB20, fInvX8, fB10 | |
4170 | nop.i 0 | |
4171 | } | |
4172 | { .mfi | |
4173 | nop.m 0 | |
4174 | fma.s1 fInvX3 = fInvX, fRcpX, f0 // (1/x)^3 | |
4175 | nop.i 0 | |
4176 | } | |
4177 | ;; | |
4178 | { .mfi | |
4179 | nop.m 0 | |
4180 | fadd.s1 fHDx = fHDx, FR_H2 // H = H_1 + H_2 | |
4181 | nop.i 0 | |
4182 | } | |
4183 | { .mfi | |
4184 | nop.m 0 | |
4185 | fadd.s1 fRes5L = fRes5L, fLnSin2L | |
4186 | nop.i 0 | |
4187 | } | |
4188 | ;; | |
4189 | { .mfi | |
4190 | nop.m 0 | |
4191 | fadd.s1 fRes2L = fRes2L, fRes5H | |
4192 | nop.i 0 | |
4193 | } | |
4194 | { .mfi | |
4195 | nop.m 0 | |
4196 | fadd.s1 fhDx = fhDx, fh2Dx // h = h_1 + h_2 | |
4197 | nop.i 0 | |
4198 | } | |
4199 | ;; | |
4200 | { .mfi | |
4201 | nop.m 0 | |
4202 | fms.s1 fBrnL = fRes1H, fMOne, fBrnH | |
4203 | nop.i 0 | |
4204 | } | |
4205 | { .mfi | |
4206 | nop.m 0 | |
4207 | fms.s1 FR_r = fGDx, fNormDx, f1 // r = G * S_hi - 1 | |
4208 | nop.i 0 | |
4209 | } | |
4210 | ;; | |
4211 | { .mfi | |
4212 | nop.m 0 | |
4213 | fma.s1 fRes3L = fResL, f8 , fRes3L // (-|x|*(ln(|x|)-1))lo | |
4214 | nop.i 0 | |
4215 | } | |
4216 | { .mfi | |
4217 | nop.m 0 | |
4218 | fsub.s1 fRes4L = fRes3H, fRes4H | |
4219 | nop.i 0 | |
4220 | } | |
4221 | ;; | |
4222 | { .mfi | |
4223 | nop.m 0 | |
4224 | // low part of "Bernulli" polynomial | |
4225 | fma.s1 fB20 = fB20, fInvX3, fA2L | |
4226 | nop.i 0 | |
4227 | } | |
4228 | { .mfi | |
4229 | nop.m 0 | |
4230 | fnma.s1 fResL = fResL, FR_MHalf, fResL // -0.5*ln(|x|)lo | |
4231 | nop.i 0 | |
4232 | } | |
4233 | ;; | |
4234 | { .mfi | |
4235 | nop.m 0 | |
4236 | fadd.s1 fHDx = fHDx, fH3Dx // H = (H_1 + H_2) + H_3 | |
4237 | nop.i 0 | |
4238 | } | |
4239 | { .mfi | |
4240 | nop.m 0 | |
4241 | fms.s1 fPolL = fRes2H, fDxSqr, fPol | |
4242 | nop.i 0 | |
4243 | } | |
4244 | ;; | |
4245 | { .mfi | |
4246 | nop.m 0 | |
4247 | fadd.s1 fhDx = fhDx, fh3Dx // h = (h_1 + h_2) + h_3 | |
4248 | nop.i 0 | |
4249 | } | |
4250 | { .mfi | |
4251 | nop.m 0 | |
4252 | // (-|x|*(ln(|x|)-1) - 0.5ln(|x|) - C - S(1/x))hi | |
4253 | fadd.s1 fB14 = fRes4H, fBrnH | |
4254 | nop.i 0 | |
4255 | } | |
4256 | ;; | |
4257 | { .mfi | |
4258 | nop.m 0 | |
4259 | // poly_lo = r * Q4 + Q3 | |
4260 | fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 | |
4261 | nop.i 0 | |
4262 | } | |
4263 | { .mfi | |
4264 | nop.m 0 | |
4265 | fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r | |
4266 | nop.i 0 | |
4267 | } | |
4268 | ;; | |
4269 | { .mfi | |
4270 | nop.m 0 | |
4271 | fadd.s1 fRes4L = fRes4L, fResH | |
4272 | nop.i 0 | |
4273 | } | |
4274 | { .mfi | |
4275 | nop.m 0 | |
4276 | fadd.s1 fBrnL = fBrnL, fA1L | |
4277 | nop.i 0 | |
4278 | } | |
4279 | ;; | |
4280 | { .mfi | |
4281 | nop.m 0 | |
4282 | // (-|x|*(ln(|x|)-1))lo + (-0.5ln(|x|))lo | |
4283 | fadd.s1 fRes3L = fRes3L, fResL | |
4284 | nop.i 0 | |
4285 | } | |
4286 | { .mfi | |
4287 | nop.m 0 | |
4288 | fnma.s1 fB20 = fRes1L, f1, fB20 // -Clo - S(1/x)lo | |
4289 | nop.i 0 | |
4290 | } | |
4291 | ;; | |
4292 | { .mfi | |
4293 | nop.m 0 | |
4294 | fadd.s1 fRes2L = fRes2L, fRes5L // (lnSin4*DeltaX^2 + lnSin2)lo | |
4295 | nop.i 0 | |
4296 | } | |
4297 | { .mfi | |
4298 | nop.m 0 | |
4299 | fma.s1 fPolL = fDxSqrL, fRes2H, fPolL | |
4300 | nop.i 0 | |
4301 | } | |
4302 | ;; | |
4303 | { .mfi | |
4304 | nop.m 0 | |
4305 | fma.s1 fLnSin14 = fLnSin14, fDx4, fLnSin8 | |
4306 | nop.i 0 | |
4307 | } | |
4308 | { .mfi | |
4309 | nop.m 0 | |
4310 | fma.s1 fLnSin36 = fLnSin36, fDx8, f0 | |
4311 | nop.i 0 | |
4312 | } | |
4313 | ;; | |
4314 | { .mfi | |
4315 | nop.m 0 | |
4316 | // poly_lo = poly_lo * r + Q2 | |
4317 | fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 | |
4318 | nop.i 0 | |
4319 | } | |
4320 | { .mfi | |
4321 | nop.m 0 | |
4322 | fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3 | |
4323 | nop.i 0 | |
4324 | } | |
4325 | ;; | |
4326 | { .mfi | |
4327 | nop.m 0 | |
4328 | // poly_hi = Q1 * rsq + r | |
4329 | fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r | |
4330 | nop.i 0 | |
4331 | } | |
4332 | { .mfi | |
4333 | nop.m 0 | |
4334 | fsub.s1 fB12 = fRes4H, fB14 | |
4335 | nop.i 0 | |
4336 | } | |
4337 | ;; | |
4338 | { .mfi | |
4339 | nop.m 0 | |
4340 | // (-|x|*(ln(|x|)-1) - 0.5ln(|x|))lo | |
4341 | fadd.s1 fRes4L = fRes4L, fRes3L | |
4342 | nop.i 0 | |
4343 | } | |
4344 | { .mfi | |
4345 | nop.m 0 | |
4346 | fadd.s1 fBrnL = fBrnL, fB20 // (-C - S(1/x))lo | |
4347 | nop.i 0 | |
4348 | } | |
4349 | ;; | |
4350 | { .mfi | |
4351 | nop.m 0 | |
4352 | // high part of log(|DeltaX|) = Y_hi = N * log2_hi + H | |
4353 | fma.s1 fLnDeltaH = fFloatNDx, FR_log2_hi, fHDx | |
4354 | nop.i 0 | |
4355 | } | |
4356 | { .mfi | |
4357 | nop.m 0 | |
4358 | // h = N * log2_lo + h | |
4359 | fma.s1 fhDx = fFloatNDx, FR_log2_lo, fhDx | |
4360 | nop.i 0 | |
4361 | } | |
4362 | ;; | |
4363 | { .mfi | |
4364 | nop.m 0 | |
4365 | fma.s1 fPolL = fRes2L, fDxSqr, fPolL | |
4366 | nop.i 0 | |
4367 | } | |
4368 | { .mfi | |
4369 | nop.m 0 | |
4370 | fma.s1 fLnSin14 = fLnSin36, fDxSqr, fLnSin14 | |
4371 | nop.i 0 | |
4372 | } | |
4373 | ;; | |
4374 | { .mfi | |
4375 | nop.m 0 | |
4376 | // (-|x|*(ln(|x|)-1) - 0.5ln(|x|))lo + (- C - S(1/x))lo | |
4377 | fadd.s1 fBrnL = fBrnL, fRes4L | |
4378 | nop.i 0 | |
4379 | } | |
4380 | { .mfi | |
4381 | nop.m 0 | |
4382 | fadd.s1 fB12 = fB12, fBrnH | |
4383 | nop.i 0 | |
4384 | } | |
4385 | ;; | |
4386 | { .mfi | |
4387 | nop.m 0 | |
4388 | // poly_lo = poly_lo*r^3 + h | |
4389 | fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, fhDx | |
4390 | nop.i 0 | |
4391 | } | |
4392 | { .mfi | |
4393 | nop.m 0 | |
4394 | fnma.s1 fRes1H = fLnDeltaH, f1, fPol//(-ln(|DeltaX|) + LnSin)hi | |
4395 | nop.i 0 | |
4396 | } | |
4397 | ;; | |
4398 | { .mfi | |
4399 | nop.m 0 | |
4400 | fma.s1 fPolL = fDxSqrL, fRes2L, fPolL | |
4401 | nop.i 0 | |
4402 | } | |
4403 | { .mfi | |
4404 | nop.m 0 | |
4405 | fma.s1 fLnSin36 = fLnSin14, fDx6, f0 | |
4406 | nop.i 0 | |
4407 | } | |
4408 | ;; | |
4409 | { .mfi | |
4410 | nop.m 0 | |
4411 | // (-|x|*(ln(|x|)-1) - 0.5ln(|x|) - C - S(1/x))lo | |
4412 | fadd.s1 fB12 = fB12, fBrnL | |
4413 | nop.i 0 | |
4414 | } | |
4415 | ;; | |
4416 | { .mfi | |
4417 | nop.m 0 | |
4418 | // low part of log(|DeltaX|) = Y_lo = poly_hi + poly_lo | |
4419 | fadd.s1 fLnDeltaL= FR_poly_hi, FR_poly_lo | |
4420 | nop.i 0 | |
4421 | } | |
4422 | { .mfi | |
4423 | nop.m 0 | |
4424 | fms.s1 fRes1L = fLnDeltaH, fMOne, fRes1H | |
4425 | nop.i 0 | |
4426 | } | |
4427 | ;; | |
4428 | { .mfi | |
4429 | nop.m 0 | |
4430 | fadd.s1 fPolL = fPolL, fLnSin36 | |
4431 | nop.i 0 | |
4432 | } | |
4433 | { .mfi | |
4434 | nop.m 0 | |
4435 | //(-|x|*(ln(|x|)-1)-0.5ln(|x|) - C - S(1/x))hi + (-ln(|DeltaX|) + LnSin)hi | |
4436 | fadd.s1 f8 = fRes1H, fB14 | |
4437 | nop.i 0 | |
4438 | } | |
4439 | ;; | |
4440 | { .mfi | |
4441 | nop.m 0 | |
4442 | //max((-|x|*(ln(|x|)-1)-0.5ln(|x|) - C - S(1/x))hi, | |
4443 | // (-ln(|DeltaX|) + LnSin)hi) | |
4444 | famax.s1 fMaxNegStir = fRes1H, fB14 | |
4445 | nop.i 0 | |
4446 | } | |
4447 | { .mfi | |
4448 | nop.m 0 | |
4449 | //min((-|x|*(ln(|x|)-1)-0.5ln(|x|) - C - S(1/x))hi, | |
4450 | // (-ln(|DeltaX|) + LnSin)hi) | |
4451 | famin.s1 fMinNegStir = fRes1H, fB14 | |
4452 | nop.i 0 | |
4453 | } | |
4454 | ;; | |
4455 | { .mfi | |
4456 | nop.m 0 | |
4457 | fadd.s1 fRes1L = fRes1L, fPol | |
4458 | nop.i 0 | |
4459 | } | |
4460 | { .mfi | |
4461 | nop.m 0 | |
4462 | // (-ln(|DeltaX|))lo + (LnSin)lo | |
4463 | fnma.s1 fPolL = fLnDeltaL, f1, fPolL | |
4464 | nop.i 0 | |
4465 | } | |
4466 | ;; | |
4467 | { .mfi | |
4468 | nop.m 0 | |
4469 | fsub.s1 f9 = fMaxNegStir, f8 // delta1 | |
4470 | nop.i 0 | |
4471 | } | |
4472 | ;; | |
4473 | { .mfi | |
4474 | nop.m 0 | |
4475 | fadd.s1 fRes1L = fRes1L, fPolL // (-ln(|DeltaX|) + LnSin)lo | |
4476 | nop.i 0 | |
4477 | } | |
4478 | ;; | |
4479 | { .mfi | |
4480 | nop.m 0 | |
4481 | fadd.s1 f9 = f9, fMinNegStir | |
4482 | nop.i 0 | |
4483 | } | |
4484 | ;; | |
4485 | { .mfi | |
4486 | nop.m 0 | |
4487 | fadd.s1 fRes1L = fRes1L, fB12 | |
4488 | nop.i 0 | |
4489 | } | |
4490 | ;; | |
4491 | { .mfi | |
4492 | // low part of the result | |
4493 | fadd.s1 f9 = f9, fRes1L | |
4494 | nop.i 0 | |
4495 | } | |
4496 | ;; | |
4497 | { .mfb | |
4498 | nop.m 0 | |
4499 | // final result for -2^63 < x < -6.0 path | |
4500 | fma.s0 f8 = f8, f1, f9 | |
4501 | // exit here for -2^63 < x < -6.0 path | |
4502 | br.ret.sptk b0 | |
4503 | } | |
4504 | ;; | |
4505 | ||
4506 | // here if x falls in neighbourhood of any negative root | |
4507 | // "neighbourhood" typically means that |lgammal(x)| < 0.17 | |
4508 | // on the [-3.0,-2.0] range |lgammal(x)| has even less | |
4509 | // magnitude | |
4510 | // rXint contains index of the root | |
4511 | // p10 is set if root belongs to "right" ones | |
4512 | // p11 is set if root belongs to "left" ones | |
4513 | // lgammal(x) is approximated by polynomial of | |
4514 | // 19th degree from (x - root) argument | |
4515 | .align 32 | |
4516 | _negRoots: | |
4517 | { .mfi | |
4518 | addl rPolDataPtr= @ltoff(lgammal_right_roots_polynomial_data),gp | |
4519 | nop.f 0 | |
4520 | shl rTmpPtr2 = rXint, 7 // (i*16)*8 | |
4521 | } | |
4522 | { .mfi | |
4523 | adds rRootsAddr = -288, rRootsBndAddr | |
4524 | nop.f 0 | |
4525 | nop.i 0 | |
4526 | } | |
4527 | ;; | |
4528 | { .mfi | |
4529 | ldfe fRoot = [rRootsAddr] // FP representation of root | |
4530 | nop.f 0 | |
4531 | shl rTmpPtr = rXint, 6 // (i*16)*4 | |
4532 | } | |
4533 | { .mfi | |
4534 | (p11) adds rTmpPtr2 = 3536, rTmpPtr2 | |
4535 | nop.f 0 | |
4536 | nop.i 0 | |
4537 | } | |
4538 | ;; | |
4539 | { .mfi | |
4540 | ld8 rPolDataPtr = [rPolDataPtr] | |
4541 | nop.f 0 | |
4542 | shladd rTmpPtr = rXint, 4, rTmpPtr // (i*16) + (i*16)*4 | |
4543 | } | |
4544 | { .mfi | |
4545 | adds rTmpPtr3 = 32, rTmpPtr2 | |
4546 | nop.f 0 | |
4547 | nop.i 0 | |
4548 | } | |
4549 | ;; | |
4550 | .pred.rel "mutex",p10,p11 | |
4551 | { .mfi | |
4552 | add rTmpPtr3 = rTmpPtr, rTmpPtr3 | |
4553 | nop.f 0 | |
4554 | (p10) cmp.eq p8, p9 = rXRnd, r0 | |
4555 | } | |
4556 | { .mfi | |
4557 | // (i*16) + (i*16)*4 + (i*16)*8 | |
4558 | add rTmpPtr = rTmpPtr, rTmpPtr2 | |
4559 | nop.f 0 | |
4560 | (p11) cmp.eq p9, p8 = rXRnd, r0 | |
4561 | } | |
4562 | ;; | |
4563 | { .mfi | |
4564 | add rTmpPtr2 = rPolDataPtr, rTmpPtr3 | |
4565 | nop.f 0 | |
4566 | nop.i 0 | |
4567 | } | |
4568 | { .mfi | |
4569 | add rPolDataPtr = rPolDataPtr, rTmpPtr // begin + offsett | |
4570 | nop.f 0 | |
4571 | nop.i 0 | |
4572 | } | |
4573 | ;; | |
4574 | { .mfi | |
4575 | ldfpd fA0, fA0L = [rPolDataPtr], 16 // A0 | |
4576 | nop.f 0 | |
4577 | adds rTmpPtr = 112, rTmpPtr2 | |
4578 | } | |
4579 | { .mfi | |
4580 | ldfpd fA2, fA2L = [rTmpPtr2], 16 // A2 | |
4581 | nop.f 0 | |
4582 | cmp.eq p12, p13 = 4, rSgnGamSize | |
4583 | } | |
4584 | ;; | |
4585 | { .mfi | |
4586 | ldfpd fA1, fA1L = [rPolDataPtr], 16 // A1 | |
4587 | nop.f 0 | |
4588 | nop.i 0 | |
4589 | } | |
4590 | { .mfi | |
4591 | ldfe fA3 = [rTmpPtr2], 128 // A4 | |
4592 | nop.f 0 | |
4593 | nop.i 0 | |
4594 | } | |
4595 | ;; | |
4596 | { .mfi | |
4597 | ldfpd fA12, fA13 = [rTmpPtr], 16 // A12, A13 | |
4598 | nop.f 0 | |
4599 | adds rTmpPtr3 = 64, rPolDataPtr | |
4600 | } | |
4601 | { .mfi | |
4602 | ldfpd fA16, fA17 = [rTmpPtr2], 16 // A16, A17 | |
4603 | nop.f 0 | |
4604 | adds rPolDataPtr = 32, rPolDataPtr | |
4605 | } | |
4606 | ;; | |
4607 | .pred.rel "mutex",p8,p9 | |
4608 | { .mfi | |
4609 | ldfpd fA14, fA15 = [rTmpPtr], 16 // A14, A15 | |
4610 | nop.f 0 | |
4611 | // sign of GAMMA(x) is negative | |
4612 | (p8) adds rSgnGam = -1, r0 | |
4613 | } | |
4614 | { .mfi | |
4615 | ldfpd fA18, fA19 = [rTmpPtr2], 16 // A18, A19 | |
4616 | nop.f 0 | |
4617 | // sign of GAMMA(x) is positive | |
4618 | (p9) adds rSgnGam = 1, r0 | |
4619 | } | |
4620 | ;; | |
4621 | { .mfi | |
4622 | ldfe fA4 = [rPolDataPtr], 16 // A4 | |
4623 | nop.f 0 | |
4624 | nop.i 0 | |
4625 | } | |
4626 | { .mfi | |
4627 | ldfpd fA6, fA7 = [rTmpPtr3], 16 // A6, A7 | |
4628 | nop.f 0 | |
4629 | nop.i 0 | |
4630 | } | |
4631 | ;; | |
4632 | { .mfi | |
4633 | ldfe fA5 = [rPolDataPtr], 16 // A5 | |
4634 | // if x equals to (rounded) root exactly | |
4635 | fcmp.eq.s1 p6, p0 = f8, fRoot | |
4636 | nop.i 0 | |
4637 | } | |
4638 | { .mfi | |
4639 | ldfpd fA8, fA9 = [rTmpPtr3], 16 // A8, A9 | |
4640 | fms.s1 FR_FracX = f8, f1, fRoot | |
4641 | nop.i 0 | |
4642 | } | |
4643 | ;; | |
4644 | { .mfi | |
4645 | // store signgam if size of variable is 4 bytes | |
4646 | (p12) st4 [rSgnGamAddr] = rSgnGam | |
4647 | nop.f 0 | |
4648 | nop.i 0 | |
4649 | } | |
4650 | { .mfb | |
4651 | // store signgam if size of variable is 8 bytes | |
4652 | (p13) st8 [rSgnGamAddr] = rSgnGam | |
4653 | // answer if x equals to (rounded) root exactly | |
4654 | (p6) fadd.s0 f8 = fA0, fA0L | |
4655 | // exit if x equals to (rounded) root exactly | |
4656 | (p6) br.ret.spnt b0 | |
4657 | } | |
4658 | ;; | |
4659 | { .mmf | |
4660 | ldfpd fA10, fA11 = [rTmpPtr3], 16 // A10, A11 | |
4661 | nop.m 0 | |
4662 | nop.f 0 | |
4663 | } | |
4664 | ;; | |
4665 | { .mfi | |
4666 | nop.m 0 | |
4667 | fma.s1 fResH = fA2, FR_FracX, f0 // (A2*x)hi | |
4668 | nop.i 0 | |
4669 | } | |
4670 | { .mfi | |
4671 | nop.m 0 | |
4672 | fma.s1 fA4L = FR_FracX, FR_FracX, f0 // x^2 | |
4673 | nop.i 0 | |
4674 | } | |
4675 | ;; | |
4676 | { .mfi | |
4677 | nop.m 0 | |
4678 | fma.s1 fA17 = fA17, FR_FracX, fA16 | |
4679 | nop.i 0 | |
4680 | } | |
4681 | {.mfi | |
4682 | nop.m 0 | |
4683 | fma.s1 fA13 = fA13, FR_FracX, fA12 | |
4684 | nop.i 0 | |
4685 | } | |
4686 | ;; | |
4687 | { .mfi | |
4688 | nop.m 0 | |
4689 | fma.s1 fA19 = fA19, FR_FracX, fA18 | |
4690 | nop.i 0 | |
4691 | } | |
4692 | {.mfi | |
4693 | nop.m 0 | |
4694 | fma.s1 fA15 = fA15, FR_FracX, fA14 | |
4695 | nop.i 0 | |
4696 | } | |
4697 | ;; | |
4698 | {.mfi | |
4699 | nop.m 0 | |
4700 | fma.s1 fPol = fA7, FR_FracX, fA6 | |
4701 | nop.i 0 | |
4702 | } | |
4703 | ;; | |
4704 | {.mfi | |
4705 | nop.m 0 | |
4706 | fma.s1 fA9 = fA9, FR_FracX, fA8 | |
4707 | nop.i 0 | |
4708 | } | |
4709 | ;; | |
4710 | { .mfi | |
4711 | nop.m 0 | |
4712 | fms.s1 fResL = fA2, FR_FracX, fResH // delta(A2*x) | |
4713 | nop.i 0 | |
4714 | } | |
4715 | {.mfi | |
4716 | nop.m 0 | |
4717 | fadd.s1 fRes1H = fResH, fA1 // (A2*x + A1)hi | |
4718 | nop.i 0 | |
4719 | } | |
4720 | ;; | |
4721 | { .mfi | |
4722 | nop.m 0 | |
4723 | fma.s1 fA11 = fA11, FR_FracX, fA10 | |
4724 | nop.i 0 | |
4725 | } | |
4726 | {.mfi | |
4727 | nop.m 0 | |
4728 | fma.s1 fA5L = fA4L, fA4L, f0 // x^4 | |
4729 | nop.i 0 | |
4730 | } | |
4731 | ;; | |
4732 | { .mfi | |
4733 | nop.m 0 | |
4734 | fma.s1 fA19 = fA19, fA4L, fA17 | |
4735 | nop.i 0 | |
4736 | } | |
4737 | {.mfi | |
4738 | nop.m 0 | |
4739 | fma.s1 fA15 = fA15, fA4L, fA13 | |
4740 | nop.i 0 | |
4741 | } | |
4742 | ;; | |
4743 | { .mfi | |
4744 | nop.m 0 | |
4745 | fma.s1 fPol = fPol, FR_FracX, fA5 | |
4746 | nop.i 0 | |
4747 | } | |
4748 | {.mfi | |
4749 | nop.m 0 | |
4750 | fma.s1 fA3L = fA4L, FR_FracX, f0 // x^3 | |
4751 | nop.i 0 | |
4752 | } | |
4753 | ;; | |
4754 | { .mfi | |
4755 | nop.m 0 | |
4756 | // delta(A2*x) + A2L*x = (A2*x)lo | |
4757 | fma.s1 fResL = fA2L, FR_FracX, fResL | |
4758 | nop.i 0 | |
4759 | } | |
4760 | {.mfi | |
4761 | nop.m 0 | |
4762 | fsub.s1 fRes1L = fA1, fRes1H | |
4763 | nop.i 0 | |
4764 | } | |
4765 | ;; | |
4766 | { .mfi | |
4767 | nop.m 0 | |
4768 | fma.s1 fA11 = fA11, fA4L, fA9 | |
4769 | nop.i 0 | |
4770 | } | |
4771 | {.mfi | |
4772 | nop.m 0 | |
4773 | fma.s1 fA19 = fA19, fA5L, fA15 | |
4774 | nop.i 0 | |
4775 | } | |
4776 | ;; | |
4777 | {.mfi | |
4778 | nop.m 0 | |
4779 | fma.s1 fPol = fPol, FR_FracX, fA4 | |
4780 | nop.i 0 | |
4781 | } | |
4782 | ;; | |
4783 | { .mfi | |
4784 | nop.m 0 | |
4785 | fadd.s1 fResL = fResL, fA1L // (A2*x)lo + A1 | |
4786 | nop.i 0 | |
4787 | } | |
4788 | {.mfi | |
4789 | nop.m 0 | |
4790 | fadd.s1 fRes1L = fRes1L, fResH | |
4791 | nop.i 0 | |
4792 | } | |
4793 | ;; | |
4794 | { .mfi | |
4795 | nop.m 0 | |
4796 | fma.s1 fRes2H = fRes1H, FR_FracX, f0 // ((A2*x + A1)*x)hi | |
4797 | nop.i 0 | |
4798 | } | |
4799 | ;; | |
4800 | {.mfi | |
4801 | nop.m 0 | |
4802 | fma.s1 fA19 = fA19, fA5L, fA11 | |
4803 | nop.i 0 | |
4804 | } | |
4805 | ;; | |
4806 | {.mfi | |
4807 | nop.m 0 | |
4808 | fma.s1 fPol = fPol, FR_FracX, fA3 | |
4809 | nop.i 0 | |
4810 | } | |
4811 | ;; | |
4812 | { .mfi | |
4813 | nop.m 0 | |
4814 | fadd.s1 fRes1L = fRes1L, fResL // (A2*x + A1)lo | |
4815 | nop.i 0 | |
4816 | } | |
4817 | ;; | |
4818 | { .mfi | |
4819 | nop.m 0 | |
4820 | // delta((A2*x + A1)*x) | |
4821 | fms.s1 fRes2L = fRes1H, FR_FracX, fRes2H | |
4822 | nop.i 0 | |
4823 | } | |
4824 | {.mfi | |
4825 | nop.m 0 | |
4826 | fadd.s1 fRes3H = fRes2H, fA0 // ((A2*x + A1)*x + A0)hi | |
4827 | nop.i 0 | |
4828 | } | |
4829 | ;; | |
4830 | { .mfi | |
4831 | nop.m 0 | |
4832 | fma.s1 fA19 = fA19, fA5L, f0 | |
4833 | nop.i 0 | |
4834 | } | |
4835 | ||
4836 | ;; | |
4837 | { .mfi | |
4838 | nop.m 0 | |
4839 | fma.s1 fRes2L = fRes1L, FR_FracX, fRes2L // ((A2*x + A1)*x)lo | |
4840 | nop.i 0 | |
4841 | } | |
4842 | {.mfi | |
4843 | nop.m 0 | |
4844 | fsub.s1 fRes3L = fRes2H, fRes3H | |
4845 | nop.i 0 | |
4846 | } | |
4847 | ;; | |
4848 | {.mfi | |
4849 | nop.m 0 | |
4850 | fma.s1 fPol = fA19, FR_FracX, fPol | |
4851 | nop.i 0 | |
4852 | } | |
4853 | ;; | |
4854 | { .mfi | |
4855 | nop.m 0 | |
4856 | fadd.s1 fRes3L = fRes3L, fA0 | |
4857 | nop.i 0 | |
4858 | } | |
4859 | {.mfi | |
4860 | nop.m 0 | |
4861 | fadd.s1 fRes2L = fRes2L, fA0L // ((A2*x + A1)*x)lo + A0L | |
4862 | nop.i 0 | |
4863 | } | |
4864 | ;; | |
4865 | { .mfi | |
4866 | nop.m 0 | |
4867 | fadd.s1 fRes3L = fRes3L, fRes2L // (((A2*x + A1)*x) + A0)lo | |
4868 | nop.i 0 | |
4869 | } | |
4870 | ;; | |
4871 | {.mfi | |
4872 | nop.m 0 | |
4873 | fma.s1 fRes3L = fPol, fA3L, fRes3L | |
4874 | nop.i 0 | |
4875 | } | |
4876 | ;; | |
4877 | { .mfb | |
4878 | nop.m 0 | |
4879 | // final result for arguments which are close to negative roots | |
4880 | fma.s0 f8 = fRes3H, f1, fRes3L | |
4881 | // exit here for arguments which are close to negative roots | |
4882 | br.ret.sptk b0 | |
4883 | } | |
4884 | ;; | |
4885 | ||
4886 | // here if |x| < 0.5 | |
4887 | .align 32 | |
4888 | lgammal_0_half: | |
4889 | { .mfi | |
4890 | ld4 GR_Z_1 = [rZ1offsett] // Load Z_1 | |
4891 | fma.s1 fA4L = f8, f8, f0 // x^2 | |
4892 | addl rPolDataPtr = @ltoff(lgammal_0_Half_data), gp | |
4893 | } | |
4894 | { .mfi | |
4895 | shladd GR_ad_tbl_1 = GR_Index1, 4, rTbl1Addr// Point to G_1 | |
4896 | nop.f 0 | |
4897 | addl rLnSinDataPtr = @ltoff(lgammal_lnsin_data), gp | |
4898 | } | |
4899 | ;; | |
4900 | { .mfi | |
4901 | ldfps FR_G, FR_H = [GR_ad_tbl_1],8 // Load G_1, H_1 | |
4902 | nop.f 0 | |
4903 | // Point to Constants_Z_2 | |
4904 | add GR_ad_z_2 = 0x140, GR_ad_z_1 | |
4905 | } | |
4906 | { .mfi | |
4907 | add GR_ad_q = -0x60, GR_ad_z_1 // Point to Constants_Q | |
4908 | nop.f 0 | |
4909 | // Point to Constants_G_H_h2 | |
4910 | add GR_ad_tbl_2 = 0x180, GR_ad_z_1 | |
4911 | } | |
4912 | ;; | |
4913 | { .mfi | |
4914 | ld8 rPolDataPtr = [rPolDataPtr] | |
4915 | nop.f 0 | |
4916 | // Point to Constants_G_H_h3 | |
4917 | add GR_ad_tbl_3 = 0x280, GR_ad_z_1 | |
4918 | } | |
4919 | { .mfi | |
4920 | ldfd FR_h = [GR_ad_tbl_1] // Load h_1 | |
4921 | nop.f 0 | |
4922 | sub GR_N = rExpX, rExpHalf, 1 | |
4923 | } | |
4924 | ;; | |
4925 | { .mfi | |
4926 | ld8 rLnSinDataPtr = [rLnSinDataPtr] | |
4927 | nop.f 0 | |
4928 | // Get bits 30-15 of X_0 * Z_1 | |
4929 | pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 | |
4930 | } | |
4931 | { .mfi | |
4932 | ldfe FR_log2_hi = [GR_ad_q],16 // Load log2_hi | |
4933 | nop.f 0 | |
4934 | sub GR_N = r0, GR_N | |
4935 | } | |
4936 | ;; | |
4937 | // | |
4938 | // For performance, don't use result of pmpyshr2.u for 4 cycles. | |
4939 | // | |
4940 | { .mfi | |
4941 | ldfe FR_log2_lo = [GR_ad_q], 16 // Load log2_lo | |
4942 | nop.f 0 | |
4943 | add rTmpPtr2 = 320, rPolDataPtr | |
4944 | } | |
4945 | { .mfi | |
4946 | add rTmpPtr = 32, rPolDataPtr | |
4947 | nop.f 0 | |
4948 | // exponent of 0.25 | |
4949 | adds rExp2 = -1, rExpHalf | |
4950 | } | |
4951 | ;; | |
4952 | { .mfi | |
4953 | ldfpd fA3, fA3L = [rPolDataPtr], 16 // A3 | |
4954 | fma.s1 fA5L = fA4L, fA4L, f0 // x^4 | |
4955 | nop.i 0 | |
4956 | } | |
4957 | { .mfi | |
4958 | ldfpd fA1, fA1L = [rTmpPtr], 16 // A1 | |
4959 | fms.s1 fB8 = f8, f8, fA4L // x^2 - <x^2> | |
4960 | // set p6 if -0.5 < x <= -0.25 | |
4961 | (p15) cmp.eq.unc p6, p0 = rExpX, rExp2 | |
4962 | } | |
4963 | ;; | |
4964 | { .mfi | |
4965 | ldfpd fA2, fA2L = [rPolDataPtr], 16 // A2 | |
4966 | nop.f 0 | |
4967 | // set p6 if -0.5 < x <= -0.40625 | |
4968 | (p6) cmp.le.unc p6, p0 = 10, GR_Index1 | |
4969 | } | |
4970 | { .mfi | |
4971 | ldfe fA21 = [rTmpPtr2], -16 // A21 | |
4972 | // Put integer N into rightmost significand | |
4973 | nop.f 0 | |
4974 | adds rTmpPtr = 240, rTmpPtr | |
4975 | } | |
4976 | ;; | |
4977 | { .mfi | |
4978 | setf.sig fFloatN = GR_N | |
4979 | nop.f 0 | |
4980 | extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 | |
4981 | } | |
4982 | { .mfi | |
4983 | ldfe FR_Q4 = [GR_ad_q], 16 // Load Q4 | |
4984 | nop.f 0 | |
4985 | adds rPolDataPtr = 304, rPolDataPtr | |
4986 | } | |
4987 | ;; | |
4988 | { .mfi | |
4989 | ldfe fA20 = [rTmpPtr2], -32 // A20 | |
4990 | nop.f 0 | |
4991 | shladd GR_ad_z_2 = GR_Index2, 2, GR_ad_z_2 // Point to Z_2 | |
4992 | } | |
4993 | { .mfi | |
4994 | ldfe fA19 = [rTmpPtr], -32 // A19 | |
4995 | nop.f 0 | |
4996 | shladd GR_ad_tbl_2 = GR_Index2, 4, GR_ad_tbl_2// Point to G_2 | |
4997 | } | |
4998 | ;; | |
4999 | { .mfi | |
5000 | ldfe fA17 = [rTmpPtr], -32 // A17 | |
5001 | nop.f 0 | |
5002 | adds rTmpPtr3 = 8, GR_ad_tbl_2 | |
5003 | } | |
5004 | { .mfb | |
5005 | ldfe fA18 = [rTmpPtr2], -32 // A18 | |
5006 | nop.f 0 | |
5007 | // branch to special path for -0.5 < x <= 0.40625 | |
5008 | (p6) br.cond.spnt lgammal_near_neg_half | |
5009 | } | |
5010 | ;; | |
5011 | { .mmf | |
5012 | ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2 | |
5013 | ldfe fA15 = [rTmpPtr], -32 // A15 | |
5014 | fma.s1 fB20 = fA5L, fA5L, f0 // x^8 | |
5015 | } | |
5016 | ;; | |
5017 | { .mmf | |
5018 | ldfe fA16 = [rTmpPtr2], -32 // A16 | |
5019 | ldfe fA13 = [rTmpPtr], -32 // A13 | |
5020 | fms.s1 fB16 = fA4L, fA4L, fA5L | |
5021 | } | |
5022 | ;; | |
5023 | { .mmf | |
5024 | ldfps FR_G2, FR_H2 = [GR_ad_tbl_2], 8 // Load G_2, H_2 | |
5025 | ldfd FR_h2 = [rTmpPtr3] // Load h_2 | |
5026 | fmerge.s fB10 = f8, fA5L // sign(x) * x^4 | |
5027 | } | |
5028 | ;; | |
5029 | { .mmi | |
5030 | ldfe fA14 = [rTmpPtr2], -32 // A14 | |
5031 | ldfe fA11 = [rTmpPtr], -32 // A11 | |
5032 | // Get bits 30-15 of X_1 * Z_2 | |
5033 | pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 | |
5034 | } | |
5035 | ;; | |
5036 | // | |
5037 | // For performance, don't use result of pmpyshr2.u for 4 cycles. | |
5038 | // | |
5039 | { .mfi | |
5040 | ldfe fA12 = [rTmpPtr2], -32 // A12 | |
5041 | fma.s1 fRes4H = fA3, fAbsX, f0 | |
5042 | adds rTmpPtr3 = 16, GR_ad_q | |
5043 | } | |
5044 | { .mfi | |
5045 | ldfe fA9 = [rTmpPtr], -32 // A9 | |
5046 | nop.f 0 | |
5047 | nop.i 0 | |
5048 | } | |
5049 | ;; | |
5050 | { .mmf | |
5051 | ldfe fA10 = [rTmpPtr2], -32 // A10 | |
5052 | ldfe fA7 = [rTmpPtr], -32 // A7 | |
5053 | fma.s1 fB18 = fB20, fB20, f0 // x^16 | |
5054 | } | |
5055 | ;; | |
5056 | { .mmf | |
5057 | ldfe fA8 = [rTmpPtr2], -32 // A8 | |
5058 | ldfe fA22 = [rPolDataPtr], 16 // A22 | |
5059 | fcvt.xf fFloatN = fFloatN | |
5060 | } | |
5061 | ;; | |
5062 | { .mfi | |
5063 | ldfe fA5 = [rTmpPtr], -32 // A5 | |
5064 | fma.s1 fA21 = fA21, fAbsX, fA20 // v16 | |
5065 | extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2 | |
5066 | } | |
5067 | { .mfi | |
5068 | ldfe fA6 = [rTmpPtr2], -32 // A6 | |
5069 | nop.f 0 | |
5070 | nop.i 0 | |
5071 | } | |
5072 | ;; | |
5073 | { .mmf | |
5074 | // Point to G_3 | |
5075 | shladd GR_ad_tbl_3 = GR_Index3, 4, GR_ad_tbl_3 | |
5076 | ldfe fA4 = [rTmpPtr2], -32 // A4 | |
5077 | fma.s1 fA19 = fA19, fAbsX, fA18 // v13 | |
5078 | } | |
5079 | ;; | |
5080 | .pred.rel "mutex",p14,p15 | |
5081 | { .mfi | |
5082 | ldfps FR_G3, FR_H3 = [GR_ad_tbl_3],8 // Load G_3, H_3 | |
5083 | fms.s1 fRes4L = fA3, fAbsX, fRes4H | |
5084 | (p14) adds rSgnGam = 1, r0 | |
5085 | } | |
5086 | { .mfi | |
5087 | cmp.eq p6, p7 = 4, rSgnGamSize | |
5088 | fadd.s1 fRes2H = fRes4H, fA2 | |
5089 | (p15) adds rSgnGam = -1, r0 | |
5090 | } | |
5091 | ;; | |
5092 | ||
5093 | { .mfi | |
5094 | ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3 | |
5095 | fma.s1 fA17 = fA17, fAbsX, fA16 // v12 | |
5096 | nop.i 0 | |
5097 | } | |
5098 | ;; | |
5099 | { .mfi | |
5100 | ldfe FR_Q3 = [GR_ad_q], 32 // Load Q3 | |
5101 | fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2 | |
5102 | nop.i 0 | |
5103 | } | |
5104 | { .mfi | |
5105 | ldfe FR_Q2 = [rTmpPtr3], 16 // Load Q2 | |
5106 | fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2 | |
5107 | nop.i 0 | |
5108 | } | |
5109 | ;; | |
5110 | { .mfi | |
5111 | ldfe FR_Q1 = [GR_ad_q] // Load Q1 | |
5112 | fma.s1 fA15 = fA15, fAbsX, fA14 // v8 | |
5113 | nop.i 0 | |
5114 | } | |
5115 | { .mfi | |
5116 | adds rTmpPtr3 = 32, rLnSinDataPtr | |
5117 | fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2 | |
5118 | nop.i 0 | |
5119 | } | |
5120 | ;; | |
5121 | { .mmf | |
5122 | ldfpd fLnSin2, fLnSin2L = [rLnSinDataPtr], 16 | |
5123 | ldfe fLnSin6 = [rTmpPtr3], 32 | |
5124 | fma.s1 fA13 = fA13, fAbsX, fA12 // v7 | |
5125 | ||
5126 | } | |
5127 | ;; | |
5128 | { .mfi | |
5129 | ldfe fLnSin4 = [rLnSinDataPtr], 32 | |
5130 | fma.s1 fRes4L = fA3L, fAbsX, fRes4L | |
5131 | nop.i 0 | |
5132 | } | |
5133 | { .mfi | |
5134 | ldfe fLnSin10 = [rTmpPtr3], 32 | |
5135 | fsub.s1 fRes2L = fA2, fRes2H | |
5136 | nop.i 0 | |
5137 | } | |
5138 | ;; | |
5139 | { .mfi | |
5140 | ldfe fLnSin8 = [rLnSinDataPtr], 32 | |
5141 | fma.s1 fResH = fRes2H, fAbsX, f0 | |
5142 | nop.i 0 | |
5143 | } | |
5144 | { .mfi | |
5145 | ldfe fLnSin14 = [rTmpPtr3], 32 | |
5146 | fma.s1 fA22 = fA22, fA4L, fA21 // v15 | |
5147 | nop.i 0 | |
5148 | } | |
5149 | ;; | |
5150 | { .mfi | |
5151 | ldfe fLnSin12 = [rLnSinDataPtr], 32 | |
5152 | fma.s1 fA9 = fA9, fAbsX, fA8 // v4 | |
5153 | nop.i 0 | |
5154 | } | |
5155 | { .mfi | |
5156 | ldfd fLnSin18 = [rTmpPtr3], 16 | |
5157 | fma.s1 fA11 = fA11, fAbsX, fA10 // v5 | |
5158 | nop.i 0 | |
5159 | } | |
5160 | ;; | |
5161 | { .mfi | |
5162 | ldfe fLnSin16 = [rLnSinDataPtr], 24 | |
5163 | fma.s1 fA19 = fA19, fA4L, fA17 // v11 | |
5164 | nop.i 0 | |
5165 | } | |
5166 | { .mfi | |
5167 | ldfd fLnSin22 = [rTmpPtr3], 16 | |
5168 | fma.s1 fPolL = fA7, fAbsX, fA6 | |
5169 | nop.i 0 | |
5170 | } | |
5171 | ;; | |
5172 | { .mfi | |
5173 | ldfd fLnSin20 = [rLnSinDataPtr], 16 | |
5174 | fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3 | |
5175 | nop.i 0 | |
5176 | } | |
5177 | { .mfi | |
5178 | ldfd fLnSin26 = [rTmpPtr3], 16 | |
5179 | fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3 | |
5180 | nop.i 0 | |
5181 | } | |
5182 | ;; | |
5183 | { .mfi | |
5184 | ldfd fLnSin24 = [rLnSinDataPtr], 16 | |
5185 | fadd.s1 fRes2L = fRes2L, fRes4H | |
5186 | nop.i 0 | |
5187 | } | |
5188 | { .mfi | |
5189 | ldfd fLnSin30 = [rTmpPtr3], 16 | |
5190 | fadd.s1 fA2L = fA2L, fRes4L | |
5191 | nop.i 0 | |
5192 | } | |
5193 | ;; | |
5194 | { .mfi | |
5195 | ldfd fLnSin28 = [rLnSinDataPtr], 16 | |
5196 | fms.s1 fResL = fRes2H, fAbsX, fResH | |
5197 | nop.i 0 | |
5198 | } | |
5199 | { .mfi | |
5200 | ldfd fLnSin34 = [rTmpPtr3], 8 | |
5201 | fadd.s1 fRes2H = fResH, fA1 | |
5202 | nop.i 0 | |
5203 | } | |
5204 | ;; | |
5205 | { .mfi | |
5206 | ldfd fLnSin32 = [rLnSinDataPtr] | |
5207 | fma.s1 fA11 = fA11, fA4L, fA9 // v3 | |
5208 | nop.i 0 | |
5209 | } | |
5210 | { .mfi | |
5211 | ldfd fLnSin36 = [rTmpPtr3] | |
5212 | fma.s1 fA15 = fA15, fA4L, fA13 // v6 | |
5213 | nop.i 0 | |
5214 | } | |
5215 | ;; | |
5216 | ||
5217 | { .mfi | |
5218 | // store signgam if size of variable is 4 bytes | |
5219 | (p6) st4 [rSgnGamAddr] = rSgnGam | |
5220 | fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3 | |
5221 | nop.i 0 | |
5222 | } | |
5223 | { .mfi | |
5224 | // store signgam if size of variable is 8 bytes | |
5225 | (p7) st8 [rSgnGamAddr] = rSgnGam | |
5226 | fma.s1 fA5 = fA5, fAbsX, fA4 | |
5227 | nop.i 0 | |
5228 | } | |
5229 | ;; | |
5230 | { .mfi | |
5231 | nop.m 0 | |
5232 | fms.s1 FR_r = FR_G, fSignifX, f1 // r = G * S_hi - 1 | |
5233 | nop.i 0 | |
5234 | } | |
5235 | { .mfi | |
5236 | nop.m 0 | |
5237 | // High part of the log(|x|): Y_hi = N * log2_hi + H | |
5238 | fms.s1 FR_log2_hi = fFloatN, FR_log2_hi, FR_H | |
5239 | nop.i 0 | |
5240 | } | |
5241 | ;; | |
5242 | { .mfi | |
5243 | nop.m 0 | |
5244 | fadd.s1 fA3L = fRes2L, fA2L | |
5245 | nop.i 0 | |
5246 | } | |
5247 | { .mfi | |
5248 | nop.m 0 | |
5249 | fma.s1 fA22 = fA22, fA5L, fA19 | |
5250 | nop.i 0 | |
5251 | } | |
5252 | ;; | |
5253 | { .mfi | |
5254 | nop.m 0 | |
5255 | fsub.s1 fRes2L = fA1, fRes2H | |
5256 | nop.i 0 | |
5257 | } | |
5258 | { .mfi | |
5259 | nop.m 0 | |
5260 | fma.s1 fRes3H = fRes2H, f8, f0 | |
5261 | nop.i 0 | |
5262 | } | |
5263 | ;; | |
5264 | { .mfi | |
5265 | nop.m 0 | |
5266 | fma.s1 fA15 = fA15, fA5L, fA11 // v2 | |
5267 | nop.i 0 | |
5268 | } | |
5269 | { .mfi | |
5270 | nop.m 0 | |
5271 | fma.s1 fLnSin18 = fLnSin18, fA4L, fLnSin16 | |
5272 | nop.i 0 | |
5273 | } | |
5274 | ;; | |
5275 | { .mfi | |
5276 | nop.m 0 | |
5277 | // h = N * log2_lo + h | |
5278 | fms.s1 FR_h = fFloatN, FR_log2_lo, FR_h | |
5279 | nop.i 0 | |
5280 | } | |
5281 | { .mfi | |
5282 | nop.m 0 | |
5283 | fma.s1 fPolL = fPolL, fA4L, fA5 | |
5284 | nop.i 0 | |
5285 | } | |
5286 | ;; | |
5287 | { .mfi | |
5288 | nop.m 0 | |
5289 | // poly_lo = r * Q4 + Q3 | |
5290 | fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 | |
5291 | nop.i 0 | |
5292 | } | |
5293 | { .mfi | |
5294 | nop.m 0 | |
5295 | fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r | |
5296 | nop.i 0 | |
5297 | } | |
5298 | ;; | |
5299 | { .mfi | |
5300 | nop.m 0 | |
5301 | fma.s1 fResL = fA3L, fAbsX, fResL | |
5302 | nop.i 0 | |
5303 | } | |
5304 | { .mfi | |
5305 | nop.m 0 | |
5306 | fma.s1 fLnSin30 = fLnSin30, fA4L, fLnSin28 | |
5307 | nop.i 0 | |
5308 | } | |
5309 | ;; | |
5310 | { .mfi | |
5311 | nop.m 0 | |
5312 | fadd.s1 fRes2L = fRes2L, fResH | |
5313 | nop.i 0 | |
5314 | } | |
5315 | { .mfi | |
5316 | nop.m 0 | |
5317 | fms.s1 fRes3L = fRes2H, f8, fRes3H | |
5318 | nop.i 0 | |
5319 | } | |
5320 | ;; | |
5321 | { .mfi | |
5322 | nop.m 0 | |
5323 | fadd.s1 fRes1H = fRes3H, FR_log2_hi | |
5324 | nop.i 0 | |
5325 | } | |
5326 | { .mfi | |
5327 | nop.m 0 | |
5328 | fma.s1 fPol = fB20, fA22, fA15 | |
5329 | nop.i 0 | |
5330 | } | |
5331 | ;; | |
5332 | { .mfi | |
5333 | nop.m 0 | |
5334 | fma.s1 fLnSin34 = fLnSin34, fA4L, fLnSin32 | |
5335 | nop.i 0 | |
5336 | } | |
5337 | { .mfi | |
5338 | nop.m 0 | |
5339 | fma.s1 fLnSin14 = fLnSin14, fA4L, fLnSin12 | |
5340 | nop.i 0 | |
5341 | } | |
5342 | ;; | |
5343 | ||
5344 | { .mfi | |
5345 | nop.m 0 | |
5346 | // poly_lo = poly_lo * r + Q2 | |
5347 | fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 | |
5348 | nop.i 0 | |
5349 | } | |
5350 | { .mfi | |
5351 | nop.m 0 | |
5352 | fnma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3 | |
5353 | nop.i 0 | |
5354 | } | |
5355 | ;; | |
5356 | { .mfi | |
5357 | nop.m 0 | |
5358 | // poly_hi = Q1 * rsq + r | |
5359 | fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r | |
5360 | nop.i 0 | |
5361 | } | |
5362 | { .mfi | |
5363 | nop.m 0 | |
5364 | fadd.s1 fA1L = fA1L, fResL | |
5365 | nop.i 0 | |
5366 | } | |
5367 | ;; | |
5368 | ||
5369 | { .mfi | |
5370 | nop.m 0 | |
5371 | fma.s1 fLnSin22 = fLnSin22, fA4L, fLnSin20 | |
5372 | nop.i 0 | |
5373 | } | |
5374 | { .mfi | |
5375 | nop.m 0 | |
5376 | fma.s1 fLnSin26 = fLnSin26, fA4L, fLnSin24 | |
5377 | nop.i 0 | |
5378 | } | |
5379 | ;; | |
5380 | ||
5381 | { .mfi | |
5382 | nop.m 0 | |
5383 | fsub.s1 fRes1L = FR_log2_hi, fRes1H | |
5384 | nop.i 0 | |
5385 | } | |
5386 | { .mfi | |
5387 | nop.m 0 | |
5388 | fma.s1 fPol = fPol, fA5L, fPolL | |
5389 | nop.i 0 | |
5390 | } | |
5391 | ;; | |
5392 | { .mfi | |
5393 | nop.m 0 | |
5394 | fma.s1 fLnSin34 = fLnSin36, fA5L, fLnSin34 | |
5395 | nop.i 0 | |
5396 | } | |
5397 | { .mfi | |
5398 | nop.m 0 | |
5399 | fma.s1 fLnSin18 = fLnSin18, fA5L, fLnSin14 | |
5400 | nop.i 0 | |
5401 | } | |
5402 | ;; | |
5403 | { .mfi | |
5404 | nop.m 0 | |
5405 | fma.s1 fLnSin6 = fLnSin6, fA4L, fLnSin4 | |
5406 | nop.i 0 | |
5407 | } | |
5408 | { .mfi | |
5409 | nop.m 0 | |
5410 | fma.s1 fLnSin10 = fLnSin10, fA4L, fLnSin8 | |
5411 | nop.i 0 | |
5412 | } | |
5413 | ;; | |
5414 | { .mfi | |
5415 | nop.m 0 | |
5416 | // poly_hi = Q1 * rsq + r | |
5417 | fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r | |
5418 | nop.i 0 | |
5419 | } | |
5420 | { .mfi | |
5421 | nop.m 0 | |
5422 | fadd.s1 fRes2L = fRes2L, fA1L | |
5423 | nop.i 0 | |
5424 | } | |
5425 | ;; | |
5426 | { .mfi | |
5427 | nop.m 0 | |
5428 | // poly_lo = poly_lo*r^3 + h | |
5429 | fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h | |
5430 | nop.i 0 | |
5431 | } | |
5432 | { .mfi | |
5433 | nop.m 0 | |
5434 | fma.s1 fB2 = fLnSin2, fA4L, f0 | |
5435 | nop.i 0 | |
5436 | } | |
5437 | ;; | |
5438 | { .mfi | |
5439 | nop.m 0 | |
5440 | fadd.s1 fRes1L = fRes1L, fRes3H | |
5441 | nop.i 0 | |
5442 | } | |
5443 | { .mfi | |
5444 | nop.m 0 | |
5445 | fma.s1 fPol = fPol, fB10, f0 | |
5446 | nop.i 0 | |
5447 | } | |
5448 | ;; | |
5449 | { .mfi | |
5450 | nop.m 0 | |
5451 | fma.s1 fLnSin26 = fLnSin26, fA5L, fLnSin22 | |
5452 | nop.i 0 | |
5453 | } | |
5454 | { .mfi | |
5455 | nop.m 0 | |
5456 | fma.s1 fLnSin34 = fLnSin34, fA5L, fLnSin30 | |
5457 | nop.i 0 | |
5458 | } | |
5459 | ;; | |
5460 | { .mfi | |
5461 | nop.m 0 | |
5462 | fma.s1 fLnSin10 = fLnSin10, fA5L, fLnSin6 | |
5463 | nop.i 0 | |
5464 | } | |
5465 | { .mfi | |
5466 | nop.m 0 | |
5467 | fma.s1 fLnSin2L = fLnSin2L, fA4L, f0 | |
5468 | nop.i 0 | |
5469 | } | |
5470 | ;; | |
5471 | ||
5472 | { .mfi | |
5473 | nop.m 0 | |
5474 | fma.s1 fRes3L = fRes2L, f8, fRes3L | |
5475 | nop.i 0 | |
5476 | } | |
5477 | ;; | |
5478 | { .mfi | |
5479 | nop.m 0 | |
5480 | // Y_lo = poly_hi + poly_lo | |
5481 | fsub.s1 FR_log2_lo = FR_poly_lo, FR_poly_hi | |
5482 | nop.i 0 | |
5483 | } | |
5484 | { .mfi | |
5485 | nop.m 0 | |
5486 | fms.s1 fB4 = fLnSin2, fA4L, fB2 | |
5487 | nop.i 0 | |
5488 | } | |
5489 | ;; | |
5490 | { .mfi | |
5491 | nop.m 0 | |
5492 | fadd.s1 fRes2H = fRes1H, fPol | |
5493 | nop.i 0 | |
5494 | } | |
5495 | ;; | |
5496 | { .mfi | |
5497 | nop.m 0 | |
5498 | fma.s1 fLnSin34 = fLnSin34, fB20, fLnSin26 | |
5499 | nop.i 0 | |
5500 | } | |
5501 | ;; | |
5502 | { .mfi | |
5503 | nop.m 0 | |
5504 | fma.s1 fLnSin18 = fLnSin18, fB20, fLnSin10 | |
5505 | nop.i 0 | |
5506 | } | |
5507 | { .mfi | |
5508 | nop.m 0 | |
5509 | fma.s1 fLnSin2L = fB8, fLnSin2, fLnSin2L | |
5510 | nop.i 0 | |
5511 | } | |
5512 | ;; | |
5513 | ||
5514 | { .mfi | |
5515 | nop.m 0 | |
5516 | fadd.s1 FR_log2_lo = FR_log2_lo, fRes3L | |
5517 | nop.i 0 | |
5518 | } | |
5519 | ;; | |
5520 | { .mfi | |
5521 | nop.m 0 | |
5522 | fsub.s1 fRes2L = fRes1H, fRes2H | |
5523 | nop.i 0 | |
5524 | } | |
5525 | ;; | |
5526 | { .mfi | |
5527 | nop.m 0 | |
5528 | fma.s1 fB6 = fLnSin34, fB18, fLnSin18 | |
5529 | nop.i 0 | |
5530 | } | |
5531 | { .mfi | |
5532 | nop.m 0 | |
5533 | fadd.s1 fB4 = fLnSin2L, fB4 | |
5534 | nop.i 0 | |
5535 | } | |
5536 | ;; | |
5537 | ||
5538 | { .mfi | |
5539 | nop.m 0 | |
5540 | fadd.s1 fRes1L = fRes1L, FR_log2_lo | |
5541 | nop.i 0 | |
5542 | } | |
5543 | ;; | |
5544 | { .mfi | |
5545 | nop.m 0 | |
5546 | fadd.s1 fRes2L = fRes2L, fPol | |
5547 | nop.i 0 | |
5548 | } | |
5549 | ;; | |
5550 | { .mfi | |
5551 | nop.m 0 | |
5552 | fma.s1 fB12 = fB6, fA5L, f0 | |
5553 | nop.i 0 | |
5554 | } | |
5555 | ;; | |
5556 | { .mfi | |
5557 | nop.m 0 | |
5558 | fadd.s1 fRes2L = fRes2L, fRes1L | |
5559 | nop.i 0 | |
5560 | } | |
5561 | ;; | |
5562 | ||
5563 | { .mfi | |
5564 | nop.m 0 | |
5565 | fms.s1 fB14 = fB6, fA5L, fB12 | |
5566 | nop.i 0 | |
5567 | } | |
5568 | { .mfb | |
5569 | nop.m 0 | |
5570 | fadd.s1 fLnSin30 = fB2, fB12 | |
5571 | // branch out if x is negative | |
5572 | (p15) br.cond.spnt _O_Half_neg | |
5573 | } | |
5574 | ;; | |
5575 | { .mfb | |
5576 | nop.m 0 | |
5577 | // sign(x)*Pol(|x|) - log(|x|) | |
5578 | fma.s0 f8 = fRes2H, f1, fRes2L | |
5579 | // it's an answer already for positive x | |
5580 | // exit if 0 < x < 0.5 | |
5581 | br.ret.sptk b0 | |
5582 | } | |
5583 | ;; | |
5584 | ||
5585 | // here if x is negative and |x| < 0.5 | |
5586 | .align 32 | |
5587 | _O_Half_neg: | |
5588 | { .mfi | |
5589 | nop.m 0 | |
5590 | fma.s1 fB14 = fB16, fB6, fB14 | |
5591 | nop.i 0 | |
5592 | } | |
5593 | { .mfi | |
5594 | nop.m 0 | |
5595 | fsub.s1 fLnSin16 = fB2, fLnSin30 | |
5596 | nop.i 0 | |
5597 | } | |
5598 | ;; | |
5599 | { .mfi | |
5600 | nop.m 0 | |
5601 | fadd.s1 fResH = fLnSin30, fRes2H | |
5602 | nop.i 0 | |
5603 | } | |
5604 | ;; | |
5605 | { .mfi | |
5606 | nop.m 0 | |
5607 | fadd.s1 fLnSin16 = fLnSin16, fB12 | |
5608 | nop.i 0 | |
5609 | } | |
5610 | { .mfi | |
5611 | nop.m 0 | |
5612 | fadd.s1 fB4 = fB14, fB4 | |
5613 | nop.i 0 | |
5614 | } | |
5615 | ;; | |
5616 | { .mfi | |
5617 | nop.m 0 | |
5618 | fadd.s1 fLnSin16 = fB4, fLnSin16 | |
5619 | nop.i 0 | |
5620 | } | |
5621 | { .mfi | |
5622 | nop.m 0 | |
5623 | fsub.s1 fResL = fRes2H, fResH | |
5624 | nop.i 0 | |
5625 | } | |
5626 | ;; | |
5627 | { .mfi | |
5628 | nop.m 0 | |
5629 | fadd.s1 fResL = fResL, fLnSin30 | |
5630 | nop.i 0 | |
5631 | } | |
5632 | { .mfi | |
5633 | nop.m 0 | |
5634 | fadd.s1 fLnSin16 = fLnSin16, fRes2L | |
5635 | nop.i 0 | |
5636 | } | |
5637 | ;; | |
5638 | { .mfi | |
5639 | nop.m 0 | |
5640 | fadd.s1 fResL = fResL, fLnSin16 | |
5641 | nop.i 0 | |
5642 | } | |
5643 | ;; | |
5644 | { .mfb | |
5645 | nop.m 0 | |
5646 | // final result for -0.5 < x < 0 | |
5647 | fma.s0 f8 = fResH, f1, fResL | |
5648 | // exit for -0.5 < x < 0 | |
5649 | br.ret.sptk b0 | |
5650 | } | |
5651 | ;; | |
5652 | ||
5653 | // here if x >= 8.0 | |
5654 | // there are two computational paths: | |
5655 | // 1) For x >10.0 Stirling's formula is used | |
5656 | // 2) Polynomial approximation for 8.0 <= x <= 10.0 | |
5657 | .align 32 | |
5658 | lgammal_big_positive: | |
5659 | { .mfi | |
5660 | addl rPolDataPtr = @ltoff(lgammal_data), gp | |
5661 | fmerge.se fSignifX = f1, f8 | |
5662 | // Get high 15 bits of significand | |
5663 | extr.u GR_X_0 = rSignifX, 49, 15 | |
5664 | } | |
5665 | {.mfi | |
5666 | shladd rZ1offsett = GR_Index1, 2, GR_ad_z_1 // Point to Z_1 | |
5667 | fnma.s1 fInvX = f8, fRcpX, f1 // start of 1st NR iteration | |
5668 | adds rSignif1andQ = 0x5, r0 | |
5669 | } | |
5670 | ;; | |
5671 | {.mfi | |
5672 | ld4 GR_Z_1 = [rZ1offsett] // Load Z_1 | |
5673 | nop.f 0 | |
5674 | shl rSignif1andQ = rSignif1andQ, 61 // significand of 1.25 | |
5675 | } | |
5676 | { .mfi | |
5677 | cmp.eq p8, p0 = rExpX, rExp8 // p8 = 1 if 8.0 <= x < 16 | |
5678 | nop.f 0 | |
5679 | adds rSgnGam = 1, r0 // gamma is positive at this range | |
5680 | } | |
5681 | ;; | |
5682 | { .mfi | |
5683 | shladd GR_ad_tbl_1 = GR_Index1, 4, rTbl1Addr// Point to G_1 | |
5684 | nop.f 0 | |
5685 | add GR_ad_q = -0x60, GR_ad_z_1 // Point to Constants_Q | |
5686 | } | |
5687 | { .mlx | |
5688 | ld8 rPolDataPtr = [rPolDataPtr] | |
5689 | movl rDelta = 0x3FF2000000000000 | |
5690 | } | |
5691 | ;; | |
5692 | { .mfi | |
5693 | ldfps FR_G, FR_H = [GR_ad_tbl_1],8 // Load G_1, H_1 | |
5694 | nop.f 0 | |
5695 | add GR_ad_z_2 = 0x140, GR_ad_z_1 // Point to Constants_Z_2 | |
5696 | } | |
5697 | { .mfi | |
5698 | // Point to Constants_G_H_h2 | |
5699 | add GR_ad_tbl_2 = 0x180, GR_ad_z_1 | |
5700 | nop.f 0 | |
5701 | // p8 = 1 if 8.0 <= x <= 10.0 | |
5702 | (p8) cmp.leu.unc p8, p0 = rSignifX, rSignif1andQ | |
5703 | } | |
5704 | ;; | |
5705 | { .mfi | |
5706 | ldfd FR_h = [GR_ad_tbl_1] // Load h_1 | |
5707 | nop.f 0 | |
5708 | // Get bits 30-15 of X_0 * Z_1 | |
5709 | pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 | |
5710 | } | |
5711 | { .mfb | |
5712 | (p8) setf.d FR_MHalf = rDelta | |
5713 | nop.f 0 | |
5714 | (p8) br.cond.spnt lgammal_8_10 // branch out if 8.0 <= x <= 10.0 | |
5715 | } | |
5716 | ;; | |
5717 | // | |
5718 | // For performance, don't use result of pmpyshr2.u for 4 cycles. | |
5719 | // | |
5720 | { .mfi | |
5721 | ldfe fA1 = [rPolDataPtr], 16 // Load overflow threshold | |
5722 | fma.s1 fRcpX = fInvX, fRcpX, fRcpX // end of 1st NR iteration | |
5723 | // Point to Constants_G_H_h3 | |
5724 | add GR_ad_tbl_3 = 0x280, GR_ad_z_1 | |
5725 | } | |
5726 | { .mlx | |
5727 | nop.m 0 | |
5728 | movl rDelta = 0xBFE0000000000000 // -0.5 in DP | |
5729 | } | |
5730 | ;; | |
5731 | { .mfi | |
5732 | ldfe FR_log2_hi = [GR_ad_q],16 // Load log2_hi | |
5733 | nop.f 0 | |
5734 | sub GR_N = rExpX, rExpHalf, 1 // unbiased exponent of x | |
5735 | } | |
5736 | ;; | |
5737 | { .mfi | |
5738 | ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo | |
5739 | nop.f 0 | |
5740 | nop.i 0 | |
5741 | } | |
5742 | { .mfi | |
5743 | setf.d FR_MHalf = rDelta | |
5744 | nop.f 0 | |
5745 | nop.i 0 | |
5746 | } | |
5747 | ;; | |
5748 | { .mfi | |
5749 | // Put integer N into rightmost significand | |
5750 | setf.sig fFloatN = GR_N | |
5751 | nop.f 0 | |
5752 | extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 | |
5753 | } | |
5754 | { .mfi | |
5755 | ldfe FR_Q4 = [GR_ad_q], 16 // Load Q4 | |
5756 | nop.f 0 | |
5757 | nop.i 0 | |
5758 | } | |
5759 | ;; | |
5760 | { .mfi | |
5761 | shladd GR_ad_z_2 = GR_Index2, 2, GR_ad_z_2 // Point to Z_2 | |
5762 | nop.f 0 | |
5763 | shladd GR_ad_tbl_2 = GR_Index2, 4, GR_ad_tbl_2// Point to G_2 | |
5764 | } | |
5765 | { .mfi | |
5766 | ldfe FR_Q3 = [GR_ad_q], 16 // Load Q3 | |
5767 | nop.f 0 | |
5768 | nop.i 0 | |
5769 | } | |
5770 | ;; | |
5771 | { .mfi | |
5772 | ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2 | |
5773 | fnma.s1 fInvX = f8, fRcpX, f1 // start of 2nd NR iteration | |
5774 | nop.i 0 | |
5775 | } | |
5776 | ;; | |
5777 | { .mfi | |
5778 | ldfps FR_G2, FR_H2 = [GR_ad_tbl_2], 8 // Load G_2, H_2 | |
5779 | nop.f 0 | |
5780 | nop.i 0 | |
5781 | } | |
5782 | ;; | |
5783 | { .mfi | |
5784 | ldfd FR_h2 = [GR_ad_tbl_2] // Load h_2 | |
5785 | nop.f 0 | |
5786 | nop.i 0 | |
5787 | } | |
5788 | ;; | |
5789 | { .mfi | |
5790 | ldfe FR_Q2 = [GR_ad_q],16 // Load Q2 | |
5791 | nop.f 0 | |
5792 | // Get bits 30-15 of X_1 * Z_2 | |
5793 | pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 | |
5794 | } | |
5795 | ;; | |
5796 | // | |
5797 | // For performance, don't use result of pmpyshr2.u for 4 cycles. | |
5798 | // | |
5799 | { .mfi | |
5800 | ldfe FR_Q1 = [GR_ad_q] // Load Q1 | |
5801 | fcmp.gt.s1 p7,p0 = f8, fA1 // check if x > overflow threshold | |
5802 | nop.i 0 | |
5803 | } | |
5804 | ;; | |
5805 | {.mfi | |
5806 | ldfpd fA0, fA0L = [rPolDataPtr], 16 // Load two parts of C | |
5807 | fma.s1 fRcpX = fInvX, fRcpX, fRcpX // end of 2nd NR iteration | |
5808 | nop.i 0 | |
5809 | } | |
5810 | ;; | |
5811 | { .mfb | |
5812 | ldfpd fB2, fA1 = [rPolDataPtr], 16 | |
5813 | nop.f 0 | |
5814 | (p7) br.cond.spnt lgammal_overflow // branch if x > overflow threshold | |
5815 | } | |
5816 | ;; | |
5817 | {.mfi | |
5818 | ldfe fB4 = [rPolDataPtr], 16 | |
5819 | fcvt.xf fFloatN = fFloatN | |
5820 | extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2 | |
5821 | } | |
5822 | ;; | |
5823 | { .mfi | |
5824 | shladd GR_ad_tbl_3 = GR_Index3, 4, GR_ad_tbl_3// Point to G_3 | |
5825 | nop.f 0 | |
5826 | nop.i 0 | |
5827 | } | |
5828 | { .mfi | |
5829 | ldfe fB6 = [rPolDataPtr], 16 | |
5830 | nop.f 0 | |
5831 | nop.i 0 | |
5832 | } | |
5833 | ;; | |
5834 | { .mfi | |
5835 | ldfps FR_G3, FR_H3 = [GR_ad_tbl_3], 8 // Load G_3, H_3 | |
5836 | nop.f 0 | |
5837 | nop.i 0 | |
5838 | } | |
5839 | ;; | |
5840 | { .mfi | |
5841 | ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3 | |
5842 | fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2 | |
5843 | nop.i 0 | |
5844 | } | |
5845 | { .mfi | |
5846 | nop.m 0 | |
5847 | fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2 | |
5848 | nop.i 0 | |
5849 | } | |
5850 | ;; | |
5851 | ||
5852 | { .mfi | |
5853 | ldfe fB8 = [rPolDataPtr], 16 | |
5854 | fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2 | |
5855 | nop.i 0 | |
5856 | } | |
5857 | { .mfi | |
5858 | nop.m 0 | |
5859 | fnma.s1 fInvX = f8, fRcpX, f1 // start of 3rd NR iteration | |
5860 | nop.i 0 | |
5861 | } | |
5862 | ;; | |
5863 | { .mfi | |
5864 | ldfe fB10 = [rPolDataPtr], 16 | |
5865 | nop.f 0 | |
5866 | cmp.eq p6, p7 = 4, rSgnGamSize | |
5867 | } | |
5868 | ;; | |
5869 | { .mfi | |
5870 | ldfe fB12 = [rPolDataPtr], 16 | |
5871 | nop.f 0 | |
5872 | nop.i 0 | |
5873 | } | |
5874 | ;; | |
5875 | { .mfi | |
5876 | ldfe fB14 = [rPolDataPtr], 16 | |
5877 | nop.f 0 | |
5878 | nop.i 0 | |
5879 | } | |
5880 | ;; | |
5881 | ||
5882 | { .mfi | |
5883 | ldfe fB16 = [rPolDataPtr], 16 | |
5884 | // get double extended coefficients from two doubles | |
5885 | // two doubles are needed in Stitling's formula for negative x | |
5886 | fadd.s1 fB2 = fB2, fA1 | |
5887 | nop.i 0 | |
5888 | } | |
5889 | ;; | |
5890 | { .mfi | |
5891 | ldfe fB18 = [rPolDataPtr], 16 | |
5892 | fma.s1 fInvX = fInvX, fRcpX, fRcpX // end of 3rd NR iteration | |
5893 | nop.i 0 | |
5894 | } | |
5895 | ;; | |
5896 | { .mfi | |
5897 | ldfe fB20 = [rPolDataPtr], 16 | |
5898 | nop.f 0 | |
5899 | nop.i 0 | |
5900 | } | |
5901 | ;; | |
5902 | { .mfi | |
5903 | // store signgam if size of variable is 4 bytes | |
5904 | (p6) st4 [rSgnGamAddr] = rSgnGam | |
5905 | fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3 | |
5906 | nop.i 0 | |
5907 | } | |
5908 | { .mfi | |
5909 | // store signgam if size of variable is 8 bytes | |
5910 | (p7) st8 [rSgnGamAddr] = rSgnGam | |
5911 | fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3 | |
5912 | nop.i 0 | |
5913 | } | |
5914 | ;; | |
5915 | { .mfi | |
5916 | nop.m 0 | |
5917 | fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3 | |
5918 | nop.i 0 | |
5919 | } | |
5920 | ;; | |
5921 | { .mfi | |
5922 | nop.m 0 | |
5923 | fma.s1 fRcpX = fInvX, fInvX, f0 // 1/x^2 | |
5924 | nop.i 0 | |
5925 | } | |
5926 | { .mfi | |
5927 | nop.m 0 | |
5928 | fma.s1 fA0L = fB2, fInvX, fA0L | |
5929 | nop.i 0 | |
5930 | } | |
5931 | ;; | |
5932 | { .mfi | |
5933 | nop.m 0 | |
5934 | fms.s1 FR_r = fSignifX, FR_G, f1 // r = G * S_hi - 1 | |
5935 | nop.i 0 | |
5936 | } | |
5937 | { .mfi | |
5938 | nop.m 0 | |
5939 | // High part of the log(x): Y_hi = N * log2_hi + H | |
5940 | fma.s1 fRes2H = fFloatN, FR_log2_hi, FR_H | |
5941 | nop.i 0 | |
5942 | } | |
5943 | ;; | |
5944 | ||
5945 | { .mfi | |
5946 | nop.m 0 | |
5947 | // h = N * log2_lo + h | |
5948 | fma.s1 FR_h = fFloatN, FR_log2_lo, FR_h | |
5949 | nop.i 0 | |
5950 | } | |
5951 | { .mfi | |
5952 | nop.m 0 | |
5953 | // High part of the log(x): Y_hi = N * log2_hi + H | |
5954 | fma.s1 fRes1H = fFloatN, FR_log2_hi, FR_H | |
5955 | nop.i 0 | |
5956 | } | |
5957 | ;; | |
5958 | {.mfi | |
5959 | nop.m 0 | |
5960 | fma.s1 fPol = fB18, fRcpX, fB16 // v9 | |
5961 | nop.i 0 | |
5962 | } | |
5963 | { .mfi | |
5964 | nop.m 0 | |
5965 | fma.s1 fA2L = fRcpX, fRcpX, f0 // v10 | |
5966 | nop.i 0 | |
5967 | } | |
5968 | ;; | |
5969 | {.mfi | |
5970 | nop.m 0 | |
5971 | fma.s1 fA3 = fB6, fRcpX, fB4 // v3 | |
5972 | nop.i 0 | |
5973 | } | |
5974 | { .mfi | |
5975 | nop.m 0 | |
5976 | fma.s1 fA4 = fB10, fRcpX, fB8 // v4 | |
5977 | nop.i 0 | |
5978 | } | |
5979 | ;; | |
5980 | { .mfi | |
5981 | nop.m 0 | |
5982 | fms.s1 fRes2H =fRes2H, f1, f1 // log_Hi(x) -1 | |
5983 | nop.i 0 | |
5984 | } | |
5985 | { .mfi | |
5986 | nop.m 0 | |
5987 | // poly_lo = r * Q4 + Q3 | |
5988 | fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 | |
5989 | nop.i 0 | |
5990 | } | |
5991 | ;; | |
5992 | { .mfi | |
5993 | nop.m 0 | |
5994 | fma.s1 fRes1H = fRes1H, FR_MHalf, f0 // -0.5*log_Hi(x) | |
5995 | nop.i 0 | |
5996 | } | |
5997 | { .mfi | |
5998 | nop.m 0 | |
5999 | fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r | |
6000 | nop.i 0 | |
6001 | } | |
6002 | ;; | |
6003 | { .mfi | |
6004 | nop.m 0 | |
6005 | fma.s1 fA7 = fB14, fRcpX, fB12 // v7 | |
6006 | nop.i 0 | |
6007 | } | |
6008 | { .mfi | |
6009 | nop.m 0 | |
6010 | fma.s1 fA8 = fA2L, fB20, fPol // v8 | |
6011 | nop.i 0 | |
6012 | } | |
6013 | ;; | |
6014 | { .mfi | |
6015 | nop.m 0 | |
6016 | fma.s1 fA2 = fA4, fA2L, fA3 // v2 | |
6017 | nop.i 0 | |
6018 | } | |
6019 | { .mfi | |
6020 | nop.m 0 | |
6021 | fma.s1 fA4L = fA2L, fA2L, f0 // v5 | |
6022 | nop.i 0 | |
6023 | } | |
6024 | ;; | |
6025 | { .mfi | |
6026 | nop.m 0 | |
6027 | fma.s1 fResH = fRes2H, f8, f0 // (x*(ln(x)-1))hi | |
6028 | nop.i 0 | |
6029 | } | |
6030 | { .mfi | |
6031 | nop.m 0 | |
6032 | // poly_lo = poly_lo * r + Q2 | |
6033 | fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 | |
6034 | nop.i 0 | |
6035 | } | |
6036 | ;; | |
6037 | { .mfi | |
6038 | nop.m 0 | |
6039 | fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3 | |
6040 | nop.i 0 | |
6041 | } | |
6042 | { .mfi | |
6043 | nop.m 0 | |
6044 | // poly_hi = Q1 * rsq + r | |
6045 | fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r | |
6046 | nop.i 0 | |
6047 | } | |
6048 | ;; | |
6049 | { .mfi | |
6050 | nop.m 0 | |
6051 | fma.s1 fA11 = fRcpX, fInvX, f0 // 1/x^3 | |
6052 | nop.i 0 | |
6053 | } | |
6054 | { .mfi | |
6055 | nop.m 0 | |
6056 | fma.s1 fA6 = fA8, fA2L, fA7 // v6 | |
6057 | nop.i 0 | |
6058 | } | |
6059 | ;; | |
6060 | { .mfi | |
6061 | nop.m 0 | |
6062 | fms.s1 fResL = fRes2H, f8, fResH // d(x*(ln(x)-1)) | |
6063 | nop.i 0 | |
6064 | } | |
6065 | { .mfi | |
6066 | nop.m 0 | |
6067 | fadd.s1 fRes3H = fResH, fRes1H // (x*(ln(x)-1) -0.5ln(x))hi | |
6068 | nop.i 0 | |
6069 | } | |
6070 | ;; | |
6071 | { .mfi | |
6072 | nop.m 0 | |
6073 | // poly_lo = poly_lo*r^3 + h | |
6074 | fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h | |
6075 | nop.i 0 | |
6076 | } | |
6077 | ;; | |
6078 | { .mfi | |
6079 | nop.m 0 | |
6080 | fma.s1 fPol = fA4L, fA6, fA2 // v1 | |
6081 | nop.i 0 | |
6082 | } | |
6083 | { .mfi | |
6084 | nop.m 0 | |
6085 | // raise inexact exception | |
6086 | fma.s0 FR_log2_lo = FR_log2_lo, FR_log2_lo, f0 | |
6087 | nop.i 0 | |
6088 | } | |
6089 | ;; | |
6090 | { .mfi | |
6091 | nop.m 0 | |
6092 | fadd.s1 fRes4H = fRes3H, fA0 // (x*(ln(x)-1) -0.5ln(x))hi + Chi | |
6093 | nop.i 0 | |
6094 | } | |
6095 | { .mfi | |
6096 | nop.m 0 | |
6097 | fsub.s1 fRes3L = fResH, fRes3H | |
6098 | nop.i 0 | |
6099 | } | |
6100 | ;; | |
6101 | { .mfi | |
6102 | nop.m 0 | |
6103 | // Y_lo = poly_hi + poly_lo | |
6104 | fadd.s1 fRes2L = FR_poly_hi, FR_poly_lo | |
6105 | nop.i 0 | |
6106 | } | |
6107 | ;; | |
6108 | ||
6109 | { .mfi | |
6110 | nop.m 0 | |
6111 | fma.s1 fA0L = fPol, fA11, fA0L // S(1/x) + Clo | |
6112 | nop.i 0 | |
6113 | } | |
6114 | ;; | |
6115 | { .mfi | |
6116 | nop.m 0 | |
6117 | fadd.s1 fRes3L = fRes3L, fRes1H | |
6118 | nop.i 0 | |
6119 | } | |
6120 | { .mfi | |
6121 | nop.m 0 | |
6122 | fsub.s1 fRes4L = fRes3H, fRes4H | |
6123 | nop.i 0 | |
6124 | } | |
6125 | ;; | |
6126 | { .mfi | |
6127 | nop.m 0 | |
6128 | fma.s1 fResL = fRes2L, f8 , fResL // lo part of x*(ln(x)-1) | |
6129 | nop.i 0 | |
6130 | } | |
6131 | ;; | |
6132 | { .mfi | |
6133 | nop.m 0 | |
6134 | // Clo + S(1/x) - 0.5*logLo(x) | |
6135 | fma.s1 fA0L = fRes2L, FR_MHalf, fA0L | |
6136 | nop.i 0 | |
6137 | } | |
6138 | ;; | |
6139 | { .mfi | |
6140 | nop.m 0 | |
6141 | fadd.s1 fRes4L = fRes4L, fA0 | |
6142 | nop.i 0 | |
6143 | } | |
6144 | ;; | |
6145 | { .mfi | |
6146 | nop.m 0 | |
6147 | // Clo + S(1/x) - 0.5*logLo(x) + (x*(ln(x)-1))lo | |
6148 | fadd.s1 fA0L = fA0L, fResL | |
6149 | nop.i 0 | |
6150 | } | |
6151 | ;; | |
6152 | { .mfi | |
6153 | nop.m 0 | |
6154 | fadd.s1 fRes4L = fRes4L, fRes3L | |
6155 | nop.i 0 | |
6156 | } | |
6157 | ;; | |
6158 | { .mfi | |
6159 | nop.m 0 | |
6160 | fadd.s1 fRes4L = fRes4L, fA0L | |
6161 | nop.i 0 | |
6162 | } | |
6163 | ;; | |
6164 | { .mfb | |
6165 | nop.m 0 | |
6166 | fma.s0 f8 = fRes4H, f1, fRes4L | |
6167 | // exit for x > 10.0 | |
6168 | br.ret.sptk b0 | |
6169 | } | |
6170 | ;; | |
6171 | // here if 8.0 <= x <= 10.0 | |
6172 | // Result = P15(y), where y = x/8.0 - 1.5 | |
6173 | .align 32 | |
6174 | lgammal_8_10: | |
6175 | { .mfi | |
6176 | addl rPolDataPtr = @ltoff(lgammal_8_10_data), gp | |
6177 | fms.s1 FR_FracX = fSignifX, f1, FR_MHalf // y = x/8.0 - 1.5 | |
6178 | cmp.eq p6, p7 = 4, rSgnGamSize | |
6179 | } | |
6180 | ;; | |
6181 | { .mfi | |
6182 | ld8 rLnSinDataPtr = [rPolDataPtr] | |
6183 | nop.f 0 | |
6184 | nop.i 0 | |
6185 | } | |
6186 | { .mfi | |
6187 | ld8 rPolDataPtr = [rPolDataPtr] | |
6188 | nop.f 0 | |
6189 | nop.i 0 | |
6190 | } | |
6191 | ;; | |
6192 | { .mfi | |
6193 | adds rZ1offsett = 32, rLnSinDataPtr | |
6194 | nop.f 0 | |
6195 | nop.i 0 | |
6196 | } | |
6197 | { .mfi | |
6198 | adds rLnSinDataPtr = 48, rLnSinDataPtr | |
6199 | nop.f 0 | |
6200 | nop.i 0 | |
6201 | } | |
6202 | ;; | |
6203 | { .mfi | |
6204 | ldfpd fA1, fA1L = [rPolDataPtr], 16 // A1 | |
6205 | nop.f 0 | |
6206 | nop.i 0 | |
6207 | } | |
6208 | { .mfi | |
6209 | ldfe fA2 = [rZ1offsett], 32 // A5 | |
6210 | nop.f 0 | |
6211 | nop.i 0 | |
6212 | } | |
6213 | ;; | |
6214 | { .mfi | |
6215 | ldfpd fA0, fA0L = [rPolDataPtr], 16 // A0 | |
6216 | fma.s1 FR_rsq = FR_FracX, FR_FracX, f0 // y^2 | |
6217 | nop.i 0 | |
6218 | } | |
6219 | { .mfi | |
6220 | ldfe fA3 = [rLnSinDataPtr],32 // A5 | |
6221 | nop.f 0 | |
6222 | nop.i 0 | |
6223 | } | |
6224 | ;; | |
6225 | { .mmf | |
6226 | ldfe fA4 = [rZ1offsett], 32 // A4 | |
6227 | ldfe fA5 = [rLnSinDataPtr], 32 // A5 | |
6228 | nop.f 0 | |
6229 | } | |
6230 | ;; | |
6231 | { .mmf | |
6232 | ldfe fA6 = [rZ1offsett], 32 // A6 | |
6233 | ldfe fA7 = [rLnSinDataPtr], 32 // A7 | |
6234 | nop.f 0 | |
6235 | } | |
6236 | ;; | |
6237 | { .mmf | |
6238 | ldfe fA8 = [rZ1offsett], 32 // A8 | |
6239 | ldfe fA9 = [rLnSinDataPtr], 32 // A9 | |
6240 | nop.f 0 | |
6241 | } | |
6242 | ;; | |
6243 | { .mmf | |
6244 | ldfe fA10 = [rZ1offsett], 32 // A10 | |
6245 | ldfe fA11 = [rLnSinDataPtr], 32 // A11 | |
6246 | nop.f 0 | |
6247 | } | |
6248 | ;; | |
6249 | { .mmf | |
6250 | ldfe fA12 = [rZ1offsett], 32 // A12 | |
6251 | ldfe fA13 = [rLnSinDataPtr], 32 // A13 | |
6252 | fma.s1 FR_Q4 = FR_rsq, FR_rsq, f0 // y^4 | |
6253 | } | |
6254 | ;; | |
6255 | { .mmf | |
6256 | ldfe fA14 = [rZ1offsett], 32 // A14 | |
6257 | ldfe fA15 = [rLnSinDataPtr], 32 // A15 | |
6258 | nop.f 0 | |
6259 | } | |
6260 | ;; | |
6261 | { .mfi | |
6262 | nop.m 0 | |
6263 | fma.s1 fRes1H = FR_FracX, fA1, f0 | |
6264 | nop.i 0 | |
6265 | } | |
6266 | ;; | |
6267 | { .mfi | |
6268 | nop.m 0 | |
6269 | fma.s1 fA3 = fA3, FR_FracX, fA2 // v4 | |
6270 | nop.i 0 | |
6271 | } | |
6272 | ;; | |
6273 | { .mfi | |
6274 | nop.m 0 | |
6275 | fma.s1 fA5 = fA5, FR_FracX, fA4 // v5 | |
6276 | nop.i 0 | |
6277 | } | |
6278 | ;; | |
6279 | { .mfi | |
6280 | // store sign of GAMMA(x) if size of variable is 4 bytes | |
6281 | (p6) st4 [rSgnGamAddr] = rSgnGam | |
6282 | fma.s1 fA3L = FR_Q4, FR_Q4, f0 // v9 = y^8 | |
6283 | nop.i 0 | |
6284 | } | |
6285 | { .mfi | |
6286 | // store sign of GAMMA(x) if size of variable is 8 bytes | |
6287 | (p7) st8 [rSgnGamAddr] = rSgnGam | |
6288 | fma.s1 fA7 = fA7, FR_FracX, fA6 // v7 | |
6289 | nop.i 0 | |
6290 | } | |
6291 | ;; | |
6292 | { .mfi | |
6293 | nop.m 0 | |
6294 | fma.s1 fA9 = fA9, FR_FracX, fA8 // v8 | |
6295 | nop.i 0 | |
6296 | } | |
6297 | ;; | |
6298 | { .mfi | |
6299 | nop.m 0 | |
6300 | fms.s1 fRes1L = FR_FracX, fA1, fRes1H | |
6301 | nop.i 0 | |
6302 | } | |
6303 | { .mfi | |
6304 | nop.m 0 | |
6305 | fma.s1 fA11 = fA11, FR_FracX, fA10 // v12 | |
6306 | nop.i 0 | |
6307 | } | |
6308 | ;; | |
6309 | { .mfi | |
6310 | nop.m 0 | |
6311 | fma.s1 fA13 = fA13, FR_FracX, fA12 // v13 | |
6312 | nop.i 0 | |
6313 | } | |
6314 | { .mfi | |
6315 | nop.m 0 | |
6316 | fma.s1 fRes2H = fRes1H, f1, fA0 | |
6317 | nop.i 0 | |
6318 | } | |
6319 | ;; | |
6320 | { .mfi | |
6321 | nop.m 0 | |
6322 | fma.s1 fA15 = fA15, FR_FracX, fA14 // v16 | |
6323 | nop.i 0 | |
6324 | } | |
6325 | { .mfi | |
6326 | nop.m 0 | |
6327 | fma.s1 fA5 = fA5, FR_rsq, fA3 // v3 | |
6328 | nop.i 0 | |
6329 | } | |
6330 | ;; | |
6331 | { .mfi | |
6332 | nop.m 0 | |
6333 | fma.s1 fA9 = fA9, FR_rsq, fA7 // v6 | |
6334 | nop.i 0 | |
6335 | } | |
6336 | ;; | |
6337 | { .mfi | |
6338 | nop.m 0 | |
6339 | fma.s1 fRes1L = FR_FracX, fA1L, fRes1L | |
6340 | nop.i 0 | |
6341 | } | |
6342 | ;; | |
6343 | { .mfi | |
6344 | nop.m 0 | |
6345 | fms.s1 fRes2L = fA0, f1, fRes2H | |
6346 | nop.i 0 | |
6347 | } | |
6348 | { .mfi | |
6349 | nop.m 0 | |
6350 | fma.s1 fA13 = fA13, FR_rsq, fA11 // v11 | |
6351 | nop.i 0 | |
6352 | } | |
6353 | ;; | |
6354 | { .mfi | |
6355 | nop.m 0 | |
6356 | fma.s1 fA9 = fA9, FR_Q4, fA5 // v2 | |
6357 | nop.i 0 | |
6358 | } | |
6359 | ;; | |
6360 | { .mfi | |
6361 | nop.m 0 | |
6362 | fma.s1 fRes1L = fRes1L, f1, fA0L | |
6363 | nop.i 0 | |
6364 | } | |
6365 | ;; | |
6366 | { .mfi | |
6367 | nop.m 0 | |
6368 | fma.s1 fRes2L = fRes2L, f1, fRes1H | |
6369 | nop.i 0 | |
6370 | } | |
6371 | { .mfi | |
6372 | nop.m 0 | |
6373 | fma.s1 fA15 = fA15, FR_Q4, fA13 // v10 | |
6374 | nop.i 0 | |
6375 | } | |
6376 | ;; | |
6377 | { .mfi | |
6378 | nop.m 0 | |
6379 | fma.s1 fRes2L = fRes1L, f1, fRes2L | |
6380 | nop.i 0 | |
6381 | } | |
6382 | { .mfi | |
6383 | nop.m 0 | |
6384 | fma.s1 fPol = fA3L, fA15, fA9 | |
6385 | nop.i 0 | |
6386 | } | |
6387 | ;; | |
6388 | { .mfi | |
6389 | nop.m 0 | |
6390 | fma.s1 f8 = FR_rsq , fPol, fRes2H | |
6391 | nop.i 0 | |
6392 | } | |
6393 | { .mfi | |
6394 | nop.m 0 | |
6395 | fma.s1 fPol = fPol, FR_rsq, f0 | |
6396 | nop.i 0 | |
6397 | } | |
6398 | ;; | |
6399 | { .mfi | |
6400 | nop.m 0 | |
6401 | fms.s1 fRes1L = fRes2H, f1, f8 | |
6402 | nop.i 0 | |
6403 | } | |
6404 | ;; | |
6405 | { .mfi | |
6406 | nop.m 0 | |
6407 | fma.s1 fRes1L = fRes1L, f1, fPol | |
6408 | nop.i 0 | |
6409 | } | |
6410 | ;; | |
6411 | {.mfi | |
6412 | nop.m 0 | |
6413 | fma.s1 fRes1L = fRes1L, f1, fRes2L | |
6414 | nop.i 0 | |
6415 | } | |
6416 | ;; | |
6417 | { .mfb | |
6418 | nop.m 0 | |
6419 | fma.s0 f8 = f8, f1, fRes1L | |
6420 | // exit for 8.0 <= x <= 10.0 | |
6421 | br.ret.sptk b0 | |
6422 | } | |
6423 | ;; | |
6424 | ||
6425 | // here if 4.0 <=x < 8.0 | |
6426 | .align 32 | |
6427 | lgammal_4_8: | |
6428 | { .mfi | |
6429 | addl rPolDataPtr= @ltoff(lgammal_4_8_data),gp | |
6430 | fms.s1 FR_FracX = fSignifX, f1, FR_MHalf | |
6431 | adds rSgnGam = 1, r0 | |
6432 | } | |
6433 | ;; | |
6434 | { .mfi | |
6435 | ld8 rPolDataPtr = [rPolDataPtr] | |
6436 | nop.f 0 | |
6437 | nop.i 0 | |
6438 | } | |
6439 | ;; | |
6440 | ||
6441 | { .mfb | |
6442 | adds rTmpPtr = 160, rPolDataPtr | |
6443 | nop.f 0 | |
6444 | // branch to special path which computes polynomial of 25th degree | |
6445 | br.sptk lgamma_polynom25 | |
6446 | } | |
6447 | ;; | |
6448 | ||
6449 | // here if 2.25 <=x < 4.0 | |
6450 | .align 32 | |
6451 | lgammal_2Q_4: | |
6452 | { .mfi | |
6453 | addl rPolDataPtr= @ltoff(lgammal_2Q_4_data),gp | |
6454 | fms.s1 FR_FracX = fSignifX, f1, FR_MHalf | |
6455 | adds rSgnGam = 1, r0 | |
6456 | } | |
6457 | ;; | |
6458 | { .mfi | |
6459 | ld8 rPolDataPtr = [rPolDataPtr] | |
6460 | nop.f 0 | |
6461 | nop.i 0 | |
6462 | } | |
6463 | ;; | |
6464 | ||
6465 | { .mfb | |
6466 | adds rTmpPtr = 160, rPolDataPtr | |
6467 | nop.f 0 | |
6468 | // branch to special path which computes polynomial of 25th degree | |
6469 | br.sptk lgamma_polynom25 | |
6470 | } | |
6471 | ;; | |
6472 | ||
6473 | // here if 0.5 <= |x| < 0.75 | |
6474 | .align 32 | |
6475 | lgammal_half_3Q: | |
6476 | .pred.rel "mutex", p14, p15 | |
6477 | { .mfi | |
6478 | (p14) addl rPolDataPtr= @ltoff(lgammal_half_3Q_data),gp | |
6479 | // FR_FracX = x - 0.625 for positive x | |
6480 | (p14) fms.s1 FR_FracX = f8, f1, FR_FracX | |
6481 | (p14) adds rSgnGam = 1, r0 | |
6482 | } | |
6483 | { .mfi | |
6484 | (p15) addl rPolDataPtr= @ltoff(lgammal_half_3Q_neg_data),gp | |
6485 | // FR_FracX = x + 0.625 for negative x | |
6486 | (p15) fma.s1 FR_FracX = f8, f1, FR_FracX | |
6487 | (p15) adds rSgnGam = -1, r0 | |
6488 | } | |
6489 | ;; | |
6490 | { .mfi | |
6491 | ld8 rPolDataPtr = [rPolDataPtr] | |
6492 | nop.f 0 | |
6493 | nop.i 0 | |
6494 | } | |
6495 | ;; | |
6496 | { .mfb | |
6497 | adds rTmpPtr = 160, rPolDataPtr | |
6498 | nop.f 0 | |
6499 | // branch to special path which computes polynomial of 25th degree | |
6500 | br.sptk lgamma_polynom25 | |
6501 | } | |
6502 | ;; | |
6503 | // here if 1.3125 <= x < 1.5625 | |
6504 | .align 32 | |
6505 | lgammal_loc_min: | |
6506 | { .mfi | |
6507 | adds rSgnGam = 1, r0 | |
6508 | nop.f 0 | |
6509 | nop.i 0 | |
6510 | } | |
6511 | { .mfb | |
6512 | adds rTmpPtr = 160, rPolDataPtr | |
6513 | fms.s1 FR_FracX = f8, f1, fA5L | |
6514 | br.sptk lgamma_polynom25 | |
6515 | } | |
6516 | ;; | |
6517 | // here if -2.605859375 <= x < -2.5 | |
6518 | // special polynomial approximation used since neither "near root" | |
6519 | // approximation nor reflection formula give satisfactory accuracy on | |
6520 | // this range | |
6521 | .align 32 | |
6522 | _neg2andHalf: | |
6523 | { .mfi | |
6524 | addl rPolDataPtr= @ltoff(lgammal_neg2andHalf_data),gp | |
6525 | fma.s1 FR_FracX = fB20, f1, f8 // 2.5 + x | |
6526 | adds rSgnGam = -1, r0 | |
6527 | } | |
6528 | ;; | |
6529 | {.mfi | |
6530 | ld8 rPolDataPtr = [rPolDataPtr] | |
6531 | nop.f 0 | |
6532 | nop.i 0 | |
6533 | } | |
6534 | ;; | |
6535 | { .mfb | |
6536 | adds rTmpPtr = 160, rPolDataPtr | |
6537 | nop.f 0 | |
6538 | // branch to special path which computes polynomial of 25th degree | |
6539 | br.sptk lgamma_polynom25 | |
6540 | } | |
6541 | ;; | |
6542 | ||
6543 | // here if -0.5 < x <= -0.40625 | |
6544 | .align 32 | |
6545 | lgammal_near_neg_half: | |
6546 | { .mmf | |
6547 | addl rPolDataPtr= @ltoff(lgammal_near_neg_half_data),gp | |
6548 | setf.exp FR_FracX = rExpHalf | |
6549 | nop.f 0 | |
6550 | } | |
6551 | ;; | |
6552 | { .mfi | |
6553 | ld8 rPolDataPtr = [rPolDataPtr] | |
6554 | nop.f 0 | |
6555 | adds rSgnGam = -1, r0 | |
6556 | } | |
6557 | ;; | |
6558 | { .mfb | |
6559 | adds rTmpPtr = 160, rPolDataPtr | |
6560 | fma.s1 FR_FracX = FR_FracX, f1, f8 | |
6561 | // branch to special path which computes polynomial of 25th degree | |
6562 | br.sptk lgamma_polynom25 | |
6563 | } | |
6564 | ;; | |
6565 | ||
6566 | // here if there an answer is P25(x) | |
6567 | // rPolDataPtr, rTmpPtr point to coefficients | |
6568 | // x is in FR_FracX register | |
6569 | .align 32 | |
6570 | lgamma_polynom25: | |
6571 | { .mfi | |
6572 | ldfpd fA3, fA0L = [rPolDataPtr], 16 // A3 | |
6573 | nop.f 0 | |
6574 | cmp.eq p6, p7 = 4, rSgnGamSize | |
6575 | } | |
6576 | { .mfi | |
6577 | ldfpd fA18, fA19 = [rTmpPtr], 16 // D7, D6 | |
6578 | nop.f 0 | |
6579 | nop.i 0 | |
6580 | } | |
6581 | ;; | |
6582 | { .mfi | |
6583 | ldfpd fA1, fA1L = [rPolDataPtr], 16 // A1 | |
6584 | nop.f 0 | |
6585 | nop.i 0 | |
6586 | } | |
6587 | { .mfi | |
6588 | ldfpd fA16, fA17 = [rTmpPtr], 16 // D4, D5 | |
6589 | nop.f 0 | |
6590 | } | |
6591 | ;; | |
6592 | { .mfi | |
6593 | ldfpd fA12, fA13 = [rPolDataPtr], 16 // D0, D1 | |
6594 | nop.f 0 | |
6595 | nop.i 0 | |
6596 | } | |
6597 | { .mfi | |
6598 | ldfpd fA14, fA15 = [rTmpPtr], 16 // D2, D3 | |
6599 | nop.f 0 | |
6600 | nop.i 0 | |
6601 | } | |
6602 | ;; | |
6603 | { .mfi | |
6604 | ldfpd fA24, fA25 = [rPolDataPtr], 16 // C21, C20 | |
6605 | nop.f 0 | |
6606 | nop.i 0 | |
6607 | } | |
6608 | { .mfi | |
6609 | ldfpd fA22, fA23 = [rTmpPtr], 16 // C19, C18 | |
6610 | nop.f 0 | |
6611 | nop.i 0 | |
6612 | } | |
6613 | ;; | |
6614 | { .mfi | |
6615 | ldfpd fA2, fA2L = [rPolDataPtr], 16 // A2 | |
6616 | fma.s1 fA4L = FR_FracX, FR_FracX, f0 // x^2 | |
6617 | nop.i 0 | |
6618 | } | |
6619 | { .mfi | |
6620 | ldfpd fA20, fA21 = [rTmpPtr], 16 // C17, C16 | |
6621 | nop.f 0 | |
6622 | nop.i 0 | |
6623 | } | |
6624 | ;; | |
6625 | { .mfi | |
6626 | ldfe fA11 = [rTmpPtr], 16 // E7 | |
6627 | nop.f 0 | |
6628 | nop.i 0 | |
6629 | } | |
6630 | { .mfi | |
6631 | ldfpd fA0, fA3L = [rPolDataPtr], 16 // A0 | |
6632 | nop.f 0 | |
6633 | nop.i 0 | |
6634 | };; | |
6635 | { .mfi | |
6636 | ldfe fA10 = [rPolDataPtr], 16 // E6 | |
6637 | nop.f 0 | |
6638 | nop.i 0 | |
6639 | } | |
6640 | { .mfi | |
6641 | ldfe fA9 = [rTmpPtr], 16 // E5 | |
6642 | nop.f 0 | |
6643 | nop.i 0 | |
6644 | } | |
6645 | ;; | |
6646 | { .mmf | |
6647 | ldfe fA8 = [rPolDataPtr], 16 // E4 | |
6648 | ldfe fA7 = [rTmpPtr], 16 // E3 | |
6649 | nop.f 0 | |
6650 | } | |
6651 | ;; | |
6652 | { .mmf | |
6653 | ldfe fA6 = [rPolDataPtr], 16 // E2 | |
6654 | ldfe fA5 = [rTmpPtr], 16 // E1 | |
6655 | nop.f 0 | |
6656 | } | |
6657 | ;; | |
6658 | { .mfi | |
6659 | ldfe fA4 = [rPolDataPtr], 16 // E0 | |
6660 | fma.s1 fA5L = fA4L, fA4L, f0 // x^4 | |
6661 | nop.i 0 | |
6662 | } | |
6663 | { .mfi | |
6664 | nop.m 0 | |
6665 | fms.s1 fB2 = FR_FracX, FR_FracX, fA4L // x^2 - <x^2> | |
6666 | nop.i 0 | |
6667 | } | |
6668 | ;; | |
6669 | { .mfi | |
6670 | // store signgam if size of variable is 4 bytes | |
6671 | (p6) st4 [rSgnGamAddr] = rSgnGam | |
6672 | fma.s1 fRes4H = fA3, FR_FracX, f0 // (A3*x)hi | |
6673 | nop.i 0 | |
6674 | } | |
6675 | { .mfi | |
6676 | // store signgam if size of variable is 8 bytes | |
6677 | (p7) st8 [rSgnGamAddr] = rSgnGam | |
6678 | fma.s1 fA19 = fA19, FR_FracX, fA18 // D7*x + D6 | |
6679 | nop.i 0 | |
6680 | } | |
6681 | ;; | |
6682 | { .mfi | |
6683 | nop.m 0 | |
6684 | fma.s1 fResH = fA1, FR_FracX, f0 // (A1*x)hi | |
6685 | nop.i 0 | |
6686 | } | |
6687 | { .mfi | |
6688 | nop.m 0 | |
6689 | fma.s1 fB6 = fA1L, FR_FracX, fA0L // A1L*x + A0L | |
6690 | nop.i 0 | |
6691 | } | |
6692 | ;; | |
6693 | { .mfi | |
6694 | nop.m 0 | |
6695 | fma.s1 fA17 = fA17, FR_FracX, fA16 // D5*x + D4 | |
6696 | nop.i 0 | |
6697 | } | |
6698 | { .mfi | |
6699 | nop.m 0 | |
6700 | fma.s1 fA15 = fA15, FR_FracX, fA14 // D3*x + D2 | |
6701 | nop.i 0 | |
6702 | } | |
6703 | ;; | |
6704 | { .mfi | |
6705 | nop.m 0 | |
6706 | fma.s1 fA25 = fA25, FR_FracX, fA24 // C21*x + C20 | |
6707 | nop.i 0 | |
6708 | } | |
6709 | { .mfi | |
6710 | nop.m 0 | |
6711 | fma.s1 fA13 = fA13, FR_FracX, fA12 // D1*x + D0 | |
6712 | nop.i 0 | |
6713 | } | |
6714 | ;; | |
6715 | { .mfi | |
6716 | nop.m 0 | |
6717 | fma.s1 fA23 = fA23, FR_FracX, fA22 // C19*x + C18 | |
6718 | nop.i 0 | |
6719 | } | |
6720 | { .mfi | |
6721 | nop.m 0 | |
6722 | fma.s1 fA21 = fA21, FR_FracX, fA20 // C17*x + C16 | |
6723 | nop.i 0 | |
6724 | } | |
6725 | ;; | |
6726 | { .mfi | |
6727 | nop.m 0 | |
6728 | fms.s1 fRes4L = fA3, FR_FracX, fRes4H // delta((A3*x)hi) | |
6729 | nop.i 0 | |
6730 | } | |
6731 | { .mfi | |
6732 | nop.m 0 | |
6733 | fadd.s1 fRes2H = fRes4H, fA2 // (A3*x + A2)hi | |
6734 | nop.i 0 | |
6735 | } | |
6736 | ;; | |
6737 | { .mfi | |
6738 | nop.m 0 | |
6739 | fms.s1 fResL = fA1, FR_FracX, fResH // d(A1*x) | |
6740 | nop.i 0 | |
6741 | } | |
6742 | { .mfi | |
6743 | nop.m 0 | |
6744 | fadd.s1 fRes1H = fResH, fA0 // (A1*x + A0)hi | |
6745 | nop.i 0 | |
6746 | } | |
6747 | ;; | |
6748 | { .mfi | |
6749 | nop.m 0 | |
6750 | fma.s1 fA19 = fA19, fA4L, fA17 // Dhi | |
6751 | nop.i 0 | |
6752 | } | |
6753 | { .mfi | |
6754 | nop.m 0 | |
6755 | fma.s1 fA11 = fA11, FR_FracX, fA10 // E7*x + E6 | |
6756 | nop.i 0 | |
6757 | } | |
6758 | ;; | |
6759 | { .mfi | |
6760 | nop.m 0 | |
6761 | // Doing this to raise inexact flag | |
6762 | fma.s0 fA10 = fA0, fA0, f0 | |
6763 | nop.i 0 | |
6764 | } | |
6765 | ;; | |
6766 | { .mfi | |
6767 | nop.m 0 | |
6768 | fma.s1 fA15 = fA15, fA4L, fA13 // Dlo | |
6769 | nop.i 0 | |
6770 | } | |
6771 | { .mfi | |
6772 | nop.m 0 | |
6773 | // (C21*x + C20)*x^2 + C19*x + C18 | |
6774 | fma.s1 fA25 = fA25, fA4L, fA23 | |
6775 | nop.i 0 | |
6776 | } | |
6777 | ;; | |
6778 | { .mfi | |
6779 | nop.m 0 | |
6780 | fma.s1 fA9 = fA9, FR_FracX, fA8 // E5*x + E4 | |
6781 | nop.i 0 | |
6782 | } | |
6783 | { .mfi | |
6784 | nop.m 0 | |
6785 | fma.s1 fA7 = fA7, FR_FracX, fA6 // E3*x + E2 | |
6786 | nop.i 0 | |
6787 | } | |
6788 | ;; | |
6789 | { .mfi | |
6790 | nop.m 0 | |
6791 | fma.s1 fRes4L = fA3L, FR_FracX, fRes4L // (A3*x)lo | |
6792 | nop.i 0 | |
6793 | } | |
6794 | { .mfi | |
6795 | nop.m 0 | |
6796 | fsub.s1 fRes2L = fA2, fRes2H | |
6797 | nop.i 0 | |
6798 | } | |
6799 | ;; | |
6800 | { .mfi | |
6801 | nop.m 0 | |
6802 | fadd.s1 fResL = fResL, fB6 // (A1L*x + A0L) + d(A1*x) | |
6803 | nop.i 0 | |
6804 | } | |
6805 | { .mfi | |
6806 | nop.m 0 | |
6807 | fsub.s1 fRes1L = fA0, fRes1H | |
6808 | nop.i 0 | |
6809 | } | |
6810 | ;; | |
6811 | { .mfi | |
6812 | nop.m 0 | |
6813 | fma.s1 fA5 = fA5, FR_FracX, fA4 // E1*x + E0 | |
6814 | nop.i 0 | |
6815 | } | |
6816 | { .mfi | |
6817 | nop.m 0 | |
6818 | fma.s1 fB8 = fA5L, fA5L, f0 // x^8 | |
6819 | nop.i 0 | |
6820 | } | |
6821 | ;; | |
6822 | { .mfi | |
6823 | nop.m 0 | |
6824 | // ((C21*x + C20)*x^2 + C19*x + C18)*x^2 + C17*x + C16 | |
6825 | fma.s1 fA25 = fA25, fA4L, fA21 | |
6826 | nop.i 0 | |
6827 | } | |
6828 | { .mfi | |
6829 | nop.m 0 | |
6830 | fma.s1 fA19 = fA19, fA5L, fA15 // D | |
6831 | nop.i 0 | |
6832 | } | |
6833 | ;; | |
6834 | { .mfi | |
6835 | nop.m 0 | |
6836 | fma.s1 fA11 = fA11, fA4L, fA9 // Ehi | |
6837 | nop.i 0 | |
6838 | } | |
6839 | ;; | |
6840 | { .mfi | |
6841 | nop.m 0 | |
6842 | fadd.s1 fRes2L = fRes2L, fRes4H | |
6843 | nop.i 0 | |
6844 | } | |
6845 | { .mfi | |
6846 | nop.m 0 | |
6847 | fadd.s1 fRes4L = fRes4L, fA2L // (A3*x)lo + A2L | |
6848 | nop.i 0 | |
6849 | } | |
6850 | ;; | |
6851 | { .mfi | |
6852 | nop.m 0 | |
6853 | fma.s1 fRes3H = fRes2H, fA4L, f0 // ((A3*x + A2)*x^2)hi | |
6854 | nop.i 0 | |
6855 | } | |
6856 | { .mfi | |
6857 | nop.m 0 | |
6858 | fadd.s1 fRes1L = fRes1L, fResH | |
6859 | nop.i 0 | |
6860 | } | |
6861 | ;; | |
6862 | { .mfi | |
6863 | nop.m 0 | |
6864 | fma.s1 fRes3L = fRes2H, fB2, f0 // (A3*x + A2)hi*d(x^2) | |
6865 | nop.i 0 | |
6866 | } | |
6867 | { .mfi | |
6868 | nop.m 0 | |
6869 | fma.s1 fA7 = fA7, fA4L, fA5 // Elo | |
6870 | nop.i 0 | |
6871 | } | |
6872 | ;; | |
6873 | { .mfi | |
6874 | nop.m 0 | |
6875 | fma.s1 fA25 = fA25, fB8, fA19 // C*x^8 + D | |
6876 | nop.i 0 | |
6877 | } | |
6878 | ;; | |
6879 | { .mfi | |
6880 | nop.m 0 | |
6881 | fadd.s1 fRes2L = fRes2L, fRes4L // (A3*x + A2)lo | |
6882 | nop.i 0 | |
6883 | } | |
6884 | ;; | |
6885 | { .mfi | |
6886 | nop.m 0 | |
6887 | fms.s1 fB4 = fRes2H, fA4L, fRes3H // d((A3*x + A2)*x^2)) | |
6888 | nop.i 0 | |
6889 | } | |
6890 | { .mfi | |
6891 | nop.m 0 | |
6892 | fadd.s1 fRes1L = fRes1L, fResL // (A1*x + A0)lo | |
6893 | nop.i 0 | |
6894 | } | |
6895 | ;; | |
6896 | { .mfi | |
6897 | nop.m 0 | |
6898 | fadd.s1 fB20 = fRes3H, fRes1H // Phi | |
6899 | nop.i 0 | |
6900 | } | |
6901 | { .mfi | |
6902 | nop.m 0 | |
6903 | fma.s1 fA11 = fA11, fA5L, fA7 // E | |
6904 | nop.i 0 | |
6905 | } | |
6906 | ;; | |
6907 | { .mfi | |
6908 | nop.m 0 | |
6909 | // ( (A3*x + A2)lo*<x^2> + (A3*x + A2)hi*d(x^2)) | |
6910 | fma.s1 fRes3L = fRes2L, fA4L, fRes3L | |
6911 | nop.i 0 | |
6912 | } | |
6913 | ;; | |
6914 | { .mfi | |
6915 | nop.m 0 | |
6916 | // d((A3*x + A2)*x^2)) + (A1*x + A0)lo | |
6917 | fadd.s1 fRes1L = fRes1L, fB4 | |
6918 | nop.i 0 | |
6919 | } | |
6920 | ;; | |
6921 | { .mfi | |
6922 | nop.m 0 | |
6923 | fsub.s1 fB18 = fRes1H, fB20 | |
6924 | nop.i 0 | |
6925 | } | |
6926 | { .mfi | |
6927 | nop.m 0 | |
6928 | fma.s1 fPol = fA25, fB8, fA11 | |
6929 | nop.i 0 | |
6930 | } | |
6931 | ;; | |
6932 | { .mfi | |
6933 | nop.m 0 | |
6934 | fadd.s1 fRes1L = fRes1L, fRes3L | |
6935 | nop.i 0 | |
6936 | } | |
6937 | ;; | |
6938 | { .mfi | |
6939 | nop.m 0 | |
6940 | fadd.s1 fB18 = fB18, fRes3H | |
6941 | nop.i 0 | |
6942 | } | |
6943 | { .mfi | |
6944 | nop.m 0 | |
6945 | fma.s1 fRes4H = fPol, fA5L, fB20 | |
6946 | nop.i 0 | |
6947 | } | |
6948 | ;; | |
6949 | { .mfi | |
6950 | nop.m 0 | |
6951 | fma.s1 fPolL = fPol, fA5L, f0 | |
6952 | nop.i 0 | |
6953 | } | |
6954 | ;; | |
6955 | { .mfi | |
6956 | nop.m 0 | |
6957 | fadd.s1 fB18 = fB18, fRes1L // Plo | |
6958 | nop.i 0 | |
6959 | } | |
6960 | { .mfi | |
6961 | nop.m 0 | |
6962 | fsub.s1 fRes4L = fB20, fRes4H | |
6963 | nop.i 0 | |
6964 | } | |
6965 | ;; | |
6966 | { .mfi | |
6967 | nop.m 0 | |
6968 | fadd.s1 fB18 = fB18, fPolL | |
6969 | nop.i 0 | |
6970 | } | |
6971 | ;; | |
6972 | { .mfi | |
6973 | nop.m 0 | |
6974 | fadd.s1 fRes4L = fRes4L, fB18 | |
6975 | nop.i 0 | |
6976 | } | |
6977 | ;; | |
6978 | { .mfb | |
6979 | nop.m 0 | |
6980 | fma.s0 f8 = fRes4H, f1, fRes4L | |
6981 | // P25(x) computed, exit here | |
6982 | br.ret.sptk b0 | |
6983 | } | |
6984 | ;; | |
6985 | ||
6986 | ||
6987 | // here if 0.75 <= x < 1.3125 | |
6988 | .align 32 | |
6989 | lgammal_03Q_1Q: | |
6990 | { .mfi | |
6991 | addl rPolDataPtr= @ltoff(lgammal_03Q_1Q_data),gp | |
6992 | fma.s1 FR_FracX = fA5L, f1, f0 // x | |
6993 | adds rSgnGam = 1, r0 | |
6994 | } | |
6995 | { .mfi | |
6996 | nop.m 0 | |
6997 | fma.s1 fB4 = fA5L, fA5L, f0 // x^2 | |
6998 | nop.i 0 | |
6999 | } | |
7000 | ;; | |
7001 | { .mfi | |
7002 | ld8 rPolDataPtr = [rPolDataPtr] | |
7003 | nop.f 0 | |
7004 | nop.i 0 | |
7005 | } | |
7006 | ;; | |
7007 | { .mfb | |
7008 | adds rTmpPtr = 144, rPolDataPtr | |
7009 | nop.f 0 | |
7010 | br.sptk lgamma_polynom24x | |
7011 | } | |
7012 | ;; | |
7013 | ||
7014 | // here if 1.5625 <= x < 2.25 | |
7015 | .align 32 | |
7016 | lgammal_13Q_2Q: | |
7017 | { .mfi | |
7018 | addl rPolDataPtr= @ltoff(lgammal_13Q_2Q_data),gp | |
7019 | fma.s1 FR_FracX = fB4, f1, f0 // x | |
7020 | adds rSgnGam = 1, r0 | |
7021 | } | |
7022 | { .mfi | |
7023 | nop.m 0 | |
7024 | fma.s1 fB4 = fB4, fB4, f0 // x^2 | |
7025 | nop.i 0 | |
7026 | } | |
7027 | ;; | |
7028 | { .mfi | |
7029 | ld8 rPolDataPtr = [rPolDataPtr] | |
7030 | nop.f 0 | |
7031 | nop.i 0 | |
7032 | } | |
7033 | ;; | |
7034 | { .mfb | |
7035 | adds rTmpPtr = 144, rPolDataPtr | |
7036 | nop.f 0 | |
7037 | br.sptk lgamma_polynom24x | |
7038 | } | |
7039 | ;; | |
7040 | ||
7041 | // here if result is Pol24(x) | |
7042 | // x is in FR_FracX, | |
7043 | // rPolDataPtr, rTmpPtr point to coefficients | |
7044 | .align 32 | |
7045 | lgamma_polynom24x: | |
7046 | { .mfi | |
7047 | ldfpd fA4, fA2L = [rPolDataPtr], 16 | |
7048 | nop.f 0 | |
7049 | cmp.eq p6, p7 = 4, rSgnGamSize | |
7050 | } | |
7051 | { .mfi | |
7052 | ldfpd fA23, fA24 = [rTmpPtr], 16 // C18, C19 | |
7053 | nop.f 0 | |
7054 | nop.i 0 | |
7055 | } | |
7056 | ;; | |
7057 | { .mfi | |
7058 | ldfpd fA3, fA1L = [rPolDataPtr], 16 | |
7059 | fma.s1 fA5L = fB4, fB4, f0 // x^4 | |
7060 | nop.i 0 | |
7061 | } | |
7062 | { .mfi | |
7063 | ldfpd fA19, fA20 = [rTmpPtr], 16 // D6, D7 | |
7064 | fms.s1 fB2 = FR_FracX, FR_FracX, fB4 // x^2 - <x^2> | |
7065 | nop.i 0 | |
7066 | } | |
7067 | ;; | |
7068 | { .mmf | |
7069 | ldfpd fA15, fA16 = [rPolDataPtr], 16 // D2, D3 | |
7070 | ldfpd fA17, fA18 = [rTmpPtr], 16 // D4, D5 | |
7071 | nop.f 0 | |
7072 | } | |
7073 | ;; | |
7074 | { .mmf | |
7075 | ldfpd fA13, fA14 = [rPolDataPtr], 16 // D0, D1 | |
7076 | ldfpd fA12, fA21 = [rTmpPtr], 16 // E7, C16 | |
7077 | nop.f 0 | |
7078 | } | |
7079 | ;; | |
7080 | { .mfi | |
7081 | ldfe fA11 = [rPolDataPtr], 16 // E6 | |
7082 | nop.f 0 | |
7083 | nop.i 0 | |
7084 | } | |
7085 | { .mfi | |
7086 | ldfe fA10 = [rTmpPtr], 16 // E5 | |
7087 | nop.f 0 | |
7088 | nop.i 0 | |
7089 | } | |
7090 | ;; | |
7091 | { .mfi | |
7092 | ldfpd fA2, fA4L = [rPolDataPtr], 16 | |
7093 | nop.f 0 | |
7094 | nop.i 0 | |
7095 | } | |
7096 | { .mfi | |
7097 | ldfpd fA1, fA3L = [rTmpPtr], 16 | |
7098 | nop.f 0 | |
7099 | nop.i 0 | |
7100 | } | |
7101 | ;; | |
7102 | { .mfi | |
7103 | ldfpd fA22, fA25 = [rPolDataPtr], 16 // C17, C20 | |
7104 | fma.s1 fA0 = fA5L, fA5L, f0 // x^8 | |
7105 | nop.i 0 | |
7106 | } | |
7107 | { .mfi | |
7108 | nop.m 0 | |
7109 | fma.s1 fA0L = fA5L, FR_FracX, f0 // x^5 | |
7110 | nop.i 0 | |
7111 | } | |
7112 | ;; | |
7113 | { .mmf | |
7114 | ldfe fA9 = [rPolDataPtr], 16 // E4 | |
7115 | ldfe fA8 = [rTmpPtr], 16 // E3 | |
7116 | nop.f 0 | |
7117 | } | |
7118 | ;; | |
7119 | { .mmf | |
7120 | ldfe fA7 = [rPolDataPtr], 16 // E2 | |
7121 | ldfe fA6 = [rTmpPtr], 16 // E1 | |
7122 | nop.f 0 | |
7123 | } | |
7124 | ;; | |
7125 | { .mfi | |
7126 | ldfe fA5 = [rTmpPtr], 16 // E0 | |
7127 | fma.s1 fRes4H = fA4, fB4, f0 // A4*<x^2> | |
7128 | nop.i 0 | |
7129 | } | |
7130 | { .mfi | |
7131 | nop.m 0 | |
7132 | fma.s1 fPol = fA24, FR_FracX, fA23 // C19*x + C18 | |
7133 | nop.i 0 | |
7134 | } | |
7135 | ;; | |
7136 | { .mfi | |
7137 | // store signgam if size of variable is 4 bytes | |
7138 | (p6) st4 [rSgnGamAddr] = rSgnGam | |
7139 | fma.s1 fRes1H = fA3, fB4, f0 // A3*<x^2> | |
7140 | nop.i 0 | |
7141 | } | |
7142 | { .mfi | |
7143 | // store signgam if size of variable is 8 bytes | |
7144 | (p7) st8 [rSgnGamAddr] = rSgnGam | |
7145 | fma.s1 fA1L = fA3, fB2,fA1L // A3*d(x^2) + A1L | |
7146 | nop.i 0 | |
7147 | } | |
7148 | ;; | |
7149 | { .mfi | |
7150 | nop.m 0 | |
7151 | fma.s1 fA20 = fA20, FR_FracX, fA19 // D7*x + D6 | |
7152 | nop.i 0 | |
7153 | } | |
7154 | { .mfi | |
7155 | nop.m 0 | |
7156 | fma.s1 fA18 = fA18, FR_FracX, fA17 // D5*x + D4 | |
7157 | nop.i 0 | |
7158 | } | |
7159 | ;; | |
7160 | { .mfi | |
7161 | nop.m 0 | |
7162 | fma.s1 fA16 = fA16, FR_FracX, fA15 // D3*x + D2 | |
7163 | nop.i 0 | |
7164 | } | |
7165 | { .mfi | |
7166 | nop.m 0 | |
7167 | fma.s1 fA14 = fA14, FR_FracX, fA13 // D1*x + D0 | |
7168 | nop.i 0 | |
7169 | } | |
7170 | ;; | |
7171 | { .mfi | |
7172 | nop.m 0 | |
7173 | fma.s1 fA2L = fA4, fB2,fA2L // A4*d(x^2) + A2L | |
7174 | nop.i 0 | |
7175 | } | |
7176 | { .mfi | |
7177 | nop.m 0 | |
7178 | fma.s1 fA12 = fA12, FR_FracX, fA11 // E7*x + E6 | |
7179 | nop.i 0 | |
7180 | } | |
7181 | ;; | |
7182 | { .mfi | |
7183 | nop.m 0 | |
7184 | fms.s1 fRes2L = fA4, fB4, fRes4H // delta(A4*<x^2>) | |
7185 | nop.i 0 | |
7186 | } | |
7187 | { .mfi | |
7188 | nop.m 0 | |
7189 | fadd.s1 fRes2H = fRes4H, fA2 // A4*<x^2> + A2 | |
7190 | nop.i 0 | |
7191 | } | |
7192 | ;; | |
7193 | { .mfi | |
7194 | nop.m 0 | |
7195 | fms.s1 fRes3L = fA3, fB4, fRes1H // delta(A3*<x^2>) | |
7196 | nop.i 0 | |
7197 | } | |
7198 | { .mfi | |
7199 | nop.m 0 | |
7200 | fadd.s1 fRes3H = fRes1H, fA1 // A3*<x^2> + A1 | |
7201 | nop.i 0 | |
7202 | } | |
7203 | ;; | |
7204 | { .mfi | |
7205 | nop.m 0 | |
7206 | fma.s1 fA20 = fA20, fB4, fA18 // (D7*x + D6)*x^2 + D5*x + D4 | |
7207 | nop.i 0 | |
7208 | } | |
7209 | { .mfi | |
7210 | nop.m 0 | |
7211 | fma.s1 fA22 = fA22, FR_FracX, fA21 // C17*x + C16 | |
7212 | nop.i 0 | |
7213 | } | |
7214 | ;; | |
7215 | { .mfi | |
7216 | nop.m 0 | |
7217 | fma.s1 fA16 = fA16, fB4, fA14 // (D3*x + D2)*x^2 + D1*x + D0 | |
7218 | nop.i 0 | |
7219 | } | |
7220 | { .mfi | |
7221 | nop.m 0 | |
7222 | fma.s1 fPol = fA25, fB4, fPol // C20*x^2 + C19*x + C18 | |
7223 | nop.i 0 | |
7224 | } | |
7225 | ;; | |
7226 | { .mfi | |
7227 | nop.m 0 | |
7228 | fma.s1 fA2L = fA4L, fB4, fA2L // A4L*<x^2> + A4*d(x^2) + A2L | |
7229 | nop.i 0 | |
7230 | } | |
7231 | { .mfi | |
7232 | nop.m 0 | |
7233 | fma.s1 fA1L = fA3L, fB4, fA1L // A3L*<x^2> + A3*d(x^2) + A1L | |
7234 | nop.i 0 | |
7235 | } | |
7236 | ;; | |
7237 | { .mfi | |
7238 | nop.m 0 | |
7239 | fsub.s1 fRes4L = fA2, fRes2H // d1 | |
7240 | nop.i 0 | |
7241 | } | |
7242 | { .mfi | |
7243 | nop.m 0 | |
7244 | fma.s1 fResH = fRes2H, fB4, f0 // (A4*<x^2> + A2)*x^2 | |
7245 | nop.i 0 | |
7246 | } | |
7247 | ;; | |
7248 | { .mfi | |
7249 | nop.m 0 | |
7250 | fsub.s1 fRes1L = fA1, fRes3H // d1 | |
7251 | nop.i 0 | |
7252 | } | |
7253 | { .mfi | |
7254 | nop.m 0 | |
7255 | fma.s1 fB6 = fRes3H, FR_FracX, f0 // (A3*<x^2> + A1)*x | |
7256 | nop.i 0 | |
7257 | } | |
7258 | ;; | |
7259 | { .mfi | |
7260 | nop.m 0 | |
7261 | fma.s1 fA10 = fA10, FR_FracX, fA9 // E5*x + E4 | |
7262 | nop.i 0 | |
7263 | } | |
7264 | { .mfi | |
7265 | nop.m 0 | |
7266 | fma.s1 fA8 = fA8, FR_FracX, fA7 // E3*x + E2 | |
7267 | nop.i 0 | |
7268 | } | |
7269 | ;; | |
7270 | { .mfi | |
7271 | nop.m 0 | |
7272 | // (C20*x^2 + C19*x + C18)*x^2 + C17*x + C16 | |
7273 | fma.s1 fPol = fPol, fB4, fA22 | |
7274 | nop.i 0 | |
7275 | } | |
7276 | { .mfi | |
7277 | nop.m 0 | |
7278 | fma.s1 fA6 = fA6, FR_FracX, fA5 // E1*x + E0 | |
7279 | nop.i 0 | |
7280 | } | |
7281 | ;; | |
7282 | { .mfi | |
7283 | nop.m 0 | |
7284 | // A4L*<x^2> + A4*d(x^2) + A2L + delta(A4*<x^2>) | |
7285 | fadd.s1 fRes2L = fA2L, fRes2L | |
7286 | nop.i 0 | |
7287 | } | |
7288 | { .mfi | |
7289 | nop.m 0 | |
7290 | // A3L*<x^2> + A3*d(x^2) + A1L + delta(A3*<x^2>) | |
7291 | fadd.s1 fRes3L = fA1L, fRes3L | |
7292 | nop.i 0 | |
7293 | } | |
7294 | ;; | |
7295 | { .mfi | |
7296 | nop.m 0 | |
7297 | fadd.s1 fRes4L = fRes4L, fRes4H // d2 | |
7298 | nop.i 0 | |
7299 | } | |
7300 | { .mfi | |
7301 | nop.m 0 | |
7302 | fms.s1 fResL = fRes2H, fB4, fResH // d(A4*<x^2> + A2)*x^2) | |
7303 | nop.i 0 | |
7304 | } | |
7305 | ;; | |
7306 | { .mfi | |
7307 | nop.m 0 | |
7308 | fadd.s1 fRes1L = fRes1L, fRes1H // d2 | |
7309 | nop.i 0 | |
7310 | } | |
7311 | { .mfi | |
7312 | nop.m 0 | |
7313 | fms.s1 fB8 = fRes3H, FR_FracX, fB6 // d((A3*<x^2> + A1)*x) | |
7314 | nop.i 0 | |
7315 | } | |
7316 | ;; | |
7317 | { .mfi | |
7318 | nop.m 0 | |
7319 | fadd.s1 fB10 = fResH, fB6 // (A4*x^4 + .. + A1*x)hi | |
7320 | nop.i 0 | |
7321 | } | |
7322 | { .mfi | |
7323 | nop.m 0 | |
7324 | fma.s1 fA12 = fA12, fB4, fA10 // Ehi | |
7325 | nop.i 0 | |
7326 | } | |
7327 | ;; | |
7328 | { .mfi | |
7329 | nop.m 0 | |
7330 | // ((D7*x + D6)*x^2 + D5*x + D4)*x^4 + (D3*x + D2)*x^2 + D1*x + D0 | |
7331 | fma.s1 fA20 = fA20, fA5L, fA16 | |
7332 | nop.i 0 | |
7333 | } | |
7334 | { .mfi | |
7335 | nop.m 0 | |
7336 | fma.s1 fA8 = fA8, fB4, fA6 // Elo | |
7337 | nop.i 0 | |
7338 | } | |
7339 | ;; | |
7340 | { .mfi | |
7341 | nop.m 0 | |
7342 | fadd.s1 fRes2L = fRes2L, fRes4L // (A4*<x^2> + A2)lo | |
7343 | nop.i 0 | |
7344 | } | |
7345 | { .mfi | |
7346 | nop.m 0 | |
7347 | // d(A4*<x^2> + A2)*x^2) + A4*<x^2> + A2)*d(x^2) | |
7348 | fma.s1 fResL = fRes2H, fB2, fResL | |
7349 | nop.i 0 | |
7350 | } | |
7351 | ;; | |
7352 | { .mfi | |
7353 | nop.m 0 | |
7354 | fadd.s1 fRes3L = fRes3L, fRes1L // (A4*<x^2> + A2)lo | |
7355 | nop.i 0 | |
7356 | } | |
7357 | ;; | |
7358 | { .mfi | |
7359 | nop.m 0 | |
7360 | fsub.s1 fB12 = fB6, fB10 | |
7361 | nop.i 0 | |
7362 | } | |
7363 | ;; | |
7364 | { .mfi | |
7365 | nop.m 0 | |
7366 | fma.s1 fPol = fPol, fA0, fA20 // PolC*x^8 + PolD | |
7367 | nop.i 0 | |
7368 | } | |
7369 | { .mfi | |
7370 | nop.m 0 | |
7371 | fma.s1 fPolL = fA12, fA5L, fA8 // E | |
7372 | nop.i 0 | |
7373 | } | |
7374 | ;; | |
7375 | { .mfi | |
7376 | nop.m 0 | |
7377 | fma.s1 fResL = fB4, fRes2L, fResL // ((A4*<x^2> + A2)*x^2)lo | |
7378 | nop.i 0 | |
7379 | } | |
7380 | ;; | |
7381 | { .mfi | |
7382 | nop.m 0 | |
7383 | fma.s1 fRes3L = fRes3L, FR_FracX, fB8 // ((A3*<x^2> + A1)*x)lo | |
7384 | nop.i 0 | |
7385 | } | |
7386 | ;; | |
7387 | { .mfi | |
7388 | nop.m 0 | |
7389 | fadd.s1 fB12 = fB12, fResH | |
7390 | nop.i 0 | |
7391 | } | |
7392 | ;; | |
7393 | { .mfi | |
7394 | nop.m 0 | |
7395 | fma.s1 fPol = fPol, fA0, fPolL | |
7396 | nop.i 0 | |
7397 | } | |
7398 | ;; | |
7399 | { .mfi | |
7400 | nop.m 0 | |
7401 | fadd.s1 fRes3L = fRes3L, fResL | |
7402 | nop.i 0 | |
7403 | } | |
7404 | ;; | |
7405 | { .mfi | |
7406 | nop.m 0 | |
7407 | fma.s1 fRes2H = fPol, fA0L, fB10 | |
7408 | nop.i 0 | |
7409 | } | |
7410 | ;; | |
7411 | { .mfi | |
7412 | nop.m 0 | |
7413 | fadd.s1 fRes3L = fB12, fRes3L | |
7414 | nop.i 0 | |
7415 | } | |
7416 | ;; | |
7417 | { .mfi | |
7418 | nop.m 0 | |
7419 | fsub.s1 fRes4L = fB10, fRes2H | |
7420 | nop.i 0 | |
7421 | } | |
7422 | ;; | |
7423 | { .mfi | |
7424 | nop.m 0 | |
7425 | fma.s1 fRes4L = fPol, fA0L, fRes4L | |
7426 | nop.i 0 | |
7427 | } | |
7428 | ;; | |
7429 | { .mfi | |
7430 | nop.m 0 | |
7431 | fadd.s1 fRes4L = fRes4L, fRes3L | |
7432 | nop.i 0 | |
7433 | } | |
7434 | ;; | |
7435 | { .mfb | |
7436 | nop.m 0 | |
7437 | // final result for all paths for which the result is Pol24(x) | |
7438 | fma.s0 f8 = fRes2H, f1, fRes4L | |
7439 | // here is the exit for all paths for which the result is Pol24(x) | |
7440 | br.ret.sptk b0 | |
7441 | } | |
7442 | ;; | |
7443 | ||
7444 | ||
7445 | // here if x is natval, nan, +/-inf, +/-0, or denormal | |
7446 | .align 32 | |
7447 | lgammal_spec: | |
7448 | { .mfi | |
7449 | nop.m 0 | |
7450 | fclass.m p9, p0 = f8, 0xB // +/-denormals | |
7451 | nop.i 0 | |
7452 | };; | |
7453 | { .mfi | |
7454 | nop.m 0 | |
7455 | fclass.m p6, p0 = f8, 0x1E1 // Test x for natval, nan, +inf | |
7456 | nop.i 0 | |
7457 | };; | |
7458 | { .mfb | |
7459 | nop.m 0 | |
7460 | fclass.m p7, p0 = f8, 0x7 // +/-0 | |
7461 | (p9) br.cond.sptk lgammal_denormal_input | |
7462 | };; | |
7463 | { .mfb | |
7464 | nop.m 0 | |
7465 | nop.f 0 | |
7466 | // branch out if x is natval, nan, +inf | |
7467 | (p6) br.cond.spnt lgammal_nan_pinf | |
7468 | };; | |
7469 | { .mfb | |
7470 | nop.m 0 | |
7471 | nop.f 0 | |
7472 | (p7) br.cond.spnt lgammal_singularity | |
7473 | };; | |
7474 | // if we are still here then x = -inf | |
7475 | { .mfi | |
7476 | cmp.eq p6, p7 = 4, rSgnGamSize | |
7477 | nop.f 0 | |
7478 | adds rSgnGam = 1, r0 | |
7479 | };; | |
7480 | { .mfi | |
7481 | // store signgam if size of variable is 4 bytes | |
7482 | (p6) st4 [rSgnGamAddr] = rSgnGam | |
7483 | nop.f 0 | |
7484 | nop.i 0 | |
7485 | } | |
7486 | { .mfb | |
7487 | // store signgam if size of variable is 8 bytes | |
7488 | (p7) st8 [rSgnGamAddr] = rSgnGam | |
7489 | fma.s0 f8 = f8,f8,f0 // return +inf, no call to error support | |
7490 | br.ret.spnt b0 | |
7491 | };; | |
7492 | ||
7493 | // here if x is NaN, NatVal or +INF | |
7494 | .align 32 | |
7495 | lgammal_nan_pinf: | |
7496 | { .mfi | |
7497 | cmp.eq p6, p7 = 4, rSgnGamSize | |
7498 | nop.f 0 | |
7499 | adds rSgnGam = 1, r0 | |
7500 | } | |
7501 | ;; | |
7502 | { .mfi | |
7503 | // store signgam if size of variable is 4 bytes | |
7504 | (p6) st4 [rSgnGamAddr] = rSgnGam | |
7505 | fma.s0 f8 = f8,f1,f8 // return x+x if x is natval, nan, +inf | |
7506 | nop.i 0 | |
7507 | } | |
7508 | { .mfb | |
7509 | // store signgam if size of variable is 8 bytes | |
7510 | (p7) st8 [rSgnGamAddr] = rSgnGam | |
7511 | nop.f 0 | |
7512 | br.ret.sptk b0 | |
7513 | } | |
7514 | ;; | |
7515 | ||
7516 | // here if x denormal or unnormal | |
7517 | .align 32 | |
7518 | lgammal_denormal_input: | |
7519 | { .mfi | |
7520 | nop.m 0 | |
7521 | fma.s0 fResH = f1, f1, f8 // raise denormal exception | |
7522 | nop.i 0 | |
7523 | } | |
7524 | { .mfi | |
7525 | nop.m 0 | |
7526 | fnorm.s1 f8 = f8 // normalize input value | |
7527 | nop.i 0 | |
7528 | } | |
7529 | ;; | |
7530 | { .mfi | |
7531 | getf.sig rSignifX = f8 | |
7532 | fmerge.se fSignifX = f1, f8 | |
7533 | nop.i 0 | |
7534 | } | |
7535 | { .mfi | |
7536 | getf.exp rSignExpX = f8 | |
7537 | fcvt.fx.s1 fXint = f8 // Convert arg to int (int repres. in FR) | |
7538 | nop.i 0 | |
7539 | } | |
7540 | ;; | |
7541 | { .mfi | |
7542 | getf.exp rSignExpX = f8 | |
7543 | fcmp.lt.s1 p15, p14 = f8, f0 | |
7544 | nop.i 0 | |
7545 | } | |
7546 | ;; | |
7547 | { .mfb | |
7548 | and rExpX = rSignExpX, r17Ones | |
7549 | fmerge.s fAbsX = f1, f8 // |x| | |
7550 | br.cond.sptk _deno_back_to_main_path | |
7551 | } | |
7552 | ;; | |
7553 | ||
7554 | ||
7555 | // here if overflow (x > overflow_bound) | |
7556 | .align 32 | |
7557 | lgammal_overflow: | |
7558 | { .mfi | |
7559 | addl r8 = 0x1FFFE, r0 | |
7560 | nop.f 0 | |
7561 | cmp.eq p6, p7 = 4, rSgnGamSize | |
7562 | } | |
7563 | { .mfi | |
7564 | adds rSgnGam = 1, r0 | |
7565 | nop.f 0 | |
7566 | nop.i 0 | |
7567 | } | |
7568 | ;; | |
7569 | { .mfi | |
7570 | setf.exp f9 = r8 | |
7571 | fmerge.s FR_X = f8,f8 | |
7572 | mov GR_Parameter_TAG = 102 // overflow | |
7573 | };; | |
7574 | { .mfi | |
7575 | // store signgam if size of variable is 4 bytes | |
7576 | (p6) st4 [rSgnGamAddr] = rSgnGam | |
7577 | nop.f 0 | |
7578 | nop.i 0 | |
7579 | } | |
7580 | { .mfb | |
7581 | // store signgam if size of variable is 8 bytes | |
7582 | (p7) st8 [rSgnGamAddr] = rSgnGam | |
7583 | fma.s0 FR_RESULT = f9,f9,f0 // Set I,O and +INF result | |
7584 | br.cond.sptk __libm_error_region | |
7585 | };; | |
7586 | ||
7587 | // here if x is negative integer or +/-0 (SINGULARITY) | |
7588 | .align 32 | |
7589 | lgammal_singularity: | |
7590 | { .mfi | |
7591 | adds rSgnGam = 1, r0 | |
7592 | fclass.m p8,p0 = f8,0x6 // is x -0? | |
7593 | mov GR_Parameter_TAG = 103 // negative | |
7594 | } | |
7595 | { .mfi | |
7596 | cmp.eq p6, p7 = 4, rSgnGamSize | |
7597 | fma.s1 FR_X = f0,f0,f8 | |
7598 | nop.i 0 | |
7599 | };; | |
7600 | { .mfi | |
7601 | (p8) sub rSgnGam = r0, rSgnGam | |
7602 | nop.f 0 | |
7603 | nop.i 0 | |
7604 | } | |
7605 | { .mfi | |
7606 | nop.m 0 | |
7607 | nop.f 0 | |
7608 | nop.i 0 | |
7609 | };; | |
7610 | { .mfi | |
7611 | // store signgam if size of variable is 4 bytes | |
7612 | (p6) st4 [rSgnGamAddr] = rSgnGam | |
7613 | nop.f 0 | |
7614 | nop.i 0 | |
7615 | } | |
7616 | { .mfb | |
7617 | // store signgam if size of variable is 8 bytes | |
7618 | (p7) st8 [rSgnGamAddr] = rSgnGam | |
7619 | frcpa.s0 FR_RESULT, p0 = f1, f0 | |
7620 | br.cond.sptk __libm_error_region | |
7621 | };; | |
7622 | ||
7623 | GLOBAL_LIBM_END(__libm_lgammal) | |
7624 | ||
7625 | ||
7626 | ||
7627 | LOCAL_LIBM_ENTRY(__libm_error_region) | |
7628 | .prologue | |
7629 | { .mfi | |
7630 | add GR_Parameter_Y=-32,sp // Parameter 2 value | |
7631 | nop.f 0 | |
7632 | .save ar.pfs,GR_SAVE_PFS | |
7633 | mov GR_SAVE_PFS=ar.pfs // Save ar.pfs | |
7634 | } | |
7635 | { .mfi | |
7636 | .fframe 64 | |
7637 | add sp=-64,sp // Create new stack | |
7638 | nop.f 0 | |
7639 | mov GR_SAVE_GP=gp // Save gp | |
7640 | };; | |
7641 | { .mmi | |
7642 | stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack | |
7643 | add GR_Parameter_X = 16,sp // Parameter 1 address | |
7644 | .save b0, GR_SAVE_B0 | |
7645 | mov GR_SAVE_B0=b0 // Save b0 | |
7646 | };; | |
7647 | .body | |
7648 | { .mib | |
7649 | stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack | |
7650 | add GR_Parameter_RESULT = 0,GR_Parameter_Y | |
7651 | nop.b 0 // Parameter 3 address | |
7652 | } | |
7653 | { .mib | |
7654 | stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack | |
7655 | add GR_Parameter_Y = -16,GR_Parameter_Y | |
7656 | br.call.sptk b0=__libm_error_support# // Call error handling function | |
7657 | };; | |
7658 | { .mmi | |
7659 | add GR_Parameter_RESULT = 48,sp | |
7660 | nop.m 999 | |
7661 | nop.i 999 | |
7662 | };; | |
7663 | { .mmi | |
7664 | ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack | |
7665 | .restore sp | |
7666 | add sp = 64,sp // Restore stack pointer | |
7667 | mov b0 = GR_SAVE_B0 // Restore return address | |
7668 | };; | |
7669 | { .mib | |
7670 | mov gp = GR_SAVE_GP // Restore gp | |
7671 | mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs | |
7672 | br.ret.sptk b0 // Return | |
7673 | };; | |
7674 | ||
7675 | LOCAL_LIBM_END(__libm_error_region#) | |
7676 | ||
7677 | .type __libm_error_support#,@function | |
7678 | .global __libm_error_support# |