]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/ia64/fpu/w_tgammaf_compat.S
Move wrappers to libm-compat-calls-auto
[thirdparty/glibc.git] / sysdeps / ia64 / fpu / w_tgammaf_compat.S
CommitLineData
d5efd131
MF
1.file "tgammaf.s"
2
3
4// Copyright (c) 2001 - 2005, Intel Corporation
5// All rights reserved.
6//
7// Contributed 2001 by the Intel Numerics Group, Intel Corporation
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// * Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// * Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// * The name of Intel Corporation may not be used to endorse or promote
21// products derived from this software without specific prior written
22// permission.
23
0347518d
MF
24// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT
d5efd131 26// LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
0347518d 27// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
d5efd131
MF
28// CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL,
29// EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO,
0347518d
MF
30// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR
31// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
d5efd131 32// OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING
0347518d
MF
33// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35//
d5efd131 36// Intel Corporation is the author of this code,and requests that all
0347518d 37// problem reports or change requests be submitted to it directly at
d5efd131
MF
38// http://www.intel.com/software/products/opensource/libraries/num.htm.
39//
40//*********************************************************************
41//
0347518d 42// History:
d5efd131
MF
43// 11/30/01 Initial version
44// 05/20/02 Cleaned up namespace and sf0 syntax
45// 02/10/03 Reordered header: .section, .global, .proc, .align
46// 04/04/03 Changed error codes for overflow and negative integers
47// 04/10/03 Changed code for overflow near zero handling
48// 12/16/03 Fixed parameter passing to/from error handling routine
49// 03/31/05 Reformatted delimiters between data tables
50//
51//*********************************************************************
52//
53//*********************************************************************
54//
55// Function: tgammaf(x) computes the principle value of the GAMMA
56// function of x.
57//
58//*********************************************************************
59//
60// Resources Used:
61//
62// Floating-Point Registers: f8-f15
63// f33-f75
64//
65// General Purpose Registers:
66// r8-r11
67// r14-r29
68// r32-r36
69// r37-r40 (Used to pass arguments to error handling routine)
70//
71// Predicate Registers: p6-p15
72//
73//*********************************************************************
74//
75// IEEE Special Conditions:
76//
77// tgammaf(+inf) = +inf
0347518d
MF
78// tgammaf(-inf) = QNaN
79// tgammaf(+/-0) = +/-inf
d5efd131
MF
80// tgammaf(x<0, x - integer) = QNaN
81// tgammaf(SNaN) = QNaN
82// tgammaf(QNaN) = QNaN
83//
84//*********************************************************************
85//
86// Overview
87//
88// The method consists of three cases.
0347518d 89//
d5efd131
MF
90// If 2 <= x < OVERFLOW_BOUNDARY use case tgamma_regular;
91// else if 0 < x < 2 use case tgamma_from_0_to_2;
92// else if -(i+1) < x < -i, i = 0...43 use case tgamma_negatives;
93//
94// Case 2 <= x < OVERFLOW_BOUNDARY
95// -------------------------------
96// Here we use algorithm based on the recursive formula
97// GAMMA(x+1) = x*GAMMA(x). For that we subdivide interval
98// [2; OVERFLOW_BOUNDARY] into intervals [8*n; 8*(n+1)] and
99// approximate GAMMA(x) by polynomial of 22th degree on each
100// [8*n; 8*n+1], recursive formula is used to expand GAMMA(x)
101// to [8*n; 8*n+1]. In other words we need to find n, i and r
102// such that x = 8 * n + i + r where n and i are integer numbers
103// and r is fractional part of x. So GAMMA(x) = GAMMA(8*n+i+r) =
104// = (x-1)*(x-2)*...*(x-i)*GAMMA(x-i) =
105// = (x-1)*(x-2)*...*(x-i)*GAMMA(8*n+r) ~
106// ~ (x-1)*(x-2)*...*(x-i)*P12n(r).
107//
108// Step 1: Reduction
109// -----------------
110// N = [x] with truncate
111// r = x - N, note 0 <= r < 1
112//
113// n = N & ~0xF - index of table that contains coefficient of
0347518d 114// polynomial approximation
d5efd131 115// i = N & 0xF - is used in recursive formula
0347518d 116//
d5efd131
MF
117//
118// Step 2: Approximation
119// ---------------------
120// We use factorized minimax approximation polynomials
121// P12n(r) = A12*(r^2+C01(n)*r+C00(n))*
122// *(r^2+C11(n)*r+C10(n))*...*(r^2+C51(n)*r+C50(n))
123//
124// Step 3: Recursion
125// -----------------
126// In case when i > 0 we need to multiply P12n(r) by product
127// R(i,x)=(x-1)*(x-2)*...*(x-i). To reduce number of fp-instructions
0347518d 128// we can calculate R as follow:
d5efd131
MF
129// R(i,x) = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-1))*(x-i)) if i is
130// even or R = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-2))*(x-(i-1)))*
131// *(i-1) if i is odd. In both cases we need to calculate
132// R2(i,x) = (x^2-3*x+2)*(x^2-7*x+12)*...*(x^2+x+2*j*(2*j-1)) =
133// = ((x^2-x)+2*(1-x))*((x^2-x)+6*(2-x))*...*((x^2-x)+2*(2*j-1)*(j-x)) =
134// = (RA+2*RB)*(RA+6*(1-RB))*...*(RA+2*(2*j-1)*(j-1+RB))
135// where j = 1..[i/2], RA = x^2-x, RB = 1-x.
136//
137// Step 4: Reconstruction
138// ----------------------
139// Reconstruction is just simple multiplication i.e.
140// GAMMA(x) = P12n(r)*R(i,x)
141//
142// Case 0 < x < 2
143// --------------
144// To calculate GAMMA(x) on this interval we do following
145// if 1.0 <= x < 1.25 than GAMMA(x) = P7(x-1)
146// if 1.25 <= x < 1.5 than GAMMA(x) = P7(x-x_min) where
147// x_min is point of local minimum on [1; 2] interval.
148// if 1.5 <= x < 1.75 than GAMMA(x) = P7(x-1.5)
149// if 1.75 <= x < 2.0 than GAMMA(x) = P7(x-1.5)
0347518d 150// and
d5efd131
MF
151// if 0 < x < 1 than GAMMA(x) = GAMMA(x+1)/x
152//
153// Case -(i+1) < x < -i, i = 0...43
154// ----------------------------------
155// Here we use the fact that GAMMA(-x) = PI/(x*GAMMA(x)*sin(PI*x)) and
156// so we need to calculate GAMMA(x), sin(PI*x)/PI. Calculation of
157// GAMMA(x) is described above.
158//
159// Step 1: Reduction
160// -----------------
0347518d
MF
161// Note that period of sin(PI*x) is 2 and range reduction for
162// sin(PI*x) is like to range reduction for GAMMA(x)
d5efd131
MF
163// i.e rs = x - round(x) and |rs| <= 0.5.
164//
165// Step 2: Approximation
166// ---------------------
0347518d 167// To approximate sin(PI*x)/PI = sin(PI*(2*n+rs))/PI =
d5efd131
MF
168// = (-1)^n*sin(PI*rs)/PI Taylor series is used.
169// sin(PI*rs)/PI ~ S17(rs).
170//
171// Step 3: Division
172// ----------------
173// To calculate 1/x and 1/(GAMMA(x)*S12(rs)) we use frcpa
174// instruction with following Newton-Raphson interations.
0347518d 175//
d5efd131
MF
176//
177//*********************************************************************
178
179GR_ad_Data = r8
180GR_TAG = r8
181GR_SignExp = r9
182GR_Sig = r10
183GR_ArgNz = r10
184GR_RqDeg = r11
185
186GR_NanBound = r14
187GR_ExpOf025 = r15
188GR_ExpOf05 = r16
189GR_ad_Co = r17
190GR_ad_Ce = r18
191GR_TblOffs = r19
192GR_Arg = r20
193GR_Exp2Ind = r21
194GR_TblOffsMask = r21
195GR_Offs = r22
196GR_OvfNzBound = r23
197GR_ZeroResBound = r24
198GR_ad_SinO = r25
199GR_ad_SinE = r26
200GR_Correction = r27
201GR_Tbl12Offs = r28
202GR_NzBound = r28
203GR_ExpOf1 = r29
204GR_fpsr = r29
205
206GR_SAVE_B0 = r33
207GR_SAVE_PFS = r34
208GR_SAVE_GP = r35
209GR_SAVE_SP = r36
210
211GR_Parameter_X = r37
212GR_Parameter_Y = r38
213GR_Parameter_RESULT = r39
214GR_Parameter_TAG = r40
215
216
217FR_X = f10
218FR_Y = f1
219FR_RESULT = f8
220
0347518d 221FR_iXt = f11
d5efd131
MF
222FR_Xt = f12
223FR_r = f13
224FR_r2 = f14
225FR_r4 = f15
226
227FR_C01 = f33
228FR_A7 = f33
229FR_C11 = f34
230FR_A6 = f34
231FR_C21 = f35
232FR_A5 = f35
233FR_C31 = f36
234FR_A4 = f36
235FR_C41 = f37
236FR_A3 = f37
237FR_C51 = f38
238FR_A2 = f38
239
240FR_C00 = f39
241FR_A1 = f39
242FR_C10 = f40
243FR_A0 = f40
244FR_C20 = f41
245FR_C30 = f42
246FR_C40 = f43
247FR_C50 = f44
248FR_An = f45
249FR_OvfBound = f46
250FR_InvAn = f47
251
252FR_Multplr = f48
253FR_NormX = f49
254FR_X2mX = f50
255FR_1mX = f51
256FR_Rq0 = f51
257FR_Rq1 = f52
258FR_Rq2 = f53
259FR_Rq3 = f54
260
261FR_Rcp0 = f55
262FR_Rcp1 = f56
263FR_Rcp2 = f57
264
265FR_InvNormX1 = f58
266FR_InvNormX2 = f59
267
268FR_rs = f60
269FR_rs2 = f61
270
271FR_LocalMin = f62
272FR_10 = f63
273
274FR_05 = f64
275
276FR_S32 = f65
277FR_S31 = f66
278FR_S01 = f67
279FR_S11 = f68
280FR_S21 = f69
281FR_S00 = f70
282FR_S10 = f71
283FR_S20 = f72
284
285FR_GAMMA = f73
286FR_2 = f74
287FR_6 = f75
288
289
290
291
292// Data tables
293//==============================================================
294RODATA
295.align 16
296LOCAL_OBJECT_START(tgammaf_data)
297data8 0x3FDD8B618D5AF8FE // local minimum (0.461632144968362356785)
298data8 0x4024000000000000 // 10.0
299data8 0x3E90FC992FF39E13 // S32
300data8 0xBEC144B2760626E2 // S31
301//
302//[2; 8)
303data8 0x4009EFD1BA0CB3B4 // C01
304data8 0x3FFFB35378FF4822 // C11
305data8 0xC01032270413B896 // C41
306data8 0xC01F171A4C0D6827 // C51
307data8 0x40148F8E197396AC // C20
308data8 0x401C601959F1249C // C30
309data8 0x3EE21AD881741977 // An
310data8 0x4041852200000000 // overflow boundary (35.04010009765625)
311data8 0x3FD9CE68F695B198 // C21
312data8 0xBFF8C30AC900DA03 // C31
313data8 0x400E17D2F0535C02 // C00
314data8 0x4010689240F7FAC8 // C10
315data8 0x402563147DDCCF8D // C40
316data8 0x4033406D0480A21C // C50
317//
318//[8; 16)
319data8 0x4006222BAE0B793B // C01
320data8 0x4002452733473EDA // C11
321data8 0xC0010EF3326FDDB3 // C41
322data8 0xC01492B817F99C0F // C51
323data8 0x40099C905A249B75 // C20
324data8 0x4012B972AE0E533D // C30
325data8 0x3FE6F6DB91D0D4CC // An
326data8 0x4041852200000000 // overflow boundary
327data8 0x3FF545828F7B73C5 // C21
328data8 0xBFBBD210578764DF // C31
329data8 0x4000542098F53CFC // C00
330data8 0x40032C1309AD6C81 // C10
331data8 0x401D7331E19BD2E1 // C40
332data8 0x402A06807295EF57 // C50
333//
334//[16; 24)
335data8 0x4000131002867596 // C01
336data8 0x3FFAA362D5D1B6F2 // C11
337data8 0xBFFCB6985697DB6D // C41
338data8 0xC0115BEE3BFC3B3B // C51
339data8 0x3FFE62FF83456F73 // C20
340data8 0x4007E33478A114C4 // C30
341data8 0x41E9B2B73795ED57 // An
342data8 0x4041852200000000 // overflow boundary
343data8 0x3FEEB1F345BC2769 // C21
344data8 0xBFC3BBE6E7F3316F // C31
345data8 0x3FF14E07DA5E9983 // C00
346data8 0x3FF53B76BF81E2C0 // C10
347data8 0x4014051E0269A3DC // C40
348data8 0x40229D4227468EDB // C50
349//
350//[24; 32)
351data8 0x3FFAF7BD498384DE // C01
352data8 0x3FF62AD8B4D1C3D2 // C11
353data8 0xBFFABCADCD004C32 // C41
354data8 0xC00FADE97C097EC9 // C51
355data8 0x3FF6DA9ED737707E // C20
356data8 0x4002A29E9E0C782C // C30
357data8 0x44329D5B5167C6C3 // An
358data8 0x4041852200000000 // overflow boundary
359data8 0x3FE8943CBBB4B727 // C21
360data8 0xBFCB39D466E11756 // C31
361data8 0x3FE879AF3243D8C1 // C00
362data8 0x3FEEC7DEBB14CE1E // C10
363data8 0x401017B79BA80BCB // C40
364data8 0x401E941DC3C4DE80 // C50
365//
366//[32; 40)
367data8 0x3FF7ECB3A0E8FE5C // C01
368data8 0x3FF3815A8516316B // C11
369data8 0xBFF9ABD8FCC000C3 // C41
370data8 0xC00DD89969A4195B // C51
371data8 0x3FF2E43139CBF563 // C20
372data8 0x3FFF96DC3474A606 // C30
373data8 0x46AFF4CA9B0DDDF0 // An
374data8 0x4041852200000000 // overflow boundary
375data8 0x3FE4CE76DA1B5783 // C21
376data8 0xBFD0524DB460BC4E // C31
377data8 0x3FE35852DF14E200 // C00
378data8 0x3FE8C7610359F642 // C10
379data8 0x400BCF750EC16173 // C40
380data8 0x401AC14E02EA701C // C50
381//
382//[40; 48)
383data8 0x3FF5DCE4D8193097 // C01
384data8 0x3FF1B0D8C4974FFA // C11
385data8 0xBFF8FB450194CAEA // C41
386data8 0xC00C9658E030A6C4 // C51
387data8 0x3FF068851118AB46 // C20
388data8 0x3FFBF7C7BB46BF7D // C30
389data8 0x3FF0000000000000 // An
390data8 0x4041852200000000 // overflow boundary
391data8 0x3FE231DEB11D847A // C21
392data8 0xBFD251ECAFD7E935 // C31
393data8 0x3FE0368AE288F6BF // C00
394data8 0x3FE513AE4215A70C // C10
395data8 0x4008F960F7141B8B // C40
396data8 0x40183BA08134397B // C50
397//
398//[1.0; 1.25)
399data8 0xBFD9909648921868 // A7
400data8 0x3FE96FFEEEA8520F // A6
401data8 0xBFED0800D93449B8 // A3
402data8 0x3FEFA648D144911C // A2
403data8 0xBFEE3720F7720B4D // A5
404data8 0x3FEF4857A010CA3B // A4
405data8 0xBFE2788CCD545AA4 // A1
406data8 0x3FEFFFFFFFE9209E // A0
407//
408//[1.25; 1.5)
409data8 0xBFB421236426936C // A7
410data8 0x3FAF237514F36691 // A6
411data8 0xBFC0BADE710A10B9 // A3
412data8 0x3FDB6C5465BBEF1F // A2
413data8 0xBFB7E7F83A546EBE // A5
414data8 0x3FC496A01A545163 // A4
415data8 0xBDEE86A39D8452EB // A1
416data8 0x3FEC56DC82A39AA2 // A0
417//
418//[1.5; 1.75)
419data8 0xBF94730B51795867 // A7
420data8 0x3FBF4203E3816C7B // A6
421data8 0xBFE85B427DBD23E4 // A3
422data8 0x3FEE65557AB26771 // A2
423data8 0xBFD59D31BE3AB42A // A5
424data8 0x3FE3C90CC8F09147 // A4
425data8 0xBFE245971DF735B8 // A1
426data8 0x3FEFFC613AE7FBC8 // A0
427//
428//[1.75; 2.0)
429data8 0xBF7746A85137617E // A7
430data8 0x3FA96E37D09735F3 // A6
431data8 0xBFE3C24AC40AC0BB // A3
432data8 0x3FEC56A80A977CA5 // A2
433data8 0xBFC6F0E707560916 // A5
434data8 0x3FDB262D949175BE // A4
435data8 0xBFE1C1AEDFB25495 // A1
436data8 0x3FEFEE1E644B2022 // A0
437//
438// sin(pi*x)/pi
439data8 0xC026FB0D377656CC // S01
440data8 0x3FFFB15F95A22324 // S11
441data8 0x406CE58F4A41C6E7 // S10
442data8 0x404453786302C61E // S20
443data8 0xC023D59A47DBFCD3 // S21
444data8 0x405541D7ABECEFCA // S00
445//
446// 1/An for [40; 48)
447data8 0xCAA7576DE621FCD5, 0x3F68
448LOCAL_OBJECT_END(tgammaf_data)
449
450//==============================================================
451// Code
452//==============================================================
453
454.section .text
455GLOBAL_LIBM_ENTRY(tgammaf)
456{ .mfi
457 getf.exp GR_SignExp = f8
458 fma.s1 FR_NormX = f8,f1,f0
459 addl GR_ad_Data = @ltoff(tgammaf_data), gp
460}
461{ .mfi
462 mov GR_ExpOf05 = 0xFFFE
463 fcvt.fx.trunc.s1 FR_iXt = f8 // [x]
464 mov GR_Offs = 0 // 2 <= x < 8
465};;
466{ .mfi
467 getf.d GR_Arg = f8
468 fcmp.lt.s1 p14,p15 = f8,f0
469 mov GR_Tbl12Offs = 0
470}
471{ .mfi
472 setf.exp FR_05 = GR_ExpOf05
473 fma.s1 FR_2 = f1,f1,f1 // 2
474 mov GR_Correction = 0
475};;
476{ .mfi
477 ld8 GR_ad_Data = [GR_ad_Data]
478 fclass.m p10,p0 = f8,0x1E7 // is x NaTVal, NaN, +/-0 or +/-INF?
479 tbit.z p12,p13 = GR_SignExp,16 // p13 if |x| >= 2
480}
481{ .mfi
482 mov GR_ExpOf1 = 0xFFFF
483 fcvt.fx.s1 FR_rs = f8 // round(x)
484 and GR_Exp2Ind = 7,GR_SignExp
485};;
486.pred.rel "mutex",p14,p15
487{ .mfi
488(p15) cmp.eq.unc p11,p0 = GR_ExpOf1,GR_SignExp // p11 if 1 <= x < 2
489(p14) fma.s1 FR_1mX = f1,f1,f8 // 1 - |x|
490 mov GR_Sig = 0 // if |x| < 2
491}
492{ .mfi
493(p13) cmp.eq.unc p7,p0 = 2,GR_Exp2Ind
494(p15) fms.s1 FR_1mX = f1,f1,f8 // 1 - |x|
495(p13) cmp.eq.unc p8,p0 = 3,GR_Exp2Ind
496};;
497.pred.rel "mutex",p7,p8
498{ .mfi
499(p7) mov GR_Offs = 0x7 // 8 <= |x| < 16
500 nop.f 0
501(p8) tbit.z.unc p0,p6 = GR_Arg,51
502}
503{ .mib
504(p13) cmp.lt.unc p9,p0 = 3,GR_Exp2Ind
505(p8) mov GR_Offs = 0xE // 16 <= |x| < 32
506 // jump if x is NaTVal, NaN, +/-0 or +/-INF?
507(p10) br.cond.spnt tgammaf_spec_args
508};;
509.pred.rel "mutex",p14,p15
510.pred.rel "mutex",p6,p9
511{ .mfi
512(p9) mov GR_Offs = 0x1C // 32 <= |x|
513(p14) fma.s1 FR_X2mX = FR_NormX,FR_NormX,FR_NormX // x^2-|x|
514(p9) tbit.z.unc p0,p8 = GR_Arg,50
515}
516{ .mfi
517 ldfpd FR_LocalMin,FR_10 = [GR_ad_Data],16
518(p15) fms.s1 FR_X2mX = FR_NormX,FR_NormX,FR_NormX // x^2-|x|
519(p6) add GR_Offs = 0x7,GR_Offs // 24 <= x < 32
520};;
521.pred.rel "mutex",p8,p12
522{ .mfi
523 add GR_ad_Ce = 0x50,GR_ad_Data
524(p15) fcmp.lt.unc.s1 p10,p0 = f8,f1 // p10 if 0 <= x < 1
525 mov GR_OvfNzBound = 2
526}
527{ .mib
528 ldfpd FR_S32,FR_S31 = [GR_ad_Data],16
529(p8) add GR_Offs = 0x7,GR_Offs // 40 <= |x|
530 // jump if 1 <= x < 2
531(p11) br.cond.spnt tgammaf_from_1_to_2
532};;
533{ .mfi
534 shladd GR_ad_Ce = GR_Offs,4,GR_ad_Ce
535 fcvt.xf FR_Xt = FR_iXt // [x]
536(p13) cmp.eq.unc p7,p0 = r0,GR_Offs // p7 if 2 <= |x| < 8
537}
538{ .mfi
539 shladd GR_ad_Co = GR_Offs,4,GR_ad_Data
540 fma.s1 FR_6 = FR_2,FR_2,FR_2
541 mov GR_ExpOf05 = 0x7FC
542};;
543{ .mfi
544(p13) getf.sig GR_Sig = FR_iXt // if |x| >= 2
545 frcpa.s1 FR_Rcp0,p0 = f1,FR_NormX
546(p10) shr GR_Arg = GR_Arg,51
547}
548{ .mib
549 ldfpd FR_C01,FR_C11 = [GR_ad_Co],16
550(p7) mov GR_Correction = 2
551 // jump if 0 < x < 1
552(p10) br.cond.spnt tgammaf_from_0_to_1
553};;
554{ .mfi
555 ldfpd FR_C21,FR_C31 = [GR_ad_Ce],16
556 fma.s1 FR_Rq2 = f1,f1,FR_1mX // 2 - |x|
557(p14) sub GR_Correction = r0,GR_Correction
558}
559{ .mfi
560 ldfpd FR_C41,FR_C51 = [GR_ad_Co],16
561(p14) fcvt.xf FR_rs = FR_rs
562(p14) add GR_ad_SinO = 0x3A0,GR_ad_Data
563};;
564.pred.rel "mutex",p14,p15
565{ .mfi
566 ldfpd FR_C00,FR_C10 = [GR_ad_Ce],16
567 nop.f 0
568(p14) sub GR_Sig = GR_Correction,GR_Sig
569}
570{ .mfi
571 ldfpd FR_C20,FR_C30 = [GR_ad_Co],16
572 fma.s1 FR_Rq1 = FR_1mX,FR_2,FR_X2mX // (x-1)*(x-2)
573(p15) sub GR_Sig = GR_Sig,GR_Correction
574};;
575{ .mfi
576(p14) ldfpd FR_S01,FR_S11 = [GR_ad_SinO],16
577 fma.s1 FR_Rq3 = FR_2,f1,FR_1mX // 3 - |x|
578 and GR_RqDeg = 0x6,GR_Sig
579}
580{ .mfi
581 ldfpd FR_C40,FR_C50 = [GR_ad_Ce],16
582(p14) fma.d.s0 FR_X = f0,f0,f8 // set deno flag
583 mov GR_NanBound = 0x30016 // -2^23
584};;
585.pred.rel "mutex",p14,p15
586{ .mfi
587(p14) add GR_ad_SinE = 0x3C0,GR_ad_Data
588(p15) fms.s1 FR_r = FR_NormX,f1,FR_Xt // r = x - [x]
589 cmp.eq p8,p0 = 2,GR_RqDeg
590}
591{ .mfi
592 ldfpd FR_An,FR_OvfBound = [GR_ad_Co]
593(p14) fms.s1 FR_r = FR_Xt,f1,FR_NormX // r = |x - [x]|
594 cmp.eq p9,p0 = 4,GR_RqDeg
595};;
596.pred.rel "mutex",p8,p9
597{ .mfi
598(p14) ldfpd FR_S21,FR_S00 = [GR_ad_SinE],16
599(p8) fma.s1 FR_Rq0 = FR_2,f1,FR_1mX // (3-x)
600 tbit.z p0,p6 = GR_Sig,0
601}
602{ .mfi
603(p14) ldfpd FR_S10,FR_S20 = [GR_ad_SinO],16
604(p9) fma.s1 FR_Rq0 = FR_2,FR_2,FR_1mX // (5-x)
605 cmp.eq p10,p0 = 6,GR_RqDeg
606};;
607{ .mfi
608(p14) getf.s GR_Arg = f8
609(p14) fcmp.eq.unc.s1 p13,p0 = FR_NormX,FR_Xt
610(p14) mov GR_ZeroResBound = 0xC22C // -43
611}
612{ .mfi
613(p14) ldfe FR_InvAn = [GR_ad_SinE]
614(p10) fma.s1 FR_Rq0 = FR_6,f1,FR_1mX // (7-x)
615 cmp.eq p7,p0 = r0,GR_RqDeg
616};;
617{ .mfi
618(p14) cmp.ge.unc p11,p0 = GR_SignExp,GR_NanBound
619 fma.s1 FR_Rq2 = FR_Rq2,FR_6,FR_X2mX // (x-3)*(x-4)
620(p14) shl GR_ZeroResBound = GR_ZeroResBound,16
621}
622{ .mfb
623(p14) mov GR_OvfNzBound = 0x802
624(p14) fms.s1 FR_rs = FR_rs,f1,FR_NormX // rs = round(x) - x
625 // jump if x < -2^23 i.e. x is negative integer
626(p11) br.cond.spnt tgammaf_singularity
627};;
628{ .mfi
629 nop.m 0
630(p7) fma.s1 FR_Rq1 = f0,f0,f1
631(p14) shl GR_OvfNzBound = GR_OvfNzBound,20
632}
633{ .mfb
634 nop.m 0
635 fma.s1 FR_Rq3 = FR_Rq3,FR_10,FR_X2mX // (x-5)*(x-6)
636 // jump if x is negative integer such that -2^23 < x < 0
637(p13) br.cond.spnt tgammaf_singularity
638};;
639{ .mfi
640 nop.m 0
641 fma.s1 FR_C01 = FR_C01,f1,FR_r
642(p14) mov GR_ExpOf05 = 0xFFFE
643}
644{ .mfi
645(p14) cmp.eq.unc p7,p0 = GR_Arg,GR_OvfNzBound
646 fma.s1 FR_C11 = FR_C11,f1,FR_r
647(p14) cmp.ltu.unc p11,p0 = GR_Arg,GR_OvfNzBound
648};;
649{ .mfi
650 nop.m 0
651 fma.s1 FR_C21 = FR_C21,f1,FR_r
652(p14) cmp.ltu.unc p9,p0 = GR_ZeroResBound,GR_Arg
653}
654{ .mfb
655 nop.m 0
656 fma.s1 FR_C31 = FR_C31,f1,FR_r
657 // jump if argument is close to 0 negative
658(p11) br.cond.spnt tgammaf_overflow
659};;
660{ .mfi
661 nop.m 0
662 fma.s1 FR_C41 = FR_C41,f1,FR_r
663 nop.i 0
664}
665{ .mfb
666 nop.m 0
667 fma.s1 FR_C51 = FR_C51,f1,FR_r
668 // jump if x is negative noninteger such that -2^23 < x < -43
669(p9) br.cond.spnt tgammaf_underflow
670};;
671{ .mfi
672 nop.m 0
673(p14) fma.s1 FR_rs2 = FR_rs,FR_rs,f0
0347518d 674 nop.i 0
d5efd131
MF
675}
676{ .mfb
677 nop.m 0
678(p14) fma.s1 FR_S01 = FR_rs,FR_rs,FR_S01
679 // jump if argument is 0x80200000
680(p7) br.cond.spnt tgammaf_overflow_near0_bound
681};;
682{ .mfi
0347518d 683 nop.m 0
d5efd131 684(p6) fnma.s1 FR_Rq1 = FR_Rq1,FR_Rq0,f0
0347518d 685 nop.i 0
d5efd131
MF
686}
687{ .mfi
0347518d 688 nop.m 0
d5efd131
MF
689(p10) fma.s1 FR_Rq2 = FR_Rq2,FR_Rq3,f0
690 and GR_Sig = 0x7,GR_Sig
691};;
692{ .mfi
693 nop.m 0
694 fma.s1 FR_C01 = FR_C01,FR_r,FR_C00
695 nop.i 0
696}
697{ .mfi
698 nop.m 0
699 fma.s1 FR_C11 = FR_C11,FR_r,FR_C10
700 cmp.eq p6,p7 = r0,GR_Sig // p6 if |x| from one of base intervals
701};;
702{ .mfi
703 nop.m 0
704 fma.s1 FR_C21 = FR_C21,FR_r,FR_C20
705 nop.i 0
706}
707{ .mfi
708 nop.m 0
709 fma.s1 FR_C31 = FR_C31,FR_r,FR_C30
710(p7) cmp.lt.unc p9,p0 = 2,GR_RqDeg
711};;
712{ .mfi
713 nop.m 0
714(p14) fma.s1 FR_S11 = FR_rs,FR_rs,FR_S11
715 nop.i 0
716}
717{ .mfi
718 nop.m 0
719(p14) fma.s1 FR_S21 = FR_rs,FR_rs,FR_S21
720 nop.i 0
721};;
722{ .mfi
723 nop.m 0
724 fma.s1 FR_C41 = FR_C41,FR_r,FR_C40
725 nop.i 0
726}
727{ .mfi
728 nop.m 0
729(p14) fma.s1 FR_S32 = FR_rs2,FR_S32,FR_S31
730 nop.i 0
731};;
732{ .mfi
0347518d 733 nop.m 0
d5efd131
MF
734(p9) fma.s1 FR_Rq1 = FR_Rq1,FR_Rq2,f0
735 nop.i 0
736}
737{ .mfi
738 nop.m 0
739 fma.s1 FR_C51 = FR_C51,FR_r,FR_C50
0347518d 740 nop.i 0
d5efd131
MF
741};;
742{ .mfi
743(p14) getf.exp GR_SignExp = FR_rs
744 fma.s1 FR_C01 = FR_C01,FR_C11,f0
0347518d 745 nop.i 0
d5efd131
MF
746}
747{ .mfi
748 nop.m 0
749(p14) fma.s1 FR_S01 = FR_S01,FR_rs2,FR_S00
0347518d 750 nop.i 0
d5efd131
MF
751};;
752{ .mfi
753 nop.m 0
754 fma.s1 FR_C21 = FR_C21,FR_C31,f0
755 nop.i 0
756}
757{ .mfi
758 nop.m 0
759 // NR-iteration
760(p14) fnma.s1 FR_InvNormX1 = FR_Rcp0,FR_NormX,f1
761 nop.i 0
762};;
763{ .mfi
764 nop.m 0
765(p14) fma.s1 FR_S11 = FR_S11,FR_rs2,FR_S10
0347518d 766(p14) tbit.z.unc p11,p12 = GR_SignExp,17
d5efd131
MF
767}
768{ .mfi
769 nop.m 0
770(p14) fma.s1 FR_S21 = FR_S21,FR_rs2,FR_S20
771 nop.i 0
772};;
773{ .mfi
774 nop.m 0
775(p15) fcmp.lt.unc.s1 p0,p13 = FR_NormX,FR_OvfBound
776 nop.i 0
777}
778{ .mfi
779 nop.m 0
780(p14) fma.s1 FR_S32 = FR_rs2,FR_S32,f0
781 nop.i 0
782};;
783{ .mfi
784 nop.m 0
785 fma.s1 FR_C41 = FR_C41,FR_C51,f0
786 nop.i 0
787}
788{ .mfi
789 nop.m 0
790(p7) fma.s1 FR_An = FR_Rq1,FR_An,f0
0347518d 791 nop.i 0
d5efd131
MF
792};;
793{ .mfb
794 nop.m 0
795 nop.f 0
796 // jump if x > 35.04010009765625
797(p13) br.cond.spnt tgammaf_overflow
798};;
799{ .mfi
800 nop.m 0
801 // NR-iteration
802(p14) fma.s1 FR_InvNormX1 = FR_Rcp0,FR_InvNormX1,FR_Rcp0
803 nop.i 0
804};;
805{ .mfi
806 nop.m 0
807(p14) fma.s1 FR_S01 = FR_S01,FR_S11,f0
808 nop.i 0
809};;
810{ .mfi
811 nop.m 0
812(p14) fma.s1 FR_S21 = FR_S21,FR_S32,f0
813 nop.i 0
814};;
815{ .mfi
816(p14) getf.exp GR_SignExp = FR_NormX
817 fma.s1 FR_C01 = FR_C01,FR_C21,f0
818 nop.i 0
819}
820{ .mfi
821 nop.m 0
822 fma.s1 FR_C41 = FR_C41,FR_An,f0
823(p14) mov GR_ExpOf1 = 0x2FFFF
824};;
825{ .mfi
826 nop.m 0
827 // NR-iteration
828(p14) fnma.s1 FR_InvNormX2 = FR_InvNormX1,FR_NormX,f1
829 nop.i 0
830};;
831.pred.rel "mutex",p11,p12
832{ .mfi
833 nop.m 0
834(p12) fnma.s1 FR_S01 = FR_S01,FR_S21,f0
835 nop.i 0
836}
837{ .mfi
838 nop.m 0
839(p11) fma.s1 FR_S01 = FR_S01,FR_S21,f0
840 nop.i 0
841};;
842
843{ .mfi
0347518d 844 nop.m 0
d5efd131
MF
845(p14) fma.s1 FR_GAMMA = FR_C01,FR_C41,f0
846(p14) tbit.z.unc p6,p7 = GR_Sig,0
847}
848{ .mfb
849 nop.m 0
850(p15) fma.s.s0 f8 = FR_C01,FR_C41,f0
851(p15) br.ret.spnt b0 // exit for positives
852};;
853.pred.rel "mutex",p11,p12
854{ .mfi
855 nop.m 0
856(p12) fms.s1 FR_S01 = FR_rs,FR_S01,FR_rs
857 nop.i 0
858}
859{ .mfi
860 nop.m 0
861(p11) fma.s1 FR_S01 = FR_rs,FR_S01,FR_rs
862 nop.i 0
863};;
864{ .mfi
865 nop.m 0
866 // NR-iteration
867 fma.s1 FR_InvNormX2 = FR_InvNormX1,FR_InvNormX2,FR_InvNormX1
868 cmp.eq p10,p0 = 0x23,GR_Offs
869};;
870.pred.rel "mutex",p6,p7
871{ .mfi
872 nop.m 0
873(p6) fma.s1 FR_GAMMA = FR_S01,FR_GAMMA,f0
874 cmp.gtu p8,p0 = GR_SignExp,GR_ExpOf1
875}
876{ .mfi
877 nop.m 0
878(p7) fnma.s1 FR_GAMMA = FR_S01,FR_GAMMA,f0
879 cmp.eq p9,p0 = GR_SignExp,GR_ExpOf1
880};;
881{ .mfi
882 nop.m 0
883 // NR-iteration
884 fnma.s1 FR_InvNormX1 = FR_InvNormX2,FR_NormX,f1
885 nop.i 0
886}
887{ .mfi
888 nop.m 0
889(p10) fma.s1 FR_InvNormX2 = FR_InvNormX2,FR_InvAn,f0
890 nop.i 0
891};;
892{ .mfi
893 nop.m 0
894 frcpa.s1 FR_Rcp0,p0 = f1,FR_GAMMA
895 nop.i 0
896};;
897{ .mfi
898 nop.m 0
899 fms.s1 FR_Multplr = FR_NormX,f1,f1 // x - 1
900 nop.i 0
901};;
902{ .mfi
903 nop.m 0
904 // NR-iteration
905 fnma.s1 FR_Rcp1 = FR_Rcp0,FR_GAMMA,f1
906 nop.i 0
907};;
908.pred.rel "mutex",p8,p9
909{ .mfi
910 nop.m 0
911 // 1/x or 1/(An*x)
912(p8) fma.s1 FR_Multplr = FR_InvNormX2,FR_InvNormX1,FR_InvNormX2
913 nop.i 0
914}
915{ .mfi
916 nop.m 0
917(p9) fma.s1 FR_Multplr = f1,f1,f0
918 nop.i 0
919};;
920{ .mfi
921 nop.m 0
922 // NR-iteration
923 fma.s1 FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0
924 nop.i 0
925};;
926{ .mfi
927 nop.m 0
928 // NR-iteration
929 fnma.s1 FR_Rcp2 = FR_Rcp1,FR_GAMMA,f1
930 nop.i 0
931}
932{ .mfi
933 nop.m 0
934 // NR-iteration
935 fma.s1 FR_Rcp1 = FR_Rcp1,FR_Multplr,f0
936 nop.i 0
937};;
938{ .mfb
939 nop.m 0
940 fma.s.s0 f8 = FR_Rcp1,FR_Rcp2,FR_Rcp1
941 br.ret.sptk b0
942};;
943
944// here if 0 < x < 1
945//--------------------------------------------------------------------
946.align 32
947tgammaf_from_0_to_1:
948{ .mfi
949 cmp.lt p7,p0 = GR_Arg,GR_ExpOf05
950 // NR-iteration
951 fnma.s1 FR_Rcp1 = FR_Rcp0,FR_NormX,f1
952 cmp.eq p8,p0 = GR_Arg,GR_ExpOf05
953}
954{ .mfi
955 cmp.gt p9,p0 = GR_Arg,GR_ExpOf05
956 fma.s1 FR_r = f0,f0,FR_NormX // reduced arg for (0;1)
0347518d 957 mov GR_ExpOf025 = 0x7FA
d5efd131
MF
958};;
959{ .mfi
960 getf.s GR_ArgNz = f8
961 fma.d.s0 FR_X = f0,f0,f8 // set deno flag
962 shl GR_OvfNzBound = GR_OvfNzBound,20
963}
964{ .mfi
965(p8) mov GR_Tbl12Offs = 0x80 // 0.5 <= x < 0.75
966 nop.f 0
967(p7) cmp.ge.unc p6,p0 = GR_Arg,GR_ExpOf025
968};;
969.pred.rel "mutex",p6,p9
970{ .mfi
971(p9) mov GR_Tbl12Offs = 0xC0 // 0.75 <= x < 1
972 nop.f 0
973(p6) mov GR_Tbl12Offs = 0x40 // 0.25 <= x < 0.5
974}
975{ .mfi
0347518d 976 add GR_ad_Ce = 0x2C0,GR_ad_Data
d5efd131
MF
977 nop.f 0
978 add GR_ad_Co = 0x2A0,GR_ad_Data
979};;
980{ .mfi
981 add GR_ad_Co = GR_ad_Co,GR_Tbl12Offs
982 nop.f 0
983 cmp.lt p12,p0 = GR_ArgNz,GR_OvfNzBound
984}
985{ .mib
986 add GR_ad_Ce = GR_ad_Ce,GR_Tbl12Offs
987 cmp.eq p7,p0 = GR_ArgNz,GR_OvfNzBound
988 // jump if argument is 0x00200000
989(p7) br.cond.spnt tgammaf_overflow_near0_bound
990};;
991{ .mmb
992 ldfpd FR_A7,FR_A6 = [GR_ad_Co],16
993 ldfpd FR_A5,FR_A4 = [GR_ad_Ce],16
994 // jump if argument is close to 0 positive
0347518d 995(p12) br.cond.spnt tgammaf_overflow
d5efd131
MF
996};;
997{ .mfi
998 ldfpd FR_A3,FR_A2 = [GR_ad_Co],16
999 // NR-iteration
1000 fma.s1 FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0
1001 nop.i 0
1002}
1003{ .mfb
1004 ldfpd FR_A1,FR_A0 = [GR_ad_Ce],16
1005 nop.f 0
0347518d 1006 br.cond.sptk tgamma_from_0_to_2
d5efd131
MF
1007};;
1008
1009// here if 1 < x < 2
1010//--------------------------------------------------------------------
1011.align 32
1012tgammaf_from_1_to_2:
1013{ .mfi
1014 add GR_ad_Co = 0x2A0,GR_ad_Data
1015 fms.s1 FR_r = f0,f0,FR_1mX
1016 shr GR_TblOffs = GR_Arg,47
1017}
1018{ .mfi
1019 add GR_ad_Ce = 0x2C0,GR_ad_Data
1020 nop.f 0
1021 mov GR_TblOffsMask = 0x18
1022};;
1023{ .mfi
1024 nop.m 0
1025 nop.f 0
0347518d 1026 and GR_TblOffs = GR_TblOffs,GR_TblOffsMask
d5efd131
MF
1027};;
1028{ .mfi
1029 shladd GR_ad_Co = GR_TblOffs,3,GR_ad_Co
1030 nop.f 0
1031 nop.i 0
1032}
1033{ .mfi
1034 shladd GR_ad_Ce = GR_TblOffs,3,GR_ad_Ce
1035 nop.f 0
1036 cmp.eq p6,p7 = 8,GR_TblOffs
1037};;
1038{ .mmi
1039 ldfpd FR_A7,FR_A6 = [GR_ad_Co],16
1040 ldfpd FR_A5,FR_A4 = [GR_ad_Ce],16
1041 nop.i 0
1042};;
1043{ .mmi
1044 ldfpd FR_A3,FR_A2 = [GR_ad_Co],16
1045 ldfpd FR_A1,FR_A0 = [GR_ad_Ce],16
1046 nop.i 0
1047};;
1048
1049.align 32
1050tgamma_from_0_to_2:
1051{ .mfi
1052 nop.m 0
1053(p6) fms.s1 FR_r = FR_r,f1,FR_LocalMin
1054 nop.i 0
1055};;
1056{ .mfi
1057 nop.m 0
1058 // NR-iteration
1059(p10) fnma.s1 FR_Rcp2 = FR_Rcp1,FR_NormX,f1
1060 nop.i 0
1061};;
1062{ .mfi
1063 nop.m 0
1064 fms.s1 FR_r2 = FR_r,FR_r,f0
1065 nop.i 0
1066};;
1067{ .mfi
1068 nop.m 0
1069 fma.s1 FR_A7 = FR_A7,FR_r,FR_A6
1070 nop.i 0
1071}
1072{ .mfi
1073 nop.m 0
1074 fma.s1 FR_A5 = FR_A5,FR_r,FR_A4
1075 nop.i 0
1076};;
1077{ .mfi
1078 nop.m 0
1079 fma.s1 FR_A3 = FR_A3,FR_r,FR_A2
1080 nop.i 0
1081}
1082{ .mfi
1083 nop.m 0
1084 fma.s1 FR_A1 = FR_A1,FR_r,FR_A0
1085 nop.i 0
1086};;
1087{ .mfi
1088 nop.m 0
1089 // NR-iteration
1090(p10) fma.s1 FR_Rcp2 = FR_Rcp1,FR_Rcp2,FR_Rcp1
1091 nop.i 0
1092};;
1093{ .mfi
1094 nop.m 0
1095 fma.s1 FR_A7 = FR_A7,FR_r2,FR_A5
1096 nop.i 0
1097}
1098{ .mfi
1099 nop.m 0
1100 fma.s1 FR_r4 = FR_r2,FR_r2,f0
1101 nop.i 0
1102};;
1103{ .mfi
1104 nop.m 0
1105 fma.s1 FR_A3 = FR_A3,FR_r2,FR_A1
1106 nop.i 0
1107};;
1108{ .mfi
0347518d 1109 nop.m 0
d5efd131
MF
1110(p10) fma.s1 FR_GAMMA = FR_A7,FR_r4,FR_A3
1111 nop.i 0
1112}
1113{ .mfi
0347518d 1114 nop.m 0
d5efd131
MF
1115(p11) fma.s.s0 f8 = FR_A7,FR_r4,FR_A3
1116 nop.i 0
1117};;
1118{ .mfb
0347518d 1119 nop.m 0
d5efd131
MF
1120(p10) fma.s.s0 f8 = FR_GAMMA,FR_Rcp2,f0
1121 br.ret.sptk b0
1122};;
1123
1124
1125// overflow
1126//--------------------------------------------------------------------
1127.align 32
1128tgammaf_overflow_near0_bound:
1129.pred.rel "mutex",p14,p15
1130{ .mfi
1131 mov GR_fpsr = ar.fpsr
1132 nop.f 0
1133(p15) mov r8 = 0x7f8
1134}
1135{ .mfi
1136 nop.m 0
1137 nop.f 0
1138(p14) mov r8 = 0xff8
1139};;
1140{ .mfi
1141 nop.m 0
1142 nop.f 0
0347518d 1143 shl r8 = r8,20
d5efd131
MF
1144};;
1145{ .mfi
1146 sub r8 = r8,r0,1
1147 nop.f 0
1148 extr.u GR_fpsr = GR_fpsr,10,2 // rounding mode
1149};;
1150.pred.rel "mutex",p14,p15
1151{ .mfi
1152 // set p8 to 0 in case of overflow and to 1 otherwise
0347518d 1153 // for negative arg:
d5efd131
MF
1154 // no overflow if rounding mode either Z or +Inf, i.e.
1155 // GR_fpsr > 1
1156(p14) cmp.lt p8,p0 = 1,GR_fpsr
1157 nop.f 0
0347518d 1158 // for positive arg:
d5efd131
MF
1159 // no overflow if rounding mode either Z or -Inf, i.e.
1160 // (GR_fpsr & 1) == 0
1161(p15) tbit.z p0,p8 = GR_fpsr,0
1162};;
1163{ .mib
1164(p8) setf.s f8 = r8 // set result to 0x7f7fffff without
1165 // OVERFLOW flag raising
1166 nop.i 0
1167(p8) br.ret.sptk b0
1168};;
1169
1170.align 32
1171tgammaf_overflow:
1172{ .mfi
1173 nop.m 0
1174 nop.f 0
1175 mov r8 = 0x1FFFE
1176};;
1177{ .mfi
1178 setf.exp f9 = r8
1179 fmerge.s FR_X = f8,f8
1180 nop.i 0
1181};;
1182.pred.rel "mutex",p14,p15
1183{ .mfi
1184 nop.m 0
1185(p14) fnma.s.s0 f8 = f9,f9,f0 // set I,O and -INF result
1186 mov GR_TAG = 261 // overflow
1187}
1188{ .mfb
0347518d 1189 nop.m 0
d5efd131
MF
1190(p15) fma.s.s0 f8 = f9,f9,f0 // set I,O and +INF result
1191 br.cond.sptk tgammaf_libm_err
1192};;
1193
1194// x is negative integer or +/-0
1195//--------------------------------------------------------------------
1196.align 32
1197tgammaf_singularity:
1198{ .mfi
1199 nop.m 0
1200 fmerge.s FR_X = f8,f8
1201 mov GR_TAG = 262 // negative
1202}
1203{ .mfb
1204 nop.m 0
1205 frcpa.s0 f8,p0 = f0,f0
1206 br.cond.sptk tgammaf_libm_err
1207};;
1208// x is negative noninteger with big absolute value
1209//--------------------------------------------------------------------
1210.align 32
1211tgammaf_underflow:
1212{ .mfi
1213 mov r8 = 0x00001
1214 nop.f 0
1215 tbit.z p6,p7 = GR_Sig,0
1216};;
1217{ .mfi
1218 setf.exp f9 = r8
1219 nop.f 0
1220 nop.i 0
1221};;
1222.pred.rel "mutex",p6,p7
1223{ .mfi
1224 nop.m 0
1225(p6) fms.s.s0 f8 = f9,f9,f9
1226 nop.i 0
1227}
1228{ .mfb
1229 nop.m 0
1230(p7) fma.s.s0 f8 = f9,f9,f9
1231 br.ret.sptk b0
1232};;
1233
1234// x for natval, nan, +/-inf or +/-0
1235//--------------------------------------------------------------------
1236.align 32
1237tgammaf_spec_args:
1238{ .mfi
1239 nop.m 0
1240 fclass.m p6,p0 = f8,0x1E1 // Test x for natval, nan, +inf
1241 nop.i 0
1242};;
1243{ .mfi
1244 nop.m 0
1245 fclass.m p7,p8 = f8,0x7 // +/-0
1246 nop.i 0
1247};;
1248{ .mfi
1249 nop.m 0
1250 fmerge.s FR_X = f8,f8
1251 nop.i 0
1252}
1253{ .mfb
1254 nop.m 0
1255(p6) fma.s.s0 f8 = f8,f1,f8
1256(p6) br.ret.spnt b0
1257};;
1258.pred.rel "mutex",p7,p8
1259{ .mfi
1260(p7) mov GR_TAG = 262 // negative
1261(p7) frcpa.s0 f8,p0 = f1,f8
0347518d 1262 nop.i 0
d5efd131
MF
1263}
1264{ .mib
1265 nop.m 0
1266 nop.i 0
1267(p8) br.cond.spnt tgammaf_singularity
1268};;
1269
1270.align 32
1271tgammaf_libm_err:
1272{ .mfi
1273 alloc r32 = ar.pfs,1,4,4,0
1274 nop.f 0
1275 mov GR_Parameter_TAG = GR_TAG
1276};;
1277
1278GLOBAL_LIBM_END(tgammaf)
1279
1280LOCAL_LIBM_ENTRY(__libm_error_region)
1281.prologue
1282{ .mfi
1283 add GR_Parameter_Y=-32,sp // Parameter 2 value
1284 nop.f 0
1285.save ar.pfs,GR_SAVE_PFS
0347518d 1286 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
d5efd131
MF
1287}
1288{ .mfi
0347518d 1289.fframe 64
d5efd131
MF
1290 add sp=-64,sp // Create new stack
1291 nop.f 0
1292 mov GR_SAVE_GP=gp // Save gp
1293};;
1294{ .mmi
1295 stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
1296 add GR_Parameter_X = 16,sp // Parameter 1 address
0347518d
MF
1297.save b0, GR_SAVE_B0
1298 mov GR_SAVE_B0=b0 // Save b0
d5efd131
MF
1299};;
1300.body
1301{ .mib
0347518d
MF
1302 stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
1303 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
1304 nop.b 0
d5efd131
MF
1305}
1306{ .mib
1307 stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
0347518d 1308 add GR_Parameter_Y = -16,GR_Parameter_Y
d5efd131
MF
1309 br.call.sptk b0=__libm_error_support# // Call error handling function
1310};;
1311{ .mmi
1312 nop.m 0
1313 nop.m 0
1314 add GR_Parameter_RESULT = 48,sp
1315};;
1316{ .mmi
1317 ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
1318.restore sp
1319 add sp = 64,sp // Restore stack pointer
1320 mov b0 = GR_SAVE_B0 // Restore return address
1321};;
1322{ .mib
0347518d 1323 mov gp = GR_SAVE_GP // Restore gp
d5efd131
MF
1324 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
1325 br.ret.sptk b0 // Return
0347518d 1326};;
d5efd131
MF
1327
1328LOCAL_LIBM_END(__libm_error_region)
1329.type __libm_error_support#,@function
1330.global __libm_error_support#