]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/ia64/fpu/w_tgamma.S
ia64: strip trailing whitespace
[thirdparty/glibc.git] / sysdeps / ia64 / fpu / w_tgamma.S
CommitLineData
d5efd131
MF
1.file "tgamma.s"
2
3
4// Copyright (c) 2001 - 2005, Intel Corporation
5// All rights reserved.
6//
7// Contributed 2001 by the Intel Numerics Group, Intel Corporation
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// * Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// * Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// * The name of Intel Corporation may not be used to endorse or promote
21// products derived from this software without specific prior written
22// permission.
23
0347518d
MF
24// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT
d5efd131 26// LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
0347518d 27// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
d5efd131
MF
28// CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL,
29// EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO,
0347518d
MF
30// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR
31// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
d5efd131 32// OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING
0347518d
MF
33// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35//
d5efd131 36// Intel Corporation is the author of this code,and requests that all
0347518d 37// problem reports or change requests be submitted to it directly at
d5efd131
MF
38// http://www.intel.com/software/products/opensource/libraries/num.htm.
39//
40//*********************************************************************
41//
0347518d 42// History:
d5efd131
MF
43// 10/12/01 Initial version
44// 05/20/02 Cleaned up namespace and sf0 syntax
45// 02/10/03 Reordered header: .section, .global, .proc, .align
46// 04/04/03 Changed error codes for overflow and negative integers
47// 04/10/03 Changed code for overflow near zero handling
48// 03/31/05 Reformatted delimiters between data tables
49//
50//*********************************************************************
51//
52//*********************************************************************
53//
54// Function: tgamma(x) computes the principle value of the GAMMA
55// function of x.
56//
57//*********************************************************************
58//
59// Resources Used:
60//
61// Floating-Point Registers: f8-f15
62// f33-f87
63//
64// General Purpose Registers:
65// r8-r11
66// r14-r28
67// r32-r36
68// r37-r40 (Used to pass arguments to error handling routine)
69//
70// Predicate Registers: p6-p15
71//
72//*********************************************************************
73//
74// IEEE Special Conditions:
75//
76// tgamma(+inf) = +inf
0347518d
MF
77// tgamma(-inf) = QNaN
78// tgamma(+/-0) = +/-inf
d5efd131
MF
79// tgamma(x<0, x - integer) = QNaN
80// tgamma(SNaN) = QNaN
81// tgamma(QNaN) = QNaN
82//
83//*********************************************************************
84//
85// Overview
86//
87// The method consists of three cases.
0347518d 88//
d5efd131
MF
89// If 2 <= x < OVERFLOW_BOUNDARY use case tgamma_regular;
90// else if 0 < x < 2 use case tgamma_from_0_to_2;
91// else if -(i+1) < x < -i, i = 0...184 use case tgamma_negatives;
92//
93// Case 2 <= x < OVERFLOW_BOUNDARY
94// -------------------------------
95// Here we use algorithm based on the recursive formula
96// GAMMA(x+1) = x*GAMMA(x). For that we subdivide interval
97// [2; OVERFLOW_BOUNDARY] into intervals [16*n; 16*(n+1)] and
98// approximate GAMMA(x) by polynomial of 22th degree on each
99// [16*n; 16*n+1], recursive formula is used to expand GAMMA(x)
100// to [16*n; 16*n+1]. In other words we need to find n, i and r
101// such that x = 16 * n + i + r where n and i are integer numbers
102// and r is fractional part of x. So GAMMA(x) = GAMMA(16*n+i+r) =
103// = (x-1)*(x-2)*...*(x-i)*GAMMA(x-i) =
104// = (x-1)*(x-2)*...*(x-i)*GAMMA(16*n+r) ~
105// ~ (x-1)*(x-2)*...*(x-i)*P22n(r).
106//
107// Step 1: Reduction
108// -----------------
109// N = [x] with truncate
110// r = x - N, note 0 <= r < 1
111//
112// n = N & ~0xF - index of table that contains coefficient of
0347518d 113// polynomial approximation
d5efd131 114// i = N & 0xF - is used in recursive formula
0347518d 115//
d5efd131
MF
116//
117// Step 2: Approximation
118// ---------------------
119// We use factorized minimax approximation polynomials
120// P22n(r) = A22*(r^2+C01(n)*R+C00(n))*
121// *(r^2+C11(n)*R+C10(n))*...*(r^2+CA1(n)*R+CA0(n))
122//
123// Step 3: Recursion
124// -----------------
125// In case when i > 0 we need to multiply P22n(r) by product
126// R(i)=(x-1)*(x-2)*...*(x-i). To reduce number of fp-instructions
0347518d 127// we can calculate R as follow:
d5efd131
MF
128// R(i) = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-1))*(x-i)) if i is
129// even or R = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-2))*(x-(i-1)))*
130// *(i-1) if i is odd. In both cases we need to calculate
131// R2(i) = (x^2-3*x+2)*(x^2-7*x+12)*...*(x^2+x+2*j*(2*j-1)) =
132// = (x^2-3*x+2)*(x^2-7*x+12)*...*((x^2+x)+2*j*(2*(j-1)+(1-2*x))) =
133// = (RA+2*(2-RB))*(RA+4*(4-RB))*...*(RA+2*j*(2*(j-1)+RB))
134// where j = 1..[i/2], RA = x^2+x, RB = 1-2*x.
135//
136// Step 4: Reconstruction
137// ----------------------
138// Reconstruction is just simple multiplication i.e.
139// GAMMA(x) = P22n(r)*R(i)
140//
141// Case 0 < x < 2
142// --------------
143// To calculate GAMMA(x) on this interval we do following
144// if 1 <= x < 1.25 than GAMMA(x) = P15(x-1)
145// if 1.25 <= x < 1.5 than GAMMA(x) = P15(x-x_min) where
146// x_min is point of local minimum on [1; 2] interval.
147// if 1.5 <= x < 2.0 than GAMMA(x) = P15(x-1.5)
0347518d 148// and
d5efd131
MF
149// if 0 < x < 1 than GAMMA(x) = GAMMA(x+1)/x
150//
151// Case -(i+1) < x < -i, i = 0...184
152// ----------------------------------
153// Here we use the fact that GAMMA(-x) = PI/(x*GAMMA(x)*sin(PI*x)) and
154// so we need to calculate GAMMA(x), sin(PI*x)/PI. Calculation of
155// GAMMA(x) is described above.
156//
157// Step 1: Reduction
158// -----------------
0347518d
MF
159// Note that period of sin(PI*x) is 2 and range reduction for
160// sin(PI*x) is like to range reduction for GAMMA(x)
d5efd131
MF
161// i.e r = x - [x] with exception of cases
162// when r > 0.5 (in such cases r = 1 - (x - [x])).
163//
164// Step 2: Approximation
165// ---------------------
0347518d 166// To approximate sin(PI*x)/PI = sin(PI*(2*n+r))/PI =
d5efd131
MF
167// = (-1)^n*sin(PI*r)/PI Taylor series is used.
168// sin(PI*r)/PI ~ S21(r).
169//
170// Step 3: Division
171// ----------------
172// To calculate 1/(x*GAMMA(x)*S21(r)) we use frcpa instruction
173// with following Newton-Raphson interations.
0347518d 174//
d5efd131
MF
175//
176//*********************************************************************
177
178GR_Sig = r8
179GR_TAG = r8
180GR_ad_Data = r9
181GR_SigRqLin = r10
182GR_iSig = r11
183GR_ExpOf1 = r11
184GR_ExpOf8 = r11
185
186
187GR_Sig2 = r14
188GR_Addr_Mask1 = r15
189GR_Sign_Exp = r16
190GR_Tbl_Offs = r17
191GR_Addr_Mask2 = r18
192GR_ad_Co = r19
193GR_Bit2 = r19
194GR_ad_Ce = r20
195GR_ad_Co7 = r21
196GR_NzOvfBound = r21
197GR_ad_Ce7 = r22
198GR_Tbl_Ind = r23
199GR_Tbl_16xInd = r24
200GR_ExpOf025 = r24
201GR_ExpOf05 = r25
202GR_0x30033 = r26
203GR_10 = r26
204GR_12 = r27
205GR_185 = r27
206GR_14 = r28
207GR_2 = r28
208GR_fpsr = r28
209
210GR_SAVE_B0 = r33
211GR_SAVE_PFS = r34
212GR_SAVE_GP = r35
213GR_SAVE_SP = r36
214
215GR_Parameter_X = r37
216GR_Parameter_Y = r38
217GR_Parameter_RESULT = r39
218GR_Parameter_TAG = r40
219
220
221
222FR_X = f10
223FR_Y = f1 // tgamma is single argument function
224FR_RESULT = f8
225
226FR_AbsX = f9
227FR_NormX = f9
228FR_r02 = f11
229FR_AbsXp1 = f12
230FR_X2pX = f13
231FR_1m2X = f14
232FR_Rq1 = f14
233FR_Xt = f15
234
235FR_r = f33
236FR_OvfBound = f34
237FR_Xmin = f35
238FR_2 = f36
239FR_Rcp1 = f36
240FR_Rcp3 = f36
241FR_4 = f37
242FR_5 = f38
243FR_6 = f39
244FR_8 = f40
245FR_10 = f41
246FR_12 = f42
247FR_14 = f43
248FR_GAMMA = f43
249FR_05 = f44
250
251FR_Rq2 = f45
252FR_Rq3 = f46
253FR_Rq4 = f47
254FR_Rq5 = f48
255FR_Rq6 = f49
256FR_Rq7 = f50
257FR_RqLin = f51
258
259FR_InvAn = f52
260
261FR_C01 = f53
262FR_A15 = f53
263FR_C11 = f54
264FR_A14 = f54
265FR_C21 = f55
266FR_A13 = f55
267FR_C31 = f56
268FR_A12 = f56
269FR_C41 = f57
270FR_A11 = f57
271FR_C51 = f58
272FR_A10 = f58
273FR_C61 = f59
274FR_A9 = f59
275FR_C71 = f60
276FR_A8 = f60
277FR_C81 = f61
278FR_A7 = f61
279FR_C91 = f62
280FR_A6 = f62
281FR_CA1 = f63
282FR_A5 = f63
283FR_C00 = f64
284FR_A4 = f64
285FR_rs2 = f64
286FR_C10 = f65
287FR_A3 = f65
288FR_rs3 = f65
289FR_C20 = f66
290FR_A2 = f66
291FR_rs4 = f66
292FR_C30 = f67
293FR_A1 = f67
294FR_rs7 = f67
295FR_C40 = f68
296FR_A0 = f68
297FR_rs8 = f68
298FR_C50 = f69
299FR_r2 = f69
300FR_C60 = f70
301FR_r3 = f70
302FR_C70 = f71
303FR_r4 = f71
304FR_C80 = f72
305FR_r7 = f72
306FR_C90 = f73
307FR_r8 = f73
308FR_CA0 = f74
309FR_An = f75
310
311FR_S21 = f76
312FR_S19 = f77
313FR_Rcp0 = f77
314FR_Rcp2 = f77
315FR_S17 = f78
316FR_S15 = f79
317FR_S13 = f80
318FR_S11 = f81
319FR_S9 = f82
320FR_S7 = f83
321FR_S5 = f84
322FR_S3 = f85
323
324FR_iXt = f86
325FR_rs = f87
326
327
328// Data tables
329//==============================================================
330RODATA
331.align 16
332
333LOCAL_OBJECT_START(tgamma_data)
334data8 0x406573FAE561F648 // overflow boundary (171.624376956302739927196)
335data8 0x3FDD8B618D5AF8FE // point of local minium (0.461632144968362356785)
336//
337//[2; 3]
338data8 0xEF0E85C9AE40ABE2,0x00004000 // C01
339data8 0xCA2049DDB4096DD8,0x00004000 // C11
340data8 0x99A203B4DC2D1A8C,0x00004000 // C21
341data8 0xBF5D9D9C0C295570,0x00003FFF // C31
342data8 0xE8DD037DEB833BAB,0x00003FFD // C41
343data8 0xB6AE39A2A36AA03A,0x0000BFFE // C51
344data8 0x804960DC2850277B,0x0000C000 // C61
345data8 0xD9F3973841C09F80,0x0000C000 // C71
346data8 0x9C198A676F8A2239,0x0000C001 // C81
347data8 0xC98B7DAE02BE3226,0x0000C001 // C91
348data8 0xE9CAF31AC69301BA,0x0000C001 // CA1
349data8 0xFBBDD58608A0D172,0x00004000 // C00
350data8 0xFDD0316D1E078301,0x00004000 // C10
351data8 0x8630B760468C15E4,0x00004001 // C20
352data8 0x93EDE20E47D9152E,0x00004001 // C30
353data8 0xA86F3A38C77D6B19,0x00004001 // C40
354//[16; 17]
355data8 0xF87F757F365EE813,0x00004000 // C01
356data8 0xECA84FBA92759DA4,0x00004000 // C11
357data8 0xD4E0A55E07A8E913,0x00004000 // C21
358data8 0xB0EB45E94C8A5F7B,0x00004000 // C31
359data8 0x8050D6B4F7C8617D,0x00004000 // C41
360data8 0x8471B111AA691E5A,0x00003FFF // C51
361data8 0xADAF462AF96585C9,0x0000BFFC // C61
362data8 0xD327C7A587A8C32B,0x0000BFFF // C71
363data8 0xDEF5192B4CF5E0F1,0x0000C000 // C81
364data8 0xBADD64BB205AEF02,0x0000C001 // C91
365data8 0x9330A24AA67D6860,0x0000C002 // CA1
366data8 0xF57EEAF36D8C47BE,0x00004000 // C00
367data8 0x807092E12A251B38,0x00004001 // C10
368data8 0x8C458F80DEE7ED1C,0x00004001 // C20
369data8 0x9F30C731DC77F1A6,0x00004001 // C30
370data8 0xBAC4E7E099C3A373,0x00004001 // C40
371//[32; 33]
372data8 0xC3059A415F142DEF,0x00004000 // C01
373data8 0xB9C1DAC24664587A,0x00004000 // C11
374data8 0xA7101D910992FFB2,0x00004000 // C21
375data8 0x8A9522B8E4AA0AB4,0x00004000 // C31
376data8 0xC76A271E4BA95DCC,0x00003FFF // C41
377data8 0xC5D6DE2A38DB7FF2,0x00003FFE // C51
378data8 0xDBA42086997818B2,0x0000BFFC // C61
379data8 0xB8EDDB1424C1C996,0x0000BFFF // C71
380data8 0xBF7372FB45524B5D,0x0000C000 // C81
381data8 0xA03DDE759131580A,0x0000C001 // C91
382data8 0xFDA6FC4022C1FFE3,0x0000C001 // CA1
383data8 0x9759ABF797B2533D,0x00004000 // C00
384data8 0x9FA160C6CF18CEC5,0x00004000 // C10
385data8 0xB0EFF1E3530E0FCD,0x00004000 // C20
386data8 0xCCD60D5C470165D1,0x00004000 // C30
387data8 0xF5E53F6307B0B1C1,0x00004000 // C40
388//[48; 49]
389data8 0xAABE577FBCE37F5E,0x00004000 // C01
390data8 0xA274CAEEB5DF7172,0x00004000 // C11
391data8 0x91B90B6646C1B924,0x00004000 // C21
392data8 0xF06718519CA256D9,0x00003FFF // C31
393data8 0xAA9EE181C0E30263,0x00003FFF // C41
394data8 0xA07BDB5325CB28D2,0x00003FFE // C51
395data8 0x86C8B873204F9219,0x0000BFFD // C61
396data8 0xB0192C5D3E4787D6,0x0000BFFF // C71
397data8 0xB1E0A6263D4C19EF,0x0000C000 // C81
398data8 0x93BA32A118EAC9AE,0x0000C001 // C91
399data8 0xE942A39CD9BEE887,0x0000C001 // CA1
400data8 0xE838B0957B0D3D0D,0x00003FFF // C00
401data8 0xF60E0F00074FCF34,0x00003FFF // C10
402data8 0x89869936AE00C2A5,0x00004000 // C20
403data8 0xA0FE4E8AA611207F,0x00004000 // C30
404data8 0xC3B1229CFF1DDAFE,0x00004000 // C40
405//[64; 65]
406data8 0x9C00DDF75CDC6183,0x00004000 // C01
407data8 0x9446AE9C0F6A833E,0x00004000 // C11
408data8 0x84ABC5083310B774,0x00004000 // C21
409data8 0xD9BA3A0977B1ED83,0x00003FFF // C31
410data8 0x989B18C99411D300,0x00003FFF // C41
411data8 0x886E66402318CE6F,0x00003FFE // C51
412data8 0x99028C2468F18F38,0x0000BFFD // C61
413data8 0xAB72D17DCD40CCE1,0x0000BFFF // C71
414data8 0xA9D9AC9BE42C2EF9,0x0000C000 // C81
415data8 0x8C11D983AA177AD2,0x0000C001 // C91
416data8 0xDC779E981C1F0F06,0x0000C001 // CA1
417data8 0xC1FD4AC85965E8D6,0x00003FFF // C00
418data8 0xCE3D2D909D389EC2,0x00003FFF // C10
419data8 0xE7F79980AD06F5D8,0x00003FFF // C20
420data8 0x88DD9F73C8680B5D,0x00004000 // C30
421data8 0xA7D6CB2CB2D46F9D,0x00004000 // C40
422//[80; 81]
423data8 0x91C7FF4E993430D0,0x00004000 // C01
424data8 0x8A6E7AB83E45A7E9,0x00004000 // C11
425data8 0xF72D6382E427BEA9,0x00003FFF // C21
426data8 0xC9E2E4F9B3B23ED6,0x00003FFF // C31
427data8 0x8BEFEF56AE05D775,0x00003FFF // C41
428data8 0xEE9666AB6A185560,0x00003FFD // C51
429data8 0xA6AFAF5CEFAEE04D,0x0000BFFD // C61
430data8 0xA877EAFEF1F9C880,0x0000BFFF // C71
431data8 0xA45BD433048ECA15,0x0000C000 // C81
432data8 0x86BD1636B774CC2E,0x0000C001 // C91
433data8 0xD3721BE006E10823,0x0000C001 // CA1
434data8 0xA97EFABA91854208,0x00003FFF // C00
435data8 0xB4AF0AEBB3F97737,0x00003FFF // C10
436data8 0xCC38241936851B0B,0x00003FFF // C20
437data8 0xF282A6261006EA84,0x00003FFF // C30
438data8 0x95B8E9DB1BD45BAF,0x00004000 // C40
439//[96; 97]
440data8 0x8A1FA3171B35A106,0x00004000 // C01
441data8 0x830D5B8843890F21,0x00004000 // C11
442data8 0xE98B0F1616677A23,0x00003FFF // C21
443data8 0xBDF8347F5F67D4EC,0x00003FFF // C31
444data8 0x825F15DE34EC055D,0x00003FFF // C41
445data8 0xD4846186B8AAC7BE,0x00003FFD // C51
446data8 0xB161093AB14919B1,0x0000BFFD // C61
447data8 0xA65758EEA4800EF4,0x0000BFFF // C71
448data8 0xA046B67536FA329C,0x0000C000 // C81
449data8 0x82BBEC1BCB9E9068,0x0000C001 // C91
450data8 0xCC9DE2B23BA91B0B,0x0000C001 // CA1
451data8 0x983B16148AF77F94,0x00003FFF // C00
452data8 0xA2A4D8EE90FEE5DD,0x00003FFF // C10
453data8 0xB89446FA37FF481C,0x00003FFF // C20
454data8 0xDC5572648485FB01,0x00003FFF // C30
455data8 0x88CD5D7DB976129A,0x00004000 // C40
456//[112; 113]
457data8 0x8417098FD62AC5E3,0x00004000 // C01
458data8 0xFA7896486B779CBB,0x00003FFF // C11
459data8 0xDEC98B14AF5EEBD1,0x00003FFF // C21
460data8 0xB48E153C6BF0B5A3,0x00003FFF // C31
461data8 0xF597B038BC957582,0x00003FFE // C41
462data8 0xBFC6F0884A415694,0x00003FFD // C51
463data8 0xBA075A1392BDB5E5,0x0000BFFD // C61
464data8 0xA4B79E01B44C7DB4,0x0000BFFF // C71
465data8 0x9D12FA7711BFAB0F,0x0000C000 // C81
466data8 0xFF24C47C8E108AB4,0x0000C000 // C91
467data8 0xC7325EC86562606A,0x0000C001 // CA1
468data8 0x8B47DCD9E1610938,0x00003FFF // C00
469data8 0x9518B111B70F88B8,0x00003FFF // C10
470data8 0xA9CC197206F68682,0x00003FFF // C20
471data8 0xCB98294CC0D7A6A6,0x00003FFF // C30
472data8 0xFE09493EA9165181,0x00003FFF // C40
473//[128; 129]
474data8 0xFE53D03442270D90,0x00003FFF // C01
475data8 0xF0F857BAEC1993E4,0x00003FFF // C11
476data8 0xD5FF6D70DBBC2FD3,0x00003FFF // C21
477data8 0xACDAA5F4988B1074,0x00003FFF // C31
478data8 0xE92E069F8AD75B54,0x00003FFE // C41
479data8 0xAEBB64645BD94234,0x00003FFD // C51
480data8 0xC13746249F39B43C,0x0000BFFD // C61
481data8 0xA36B74F5B6297A1F,0x0000BFFF // C71
482data8 0x9A77860DF180F6E5,0x0000C000 // C81
483data8 0xF9F8457D84410A0C,0x0000C000 // C91
484data8 0xC2BF44C649EB8597,0x0000C001 // CA1
485data8 0x81225E7489BCDC0E,0x00003FFF // C00
486data8 0x8A788A09CE0EED11,0x00003FFF // C10
487data8 0x9E2E6F86D1B1D89C,0x00003FFF // C20
488data8 0xBE6866B21CF6CCB5,0x00003FFF // C30
489data8 0xEE94426EC1486AAE,0x00003FFF // C40
490//[144; 145]
491data8 0xF6113E09732A6497,0x00003FFF // C01
492data8 0xE900D45931B04FC8,0x00003FFF // C11
493data8 0xCE9FD58F745EBA5D,0x00003FFF // C21
494data8 0xA663A9636C864C86,0x00003FFF // C31
495data8 0xDEBF5315896CE629,0x00003FFE // C41
496data8 0xA05FEA415EBD7737,0x00003FFD // C51
497data8 0xC750F112BD9C4031,0x0000BFFD // C61
498data8 0xA2593A35C51C6F6C,0x0000BFFF // C71
499data8 0x9848E1DA7FB40C8C,0x0000C000 // C81
500data8 0xF59FEE87A5759A4B,0x0000C000 // C91
501data8 0xBF00203909E45A1D,0x0000C001 // CA1
502data8 0xF1D8E157200127E5,0x00003FFE // C00
503data8 0x81DD5397CB08D487,0x00003FFF // C10
504data8 0x94C1DC271A8B766F,0x00003FFF // C20
505data8 0xB3AFAF9B5D6EDDCF,0x00003FFF // C30
506data8 0xE1FB4C57CA81BE1E,0x00003FFF // C40
507//[160; 161]
508data8 0xEEFFE5122AC72FFD,0x00003FFF // C01
509data8 0xE22F70BB52AD54B3,0x00003FFF // C11
510data8 0xC84FF021FE993EEA,0x00003FFF // C21
511data8 0xA0DA2208EB5B2752,0x00003FFF // C31
512data8 0xD5CDD2FCF8AD2DF5,0x00003FFE // C41
513data8 0x940BEC6DCD811A59,0x00003FFD // C51
514data8 0xCC954EF4FD4EBB81,0x0000BFFD // C61
515data8 0xA1712E29A8C04554,0x0000BFFF // C71
516data8 0x966B55DFB243521A,0x0000C000 // C81
517data8 0xF1E6A2B9CEDD0C4C,0x0000C000 // C91
518data8 0xBBC87BCC031012DB,0x0000C001 // CA1
519data8 0xE43974E6D2818583,0x00003FFE // C00
520data8 0xF5702A516B64C5B7,0x00003FFE // C10
521data8 0x8CEBCB1B32E19471,0x00003FFF // C20
522data8 0xAAC10F05BB77E0AF,0x00003FFF // C30
523data8 0xD776EFCAB205CC58,0x00003FFF // C40
524//[176; 177]
525data8 0xE8DA614119811E5D,0x00003FFF // C01
526data8 0xDC415E0288B223D8,0x00003FFF // C11
527data8 0xC2D2243E44EC970E,0x00003FFF // C21
528data8 0x9C086664B5307BEA,0x00003FFF // C31
529data8 0xCE03D7A08B461156,0x00003FFE // C41
530data8 0x894BE3BAAAB66ADC,0x00003FFD // C51
531data8 0xD131EDD71A702D4D,0x0000BFFD // C61
532data8 0xA0A907CDDBE10898,0x0000BFFF // C71
533data8 0x94CC3CD9C765C808,0x0000C000 // C81
534data8 0xEEA85F237815FC0D,0x0000C000 // C91
535data8 0xB8FA04B023E43F91,0x0000C001 // CA1
536data8 0xD8B2C7D9FCBD7EF9,0x00003FFE // C00
537data8 0xE9566E93AAE7E38F,0x00003FFE // C10
538data8 0x8646E78AABEF0255,0x00003FFF // C20
539data8 0xA32AEDB62E304345,0x00003FFF // C30
540data8 0xCE83E40280EE7DF0,0x00003FFF // C40
541//
542//[2; 3]
543data8 0xC44FB47E90584083,0x00004001 // C50
544data8 0xE863EE77E1C45981,0x00004001 // C60
545data8 0x8AC15BE238B9D70E,0x00004002 // C70
546data8 0xA5D94B6592350EF4,0x00004002 // C80
547data8 0xC379DB3E20A148B3,0x00004002 // C90
548data8 0xDACA49B73974F6C9,0x00004002 // CA0
549data8 0x810E496A1AFEC895,0x00003FE1 // An
550//[16; 17]
551data8 0xE17C0357AAF3F817,0x00004001 // C50
552data8 0x8BA8804750FBFBFE,0x00004002 // C60
553data8 0xB18EAB3CB64BEBEE,0x00004002 // C70
554data8 0xE90AB7015AF1C28F,0x00004002 // C80
555data8 0xA0AB97CE9E259196,0x00004003 // C90
556data8 0xF5E0E0A000C2D720,0x00004003 // CA0
557data8 0xD97F0F87EC791954,0x00004005 // An
558//[32; 33]
559data8 0x980C293F3696040D,0x00004001 // C50
560data8 0xC0DBFFBB948A9A4E,0x00004001 // C60
561data8 0xFAB54625E9A588A2,0x00004001 // C70
562data8 0xA7E08176D6050FBF,0x00004002 // C80
563data8 0xEBAAEC4952270A9F,0x00004002 // C90
564data8 0xB7479CDAD20550FE,0x00004003 // CA0
565data8 0xAACD45931C3FF634,0x00004054 // An
566//[48; 49]
567data8 0xF5180F0000419AD5,0x00004000 // C50
568data8 0x9D507D07BFBB2273,0x00004001 // C60
569data8 0xCEB53F7A13A383E3,0x00004001 // C70
570data8 0x8BAFEF9E0A49128F,0x00004002 // C80
571data8 0xC58EF912D39E228C,0x00004002 // C90
572data8 0x9A88118422BA208E,0x00004003 // CA0
573data8 0xBD6C0E2477EC12CB,0x000040AC // An
574//[64; 65]
575data8 0xD410AC48BF7748DA,0x00004000 // C50
576data8 0x89399B90AFEBD931,0x00004001 // C60
577data8 0xB596DF8F77EB8560,0x00004001 // C70
578data8 0xF6D9445A047FB4A6,0x00004001 // C80
579data8 0xAF52F0DD65221357,0x00004002 // C90
580data8 0x8989B45BFC881989,0x00004003 // CA0
581data8 0xB7FCAE86E6E10D5A,0x0000410B // An
582//[80; 81]
583data8 0xBE759740E3B5AA84,0x00004000 // C50
584data8 0xF8037B1B07D27609,0x00004000 // C60
585data8 0xA4F6F6C7F0977D4F,0x00004001 // C70
586data8 0xE131960233BF02C4,0x00004001 // C80
587data8 0xA06DF43D3922BBE2,0x00004002 // C90
588data8 0xFC266AB27255A360,0x00004002 // CA0
589data8 0xD9F4B012EDAFEF2F,0x0000416F // An
590//[96; 97]
591data8 0xAEFC84CDA8E1EAA6,0x00004000 // C50
592data8 0xE5009110DB5F3C8A,0x00004000 // C60
593data8 0x98F5F48738E7B232,0x00004001 // C70
594data8 0xD17EE64E21FFDC6B,0x00004001 // C80
595data8 0x9596F7A7E36145CC,0x00004002 // C90
596data8 0xEB64DBE50E125CAF,0x00004002 // CA0
597data8 0xA090530D79E32D2E,0x000041D8 // An
598//[112; 113]
599data8 0xA33AEA22A16B2655,0x00004000 // C50
600data8 0xD682B93BD7D7945C,0x00004000 // C60
601data8 0x8FC854C6E6E30CC3,0x00004001 // C70
602data8 0xC5754D828AFFDC7A,0x00004001 // C80
603data8 0x8D41216B397139C2,0x00004002 // C90
604data8 0xDE78D746848116E5,0x00004002 // CA0
605data8 0xB8A297A2DC0630DB,0x00004244 // An
606//[128; 129]
607data8 0x99EB00F11D95E292,0x00004000 // C50
608data8 0xCB005CB911EB779A,0x00004000 // C60
609data8 0x8879AA2FDFF3A37A,0x00004001 // C70
610data8 0xBBDA538AD40CAC2C,0x00004001 // C80
611data8 0x8696D849D311B9DE,0x00004002 // C90
612data8 0xD41E1C041481199F,0x00004002 // CA0
613data8 0xEBA1A43D34EE61EE,0x000042B3 // An
614//[144; 145]
615data8 0x924F822578AA9F3D,0x00004000 // C50
616data8 0xC193FAF9D3B36960,0x00004000 // C60
617data8 0x827AE3A6B68ED0CA,0x00004001 // C70
618data8 0xB3F52A27EED23F0B,0x00004001 // C80
619data8 0x811A079FB3C94D79,0x00004002 // C90
620data8 0xCB94415470B6F8D2,0x00004002 // CA0
621data8 0x80A0260DCB3EC9AC,0x00004326 // An
622//[160; 161]
623data8 0x8BF24091E88B331D,0x00004000 // C50
624data8 0xB9ADE01187E65201,0x00004000 // C60
625data8 0xFAE4508F6E7625FE,0x00004000 // C70
626data8 0xAD516668AD6D7367,0x00004001 // C80
627data8 0xF8F5FF171154F637,0x00004001 // C90
628data8 0xC461321268990C82,0x00004002 // CA0
629data8 0xC3B693F344B0E6FE,0x0000439A // An
630//
631//[176; 177]
632data8 0x868545EB42A258ED,0x00004000 // C50
633data8 0xB2EF04ACE8BA0E6E,0x00004000 // C60
634data8 0xF247D22C22E69230,0x00004000 // C70
635data8 0xA7A1AB93E3981A90,0x00004001 // C80
636data8 0xF10951733E2C697F,0x00004001 // C90
637data8 0xBE3359BFAD128322,0x00004002 // CA0
638data8 0x8000000000000000,0x00003fff
639//
640//[160; 161] for negatives
641data8 0xA76DBD55B2E32D71,0x00003C63 // 1/An
642//
643// sin(pi*x)/pi
644data8 0xBCBC4342112F52A2,0x00003FDE // S21
645data8 0xFAFCECB86536F655,0x0000BFE3 // S19
646data8 0x87E4C97F9CF09B92,0x00003FE9 // S17
647data8 0xEA124C68E704C5CB,0x0000BFED // S15
648data8 0x9BA38CFD59C8AA1D,0x00003FF2 // S13
649data8 0x99C0B552303D5B21,0x0000BFF6 // S11
650//
651//[176; 177] for negatives
652data8 0xBA5D5869211696FF,0x00003BEC // 1/An
653//
654// sin(pi*x)/pi
655data8 0xD63402E79A853175,0x00003FF9 // S9
656data8 0xC354723906DB36BA,0x0000BFFC // S7
657data8 0xCFCE5A015E236291,0x00003FFE // S5
658data8 0xD28D3312983E9918,0x0000BFFF // S3
659//
660//
661// [1.0;1.25]
662data8 0xA405530B067ECD3C,0x0000BFFC // A15
663data8 0xF5B5413F95E1C282,0x00003FFD // A14
664data8 0xC4DED71C782F76C8,0x0000BFFE // A13
665data8 0xECF7DDDFD27C9223,0x00003FFE // A12
666data8 0xFB73D31793068463,0x0000BFFE // A11
667data8 0xFF173B7E66FD1D61,0x00003FFE // A10
668data8 0xFFA5EF3959089E94,0x0000BFFE // A9
669data8 0xFF8153BD42E71A4F,0x00003FFE // A8
670data8 0xFEF9CAEE2CB5B533,0x0000BFFE // A7
671data8 0xFE3F02E5EDB6811E,0x00003FFE // A6
672data8 0xFB64074CED2658FB,0x0000BFFE // A5
673data8 0xFB52882A095B18A4,0x00003FFE // A4
674data8 0xE8508C7990A0DAC0,0x0000BFFE // A3
675data8 0xFD32C611D8A881D0,0x00003FFE // A2
676data8 0x93C467E37DB0C536,0x0000BFFE // A1
677data8 0x8000000000000000,0x00003FFF // A0
678//
679// [1.25;1.5]
680data8 0xD038092400619677,0x0000BFF7 // A15
681data8 0xEA6DE925E6EB8C8F,0x00003FF3 // A14
682data8 0xC53F83645D4597FC,0x0000BFF7 // A13
683data8 0xE366DB2FB27B7ECD,0x00003FF7 // A12
684data8 0xAC8FD5E11F6EEAD8,0x0000BFF8 // A11
685data8 0xFB14010FB3697785,0x00003FF8 // A10
686data8 0xB6F91CB5C371177B,0x0000BFF9 // A9
687data8 0x85A262C6F8FEEF71,0x00003FFA // A8
688data8 0xC038E6E3261568F9,0x0000BFFA // A7
689data8 0x8F4BDE8883232364,0x00003FFB // A6
690data8 0xBCFBBD5786537E9A,0x0000BFFB // A5
691data8 0xA4C08BAF0A559479,0x00003FFC // A4
692data8 0x85D74FA063E81476,0x0000BFFC // A3
693data8 0xDB629FB9BBDC1C4E,0x00003FFD // A2
694data8 0xF4F8FBC7C0C9D317,0x00003FC6 // A1
695data8 0xE2B6E4153A57746C,0x00003FFE // A0
696//
697// [1.25;1.5]
698data8 0x9533F9D3723B448C,0x0000BFF2 // A15
699data8 0xF1F75D3C561CBBAF,0x00003FF5 // A14
700data8 0xBA55A9A1FC883523,0x0000BFF8 // A13
701data8 0xB5D5E9E5104FA995,0x00003FFA // A12
702data8 0xFD84F35B70CD9AE2,0x0000BFFB // A11
703data8 0x87445235F4688CC5,0x00003FFD // A10
704data8 0xE7F236EBFB9F774E,0x0000BFFD // A9
705data8 0xA6605F2721F787CE,0x00003FFE // A8
706data8 0xCF579312AD7EAD72,0x0000BFFE // A7
707data8 0xE96254A2407A5EAC,0x00003FFE // A6
708data8 0xF41312A8572ED346,0x0000BFFE // A5
709data8 0xF9535027C1B1F795,0x00003FFE // A4
710data8 0xE7E82D0C613A8DE4,0x0000BFFE // A3
711data8 0xFD23CD9741B460B8,0x00003FFE // A2
712data8 0x93C30FD9781DBA88,0x0000BFFE // A1
713data8 0xFFFFF1781FDBEE84,0x00003FFE // A0
714LOCAL_OBJECT_END(tgamma_data)
715
716
717//==============================================================
718// Code
719//==============================================================
720
721.section .text
722GLOBAL_LIBM_ENTRY(tgamma)
723{ .mfi
724 getf.exp GR_Sign_Exp = f8
725 fma.s1 FR_1m2X = f8,f1,f8 // 2x
726 addl GR_ad_Data = @ltoff(tgamma_data), gp
727}
728{ .mfi
729 mov GR_ExpOf8 = 0x10002 // 8
730 fcvt.fx.trunc.s1 FR_iXt = f8 // [x]
731 mov GR_ExpOf05 = 0xFFFE // 0.5
732};;
733{ .mfi
734 getf.sig GR_Sig = f8
735 fma.s1 FR_2 = f1,f1,f1 // 2
736 mov GR_Addr_Mask1 = 0x780
737}
738{ .mlx
739 setf.exp FR_8 = GR_ExpOf8
740 movl GR_10 = 0x4024000000000000
741};;
742{ .mfi
743 ld8 GR_ad_Data = [GR_ad_Data]
744 fcmp.lt.s1 p14,p15 = f8,f0
745 tbit.z p12,p13 = GR_Sign_Exp,0x10 // p13 if x >= 2
746}
747{ .mlx
748 and GR_Bit2 = 4,GR_Sign_Exp
749 movl GR_12 = 0x4028000000000000
750};;
751{ .mfi
752 setf.d FR_10 = GR_10
753 fma.s1 FR_r02 = f8,f1,f0
754 extr.u GR_Tbl_Offs = GR_Sig,58,6
755}
756{ .mfi
757(p12) mov GR_Addr_Mask1 = r0
758 fma.s1 FR_NormX = f8,f1,f0
759 cmp.ne p8,p0 = GR_Bit2,r0
760};;
761{ .mfi
762(p8) shladd GR_Tbl_Offs = GR_Tbl_Offs,4,r0
763 fclass.m p10,p0 = f8,0x1E7 // Test x for NaTVal, NaN, +/-0, +/-INF
764 tbit.nz p11,p0 = GR_Sign_Exp,1
765}
766{ .mlx
767 add GR_Addr_Mask2 = GR_Addr_Mask1,GR_Addr_Mask1
768 movl GR_14 = 0x402C000000000000
769};;
770.pred.rel "mutex",p14,p15
771{ .mfi
772 setf.d FR_12 = GR_12
773(p14) fma.s1 FR_1m2X = f1,f1,FR_1m2X // RB=1-2|x|
774 tbit.nz p8,p9 = GR_Sign_Exp,0
775}
776{ .mfi
777 ldfpd FR_OvfBound,FR_Xmin = [GR_ad_Data],16
778(p15) fms.s1 FR_1m2X = f1,f1,FR_1m2X // RB=1-2|x|
779(p11) shladd GR_Tbl_Offs = GR_Tbl_Offs,2,r0
780};;
781.pred.rel "mutex",p9,p8
782{ .mfi
783 setf.d FR_14 = GR_14
784 fma.s1 FR_4 = FR_2,FR_2,f0
785(p8) and GR_Tbl_Offs = GR_Tbl_Offs, GR_Addr_Mask1
786}
787{ .mfi
788 setf.exp FR_05 = GR_ExpOf05
789 fma.s1 FR_6 = FR_2,FR_2,FR_2
790(p9) and GR_Tbl_Offs = GR_Tbl_Offs, GR_Addr_Mask2
791};;
792.pred.rel "mutex",p9,p8
793{ .mfi
794(p8) shladd GR_ad_Co = GR_Tbl_Offs,1,GR_ad_Data
795 fcvt.xf FR_Xt = FR_iXt // [x]
796(p15) tbit.z.unc p11,p0 = GR_Sign_Exp,0x10 // p11 if 0 < x < 2
797}
798{ .mfi
799(p9) add GR_ad_Co = GR_ad_Data,GR_Tbl_Offs
800 fma.s1 FR_5 = FR_2,FR_2,f1
801(p15) cmp.lt.unc p7,p6 = GR_ExpOf05,GR_Sign_Exp // p7 if 0 < x < 1
802};;
803{ .mfi
804 add GR_ad_Ce = 16,GR_ad_Co
805(p11) frcpa.s1 FR_Rcp0,p0 = f1,f8
806 sub GR_Tbl_Offs = GR_ad_Co,GR_ad_Data
807}
808{ .mfb
809 ldfe FR_C01 = [GR_ad_Co],32
0347518d 810(p7) fms.s1 FR_r02 = FR_r02,f1,f1
d5efd131
MF
811 // jump if x is NaTVal, NaN, +/-0, +/-INF
812(p10) br.cond.spnt tgamma_spec
813};;
814.pred.rel "mutex",p14,p15
815{ .mfi
816 ldfe FR_C11 = [GR_ad_Ce],32
817(p14) fms.s1 FR_X2pX = f8,f8,f8 // RA=x^2+|x|
818 shr GR_Tbl_Ind = GR_Tbl_Offs,8
819}
820{ .mfb
821 ldfe FR_C21 = [GR_ad_Co],32
822(p15) fma.s1 FR_X2pX = f8,f8,f8 // RA=x^2+x
823 // jump if 0 < x < 2
824(p11) br.cond.spnt tgamma_from_0_to_2
825};;
826{ .mfi
827 ldfe FR_C31 = [GR_ad_Ce],32
828 fma.s1 FR_Rq2 = FR_2,f1,FR_1m2X // 2 + B
829 cmp.ltu p7,p0=0xB,GR_Tbl_Ind
830}
831{ .mfb
832 ldfe FR_C41 = [GR_ad_Co],32
833 fma.s1 FR_Rq3 = FR_2,FR_2,FR_1m2X // 4 + B
834 // jump if GR_Tbl_Ind > 11, i.e |x| is more than 192
835(p7) br.cond.spnt tgamma_spec_res
836};;
837{ .mfi
838 ldfe FR_C51 = [GR_ad_Ce],32
839 fma.s1 FR_Rq4 = FR_6,f1,FR_1m2X // 6 + B
840 shr GR_Tbl_Offs = GR_Tbl_Offs,1
841}
842{ .mfi
843 ldfe FR_C61 = [GR_ad_Co],32
844 fma.s1 FR_Rq5 = FR_4,FR_2,FR_1m2X // 8 + B
845 nop.i 0
846};;
847{ .mfi
848 ldfe FR_C71 = [GR_ad_Ce],32
849(p14) fms.s1 FR_r = FR_Xt,f1,f8 // r = |x| - [|x|]
850 shr GR_Tbl_16xInd = GR_Tbl_Offs,3
851}
852{ .mfi
853 ldfe FR_C81 = [GR_ad_Co],32
854(p15) fms.s1 FR_r = f8,f1,FR_Xt // r = x - [x]
855 add GR_ad_Data = 0xC00,GR_ad_Data
856};;
857{ .mfi
858 ldfe FR_C91 = [GR_ad_Ce],32
859 fma.s1 FR_Rq6 = FR_5,FR_2,FR_1m2X // 10 + B
860(p14) mov GR_0x30033 = 0x30033
861}
862{ .mfi
863 ldfe FR_CA1 = [GR_ad_Co],32
864 fma.s1 FR_Rq7 = FR_6,FR_2,FR_1m2X // 12 + B
865 sub GR_Tbl_Offs = GR_Tbl_Offs,GR_Tbl_16xInd
866};;
867{ .mfi
868 ldfe FR_C00 = [GR_ad_Ce],32
869 fma.s1 FR_Rq1 = FR_Rq1,FR_2,FR_X2pX // (x-1)*(x-2)
870(p13) cmp.eq.unc p8,p0 = r0,GR_Tbl_16xInd // index is 0 i.e. arg from [2;16)
871}
872{ .mfi
873 ldfe FR_C10 = [GR_ad_Co],32
874(p14) fms.s1 FR_AbsX = f0,f0,FR_NormX // absolute value of argument
875 add GR_ad_Co7 = GR_ad_Data,GR_Tbl_Offs
876};;
877{ .mfi
878 ldfe FR_C20 = [GR_ad_Ce],32
879 fma.s1 FR_Rq2 = FR_Rq2,FR_4,FR_X2pX // (x-3)*(x-4)
880 add GR_ad_Ce7 = 16,GR_ad_Co7
881}
882{ .mfi
883 ldfe FR_C30 = [GR_ad_Co],32
884 fma.s1 FR_Rq3 = FR_Rq3,FR_6,FR_X2pX // (x-5)*(x-6)
0347518d 885 nop.i 0
d5efd131
MF
886};;
887{ .mfi
888 ldfe FR_C40 = [GR_ad_Ce],32
889 fma.s1 FR_Rq4 = FR_Rq4,FR_8,FR_X2pX // (x-7)*(x-8)
890(p14) cmp.leu.unc p7,p0 = GR_0x30033,GR_Sign_Exp
891}
892{ .mfb
893 ldfe FR_C50 = [GR_ad_Co7],32
894 fma.s1 FR_Rq5 = FR_Rq5,FR_10,FR_X2pX // (x-9)*(x-10)
895 // jump if x is less or equal to -2^52, i.e. x is big negative integer
896(p7) br.cond.spnt tgamma_singularity
897};;
898{ .mfi
899 ldfe FR_C60 = [GR_ad_Ce7],32
900 fma.s1 FR_C01 = FR_C01,f1,FR_r
901 add GR_ad_Ce = 0x560,GR_ad_Data
902}
903{ .mfi
904 ldfe FR_C70 = [GR_ad_Co7],32
0347518d 905 fma.s1 FR_rs = f0,f0,FR_r // reduced arg for sin(pi*x)
d5efd131
MF
906 add GR_ad_Co = 0x550,GR_ad_Data
907};;
908{ .mfi
909 ldfe FR_C80 = [GR_ad_Ce7],32
910 fma.s1 FR_C11 = FR_C11,f1,FR_r
911 nop.i 0
912}
913{ .mfi
914 ldfe FR_C90 = [GR_ad_Co7],32
915 fma.s1 FR_C21 = FR_C21,f1,FR_r
916 nop.i 0
917};;
918.pred.rel "mutex",p12,p13
919{ .mfi
920(p13) getf.sig GR_iSig = FR_iXt
921 fcmp.lt.s1 p11,p0 = FR_05,FR_r
922 mov GR_185 = 185
923}
924{ .mfi
925 nop.m 0
926 fma.s1 FR_Rq6 = FR_Rq6,FR_12,FR_X2pX // (x-11)*(x-12)
927 nop.i 0
928};;
929{ .mfi
930 ldfe FR_CA0 = [GR_ad_Ce7],32
931 fma.s1 FR_C31 = FR_C31,f1,FR_r
932(p12) mov GR_iSig = 0
933}
934{ .mfi
935 ldfe FR_An = [GR_ad_Co7],0x80
936 fma.s1 FR_C41 = FR_C41,f1,FR_r
937 nop.i 0
938};;
939{ .mfi
940(p14) getf.sig GR_Sig = FR_r
941 fma.s1 FR_C51 = FR_C51,f1,FR_r
942(p14) sub GR_iSig = r0,GR_iSig
943}
944{ .mfi
945 ldfe FR_S21 = [GR_ad_Co],32
946 fma.s1 FR_C61 = FR_C61,f1,FR_r
947 nop.i 0
948};;
949{ .mfi
950 ldfe FR_S19 = [GR_ad_Ce],32
951 fma.s1 FR_C71 = FR_C71,f1,FR_r
952 and GR_SigRqLin = 0xF,GR_iSig
953}
954{ .mfi
955 ldfe FR_S17 = [GR_ad_Co],32
956 fma.s1 FR_C81 = FR_C81,f1,FR_r
957 mov GR_2 = 2
958};;
959{ .mfi
960(p14) ldfe FR_InvAn = [GR_ad_Co7]
961 fma.s1 FR_C91 = FR_C91,f1,FR_r
962 // if significand of r is 0 tnan argument is negative integer
963(p14) cmp.eq.unc p12,p0 = r0,GR_Sig
964}
965{ .mfb
966(p8) sub GR_SigRqLin = GR_SigRqLin,GR_2 // subtract 2 if 2 <= x < 16
967 fma.s1 FR_CA1 = FR_CA1,f1,FR_r
968 // jump if x is negative integer such that -2^52 < x < -185
969(p12) br.cond.spnt tgamma_singularity
970};;
971{ .mfi
972 setf.sig FR_Xt = GR_SigRqLin
973(p11) fms.s1 FR_rs = f1,f1,FR_r
974(p14) cmp.ltu.unc p7,p0 = GR_185,GR_iSig
975}
976{ .mfb
977 ldfe FR_S15 = [GR_ad_Ce],32
978 fma.s1 FR_Rq7 = FR_Rq7,FR_14,FR_X2pX // (x-13)*(x-14)
979 // jump if x is noninteger such that -2^52 < x < -185
980(p7) br.cond.spnt tgamma_underflow
981};;
982{ .mfi
983 ldfe FR_S13 = [GR_ad_Co],48
984 fma.s1 FR_C01 = FR_C01,FR_r,FR_C00
985 and GR_Sig2 = 0xE,GR_SigRqLin
986}
987{ .mfi
988 ldfe FR_S11 = [GR_ad_Ce],48
989 fma.s1 FR_C11 = FR_C11,FR_r,FR_C10
990 nop.i 0
991};;
992{ .mfi
993 ldfe FR_S9 = [GR_ad_Co],32
994 fma.s1 FR_C21 = FR_C21,FR_r,FR_C20
995 // should we mul by polynomial of recursion?
996 cmp.eq p13,p12 = r0,GR_SigRqLin
997}
998{ .mfi
999 ldfe FR_S7 = [GR_ad_Ce],32
1000 fma.s1 FR_C31 = FR_C31,FR_r,FR_C30
1001 nop.i 0
1002};;
1003{ .mfi
1004 ldfe FR_S5 = [GR_ad_Co],32
1005 fma.s1 FR_C41 = FR_C41,FR_r,FR_C40
1006 nop.i 0
1007}
1008{ .mfi
1009 ldfe FR_S3 = [GR_ad_Ce],32
1010 fma.s1 FR_C51 = FR_C51,FR_r,FR_C50
1011 nop.i 0
1012};;
1013{ .mfi
1014 nop.m 0
1015 fma.s1 FR_C61 = FR_C61,FR_r,FR_C60
1016 nop.i 0
1017}
1018{ .mfi
1019 nop.m 0
1020 fma.s1 FR_C71 = FR_C71,FR_r,FR_C70
1021 nop.i 0
1022};;
1023{ .mfi
1024 nop.m 0
1025 fma.s1 FR_C81 = FR_C81,FR_r,FR_C80
1026 nop.i 0
1027}
1028{ .mfi
1029 nop.m 0
1030 fma.s1 FR_C91 = FR_C91,FR_r,FR_C90
1031 nop.i 0
1032};;
1033{ .mfi
1034 nop.m 0
1035 fma.s1 FR_CA1 = FR_CA1,FR_r,FR_CA0
1036 nop.i 0
1037}
1038{ .mfi
0347518d 1039 nop.m 0
d5efd131
MF
1040 fma.s1 FR_C01 = FR_C01,FR_C11,f0
1041 nop.i 0
1042};;
1043{ .mfi
0347518d 1044 nop.m 0
d5efd131
MF
1045 fma.s1 FR_C21 = FR_C21,FR_C31,f0
1046 nop.i 0
1047}
1048{ .mfi
1049 nop.m 0
1050 fma.s1 FR_rs2 = FR_rs,FR_rs,f0
1051(p12) cmp.lt.unc p7,p0 = 2,GR_Sig2 // should mul by FR_Rq2?
1052};;
1053{ .mfi
0347518d 1054 nop.m 0
d5efd131 1055 fma.s1 FR_C41 = FR_C41,FR_C51,f0
0347518d 1056 nop.i 0
d5efd131
MF
1057}
1058{ .mfi
1059 nop.m 0
1060(p7) fma.s1 FR_Rq1 = FR_Rq1,FR_Rq2,f0
1061(p12) cmp.lt.unc p9,p0 = 6,GR_Sig2 // should mul by FR_Rq4?
1062};;
1063{ .mfi
0347518d 1064 nop.m 0
d5efd131
MF
1065 fma.s1 FR_C61 = FR_C61,FR_C71,f0
1066(p15) cmp.eq p11,p0 = r0,r0
1067}
1068{ .mfi
1069 nop.m 0
1070(p9) fma.s1 FR_Rq3 = FR_Rq3,FR_Rq4,f0
1071(p12) cmp.lt.unc p8,p0 = 10,GR_Sig2 // should mul by FR_Rq6?
1072};;
1073{ .mfi
0347518d 1074 nop.m 0
d5efd131
MF
1075 fma.s1 FR_C81 = FR_C81,FR_C91,f0
1076 nop.i 0
1077}
1078{ .mfi
1079 nop.m 0
1080(p8) fma.s1 FR_Rq5 = FR_Rq5,FR_Rq6,f0
1081(p14) cmp.ltu p0,p11 = 0x9,GR_Tbl_Ind
1082};;
1083{ .mfi
0347518d
MF
1084 nop.m 0
1085 fcvt.xf FR_RqLin = FR_Xt
d5efd131
MF
1086 nop.i 0
1087}
1088{ .mfi
1089 nop.m 0
1090(p11) fma.s1 FR_CA1 = FR_CA1,FR_An,f0
1091 nop.i 0
1092};;
1093{ .mfi
1094 nop.m 0
1095 fma.s1 FR_S21 = FR_S21,FR_rs2,FR_S19
1096 nop.i 0
1097}
1098{ .mfi
1099 nop.m 0
1100 fma.s1 FR_S17 = FR_S17,FR_rs2,FR_S15
1101 nop.i 0
1102};;
1103{ .mfi
0347518d 1104 nop.m 0
d5efd131
MF
1105 fma.s1 FR_C01 = FR_C01,FR_C21,f0
1106 nop.i 0
1107}
1108{ .mfi
0347518d 1109 nop.m 0
d5efd131
MF
1110 fma.s1 FR_rs4 = FR_rs2,FR_rs2,f0
1111(p12) cmp.lt.unc p8,p0 = 4,GR_Sig2 // should mul by FR_Rq3?
1112};;
1113{ .mfi
1114 nop.m 0
1115(p8) fma.s1 FR_Rq1 = FR_Rq1,FR_Rq3,f0
1116 nop.i 0
1117}
1118{ .mfi
1119 nop.m 0
1120 fma.s1 FR_S13 = FR_S13,FR_rs2,FR_S11
1121(p12) cmp.lt.unc p9,p0 = 12,GR_Sig2 // should mul by FR_Rq7?
1122};;
1123{ .mfi
0347518d 1124 nop.m 0
d5efd131
MF
1125 fma.s1 FR_C41 = FR_C41,FR_C61,f0
1126 nop.i 0
1127}
1128{ .mfi
0347518d 1129 nop.m 0
d5efd131 1130(p9) fma.s1 FR_Rq5 = FR_Rq5,FR_Rq7,f0
0347518d 1131 nop.i 0
d5efd131
MF
1132};;
1133{ .mfi
0347518d 1134 nop.m 0
d5efd131 1135 fma.s1 FR_C81 = FR_C81,FR_CA1,f0
0347518d 1136 nop.i 0
d5efd131
MF
1137}
1138{ .mfi
1139 nop.m 0
1140 fma.s1 FR_S9 = FR_S9,FR_rs2,FR_S7
1141 nop.i 0
1142};;
1143{ .mfi
1144 nop.m 0
1145 fma.s1 FR_S5 = FR_S5,FR_rs2,FR_S3
1146 nop.i 0
1147};;
1148{ .mfi
1149 nop.m 0
1150 fma.s1 FR_rs3 = FR_rs2,FR_rs,f0
1151(p12) tbit.nz.unc p6,p0 = GR_SigRqLin,0
1152}
1153{ .mfi
1154 nop.m 0
1155 fma.s1 FR_rs8 = FR_rs4,FR_rs4,f0
1156 nop.i 0
1157};;
1158{ .mfi
1159 nop.m 0
1160 fma.s1 FR_S21 = FR_S21,FR_rs4,FR_S17
1161 mov GR_ExpOf1 = 0x2FFFF
1162}
1163{ .mfi
0347518d 1164 nop.m 0
d5efd131
MF
1165(p6) fms.s1 FR_RqLin = FR_AbsX,f1,FR_RqLin
1166(p12) cmp.lt.unc p8,p0 = 8,GR_Sig2 // should mul by FR_Rq5?
1167};;
1168{ .mfi
0347518d 1169 nop.m 0
d5efd131
MF
1170 fma.s1 FR_C01 = FR_C01,FR_C41,f0
1171 nop.i 0
1172}
1173{ .mfi
1174 nop.m 0
1175(p8) fma.s1 FR_Rq1 = FR_Rq1,FR_Rq5,f0
1176(p14) cmp.gtu.unc p7,p0 = GR_Sign_Exp,GR_ExpOf1
1177};;
1178{ .mfi
1179 nop.m 0
1180 fma.s1 FR_S13 = FR_S13,FR_rs4,FR_S9
1181 nop.i 0
1182}
1183{ .mfi
1184 nop.m 0
1185(p7) fma.s1 FR_C81 = FR_C81,FR_AbsX,f0
1186 nop.i 0
1187};;
1188{ .mfi
1189 nop.m 0
1190(p14) fma.s1 FR_AbsXp1 = f1,f1,FR_AbsX // |x|+1
1191 nop.i 0
1192}
1193{ .mfi
1194 nop.m 0
0347518d 1195(p15) fcmp.lt.unc.s1 p0,p10 = FR_AbsX,FR_OvfBound // x >= overflow_boundary
d5efd131
MF
1196 nop.i 0
1197};;
1198{ .mfi
1199 nop.m 0
1200 fma.s1 FR_rs7 = FR_rs4,FR_rs3,f0
1201 nop.i 0
1202}
1203{ .mfi
1204 nop.m 0
1205 fma.s1 FR_S5 = FR_S5,FR_rs3,FR_rs
1206 nop.i 0
1207};;
1208{ .mib
1209(p14) cmp.lt p13,p0 = r0,r0 // set p13 to 0 if x < 0
1210(p12) cmp.eq.unc p8,p9 = 1,GR_SigRqLin
1211(p10) br.cond.spnt tgamma_spec_res
1212};;
1213{ .mfi
1214 getf.sig GR_Sig = FR_iXt
1215(p6) fma.s1 FR_Rq1 = FR_Rq1,FR_RqLin,f0
1216 // should we mul by polynomial of recursion?
1217(p15) cmp.eq.unc p0,p11 = r0,GR_SigRqLin
1218}
1219{ .mfb
0347518d 1220 nop.m 0
d5efd131
MF
1221 fma.s1 FR_GAMMA = FR_C01,FR_C81,f0
1222(p11) br.cond.spnt tgamma_positives
1223};;
1224{ .mfi
1225 nop.m 0
1226 fma.s1 FR_S21 = FR_S21,FR_rs8,FR_S13
1227 nop.i 0
1228}
1229{ .mfb
1230 nop.m 0
1231(p13) fma.d.s0 f8 = FR_C01,FR_C81,f0
1232(p13) br.ret.spnt b0
1233};;
1234.pred.rel "mutex",p8,p9
1235{ .mfi
0347518d 1236 nop.m 0
d5efd131
MF
1237(p9) fma.s1 FR_GAMMA = FR_GAMMA,FR_Rq1,f0
1238 tbit.z p6,p7 = GR_Sig,0 // p6 if sin<0, p7 if sin>0
1239}
1240{ .mfi
0347518d 1241 nop.m 0
d5efd131
MF
1242(p8) fma.s1 FR_GAMMA = FR_GAMMA,FR_RqLin,f0
1243 nop.i 0
1244};;
1245{ .mfi
1246 nop.m 0
1247 fma.s1 FR_S21 = FR_S21,FR_rs7,FR_S5
1248 nop.i 0
1249};;
1250.pred.rel "mutex",p6,p7
1251{ .mfi
0347518d
MF
1252 nop.m 0
1253(p6) fnma.s1 FR_GAMMA = FR_GAMMA,FR_S21,f0
1254 nop.i 0
d5efd131
MF
1255}
1256{ .mfi
0347518d 1257 nop.m 0
d5efd131
MF
1258(p7) fma.s1 FR_GAMMA = FR_GAMMA,FR_S21,f0
1259 mov GR_Sig2 = 1
1260};;
1261{ .mfi
1262 nop.m 0
1263 frcpa.s1 FR_Rcp0,p0 = f1,FR_GAMMA
1264 cmp.ltu p13,p0 = GR_Sign_Exp,GR_ExpOf1
1265};;
1266// NR method: ineration #1
1267{ .mfi
1268(p13) getf.exp GR_Sign_Exp = FR_AbsX
1269 fnma.s1 FR_Rcp1 = FR_Rcp0,FR_GAMMA,f1 // t = 1 - r0*x
1270(p13) shl GR_Sig2 = GR_Sig2,63
1271};;
1272{ .mfi
1273(p13) getf.sig GR_Sig = FR_AbsX
1274 nop.f 0
1275(p13) mov GR_NzOvfBound = 0xFBFF
1276};;
1277{ .mfi
1278(p13) cmp.ltu.unc p8,p0 = GR_Sign_Exp,GR_NzOvfBound // p8 <- overflow
1279 nop.f 0
1280(p13) cmp.eq.unc p9,p0 = GR_Sign_Exp,GR_NzOvfBound
1281};;
1282{ .mfb
1283 nop.m 0
1284(p13) fma.d.s0 FR_X = f1,f1,f8 // set deno & inexact flags
1285(p8) br.cond.spnt tgamma_ovf_near_0 //tgamma_neg_overflow
1286};;
1287{ .mib
1288 nop.m 0
1289(p9) cmp.eq.unc p8,p0 = GR_Sig,GR_Sig2
1290(p8) br.cond.spnt tgamma_ovf_near_0_boundary //tgamma_neg_overflow
1291};;
1292{ .mfi
1293 nop.m 0
1294 fma.s1 FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0
0347518d 1295 nop.i 0
d5efd131
MF
1296};;
1297// NR method: ineration #2
1298{ .mfi
1299 nop.m 0
1300 fnma.s1 FR_Rcp2 = FR_Rcp1,FR_GAMMA,f1 // t = 1 - r1*x
1301 nop.i 0
1302};;
1303{ .mfi
1304 nop.m 0
1305 fma.s1 FR_Rcp2 = FR_Rcp1,FR_Rcp2,FR_Rcp1
1306 nop.i 0
1307};;
1308// NR method: ineration #3
1309{ .mfi
1310 nop.m 0
1311 fnma.s1 FR_Rcp3 = FR_Rcp2,FR_GAMMA,f1 // t = 1 - r2*x
1312 nop.i 0
1313}
1314{ .mfi
1315 nop.m 0
1316(p13) fma.s1 FR_Rcp2 = FR_Rcp2,FR_AbsXp1,f0
1317(p14) cmp.ltu p10,p11 = 0x9,GR_Tbl_Ind
1318};;
1319.pred.rel "mutex",p10,p11
1320{ .mfi
1321 nop.m 0
1322(p10) fma.s1 FR_GAMMA = FR_Rcp2,FR_Rcp3,FR_Rcp2
1323 nop.i 0
1324}
1325{ .mfi
1326 nop.m 0
1327(p11) fma.d.s0 f8 = FR_Rcp2,FR_Rcp3,FR_Rcp2
1328 nop.i 0
1329};;
1330{ .mfb
1331 nop.m 0
1332(p10) fma.d.s0 f8 = FR_GAMMA,FR_InvAn,f0
1333 br.ret.sptk b0
1334};;
1335
1336
1337// here if x >= 3
1338//--------------------------------------------------------------------
1339.align 32
1340tgamma_positives:
1341.pred.rel "mutex",p8,p9
1342{ .mfi
0347518d 1343 nop.m 0
d5efd131
MF
1344(p9) fma.d.s0 f8 = FR_GAMMA,FR_Rq1,f0
1345 nop.i 0
1346}
1347{ .mfb
0347518d 1348 nop.m 0
d5efd131
MF
1349(p8) fma.d.s0 f8 = FR_GAMMA,FR_RqLin,f0
1350 br.ret.sptk b0
1351};;
1352
1353// here if 0 < x < 1
1354//--------------------------------------------------------------------
1355.align 32
1356tgamma_from_0_to_2:
1357{ .mfi
1358 getf.exp GR_Sign_Exp = FR_r02
1359 fms.s1 FR_r = FR_r02,f1,FR_Xmin
1360 mov GR_ExpOf025 = 0xFFFD
1361}
1362{ .mfi
1363 add GR_ad_Co = 0x1200,GR_ad_Data
1364(p6) fnma.s1 FR_Rcp1 = FR_Rcp0,FR_NormX,f1 // t = 1 - r0*x
1365(p6) mov GR_Sig2 = 1
1366};;
1367{ .mfi
1368(p6) getf.sig GR_Sig = FR_NormX
0347518d 1369 nop.f 0
d5efd131
MF
1370(p6) shl GR_Sig2 = GR_Sig2,63
1371}
1372{ .mfi
1373 add GR_ad_Ce = 0x1210,GR_ad_Data
1374 nop.f 0
1375(p6) mov GR_NzOvfBound = 0xFBFF
1376};;
1377{ .mfi
0347518d 1378 cmp.eq p8,p0 = GR_Sign_Exp,GR_ExpOf05 // r02 >= 1/2
d5efd131 1379 nop.f 0
0347518d 1380 cmp.eq p9,p10 = GR_Sign_Exp,GR_ExpOf025 // r02 >= 1/4
d5efd131
MF
1381}
1382{ .mfi
1383(p6) cmp.ltu.unc p11,p0 = GR_Sign_Exp,GR_NzOvfBound // p11 <- overflow
1384 nop.f 0
1385(p6) cmp.eq.unc p12,p0 = GR_Sign_Exp,GR_NzOvfBound
1386};;
1387.pred.rel "mutex",p8,p9
1388{ .mfi
1389(p8) add GR_ad_Co = 0x200,GR_ad_Co
1390(p6) fma.d.s0 FR_X = f1,f1,f8 // set deno & inexact flags
1391(p9) add GR_ad_Co = 0x100,GR_ad_Co
1392}
1393{ .mib
1394(p8) add GR_ad_Ce = 0x200,GR_ad_Ce
1395(p9) add GR_ad_Ce = 0x100,GR_ad_Ce
1396(p11) br.cond.spnt tgamma_ovf_near_0 //tgamma_spec_res
1397};;
1398{ .mfi
0347518d 1399 ldfe FR_A15 = [GR_ad_Co],32
d5efd131
MF
1400 nop.f 0
1401(p12) cmp.eq.unc p13,p0 = GR_Sig,GR_Sig2
1402}
1403{ .mfb
0347518d 1404 ldfe FR_A14 = [GR_ad_Ce],32
d5efd131
MF
1405 nop.f 0
1406(p13) br.cond.spnt tgamma_ovf_near_0_boundary //tgamma_spec_res
1407};;
1408{ .mfi
0347518d 1409 ldfe FR_A13 = [GR_ad_Co],32
d5efd131
MF
1410 nop.f 0
1411 nop.i 0
1412}
1413{ .mfi
0347518d 1414 ldfe FR_A12 = [GR_ad_Ce],32
d5efd131
MF
1415 nop.f 0
1416 nop.i 0
1417};;
1418.pred.rel "mutex",p9,p10
1419{ .mfi
0347518d
MF
1420 ldfe FR_A11 = [GR_ad_Co],32
1421(p10) fma.s1 FR_r2 = FR_r02,FR_r02,f0
d5efd131
MF
1422 nop.i 0
1423}
1424{ .mfi
0347518d
MF
1425 ldfe FR_A10 = [GR_ad_Ce],32
1426(p9) fma.s1 FR_r2 = FR_r,FR_r,f0
d5efd131
MF
1427 nop.i 0
1428};;
1429{ .mfi
0347518d 1430 ldfe FR_A9 = [GR_ad_Co],32
d5efd131
MF
1431(p6) fma.s1 FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0
1432 nop.i 0
1433}
1434{ .mfi
0347518d 1435 ldfe FR_A8 = [GR_ad_Ce],32
d5efd131
MF
1436(p10) fma.s1 FR_r = f0,f0,FR_r02
1437 nop.i 0
1438};;
1439{ .mfi
0347518d 1440 ldfe FR_A7 = [GR_ad_Co],32
d5efd131
MF
1441 nop.f 0
1442 nop.i 0
1443}
1444{ .mfi
0347518d
MF
1445 ldfe FR_A6 = [GR_ad_Ce],32
1446 nop.f 0
d5efd131
MF
1447 nop.i 0
1448};;
1449{ .mfi
0347518d 1450 ldfe FR_A5 = [GR_ad_Co],32
d5efd131
MF
1451 nop.f 0
1452 nop.i 0
1453}
1454{ .mfi
0347518d 1455 ldfe FR_A4 = [GR_ad_Ce],32
d5efd131
MF
1456 nop.f 0
1457 nop.i 0
1458};;
1459{ .mfi
0347518d 1460 ldfe FR_A3 = [GR_ad_Co],32
d5efd131
MF
1461 nop.f 0
1462 nop.i 0
1463}
1464{ .mfi
0347518d 1465 ldfe FR_A2 = [GR_ad_Ce],32
d5efd131
MF
1466 nop.f 0
1467 nop.i 0
1468};;
1469{ .mfi
0347518d
MF
1470 ldfe FR_A1 = [GR_ad_Co],32
1471 fma.s1 FR_r4 = FR_r2,FR_r2,f0
d5efd131
MF
1472 nop.i 0
1473}
1474{ .mfi
0347518d 1475 ldfe FR_A0 = [GR_ad_Ce],32
d5efd131
MF
1476 nop.f 0
1477 nop.i 0
1478};;
1479{ .mfi
1480 nop.m 0
1481(p6) fnma.s1 FR_Rcp2 = FR_Rcp1,FR_NormX,f1 // t = 1 - r1*x
1482 nop.i 0
1483};;
1484{ .mfi
1485 nop.m 0
1486 fma.s1 FR_A15 = FR_A15,FR_r,FR_A14
1487 nop.i 0
1488}
1489{ .mfi
1490 nop.m 0
1491 fma.s1 FR_A11 = FR_A11,FR_r,FR_A10
1492 nop.i 0
1493};;
1494{ .mfi
1495 nop.m 0
0347518d 1496 fma.s1 FR_r8 = FR_r4,FR_r4,f0
d5efd131
MF
1497 nop.i 0
1498};;
1499{ .mfi
1500 nop.m 0
1501(p6) fma.s1 FR_Rcp2 = FR_Rcp1,FR_Rcp2,FR_Rcp1
1502 nop.i 0
1503};;
1504{ .mfi
1505 nop.m 0
1506 fma.s1 FR_A7 = FR_A7,FR_r,FR_A6
1507 nop.i 0
1508}
1509{ .mfi
1510 nop.m 0
1511 fma.s1 FR_A3 = FR_A3,FR_r,FR_A2
1512 nop.i 0
1513};;
1514{ .mfi
1515 nop.m 0
1516 fma.s1 FR_A15 = FR_A15,FR_r,FR_A13
1517 nop.i 0
1518}
1519{ .mfi
1520 nop.m 0
1521 fma.s1 FR_A11 = FR_A11,FR_r,FR_A9
1522 nop.i 0
1523};;
1524{ .mfi
1525 nop.m 0
1526(p6) fnma.s1 FR_Rcp3 = FR_Rcp2,FR_NormX,f1 // t = 1 - r1*x
1527 nop.i 0
1528};;
1529{ .mfi
1530 nop.m 0
1531 fma.s1 FR_A7 = FR_A7,FR_r,FR_A5
1532 nop.i 0
1533}
1534{ .mfi
1535 nop.m 0
1536 fma.s1 FR_A3 = FR_A3,FR_r,FR_A1
1537 nop.i 0
1538};;
1539{ .mfi
1540 nop.m 0
1541 fma.s1 FR_A15 = FR_A15,FR_r,FR_A12
1542 nop.i 0
1543}
1544{ .mfi
1545 nop.m 0
1546 fma.s1 FR_A11 = FR_A11,FR_r,FR_A8
1547 nop.i 0
1548};;
1549{ .mfi
1550 nop.m 0
1551(p6) fma.s1 FR_Rcp3 = FR_Rcp2,FR_Rcp3,FR_Rcp2
1552 nop.i 0
1553};;
1554{ .mfi
1555 nop.m 0
1556 fma.s1 FR_A7 = FR_A7,FR_r,FR_A4
1557 nop.i 0
1558}
1559{ .mfi
1560 nop.m 0
1561 fma.s1 FR_A3 = FR_A3,FR_r,FR_A0
1562 nop.i 0
1563};;
1564{ .mfi
1565 nop.m 0
1566 fma.s1 FR_A15 = FR_A15,FR_r4,FR_A11
1567 nop.i 0
1568}
1569{ .mfi
1570 nop.m 0
1571 fma.s1 FR_A7 = FR_A7,FR_r4,FR_A3
1572 nop.i 0
1573};;
1574.pred.rel "mutex",p6,p7
1575{ .mfi
0347518d 1576 nop.m 0
d5efd131
MF
1577(p6) fma.s1 FR_A15 = FR_A15,FR_r8,FR_A7
1578 nop.i 0
1579}
1580{ .mfi
0347518d 1581 nop.m 0
d5efd131
MF
1582(p7) fma.d.s0 f8 = FR_A15,FR_r8,FR_A7
1583 nop.i 0
1584};;
1585{ .mfb
0347518d 1586 nop.m 0
d5efd131
MF
1587(p6) fma.d.s0 f8 = FR_A15,FR_Rcp3,f0
1588 br.ret.sptk b0
1589};;
1590
1591// overflow
1592//--------------------------------------------------------------------
1593.align 32
1594tgamma_ovf_near_0_boundary:
1595.pred.rel "mutex",p14,p15
1596{ .mfi
1597 mov GR_fpsr = ar.fpsr
1598 nop.f 0
1599(p15) mov r8 = 0x7ff
1600}
1601{ .mfi
1602 nop.m 0
1603 nop.f 0
1604(p14) mov r8 = 0xfff
1605};;
1606{ .mfi
1607 nop.m 0
1608 nop.f 0
0347518d 1609 shl r8 = r8,52
d5efd131
MF
1610};;
1611{ .mfi
1612 sub r8 = r8,r0,1
1613 nop.f 0
1614 extr.u GR_fpsr = GR_fpsr,10,2 // rounding mode
1615};;
1616.pred.rel "mutex",p14,p15
1617{ .mfi
1618 // set p8 to 0 in case of overflow and to 1 otherwise
0347518d 1619 // for negative arg:
d5efd131
MF
1620 // no overflow if rounding mode either Z or +Inf, i.e.
1621 // GR_fpsr > 1
1622(p14) cmp.lt p8,p0 = 1,GR_fpsr
1623 nop.f 0
0347518d 1624 // for positive arg:
d5efd131
MF
1625 // no overflow if rounding mode either Z or -Inf, i.e.
1626 // (GR_fpsr & 1) == 0
1627(p15) tbit.z p0,p8 = GR_fpsr,0
1628};;
1629{ .mib
1630(p8) setf.d f8 = r8 // set result to 0x7fefffffffffffff without
1631 // OVERFLOW flag raising
1632 nop.i 0
1633(p8) br.ret.sptk b0
1634};;
1635.align 32
1636tgamma_ovf_near_0:
1637{ .mfi
1638 mov r8 = 0x1FFFE
0347518d 1639 nop.f 0
d5efd131
MF
1640 nop.i 0
1641};;
1642{ .mfi
1643 setf.exp f9 = r8
1644 fmerge.s FR_X = f8,f8
1645 mov GR_TAG = 258 // overflow
1646};;
1647.pred.rel "mutex",p14,p15
1648{ .mfi
0347518d 1649 nop.m 0
d5efd131 1650(p15) fma.d.s0 f8 = f9,f9,f0 // Set I,O and +INF result
0347518d 1651 nop.i 0
d5efd131
MF
1652}
1653{ .mfb
0347518d 1654 nop.m 0
d5efd131
MF
1655(p14) fnma.d.s0 f8 = f9,f9,f0 // Set I,O and -INF result
1656 br.cond.sptk tgamma_libm_err
1657};;
1658// overflow or absolute value of x is too big
1659//--------------------------------------------------------------------
1660.align 32
1661tgamma_spec_res:
1662{ .mfi
1663 mov GR_0x30033 = 0x30033
1664(p14) fcmp.eq.unc.s1 p10,p11 = f8,FR_Xt
1665(p15) mov r8 = 0x1FFFE
1666};;
1667{ .mfi
1668(p15) setf.exp f9 = r8
1669 nop.f 0
1670 nop.i 0
1671};;
1672{ .mfb
1673(p11) cmp.ltu.unc p7,p8 = GR_0x30033,GR_Sign_Exp
0347518d 1674 nop.f 0
d5efd131
MF
1675(p10) br.cond.spnt tgamma_singularity
1676};;
1677.pred.rel "mutex",p7,p8
1678{ .mbb
1679 nop.m 0
1680(p7) br.cond.spnt tgamma_singularity
1681(p8) br.cond.spnt tgamma_underflow
1682};;
1683{ .mfi
1684 nop.m 0
1685 fmerge.s FR_X = f8,f8
1686 mov GR_TAG = 258 // overflow
1687}
1688{ .mfb
0347518d 1689 nop.m 0
d5efd131
MF
1690(p15) fma.d.s0 f8 = f9,f9,f0 // Set I,O and +INF result
1691 br.cond.sptk tgamma_libm_err
1692};;
1693
1694// x is negative integer or +/-0
1695//--------------------------------------------------------------------
1696.align 32
1697tgamma_singularity:
1698{ .mfi
1699 nop.m 0
1700 fmerge.s FR_X = f8,f8
1701 mov GR_TAG = 259 // negative
1702}
1703{ .mfb
1704 nop.m 0
1705 frcpa.s0 f8,p0 = f0,f0
1706 br.cond.sptk tgamma_libm_err
1707};;
1708// x is negative noninteger with big absolute value
1709//--------------------------------------------------------------------
1710.align 32
1711tgamma_underflow:
1712{ .mmi
1713 getf.sig GR_Sig = FR_iXt
1714 mov r11 = 0x00001
1715 nop.i 0
1716};;
1717{ .mfi
1718 setf.exp f9 = r11
1719 nop.f 0
1720 nop.i 0
1721};;
1722{ .mfi
1723 nop.m 0
1724 nop.f 0
1725 tbit.z p6,p7 = GR_Sig,0
1726};;
1727.pred.rel "mutex",p6,p7
1728{ .mfi
1729 nop.m 0
1730(p6) fms.d.s0 f8 = f9,f9,f9
1731 nop.i 0
1732}
1733{ .mfb
1734 nop.m 0
1735(p7) fma.d.s0 f8 = f9,f9,f9
1736 br.ret.sptk b0
1737};;
1738
1739// x for natval, nan, +/-inf or +/-0
1740//--------------------------------------------------------------------
1741.align 32
1742tgamma_spec:
1743{ .mfi
1744 nop.m 0
1745 fclass.m p6,p0 = f8,0x1E1 // Test x for natval, nan, +inf
1746 nop.i 0
1747};;
1748{ .mfi
1749 nop.m 0
1750 fclass.m p7,p8 = f8,0x7 // +/-0
1751 nop.i 0
1752};;
1753{ .mfi
1754 nop.m 0
1755 fmerge.s FR_X = f8,f8
1756 nop.i 0
1757}
1758{ .mfb
1759 nop.m 0
1760(p6) fma.d.s0 f8 = f8,f1,f8
1761(p6) br.ret.spnt b0
1762};;
1763.pred.rel "mutex",p7,p8
1764{ .mfi
1765(p7) mov GR_TAG = 259 // negative
1766(p7) frcpa.s0 f8,p0 = f1,f8
0347518d 1767 nop.i 0
d5efd131
MF
1768}
1769{ .mib
1770 nop.m 0
1771 nop.i 0
1772(p8) br.cond.spnt tgamma_singularity
1773};;
1774
1775.align 32
1776tgamma_libm_err:
1777{ .mfi
1778 alloc r32 = ar.pfs,1,4,4,0
1779 nop.f 0
1780 mov GR_Parameter_TAG = GR_TAG
1781};;
1782
1783GLOBAL_LIBM_END(tgamma)
1784
1785LOCAL_LIBM_ENTRY(__libm_error_region)
1786.prologue
1787{ .mfi
1788 add GR_Parameter_Y=-32,sp // Parameter 2 value
1789 nop.f 0
1790.save ar.pfs,GR_SAVE_PFS
0347518d 1791 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
d5efd131
MF
1792}
1793{ .mfi
0347518d 1794.fframe 64
d5efd131
MF
1795 add sp=-64,sp // Create new stack
1796 nop.f 0
1797 mov GR_SAVE_GP=gp // Save gp
1798};;
1799{ .mmi
1800 stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
1801 add GR_Parameter_X = 16,sp // Parameter 1 address
0347518d
MF
1802.save b0, GR_SAVE_B0
1803 mov GR_SAVE_B0=b0 // Save b0
d5efd131
MF
1804};;
1805.body
1806{ .mib
0347518d
MF
1807 stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
1808 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
d5efd131
MF
1809 nop.b 0
1810}
1811{ .mib
1812 stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
0347518d 1813 add GR_Parameter_Y = -16,GR_Parameter_Y
d5efd131
MF
1814 br.call.sptk b0=__libm_error_support# // Call error handling function
1815};;
1816{ .mmi
1817 nop.m 0
1818 nop.m 0
1819 add GR_Parameter_RESULT = 48,sp
1820};;
1821{ .mmi
1822 ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
1823.restore sp
1824 add sp = 64,sp // Restore stack pointer
1825 mov b0 = GR_SAVE_B0 // Restore return address
1826};;
1827{ .mib
0347518d 1828 mov gp = GR_SAVE_GP // Restore gp
d5efd131
MF
1829 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
1830 br.ret.sptk b0 // Return
0347518d 1831};;
d5efd131
MF
1832
1833LOCAL_LIBM_END(__libm_error_region)
1834.type __libm_error_support#,@function
1835.global __libm_error_support#
1836