]>
Commit | Line | Data |
---|---|---|
d5efd131 MF |
1 | .file "tgamma.s" |
2 | ||
3 | ||
4 | // Copyright (c) 2001 - 2005, Intel Corporation | |
5 | // All rights reserved. | |
6 | // | |
7 | // Contributed 2001 by the Intel Numerics Group, Intel Corporation | |
8 | // | |
9 | // Redistribution and use in source and binary forms, with or without | |
10 | // modification, are permitted provided that the following conditions are | |
11 | // met: | |
12 | // | |
13 | // * Redistributions of source code must retain the above copyright | |
14 | // notice, this list of conditions and the following disclaimer. | |
15 | // | |
16 | // * Redistributions in binary form must reproduce the above copyright | |
17 | // notice, this list of conditions and the following disclaimer in the | |
18 | // documentation and/or other materials provided with the distribution. | |
19 | // | |
20 | // * The name of Intel Corporation may not be used to endorse or promote | |
21 | // products derived from this software without specific prior written | |
22 | // permission. | |
23 | ||
0347518d MF |
24 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
25 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT | |
d5efd131 | 26 | // LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
0347518d | 27 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS |
d5efd131 MF |
28 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL, |
29 | // EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO, | |
0347518d MF |
30 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR |
31 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
d5efd131 | 32 | // OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING |
0347518d MF |
33 | // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
34 | // SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
35 | // | |
d5efd131 | 36 | // Intel Corporation is the author of this code,and requests that all |
0347518d | 37 | // problem reports or change requests be submitted to it directly at |
d5efd131 MF |
38 | // http://www.intel.com/software/products/opensource/libraries/num.htm. |
39 | // | |
40 | //********************************************************************* | |
41 | // | |
0347518d | 42 | // History: |
d5efd131 MF |
43 | // 10/12/01 Initial version |
44 | // 05/20/02 Cleaned up namespace and sf0 syntax | |
45 | // 02/10/03 Reordered header: .section, .global, .proc, .align | |
46 | // 04/04/03 Changed error codes for overflow and negative integers | |
47 | // 04/10/03 Changed code for overflow near zero handling | |
48 | // 03/31/05 Reformatted delimiters between data tables | |
49 | // | |
50 | //********************************************************************* | |
51 | // | |
52 | //********************************************************************* | |
53 | // | |
54 | // Function: tgamma(x) computes the principle value of the GAMMA | |
55 | // function of x. | |
56 | // | |
57 | //********************************************************************* | |
58 | // | |
59 | // Resources Used: | |
60 | // | |
61 | // Floating-Point Registers: f8-f15 | |
62 | // f33-f87 | |
63 | // | |
64 | // General Purpose Registers: | |
65 | // r8-r11 | |
66 | // r14-r28 | |
67 | // r32-r36 | |
68 | // r37-r40 (Used to pass arguments to error handling routine) | |
69 | // | |
70 | // Predicate Registers: p6-p15 | |
71 | // | |
72 | //********************************************************************* | |
73 | // | |
74 | // IEEE Special Conditions: | |
75 | // | |
76 | // tgamma(+inf) = +inf | |
0347518d MF |
77 | // tgamma(-inf) = QNaN |
78 | // tgamma(+/-0) = +/-inf | |
d5efd131 MF |
79 | // tgamma(x<0, x - integer) = QNaN |
80 | // tgamma(SNaN) = QNaN | |
81 | // tgamma(QNaN) = QNaN | |
82 | // | |
83 | //********************************************************************* | |
84 | // | |
85 | // Overview | |
86 | // | |
87 | // The method consists of three cases. | |
0347518d | 88 | // |
d5efd131 MF |
89 | // If 2 <= x < OVERFLOW_BOUNDARY use case tgamma_regular; |
90 | // else if 0 < x < 2 use case tgamma_from_0_to_2; | |
91 | // else if -(i+1) < x < -i, i = 0...184 use case tgamma_negatives; | |
92 | // | |
93 | // Case 2 <= x < OVERFLOW_BOUNDARY | |
94 | // ------------------------------- | |
95 | // Here we use algorithm based on the recursive formula | |
96 | // GAMMA(x+1) = x*GAMMA(x). For that we subdivide interval | |
97 | // [2; OVERFLOW_BOUNDARY] into intervals [16*n; 16*(n+1)] and | |
98 | // approximate GAMMA(x) by polynomial of 22th degree on each | |
99 | // [16*n; 16*n+1], recursive formula is used to expand GAMMA(x) | |
100 | // to [16*n; 16*n+1]. In other words we need to find n, i and r | |
101 | // such that x = 16 * n + i + r where n and i are integer numbers | |
102 | // and r is fractional part of x. So GAMMA(x) = GAMMA(16*n+i+r) = | |
103 | // = (x-1)*(x-2)*...*(x-i)*GAMMA(x-i) = | |
104 | // = (x-1)*(x-2)*...*(x-i)*GAMMA(16*n+r) ~ | |
105 | // ~ (x-1)*(x-2)*...*(x-i)*P22n(r). | |
106 | // | |
107 | // Step 1: Reduction | |
108 | // ----------------- | |
109 | // N = [x] with truncate | |
110 | // r = x - N, note 0 <= r < 1 | |
111 | // | |
112 | // n = N & ~0xF - index of table that contains coefficient of | |
0347518d | 113 | // polynomial approximation |
d5efd131 | 114 | // i = N & 0xF - is used in recursive formula |
0347518d | 115 | // |
d5efd131 MF |
116 | // |
117 | // Step 2: Approximation | |
118 | // --------------------- | |
119 | // We use factorized minimax approximation polynomials | |
120 | // P22n(r) = A22*(r^2+C01(n)*R+C00(n))* | |
121 | // *(r^2+C11(n)*R+C10(n))*...*(r^2+CA1(n)*R+CA0(n)) | |
122 | // | |
123 | // Step 3: Recursion | |
124 | // ----------------- | |
125 | // In case when i > 0 we need to multiply P22n(r) by product | |
126 | // R(i)=(x-1)*(x-2)*...*(x-i). To reduce number of fp-instructions | |
0347518d | 127 | // we can calculate R as follow: |
d5efd131 MF |
128 | // R(i) = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-1))*(x-i)) if i is |
129 | // even or R = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-2))*(x-(i-1)))* | |
130 | // *(i-1) if i is odd. In both cases we need to calculate | |
131 | // R2(i) = (x^2-3*x+2)*(x^2-7*x+12)*...*(x^2+x+2*j*(2*j-1)) = | |
132 | // = (x^2-3*x+2)*(x^2-7*x+12)*...*((x^2+x)+2*j*(2*(j-1)+(1-2*x))) = | |
133 | // = (RA+2*(2-RB))*(RA+4*(4-RB))*...*(RA+2*j*(2*(j-1)+RB)) | |
134 | // where j = 1..[i/2], RA = x^2+x, RB = 1-2*x. | |
135 | // | |
136 | // Step 4: Reconstruction | |
137 | // ---------------------- | |
138 | // Reconstruction is just simple multiplication i.e. | |
139 | // GAMMA(x) = P22n(r)*R(i) | |
140 | // | |
141 | // Case 0 < x < 2 | |
142 | // -------------- | |
143 | // To calculate GAMMA(x) on this interval we do following | |
144 | // if 1 <= x < 1.25 than GAMMA(x) = P15(x-1) | |
145 | // if 1.25 <= x < 1.5 than GAMMA(x) = P15(x-x_min) where | |
146 | // x_min is point of local minimum on [1; 2] interval. | |
147 | // if 1.5 <= x < 2.0 than GAMMA(x) = P15(x-1.5) | |
0347518d | 148 | // and |
d5efd131 MF |
149 | // if 0 < x < 1 than GAMMA(x) = GAMMA(x+1)/x |
150 | // | |
151 | // Case -(i+1) < x < -i, i = 0...184 | |
152 | // ---------------------------------- | |
153 | // Here we use the fact that GAMMA(-x) = PI/(x*GAMMA(x)*sin(PI*x)) and | |
154 | // so we need to calculate GAMMA(x), sin(PI*x)/PI. Calculation of | |
155 | // GAMMA(x) is described above. | |
156 | // | |
157 | // Step 1: Reduction | |
158 | // ----------------- | |
0347518d MF |
159 | // Note that period of sin(PI*x) is 2 and range reduction for |
160 | // sin(PI*x) is like to range reduction for GAMMA(x) | |
d5efd131 MF |
161 | // i.e r = x - [x] with exception of cases |
162 | // when r > 0.5 (in such cases r = 1 - (x - [x])). | |
163 | // | |
164 | // Step 2: Approximation | |
165 | // --------------------- | |
0347518d | 166 | // To approximate sin(PI*x)/PI = sin(PI*(2*n+r))/PI = |
d5efd131 MF |
167 | // = (-1)^n*sin(PI*r)/PI Taylor series is used. |
168 | // sin(PI*r)/PI ~ S21(r). | |
169 | // | |
170 | // Step 3: Division | |
171 | // ---------------- | |
172 | // To calculate 1/(x*GAMMA(x)*S21(r)) we use frcpa instruction | |
173 | // with following Newton-Raphson interations. | |
0347518d | 174 | // |
d5efd131 MF |
175 | // |
176 | //********************************************************************* | |
177 | ||
178 | GR_Sig = r8 | |
179 | GR_TAG = r8 | |
180 | GR_ad_Data = r9 | |
181 | GR_SigRqLin = r10 | |
182 | GR_iSig = r11 | |
183 | GR_ExpOf1 = r11 | |
184 | GR_ExpOf8 = r11 | |
185 | ||
186 | ||
187 | GR_Sig2 = r14 | |
188 | GR_Addr_Mask1 = r15 | |
189 | GR_Sign_Exp = r16 | |
190 | GR_Tbl_Offs = r17 | |
191 | GR_Addr_Mask2 = r18 | |
192 | GR_ad_Co = r19 | |
193 | GR_Bit2 = r19 | |
194 | GR_ad_Ce = r20 | |
195 | GR_ad_Co7 = r21 | |
196 | GR_NzOvfBound = r21 | |
197 | GR_ad_Ce7 = r22 | |
198 | GR_Tbl_Ind = r23 | |
199 | GR_Tbl_16xInd = r24 | |
200 | GR_ExpOf025 = r24 | |
201 | GR_ExpOf05 = r25 | |
202 | GR_0x30033 = r26 | |
203 | GR_10 = r26 | |
204 | GR_12 = r27 | |
205 | GR_185 = r27 | |
206 | GR_14 = r28 | |
207 | GR_2 = r28 | |
208 | GR_fpsr = r28 | |
209 | ||
210 | GR_SAVE_B0 = r33 | |
211 | GR_SAVE_PFS = r34 | |
212 | GR_SAVE_GP = r35 | |
213 | GR_SAVE_SP = r36 | |
214 | ||
215 | GR_Parameter_X = r37 | |
216 | GR_Parameter_Y = r38 | |
217 | GR_Parameter_RESULT = r39 | |
218 | GR_Parameter_TAG = r40 | |
219 | ||
220 | ||
221 | ||
222 | FR_X = f10 | |
223 | FR_Y = f1 // tgamma is single argument function | |
224 | FR_RESULT = f8 | |
225 | ||
226 | FR_AbsX = f9 | |
227 | FR_NormX = f9 | |
228 | FR_r02 = f11 | |
229 | FR_AbsXp1 = f12 | |
230 | FR_X2pX = f13 | |
231 | FR_1m2X = f14 | |
232 | FR_Rq1 = f14 | |
233 | FR_Xt = f15 | |
234 | ||
235 | FR_r = f33 | |
236 | FR_OvfBound = f34 | |
237 | FR_Xmin = f35 | |
238 | FR_2 = f36 | |
239 | FR_Rcp1 = f36 | |
240 | FR_Rcp3 = f36 | |
241 | FR_4 = f37 | |
242 | FR_5 = f38 | |
243 | FR_6 = f39 | |
244 | FR_8 = f40 | |
245 | FR_10 = f41 | |
246 | FR_12 = f42 | |
247 | FR_14 = f43 | |
248 | FR_GAMMA = f43 | |
249 | FR_05 = f44 | |
250 | ||
251 | FR_Rq2 = f45 | |
252 | FR_Rq3 = f46 | |
253 | FR_Rq4 = f47 | |
254 | FR_Rq5 = f48 | |
255 | FR_Rq6 = f49 | |
256 | FR_Rq7 = f50 | |
257 | FR_RqLin = f51 | |
258 | ||
259 | FR_InvAn = f52 | |
260 | ||
261 | FR_C01 = f53 | |
262 | FR_A15 = f53 | |
263 | FR_C11 = f54 | |
264 | FR_A14 = f54 | |
265 | FR_C21 = f55 | |
266 | FR_A13 = f55 | |
267 | FR_C31 = f56 | |
268 | FR_A12 = f56 | |
269 | FR_C41 = f57 | |
270 | FR_A11 = f57 | |
271 | FR_C51 = f58 | |
272 | FR_A10 = f58 | |
273 | FR_C61 = f59 | |
274 | FR_A9 = f59 | |
275 | FR_C71 = f60 | |
276 | FR_A8 = f60 | |
277 | FR_C81 = f61 | |
278 | FR_A7 = f61 | |
279 | FR_C91 = f62 | |
280 | FR_A6 = f62 | |
281 | FR_CA1 = f63 | |
282 | FR_A5 = f63 | |
283 | FR_C00 = f64 | |
284 | FR_A4 = f64 | |
285 | FR_rs2 = f64 | |
286 | FR_C10 = f65 | |
287 | FR_A3 = f65 | |
288 | FR_rs3 = f65 | |
289 | FR_C20 = f66 | |
290 | FR_A2 = f66 | |
291 | FR_rs4 = f66 | |
292 | FR_C30 = f67 | |
293 | FR_A1 = f67 | |
294 | FR_rs7 = f67 | |
295 | FR_C40 = f68 | |
296 | FR_A0 = f68 | |
297 | FR_rs8 = f68 | |
298 | FR_C50 = f69 | |
299 | FR_r2 = f69 | |
300 | FR_C60 = f70 | |
301 | FR_r3 = f70 | |
302 | FR_C70 = f71 | |
303 | FR_r4 = f71 | |
304 | FR_C80 = f72 | |
305 | FR_r7 = f72 | |
306 | FR_C90 = f73 | |
307 | FR_r8 = f73 | |
308 | FR_CA0 = f74 | |
309 | FR_An = f75 | |
310 | ||
311 | FR_S21 = f76 | |
312 | FR_S19 = f77 | |
313 | FR_Rcp0 = f77 | |
314 | FR_Rcp2 = f77 | |
315 | FR_S17 = f78 | |
316 | FR_S15 = f79 | |
317 | FR_S13 = f80 | |
318 | FR_S11 = f81 | |
319 | FR_S9 = f82 | |
320 | FR_S7 = f83 | |
321 | FR_S5 = f84 | |
322 | FR_S3 = f85 | |
323 | ||
324 | FR_iXt = f86 | |
325 | FR_rs = f87 | |
326 | ||
327 | ||
328 | // Data tables | |
329 | //============================================================== | |
330 | RODATA | |
331 | .align 16 | |
332 | ||
333 | LOCAL_OBJECT_START(tgamma_data) | |
334 | data8 0x406573FAE561F648 // overflow boundary (171.624376956302739927196) | |
335 | data8 0x3FDD8B618D5AF8FE // point of local minium (0.461632144968362356785) | |
336 | // | |
337 | //[2; 3] | |
338 | data8 0xEF0E85C9AE40ABE2,0x00004000 // C01 | |
339 | data8 0xCA2049DDB4096DD8,0x00004000 // C11 | |
340 | data8 0x99A203B4DC2D1A8C,0x00004000 // C21 | |
341 | data8 0xBF5D9D9C0C295570,0x00003FFF // C31 | |
342 | data8 0xE8DD037DEB833BAB,0x00003FFD // C41 | |
343 | data8 0xB6AE39A2A36AA03A,0x0000BFFE // C51 | |
344 | data8 0x804960DC2850277B,0x0000C000 // C61 | |
345 | data8 0xD9F3973841C09F80,0x0000C000 // C71 | |
346 | data8 0x9C198A676F8A2239,0x0000C001 // C81 | |
347 | data8 0xC98B7DAE02BE3226,0x0000C001 // C91 | |
348 | data8 0xE9CAF31AC69301BA,0x0000C001 // CA1 | |
349 | data8 0xFBBDD58608A0D172,0x00004000 // C00 | |
350 | data8 0xFDD0316D1E078301,0x00004000 // C10 | |
351 | data8 0x8630B760468C15E4,0x00004001 // C20 | |
352 | data8 0x93EDE20E47D9152E,0x00004001 // C30 | |
353 | data8 0xA86F3A38C77D6B19,0x00004001 // C40 | |
354 | //[16; 17] | |
355 | data8 0xF87F757F365EE813,0x00004000 // C01 | |
356 | data8 0xECA84FBA92759DA4,0x00004000 // C11 | |
357 | data8 0xD4E0A55E07A8E913,0x00004000 // C21 | |
358 | data8 0xB0EB45E94C8A5F7B,0x00004000 // C31 | |
359 | data8 0x8050D6B4F7C8617D,0x00004000 // C41 | |
360 | data8 0x8471B111AA691E5A,0x00003FFF // C51 | |
361 | data8 0xADAF462AF96585C9,0x0000BFFC // C61 | |
362 | data8 0xD327C7A587A8C32B,0x0000BFFF // C71 | |
363 | data8 0xDEF5192B4CF5E0F1,0x0000C000 // C81 | |
364 | data8 0xBADD64BB205AEF02,0x0000C001 // C91 | |
365 | data8 0x9330A24AA67D6860,0x0000C002 // CA1 | |
366 | data8 0xF57EEAF36D8C47BE,0x00004000 // C00 | |
367 | data8 0x807092E12A251B38,0x00004001 // C10 | |
368 | data8 0x8C458F80DEE7ED1C,0x00004001 // C20 | |
369 | data8 0x9F30C731DC77F1A6,0x00004001 // C30 | |
370 | data8 0xBAC4E7E099C3A373,0x00004001 // C40 | |
371 | //[32; 33] | |
372 | data8 0xC3059A415F142DEF,0x00004000 // C01 | |
373 | data8 0xB9C1DAC24664587A,0x00004000 // C11 | |
374 | data8 0xA7101D910992FFB2,0x00004000 // C21 | |
375 | data8 0x8A9522B8E4AA0AB4,0x00004000 // C31 | |
376 | data8 0xC76A271E4BA95DCC,0x00003FFF // C41 | |
377 | data8 0xC5D6DE2A38DB7FF2,0x00003FFE // C51 | |
378 | data8 0xDBA42086997818B2,0x0000BFFC // C61 | |
379 | data8 0xB8EDDB1424C1C996,0x0000BFFF // C71 | |
380 | data8 0xBF7372FB45524B5D,0x0000C000 // C81 | |
381 | data8 0xA03DDE759131580A,0x0000C001 // C91 | |
382 | data8 0xFDA6FC4022C1FFE3,0x0000C001 // CA1 | |
383 | data8 0x9759ABF797B2533D,0x00004000 // C00 | |
384 | data8 0x9FA160C6CF18CEC5,0x00004000 // C10 | |
385 | data8 0xB0EFF1E3530E0FCD,0x00004000 // C20 | |
386 | data8 0xCCD60D5C470165D1,0x00004000 // C30 | |
387 | data8 0xF5E53F6307B0B1C1,0x00004000 // C40 | |
388 | //[48; 49] | |
389 | data8 0xAABE577FBCE37F5E,0x00004000 // C01 | |
390 | data8 0xA274CAEEB5DF7172,0x00004000 // C11 | |
391 | data8 0x91B90B6646C1B924,0x00004000 // C21 | |
392 | data8 0xF06718519CA256D9,0x00003FFF // C31 | |
393 | data8 0xAA9EE181C0E30263,0x00003FFF // C41 | |
394 | data8 0xA07BDB5325CB28D2,0x00003FFE // C51 | |
395 | data8 0x86C8B873204F9219,0x0000BFFD // C61 | |
396 | data8 0xB0192C5D3E4787D6,0x0000BFFF // C71 | |
397 | data8 0xB1E0A6263D4C19EF,0x0000C000 // C81 | |
398 | data8 0x93BA32A118EAC9AE,0x0000C001 // C91 | |
399 | data8 0xE942A39CD9BEE887,0x0000C001 // CA1 | |
400 | data8 0xE838B0957B0D3D0D,0x00003FFF // C00 | |
401 | data8 0xF60E0F00074FCF34,0x00003FFF // C10 | |
402 | data8 0x89869936AE00C2A5,0x00004000 // C20 | |
403 | data8 0xA0FE4E8AA611207F,0x00004000 // C30 | |
404 | data8 0xC3B1229CFF1DDAFE,0x00004000 // C40 | |
405 | //[64; 65] | |
406 | data8 0x9C00DDF75CDC6183,0x00004000 // C01 | |
407 | data8 0x9446AE9C0F6A833E,0x00004000 // C11 | |
408 | data8 0x84ABC5083310B774,0x00004000 // C21 | |
409 | data8 0xD9BA3A0977B1ED83,0x00003FFF // C31 | |
410 | data8 0x989B18C99411D300,0x00003FFF // C41 | |
411 | data8 0x886E66402318CE6F,0x00003FFE // C51 | |
412 | data8 0x99028C2468F18F38,0x0000BFFD // C61 | |
413 | data8 0xAB72D17DCD40CCE1,0x0000BFFF // C71 | |
414 | data8 0xA9D9AC9BE42C2EF9,0x0000C000 // C81 | |
415 | data8 0x8C11D983AA177AD2,0x0000C001 // C91 | |
416 | data8 0xDC779E981C1F0F06,0x0000C001 // CA1 | |
417 | data8 0xC1FD4AC85965E8D6,0x00003FFF // C00 | |
418 | data8 0xCE3D2D909D389EC2,0x00003FFF // C10 | |
419 | data8 0xE7F79980AD06F5D8,0x00003FFF // C20 | |
420 | data8 0x88DD9F73C8680B5D,0x00004000 // C30 | |
421 | data8 0xA7D6CB2CB2D46F9D,0x00004000 // C40 | |
422 | //[80; 81] | |
423 | data8 0x91C7FF4E993430D0,0x00004000 // C01 | |
424 | data8 0x8A6E7AB83E45A7E9,0x00004000 // C11 | |
425 | data8 0xF72D6382E427BEA9,0x00003FFF // C21 | |
426 | data8 0xC9E2E4F9B3B23ED6,0x00003FFF // C31 | |
427 | data8 0x8BEFEF56AE05D775,0x00003FFF // C41 | |
428 | data8 0xEE9666AB6A185560,0x00003FFD // C51 | |
429 | data8 0xA6AFAF5CEFAEE04D,0x0000BFFD // C61 | |
430 | data8 0xA877EAFEF1F9C880,0x0000BFFF // C71 | |
431 | data8 0xA45BD433048ECA15,0x0000C000 // C81 | |
432 | data8 0x86BD1636B774CC2E,0x0000C001 // C91 | |
433 | data8 0xD3721BE006E10823,0x0000C001 // CA1 | |
434 | data8 0xA97EFABA91854208,0x00003FFF // C00 | |
435 | data8 0xB4AF0AEBB3F97737,0x00003FFF // C10 | |
436 | data8 0xCC38241936851B0B,0x00003FFF // C20 | |
437 | data8 0xF282A6261006EA84,0x00003FFF // C30 | |
438 | data8 0x95B8E9DB1BD45BAF,0x00004000 // C40 | |
439 | //[96; 97] | |
440 | data8 0x8A1FA3171B35A106,0x00004000 // C01 | |
441 | data8 0x830D5B8843890F21,0x00004000 // C11 | |
442 | data8 0xE98B0F1616677A23,0x00003FFF // C21 | |
443 | data8 0xBDF8347F5F67D4EC,0x00003FFF // C31 | |
444 | data8 0x825F15DE34EC055D,0x00003FFF // C41 | |
445 | data8 0xD4846186B8AAC7BE,0x00003FFD // C51 | |
446 | data8 0xB161093AB14919B1,0x0000BFFD // C61 | |
447 | data8 0xA65758EEA4800EF4,0x0000BFFF // C71 | |
448 | data8 0xA046B67536FA329C,0x0000C000 // C81 | |
449 | data8 0x82BBEC1BCB9E9068,0x0000C001 // C91 | |
450 | data8 0xCC9DE2B23BA91B0B,0x0000C001 // CA1 | |
451 | data8 0x983B16148AF77F94,0x00003FFF // C00 | |
452 | data8 0xA2A4D8EE90FEE5DD,0x00003FFF // C10 | |
453 | data8 0xB89446FA37FF481C,0x00003FFF // C20 | |
454 | data8 0xDC5572648485FB01,0x00003FFF // C30 | |
455 | data8 0x88CD5D7DB976129A,0x00004000 // C40 | |
456 | //[112; 113] | |
457 | data8 0x8417098FD62AC5E3,0x00004000 // C01 | |
458 | data8 0xFA7896486B779CBB,0x00003FFF // C11 | |
459 | data8 0xDEC98B14AF5EEBD1,0x00003FFF // C21 | |
460 | data8 0xB48E153C6BF0B5A3,0x00003FFF // C31 | |
461 | data8 0xF597B038BC957582,0x00003FFE // C41 | |
462 | data8 0xBFC6F0884A415694,0x00003FFD // C51 | |
463 | data8 0xBA075A1392BDB5E5,0x0000BFFD // C61 | |
464 | data8 0xA4B79E01B44C7DB4,0x0000BFFF // C71 | |
465 | data8 0x9D12FA7711BFAB0F,0x0000C000 // C81 | |
466 | data8 0xFF24C47C8E108AB4,0x0000C000 // C91 | |
467 | data8 0xC7325EC86562606A,0x0000C001 // CA1 | |
468 | data8 0x8B47DCD9E1610938,0x00003FFF // C00 | |
469 | data8 0x9518B111B70F88B8,0x00003FFF // C10 | |
470 | data8 0xA9CC197206F68682,0x00003FFF // C20 | |
471 | data8 0xCB98294CC0D7A6A6,0x00003FFF // C30 | |
472 | data8 0xFE09493EA9165181,0x00003FFF // C40 | |
473 | //[128; 129] | |
474 | data8 0xFE53D03442270D90,0x00003FFF // C01 | |
475 | data8 0xF0F857BAEC1993E4,0x00003FFF // C11 | |
476 | data8 0xD5FF6D70DBBC2FD3,0x00003FFF // C21 | |
477 | data8 0xACDAA5F4988B1074,0x00003FFF // C31 | |
478 | data8 0xE92E069F8AD75B54,0x00003FFE // C41 | |
479 | data8 0xAEBB64645BD94234,0x00003FFD // C51 | |
480 | data8 0xC13746249F39B43C,0x0000BFFD // C61 | |
481 | data8 0xA36B74F5B6297A1F,0x0000BFFF // C71 | |
482 | data8 0x9A77860DF180F6E5,0x0000C000 // C81 | |
483 | data8 0xF9F8457D84410A0C,0x0000C000 // C91 | |
484 | data8 0xC2BF44C649EB8597,0x0000C001 // CA1 | |
485 | data8 0x81225E7489BCDC0E,0x00003FFF // C00 | |
486 | data8 0x8A788A09CE0EED11,0x00003FFF // C10 | |
487 | data8 0x9E2E6F86D1B1D89C,0x00003FFF // C20 | |
488 | data8 0xBE6866B21CF6CCB5,0x00003FFF // C30 | |
489 | data8 0xEE94426EC1486AAE,0x00003FFF // C40 | |
490 | //[144; 145] | |
491 | data8 0xF6113E09732A6497,0x00003FFF // C01 | |
492 | data8 0xE900D45931B04FC8,0x00003FFF // C11 | |
493 | data8 0xCE9FD58F745EBA5D,0x00003FFF // C21 | |
494 | data8 0xA663A9636C864C86,0x00003FFF // C31 | |
495 | data8 0xDEBF5315896CE629,0x00003FFE // C41 | |
496 | data8 0xA05FEA415EBD7737,0x00003FFD // C51 | |
497 | data8 0xC750F112BD9C4031,0x0000BFFD // C61 | |
498 | data8 0xA2593A35C51C6F6C,0x0000BFFF // C71 | |
499 | data8 0x9848E1DA7FB40C8C,0x0000C000 // C81 | |
500 | data8 0xF59FEE87A5759A4B,0x0000C000 // C91 | |
501 | data8 0xBF00203909E45A1D,0x0000C001 // CA1 | |
502 | data8 0xF1D8E157200127E5,0x00003FFE // C00 | |
503 | data8 0x81DD5397CB08D487,0x00003FFF // C10 | |
504 | data8 0x94C1DC271A8B766F,0x00003FFF // C20 | |
505 | data8 0xB3AFAF9B5D6EDDCF,0x00003FFF // C30 | |
506 | data8 0xE1FB4C57CA81BE1E,0x00003FFF // C40 | |
507 | //[160; 161] | |
508 | data8 0xEEFFE5122AC72FFD,0x00003FFF // C01 | |
509 | data8 0xE22F70BB52AD54B3,0x00003FFF // C11 | |
510 | data8 0xC84FF021FE993EEA,0x00003FFF // C21 | |
511 | data8 0xA0DA2208EB5B2752,0x00003FFF // C31 | |
512 | data8 0xD5CDD2FCF8AD2DF5,0x00003FFE // C41 | |
513 | data8 0x940BEC6DCD811A59,0x00003FFD // C51 | |
514 | data8 0xCC954EF4FD4EBB81,0x0000BFFD // C61 | |
515 | data8 0xA1712E29A8C04554,0x0000BFFF // C71 | |
516 | data8 0x966B55DFB243521A,0x0000C000 // C81 | |
517 | data8 0xF1E6A2B9CEDD0C4C,0x0000C000 // C91 | |
518 | data8 0xBBC87BCC031012DB,0x0000C001 // CA1 | |
519 | data8 0xE43974E6D2818583,0x00003FFE // C00 | |
520 | data8 0xF5702A516B64C5B7,0x00003FFE // C10 | |
521 | data8 0x8CEBCB1B32E19471,0x00003FFF // C20 | |
522 | data8 0xAAC10F05BB77E0AF,0x00003FFF // C30 | |
523 | data8 0xD776EFCAB205CC58,0x00003FFF // C40 | |
524 | //[176; 177] | |
525 | data8 0xE8DA614119811E5D,0x00003FFF // C01 | |
526 | data8 0xDC415E0288B223D8,0x00003FFF // C11 | |
527 | data8 0xC2D2243E44EC970E,0x00003FFF // C21 | |
528 | data8 0x9C086664B5307BEA,0x00003FFF // C31 | |
529 | data8 0xCE03D7A08B461156,0x00003FFE // C41 | |
530 | data8 0x894BE3BAAAB66ADC,0x00003FFD // C51 | |
531 | data8 0xD131EDD71A702D4D,0x0000BFFD // C61 | |
532 | data8 0xA0A907CDDBE10898,0x0000BFFF // C71 | |
533 | data8 0x94CC3CD9C765C808,0x0000C000 // C81 | |
534 | data8 0xEEA85F237815FC0D,0x0000C000 // C91 | |
535 | data8 0xB8FA04B023E43F91,0x0000C001 // CA1 | |
536 | data8 0xD8B2C7D9FCBD7EF9,0x00003FFE // C00 | |
537 | data8 0xE9566E93AAE7E38F,0x00003FFE // C10 | |
538 | data8 0x8646E78AABEF0255,0x00003FFF // C20 | |
539 | data8 0xA32AEDB62E304345,0x00003FFF // C30 | |
540 | data8 0xCE83E40280EE7DF0,0x00003FFF // C40 | |
541 | // | |
542 | //[2; 3] | |
543 | data8 0xC44FB47E90584083,0x00004001 // C50 | |
544 | data8 0xE863EE77E1C45981,0x00004001 // C60 | |
545 | data8 0x8AC15BE238B9D70E,0x00004002 // C70 | |
546 | data8 0xA5D94B6592350EF4,0x00004002 // C80 | |
547 | data8 0xC379DB3E20A148B3,0x00004002 // C90 | |
548 | data8 0xDACA49B73974F6C9,0x00004002 // CA0 | |
549 | data8 0x810E496A1AFEC895,0x00003FE1 // An | |
550 | //[16; 17] | |
551 | data8 0xE17C0357AAF3F817,0x00004001 // C50 | |
552 | data8 0x8BA8804750FBFBFE,0x00004002 // C60 | |
553 | data8 0xB18EAB3CB64BEBEE,0x00004002 // C70 | |
554 | data8 0xE90AB7015AF1C28F,0x00004002 // C80 | |
555 | data8 0xA0AB97CE9E259196,0x00004003 // C90 | |
556 | data8 0xF5E0E0A000C2D720,0x00004003 // CA0 | |
557 | data8 0xD97F0F87EC791954,0x00004005 // An | |
558 | //[32; 33] | |
559 | data8 0x980C293F3696040D,0x00004001 // C50 | |
560 | data8 0xC0DBFFBB948A9A4E,0x00004001 // C60 | |
561 | data8 0xFAB54625E9A588A2,0x00004001 // C70 | |
562 | data8 0xA7E08176D6050FBF,0x00004002 // C80 | |
563 | data8 0xEBAAEC4952270A9F,0x00004002 // C90 | |
564 | data8 0xB7479CDAD20550FE,0x00004003 // CA0 | |
565 | data8 0xAACD45931C3FF634,0x00004054 // An | |
566 | //[48; 49] | |
567 | data8 0xF5180F0000419AD5,0x00004000 // C50 | |
568 | data8 0x9D507D07BFBB2273,0x00004001 // C60 | |
569 | data8 0xCEB53F7A13A383E3,0x00004001 // C70 | |
570 | data8 0x8BAFEF9E0A49128F,0x00004002 // C80 | |
571 | data8 0xC58EF912D39E228C,0x00004002 // C90 | |
572 | data8 0x9A88118422BA208E,0x00004003 // CA0 | |
573 | data8 0xBD6C0E2477EC12CB,0x000040AC // An | |
574 | //[64; 65] | |
575 | data8 0xD410AC48BF7748DA,0x00004000 // C50 | |
576 | data8 0x89399B90AFEBD931,0x00004001 // C60 | |
577 | data8 0xB596DF8F77EB8560,0x00004001 // C70 | |
578 | data8 0xF6D9445A047FB4A6,0x00004001 // C80 | |
579 | data8 0xAF52F0DD65221357,0x00004002 // C90 | |
580 | data8 0x8989B45BFC881989,0x00004003 // CA0 | |
581 | data8 0xB7FCAE86E6E10D5A,0x0000410B // An | |
582 | //[80; 81] | |
583 | data8 0xBE759740E3B5AA84,0x00004000 // C50 | |
584 | data8 0xF8037B1B07D27609,0x00004000 // C60 | |
585 | data8 0xA4F6F6C7F0977D4F,0x00004001 // C70 | |
586 | data8 0xE131960233BF02C4,0x00004001 // C80 | |
587 | data8 0xA06DF43D3922BBE2,0x00004002 // C90 | |
588 | data8 0xFC266AB27255A360,0x00004002 // CA0 | |
589 | data8 0xD9F4B012EDAFEF2F,0x0000416F // An | |
590 | //[96; 97] | |
591 | data8 0xAEFC84CDA8E1EAA6,0x00004000 // C50 | |
592 | data8 0xE5009110DB5F3C8A,0x00004000 // C60 | |
593 | data8 0x98F5F48738E7B232,0x00004001 // C70 | |
594 | data8 0xD17EE64E21FFDC6B,0x00004001 // C80 | |
595 | data8 0x9596F7A7E36145CC,0x00004002 // C90 | |
596 | data8 0xEB64DBE50E125CAF,0x00004002 // CA0 | |
597 | data8 0xA090530D79E32D2E,0x000041D8 // An | |
598 | //[112; 113] | |
599 | data8 0xA33AEA22A16B2655,0x00004000 // C50 | |
600 | data8 0xD682B93BD7D7945C,0x00004000 // C60 | |
601 | data8 0x8FC854C6E6E30CC3,0x00004001 // C70 | |
602 | data8 0xC5754D828AFFDC7A,0x00004001 // C80 | |
603 | data8 0x8D41216B397139C2,0x00004002 // C90 | |
604 | data8 0xDE78D746848116E5,0x00004002 // CA0 | |
605 | data8 0xB8A297A2DC0630DB,0x00004244 // An | |
606 | //[128; 129] | |
607 | data8 0x99EB00F11D95E292,0x00004000 // C50 | |
608 | data8 0xCB005CB911EB779A,0x00004000 // C60 | |
609 | data8 0x8879AA2FDFF3A37A,0x00004001 // C70 | |
610 | data8 0xBBDA538AD40CAC2C,0x00004001 // C80 | |
611 | data8 0x8696D849D311B9DE,0x00004002 // C90 | |
612 | data8 0xD41E1C041481199F,0x00004002 // CA0 | |
613 | data8 0xEBA1A43D34EE61EE,0x000042B3 // An | |
614 | //[144; 145] | |
615 | data8 0x924F822578AA9F3D,0x00004000 // C50 | |
616 | data8 0xC193FAF9D3B36960,0x00004000 // C60 | |
617 | data8 0x827AE3A6B68ED0CA,0x00004001 // C70 | |
618 | data8 0xB3F52A27EED23F0B,0x00004001 // C80 | |
619 | data8 0x811A079FB3C94D79,0x00004002 // C90 | |
620 | data8 0xCB94415470B6F8D2,0x00004002 // CA0 | |
621 | data8 0x80A0260DCB3EC9AC,0x00004326 // An | |
622 | //[160; 161] | |
623 | data8 0x8BF24091E88B331D,0x00004000 // C50 | |
624 | data8 0xB9ADE01187E65201,0x00004000 // C60 | |
625 | data8 0xFAE4508F6E7625FE,0x00004000 // C70 | |
626 | data8 0xAD516668AD6D7367,0x00004001 // C80 | |
627 | data8 0xF8F5FF171154F637,0x00004001 // C90 | |
628 | data8 0xC461321268990C82,0x00004002 // CA0 | |
629 | data8 0xC3B693F344B0E6FE,0x0000439A // An | |
630 | // | |
631 | //[176; 177] | |
632 | data8 0x868545EB42A258ED,0x00004000 // C50 | |
633 | data8 0xB2EF04ACE8BA0E6E,0x00004000 // C60 | |
634 | data8 0xF247D22C22E69230,0x00004000 // C70 | |
635 | data8 0xA7A1AB93E3981A90,0x00004001 // C80 | |
636 | data8 0xF10951733E2C697F,0x00004001 // C90 | |
637 | data8 0xBE3359BFAD128322,0x00004002 // CA0 | |
638 | data8 0x8000000000000000,0x00003fff | |
639 | // | |
640 | //[160; 161] for negatives | |
641 | data8 0xA76DBD55B2E32D71,0x00003C63 // 1/An | |
642 | // | |
643 | // sin(pi*x)/pi | |
644 | data8 0xBCBC4342112F52A2,0x00003FDE // S21 | |
645 | data8 0xFAFCECB86536F655,0x0000BFE3 // S19 | |
646 | data8 0x87E4C97F9CF09B92,0x00003FE9 // S17 | |
647 | data8 0xEA124C68E704C5CB,0x0000BFED // S15 | |
648 | data8 0x9BA38CFD59C8AA1D,0x00003FF2 // S13 | |
649 | data8 0x99C0B552303D5B21,0x0000BFF6 // S11 | |
650 | // | |
651 | //[176; 177] for negatives | |
652 | data8 0xBA5D5869211696FF,0x00003BEC // 1/An | |
653 | // | |
654 | // sin(pi*x)/pi | |
655 | data8 0xD63402E79A853175,0x00003FF9 // S9 | |
656 | data8 0xC354723906DB36BA,0x0000BFFC // S7 | |
657 | data8 0xCFCE5A015E236291,0x00003FFE // S5 | |
658 | data8 0xD28D3312983E9918,0x0000BFFF // S3 | |
659 | // | |
660 | // | |
661 | // [1.0;1.25] | |
662 | data8 0xA405530B067ECD3C,0x0000BFFC // A15 | |
663 | data8 0xF5B5413F95E1C282,0x00003FFD // A14 | |
664 | data8 0xC4DED71C782F76C8,0x0000BFFE // A13 | |
665 | data8 0xECF7DDDFD27C9223,0x00003FFE // A12 | |
666 | data8 0xFB73D31793068463,0x0000BFFE // A11 | |
667 | data8 0xFF173B7E66FD1D61,0x00003FFE // A10 | |
668 | data8 0xFFA5EF3959089E94,0x0000BFFE // A9 | |
669 | data8 0xFF8153BD42E71A4F,0x00003FFE // A8 | |
670 | data8 0xFEF9CAEE2CB5B533,0x0000BFFE // A7 | |
671 | data8 0xFE3F02E5EDB6811E,0x00003FFE // A6 | |
672 | data8 0xFB64074CED2658FB,0x0000BFFE // A5 | |
673 | data8 0xFB52882A095B18A4,0x00003FFE // A4 | |
674 | data8 0xE8508C7990A0DAC0,0x0000BFFE // A3 | |
675 | data8 0xFD32C611D8A881D0,0x00003FFE // A2 | |
676 | data8 0x93C467E37DB0C536,0x0000BFFE // A1 | |
677 | data8 0x8000000000000000,0x00003FFF // A0 | |
678 | // | |
679 | // [1.25;1.5] | |
680 | data8 0xD038092400619677,0x0000BFF7 // A15 | |
681 | data8 0xEA6DE925E6EB8C8F,0x00003FF3 // A14 | |
682 | data8 0xC53F83645D4597FC,0x0000BFF7 // A13 | |
683 | data8 0xE366DB2FB27B7ECD,0x00003FF7 // A12 | |
684 | data8 0xAC8FD5E11F6EEAD8,0x0000BFF8 // A11 | |
685 | data8 0xFB14010FB3697785,0x00003FF8 // A10 | |
686 | data8 0xB6F91CB5C371177B,0x0000BFF9 // A9 | |
687 | data8 0x85A262C6F8FEEF71,0x00003FFA // A8 | |
688 | data8 0xC038E6E3261568F9,0x0000BFFA // A7 | |
689 | data8 0x8F4BDE8883232364,0x00003FFB // A6 | |
690 | data8 0xBCFBBD5786537E9A,0x0000BFFB // A5 | |
691 | data8 0xA4C08BAF0A559479,0x00003FFC // A4 | |
692 | data8 0x85D74FA063E81476,0x0000BFFC // A3 | |
693 | data8 0xDB629FB9BBDC1C4E,0x00003FFD // A2 | |
694 | data8 0xF4F8FBC7C0C9D317,0x00003FC6 // A1 | |
695 | data8 0xE2B6E4153A57746C,0x00003FFE // A0 | |
696 | // | |
697 | // [1.25;1.5] | |
698 | data8 0x9533F9D3723B448C,0x0000BFF2 // A15 | |
699 | data8 0xF1F75D3C561CBBAF,0x00003FF5 // A14 | |
700 | data8 0xBA55A9A1FC883523,0x0000BFF8 // A13 | |
701 | data8 0xB5D5E9E5104FA995,0x00003FFA // A12 | |
702 | data8 0xFD84F35B70CD9AE2,0x0000BFFB // A11 | |
703 | data8 0x87445235F4688CC5,0x00003FFD // A10 | |
704 | data8 0xE7F236EBFB9F774E,0x0000BFFD // A9 | |
705 | data8 0xA6605F2721F787CE,0x00003FFE // A8 | |
706 | data8 0xCF579312AD7EAD72,0x0000BFFE // A7 | |
707 | data8 0xE96254A2407A5EAC,0x00003FFE // A6 | |
708 | data8 0xF41312A8572ED346,0x0000BFFE // A5 | |
709 | data8 0xF9535027C1B1F795,0x00003FFE // A4 | |
710 | data8 0xE7E82D0C613A8DE4,0x0000BFFE // A3 | |
711 | data8 0xFD23CD9741B460B8,0x00003FFE // A2 | |
712 | data8 0x93C30FD9781DBA88,0x0000BFFE // A1 | |
713 | data8 0xFFFFF1781FDBEE84,0x00003FFE // A0 | |
714 | LOCAL_OBJECT_END(tgamma_data) | |
715 | ||
716 | ||
717 | //============================================================== | |
718 | // Code | |
719 | //============================================================== | |
720 | ||
721 | .section .text | |
722 | GLOBAL_LIBM_ENTRY(tgamma) | |
723 | { .mfi | |
724 | getf.exp GR_Sign_Exp = f8 | |
725 | fma.s1 FR_1m2X = f8,f1,f8 // 2x | |
726 | addl GR_ad_Data = @ltoff(tgamma_data), gp | |
727 | } | |
728 | { .mfi | |
729 | mov GR_ExpOf8 = 0x10002 // 8 | |
730 | fcvt.fx.trunc.s1 FR_iXt = f8 // [x] | |
731 | mov GR_ExpOf05 = 0xFFFE // 0.5 | |
732 | };; | |
733 | { .mfi | |
734 | getf.sig GR_Sig = f8 | |
735 | fma.s1 FR_2 = f1,f1,f1 // 2 | |
736 | mov GR_Addr_Mask1 = 0x780 | |
737 | } | |
738 | { .mlx | |
739 | setf.exp FR_8 = GR_ExpOf8 | |
740 | movl GR_10 = 0x4024000000000000 | |
741 | };; | |
742 | { .mfi | |
743 | ld8 GR_ad_Data = [GR_ad_Data] | |
744 | fcmp.lt.s1 p14,p15 = f8,f0 | |
745 | tbit.z p12,p13 = GR_Sign_Exp,0x10 // p13 if x >= 2 | |
746 | } | |
747 | { .mlx | |
748 | and GR_Bit2 = 4,GR_Sign_Exp | |
749 | movl GR_12 = 0x4028000000000000 | |
750 | };; | |
751 | { .mfi | |
752 | setf.d FR_10 = GR_10 | |
753 | fma.s1 FR_r02 = f8,f1,f0 | |
754 | extr.u GR_Tbl_Offs = GR_Sig,58,6 | |
755 | } | |
756 | { .mfi | |
757 | (p12) mov GR_Addr_Mask1 = r0 | |
758 | fma.s1 FR_NormX = f8,f1,f0 | |
759 | cmp.ne p8,p0 = GR_Bit2,r0 | |
760 | };; | |
761 | { .mfi | |
762 | (p8) shladd GR_Tbl_Offs = GR_Tbl_Offs,4,r0 | |
763 | fclass.m p10,p0 = f8,0x1E7 // Test x for NaTVal, NaN, +/-0, +/-INF | |
764 | tbit.nz p11,p0 = GR_Sign_Exp,1 | |
765 | } | |
766 | { .mlx | |
767 | add GR_Addr_Mask2 = GR_Addr_Mask1,GR_Addr_Mask1 | |
768 | movl GR_14 = 0x402C000000000000 | |
769 | };; | |
770 | .pred.rel "mutex",p14,p15 | |
771 | { .mfi | |
772 | setf.d FR_12 = GR_12 | |
773 | (p14) fma.s1 FR_1m2X = f1,f1,FR_1m2X // RB=1-2|x| | |
774 | tbit.nz p8,p9 = GR_Sign_Exp,0 | |
775 | } | |
776 | { .mfi | |
777 | ldfpd FR_OvfBound,FR_Xmin = [GR_ad_Data],16 | |
778 | (p15) fms.s1 FR_1m2X = f1,f1,FR_1m2X // RB=1-2|x| | |
779 | (p11) shladd GR_Tbl_Offs = GR_Tbl_Offs,2,r0 | |
780 | };; | |
781 | .pred.rel "mutex",p9,p8 | |
782 | { .mfi | |
783 | setf.d FR_14 = GR_14 | |
784 | fma.s1 FR_4 = FR_2,FR_2,f0 | |
785 | (p8) and GR_Tbl_Offs = GR_Tbl_Offs, GR_Addr_Mask1 | |
786 | } | |
787 | { .mfi | |
788 | setf.exp FR_05 = GR_ExpOf05 | |
789 | fma.s1 FR_6 = FR_2,FR_2,FR_2 | |
790 | (p9) and GR_Tbl_Offs = GR_Tbl_Offs, GR_Addr_Mask2 | |
791 | };; | |
792 | .pred.rel "mutex",p9,p8 | |
793 | { .mfi | |
794 | (p8) shladd GR_ad_Co = GR_Tbl_Offs,1,GR_ad_Data | |
795 | fcvt.xf FR_Xt = FR_iXt // [x] | |
796 | (p15) tbit.z.unc p11,p0 = GR_Sign_Exp,0x10 // p11 if 0 < x < 2 | |
797 | } | |
798 | { .mfi | |
799 | (p9) add GR_ad_Co = GR_ad_Data,GR_Tbl_Offs | |
800 | fma.s1 FR_5 = FR_2,FR_2,f1 | |
801 | (p15) cmp.lt.unc p7,p6 = GR_ExpOf05,GR_Sign_Exp // p7 if 0 < x < 1 | |
802 | };; | |
803 | { .mfi | |
804 | add GR_ad_Ce = 16,GR_ad_Co | |
805 | (p11) frcpa.s1 FR_Rcp0,p0 = f1,f8 | |
806 | sub GR_Tbl_Offs = GR_ad_Co,GR_ad_Data | |
807 | } | |
808 | { .mfb | |
809 | ldfe FR_C01 = [GR_ad_Co],32 | |
0347518d | 810 | (p7) fms.s1 FR_r02 = FR_r02,f1,f1 |
d5efd131 MF |
811 | // jump if x is NaTVal, NaN, +/-0, +/-INF |
812 | (p10) br.cond.spnt tgamma_spec | |
813 | };; | |
814 | .pred.rel "mutex",p14,p15 | |
815 | { .mfi | |
816 | ldfe FR_C11 = [GR_ad_Ce],32 | |
817 | (p14) fms.s1 FR_X2pX = f8,f8,f8 // RA=x^2+|x| | |
818 | shr GR_Tbl_Ind = GR_Tbl_Offs,8 | |
819 | } | |
820 | { .mfb | |
821 | ldfe FR_C21 = [GR_ad_Co],32 | |
822 | (p15) fma.s1 FR_X2pX = f8,f8,f8 // RA=x^2+x | |
823 | // jump if 0 < x < 2 | |
824 | (p11) br.cond.spnt tgamma_from_0_to_2 | |
825 | };; | |
826 | { .mfi | |
827 | ldfe FR_C31 = [GR_ad_Ce],32 | |
828 | fma.s1 FR_Rq2 = FR_2,f1,FR_1m2X // 2 + B | |
829 | cmp.ltu p7,p0=0xB,GR_Tbl_Ind | |
830 | } | |
831 | { .mfb | |
832 | ldfe FR_C41 = [GR_ad_Co],32 | |
833 | fma.s1 FR_Rq3 = FR_2,FR_2,FR_1m2X // 4 + B | |
834 | // jump if GR_Tbl_Ind > 11, i.e |x| is more than 192 | |
835 | (p7) br.cond.spnt tgamma_spec_res | |
836 | };; | |
837 | { .mfi | |
838 | ldfe FR_C51 = [GR_ad_Ce],32 | |
839 | fma.s1 FR_Rq4 = FR_6,f1,FR_1m2X // 6 + B | |
840 | shr GR_Tbl_Offs = GR_Tbl_Offs,1 | |
841 | } | |
842 | { .mfi | |
843 | ldfe FR_C61 = [GR_ad_Co],32 | |
844 | fma.s1 FR_Rq5 = FR_4,FR_2,FR_1m2X // 8 + B | |
845 | nop.i 0 | |
846 | };; | |
847 | { .mfi | |
848 | ldfe FR_C71 = [GR_ad_Ce],32 | |
849 | (p14) fms.s1 FR_r = FR_Xt,f1,f8 // r = |x| - [|x|] | |
850 | shr GR_Tbl_16xInd = GR_Tbl_Offs,3 | |
851 | } | |
852 | { .mfi | |
853 | ldfe FR_C81 = [GR_ad_Co],32 | |
854 | (p15) fms.s1 FR_r = f8,f1,FR_Xt // r = x - [x] | |
855 | add GR_ad_Data = 0xC00,GR_ad_Data | |
856 | };; | |
857 | { .mfi | |
858 | ldfe FR_C91 = [GR_ad_Ce],32 | |
859 | fma.s1 FR_Rq6 = FR_5,FR_2,FR_1m2X // 10 + B | |
860 | (p14) mov GR_0x30033 = 0x30033 | |
861 | } | |
862 | { .mfi | |
863 | ldfe FR_CA1 = [GR_ad_Co],32 | |
864 | fma.s1 FR_Rq7 = FR_6,FR_2,FR_1m2X // 12 + B | |
865 | sub GR_Tbl_Offs = GR_Tbl_Offs,GR_Tbl_16xInd | |
866 | };; | |
867 | { .mfi | |
868 | ldfe FR_C00 = [GR_ad_Ce],32 | |
869 | fma.s1 FR_Rq1 = FR_Rq1,FR_2,FR_X2pX // (x-1)*(x-2) | |
870 | (p13) cmp.eq.unc p8,p0 = r0,GR_Tbl_16xInd // index is 0 i.e. arg from [2;16) | |
871 | } | |
872 | { .mfi | |
873 | ldfe FR_C10 = [GR_ad_Co],32 | |
874 | (p14) fms.s1 FR_AbsX = f0,f0,FR_NormX // absolute value of argument | |
875 | add GR_ad_Co7 = GR_ad_Data,GR_Tbl_Offs | |
876 | };; | |
877 | { .mfi | |
878 | ldfe FR_C20 = [GR_ad_Ce],32 | |
879 | fma.s1 FR_Rq2 = FR_Rq2,FR_4,FR_X2pX // (x-3)*(x-4) | |
880 | add GR_ad_Ce7 = 16,GR_ad_Co7 | |
881 | } | |
882 | { .mfi | |
883 | ldfe FR_C30 = [GR_ad_Co],32 | |
884 | fma.s1 FR_Rq3 = FR_Rq3,FR_6,FR_X2pX // (x-5)*(x-6) | |
0347518d | 885 | nop.i 0 |
d5efd131 MF |
886 | };; |
887 | { .mfi | |
888 | ldfe FR_C40 = [GR_ad_Ce],32 | |
889 | fma.s1 FR_Rq4 = FR_Rq4,FR_8,FR_X2pX // (x-7)*(x-8) | |
890 | (p14) cmp.leu.unc p7,p0 = GR_0x30033,GR_Sign_Exp | |
891 | } | |
892 | { .mfb | |
893 | ldfe FR_C50 = [GR_ad_Co7],32 | |
894 | fma.s1 FR_Rq5 = FR_Rq5,FR_10,FR_X2pX // (x-9)*(x-10) | |
895 | // jump if x is less or equal to -2^52, i.e. x is big negative integer | |
896 | (p7) br.cond.spnt tgamma_singularity | |
897 | };; | |
898 | { .mfi | |
899 | ldfe FR_C60 = [GR_ad_Ce7],32 | |
900 | fma.s1 FR_C01 = FR_C01,f1,FR_r | |
901 | add GR_ad_Ce = 0x560,GR_ad_Data | |
902 | } | |
903 | { .mfi | |
904 | ldfe FR_C70 = [GR_ad_Co7],32 | |
0347518d | 905 | fma.s1 FR_rs = f0,f0,FR_r // reduced arg for sin(pi*x) |
d5efd131 MF |
906 | add GR_ad_Co = 0x550,GR_ad_Data |
907 | };; | |
908 | { .mfi | |
909 | ldfe FR_C80 = [GR_ad_Ce7],32 | |
910 | fma.s1 FR_C11 = FR_C11,f1,FR_r | |
911 | nop.i 0 | |
912 | } | |
913 | { .mfi | |
914 | ldfe FR_C90 = [GR_ad_Co7],32 | |
915 | fma.s1 FR_C21 = FR_C21,f1,FR_r | |
916 | nop.i 0 | |
917 | };; | |
918 | .pred.rel "mutex",p12,p13 | |
919 | { .mfi | |
920 | (p13) getf.sig GR_iSig = FR_iXt | |
921 | fcmp.lt.s1 p11,p0 = FR_05,FR_r | |
922 | mov GR_185 = 185 | |
923 | } | |
924 | { .mfi | |
925 | nop.m 0 | |
926 | fma.s1 FR_Rq6 = FR_Rq6,FR_12,FR_X2pX // (x-11)*(x-12) | |
927 | nop.i 0 | |
928 | };; | |
929 | { .mfi | |
930 | ldfe FR_CA0 = [GR_ad_Ce7],32 | |
931 | fma.s1 FR_C31 = FR_C31,f1,FR_r | |
932 | (p12) mov GR_iSig = 0 | |
933 | } | |
934 | { .mfi | |
935 | ldfe FR_An = [GR_ad_Co7],0x80 | |
936 | fma.s1 FR_C41 = FR_C41,f1,FR_r | |
937 | nop.i 0 | |
938 | };; | |
939 | { .mfi | |
940 | (p14) getf.sig GR_Sig = FR_r | |
941 | fma.s1 FR_C51 = FR_C51,f1,FR_r | |
942 | (p14) sub GR_iSig = r0,GR_iSig | |
943 | } | |
944 | { .mfi | |
945 | ldfe FR_S21 = [GR_ad_Co],32 | |
946 | fma.s1 FR_C61 = FR_C61,f1,FR_r | |
947 | nop.i 0 | |
948 | };; | |
949 | { .mfi | |
950 | ldfe FR_S19 = [GR_ad_Ce],32 | |
951 | fma.s1 FR_C71 = FR_C71,f1,FR_r | |
952 | and GR_SigRqLin = 0xF,GR_iSig | |
953 | } | |
954 | { .mfi | |
955 | ldfe FR_S17 = [GR_ad_Co],32 | |
956 | fma.s1 FR_C81 = FR_C81,f1,FR_r | |
957 | mov GR_2 = 2 | |
958 | };; | |
959 | { .mfi | |
960 | (p14) ldfe FR_InvAn = [GR_ad_Co7] | |
961 | fma.s1 FR_C91 = FR_C91,f1,FR_r | |
962 | // if significand of r is 0 tnan argument is negative integer | |
963 | (p14) cmp.eq.unc p12,p0 = r0,GR_Sig | |
964 | } | |
965 | { .mfb | |
966 | (p8) sub GR_SigRqLin = GR_SigRqLin,GR_2 // subtract 2 if 2 <= x < 16 | |
967 | fma.s1 FR_CA1 = FR_CA1,f1,FR_r | |
968 | // jump if x is negative integer such that -2^52 < x < -185 | |
969 | (p12) br.cond.spnt tgamma_singularity | |
970 | };; | |
971 | { .mfi | |
972 | setf.sig FR_Xt = GR_SigRqLin | |
973 | (p11) fms.s1 FR_rs = f1,f1,FR_r | |
974 | (p14) cmp.ltu.unc p7,p0 = GR_185,GR_iSig | |
975 | } | |
976 | { .mfb | |
977 | ldfe FR_S15 = [GR_ad_Ce],32 | |
978 | fma.s1 FR_Rq7 = FR_Rq7,FR_14,FR_X2pX // (x-13)*(x-14) | |
979 | // jump if x is noninteger such that -2^52 < x < -185 | |
980 | (p7) br.cond.spnt tgamma_underflow | |
981 | };; | |
982 | { .mfi | |
983 | ldfe FR_S13 = [GR_ad_Co],48 | |
984 | fma.s1 FR_C01 = FR_C01,FR_r,FR_C00 | |
985 | and GR_Sig2 = 0xE,GR_SigRqLin | |
986 | } | |
987 | { .mfi | |
988 | ldfe FR_S11 = [GR_ad_Ce],48 | |
989 | fma.s1 FR_C11 = FR_C11,FR_r,FR_C10 | |
990 | nop.i 0 | |
991 | };; | |
992 | { .mfi | |
993 | ldfe FR_S9 = [GR_ad_Co],32 | |
994 | fma.s1 FR_C21 = FR_C21,FR_r,FR_C20 | |
995 | // should we mul by polynomial of recursion? | |
996 | cmp.eq p13,p12 = r0,GR_SigRqLin | |
997 | } | |
998 | { .mfi | |
999 | ldfe FR_S7 = [GR_ad_Ce],32 | |
1000 | fma.s1 FR_C31 = FR_C31,FR_r,FR_C30 | |
1001 | nop.i 0 | |
1002 | };; | |
1003 | { .mfi | |
1004 | ldfe FR_S5 = [GR_ad_Co],32 | |
1005 | fma.s1 FR_C41 = FR_C41,FR_r,FR_C40 | |
1006 | nop.i 0 | |
1007 | } | |
1008 | { .mfi | |
1009 | ldfe FR_S3 = [GR_ad_Ce],32 | |
1010 | fma.s1 FR_C51 = FR_C51,FR_r,FR_C50 | |
1011 | nop.i 0 | |
1012 | };; | |
1013 | { .mfi | |
1014 | nop.m 0 | |
1015 | fma.s1 FR_C61 = FR_C61,FR_r,FR_C60 | |
1016 | nop.i 0 | |
1017 | } | |
1018 | { .mfi | |
1019 | nop.m 0 | |
1020 | fma.s1 FR_C71 = FR_C71,FR_r,FR_C70 | |
1021 | nop.i 0 | |
1022 | };; | |
1023 | { .mfi | |
1024 | nop.m 0 | |
1025 | fma.s1 FR_C81 = FR_C81,FR_r,FR_C80 | |
1026 | nop.i 0 | |
1027 | } | |
1028 | { .mfi | |
1029 | nop.m 0 | |
1030 | fma.s1 FR_C91 = FR_C91,FR_r,FR_C90 | |
1031 | nop.i 0 | |
1032 | };; | |
1033 | { .mfi | |
1034 | nop.m 0 | |
1035 | fma.s1 FR_CA1 = FR_CA1,FR_r,FR_CA0 | |
1036 | nop.i 0 | |
1037 | } | |
1038 | { .mfi | |
0347518d | 1039 | nop.m 0 |
d5efd131 MF |
1040 | fma.s1 FR_C01 = FR_C01,FR_C11,f0 |
1041 | nop.i 0 | |
1042 | };; | |
1043 | { .mfi | |
0347518d | 1044 | nop.m 0 |
d5efd131 MF |
1045 | fma.s1 FR_C21 = FR_C21,FR_C31,f0 |
1046 | nop.i 0 | |
1047 | } | |
1048 | { .mfi | |
1049 | nop.m 0 | |
1050 | fma.s1 FR_rs2 = FR_rs,FR_rs,f0 | |
1051 | (p12) cmp.lt.unc p7,p0 = 2,GR_Sig2 // should mul by FR_Rq2? | |
1052 | };; | |
1053 | { .mfi | |
0347518d | 1054 | nop.m 0 |
d5efd131 | 1055 | fma.s1 FR_C41 = FR_C41,FR_C51,f0 |
0347518d | 1056 | nop.i 0 |
d5efd131 MF |
1057 | } |
1058 | { .mfi | |
1059 | nop.m 0 | |
1060 | (p7) fma.s1 FR_Rq1 = FR_Rq1,FR_Rq2,f0 | |
1061 | (p12) cmp.lt.unc p9,p0 = 6,GR_Sig2 // should mul by FR_Rq4? | |
1062 | };; | |
1063 | { .mfi | |
0347518d | 1064 | nop.m 0 |
d5efd131 MF |
1065 | fma.s1 FR_C61 = FR_C61,FR_C71,f0 |
1066 | (p15) cmp.eq p11,p0 = r0,r0 | |
1067 | } | |
1068 | { .mfi | |
1069 | nop.m 0 | |
1070 | (p9) fma.s1 FR_Rq3 = FR_Rq3,FR_Rq4,f0 | |
1071 | (p12) cmp.lt.unc p8,p0 = 10,GR_Sig2 // should mul by FR_Rq6? | |
1072 | };; | |
1073 | { .mfi | |
0347518d | 1074 | nop.m 0 |
d5efd131 MF |
1075 | fma.s1 FR_C81 = FR_C81,FR_C91,f0 |
1076 | nop.i 0 | |
1077 | } | |
1078 | { .mfi | |
1079 | nop.m 0 | |
1080 | (p8) fma.s1 FR_Rq5 = FR_Rq5,FR_Rq6,f0 | |
1081 | (p14) cmp.ltu p0,p11 = 0x9,GR_Tbl_Ind | |
1082 | };; | |
1083 | { .mfi | |
0347518d MF |
1084 | nop.m 0 |
1085 | fcvt.xf FR_RqLin = FR_Xt | |
d5efd131 MF |
1086 | nop.i 0 |
1087 | } | |
1088 | { .mfi | |
1089 | nop.m 0 | |
1090 | (p11) fma.s1 FR_CA1 = FR_CA1,FR_An,f0 | |
1091 | nop.i 0 | |
1092 | };; | |
1093 | { .mfi | |
1094 | nop.m 0 | |
1095 | fma.s1 FR_S21 = FR_S21,FR_rs2,FR_S19 | |
1096 | nop.i 0 | |
1097 | } | |
1098 | { .mfi | |
1099 | nop.m 0 | |
1100 | fma.s1 FR_S17 = FR_S17,FR_rs2,FR_S15 | |
1101 | nop.i 0 | |
1102 | };; | |
1103 | { .mfi | |
0347518d | 1104 | nop.m 0 |
d5efd131 MF |
1105 | fma.s1 FR_C01 = FR_C01,FR_C21,f0 |
1106 | nop.i 0 | |
1107 | } | |
1108 | { .mfi | |
0347518d | 1109 | nop.m 0 |
d5efd131 MF |
1110 | fma.s1 FR_rs4 = FR_rs2,FR_rs2,f0 |
1111 | (p12) cmp.lt.unc p8,p0 = 4,GR_Sig2 // should mul by FR_Rq3? | |
1112 | };; | |
1113 | { .mfi | |
1114 | nop.m 0 | |
1115 | (p8) fma.s1 FR_Rq1 = FR_Rq1,FR_Rq3,f0 | |
1116 | nop.i 0 | |
1117 | } | |
1118 | { .mfi | |
1119 | nop.m 0 | |
1120 | fma.s1 FR_S13 = FR_S13,FR_rs2,FR_S11 | |
1121 | (p12) cmp.lt.unc p9,p0 = 12,GR_Sig2 // should mul by FR_Rq7? | |
1122 | };; | |
1123 | { .mfi | |
0347518d | 1124 | nop.m 0 |
d5efd131 MF |
1125 | fma.s1 FR_C41 = FR_C41,FR_C61,f0 |
1126 | nop.i 0 | |
1127 | } | |
1128 | { .mfi | |
0347518d | 1129 | nop.m 0 |
d5efd131 | 1130 | (p9) fma.s1 FR_Rq5 = FR_Rq5,FR_Rq7,f0 |
0347518d | 1131 | nop.i 0 |
d5efd131 MF |
1132 | };; |
1133 | { .mfi | |
0347518d | 1134 | nop.m 0 |
d5efd131 | 1135 | fma.s1 FR_C81 = FR_C81,FR_CA1,f0 |
0347518d | 1136 | nop.i 0 |
d5efd131 MF |
1137 | } |
1138 | { .mfi | |
1139 | nop.m 0 | |
1140 | fma.s1 FR_S9 = FR_S9,FR_rs2,FR_S7 | |
1141 | nop.i 0 | |
1142 | };; | |
1143 | { .mfi | |
1144 | nop.m 0 | |
1145 | fma.s1 FR_S5 = FR_S5,FR_rs2,FR_S3 | |
1146 | nop.i 0 | |
1147 | };; | |
1148 | { .mfi | |
1149 | nop.m 0 | |
1150 | fma.s1 FR_rs3 = FR_rs2,FR_rs,f0 | |
1151 | (p12) tbit.nz.unc p6,p0 = GR_SigRqLin,0 | |
1152 | } | |
1153 | { .mfi | |
1154 | nop.m 0 | |
1155 | fma.s1 FR_rs8 = FR_rs4,FR_rs4,f0 | |
1156 | nop.i 0 | |
1157 | };; | |
1158 | { .mfi | |
1159 | nop.m 0 | |
1160 | fma.s1 FR_S21 = FR_S21,FR_rs4,FR_S17 | |
1161 | mov GR_ExpOf1 = 0x2FFFF | |
1162 | } | |
1163 | { .mfi | |
0347518d | 1164 | nop.m 0 |
d5efd131 MF |
1165 | (p6) fms.s1 FR_RqLin = FR_AbsX,f1,FR_RqLin |
1166 | (p12) cmp.lt.unc p8,p0 = 8,GR_Sig2 // should mul by FR_Rq5? | |
1167 | };; | |
1168 | { .mfi | |
0347518d | 1169 | nop.m 0 |
d5efd131 MF |
1170 | fma.s1 FR_C01 = FR_C01,FR_C41,f0 |
1171 | nop.i 0 | |
1172 | } | |
1173 | { .mfi | |
1174 | nop.m 0 | |
1175 | (p8) fma.s1 FR_Rq1 = FR_Rq1,FR_Rq5,f0 | |
1176 | (p14) cmp.gtu.unc p7,p0 = GR_Sign_Exp,GR_ExpOf1 | |
1177 | };; | |
1178 | { .mfi | |
1179 | nop.m 0 | |
1180 | fma.s1 FR_S13 = FR_S13,FR_rs4,FR_S9 | |
1181 | nop.i 0 | |
1182 | } | |
1183 | { .mfi | |
1184 | nop.m 0 | |
1185 | (p7) fma.s1 FR_C81 = FR_C81,FR_AbsX,f0 | |
1186 | nop.i 0 | |
1187 | };; | |
1188 | { .mfi | |
1189 | nop.m 0 | |
1190 | (p14) fma.s1 FR_AbsXp1 = f1,f1,FR_AbsX // |x|+1 | |
1191 | nop.i 0 | |
1192 | } | |
1193 | { .mfi | |
1194 | nop.m 0 | |
0347518d | 1195 | (p15) fcmp.lt.unc.s1 p0,p10 = FR_AbsX,FR_OvfBound // x >= overflow_boundary |
d5efd131 MF |
1196 | nop.i 0 |
1197 | };; | |
1198 | { .mfi | |
1199 | nop.m 0 | |
1200 | fma.s1 FR_rs7 = FR_rs4,FR_rs3,f0 | |
1201 | nop.i 0 | |
1202 | } | |
1203 | { .mfi | |
1204 | nop.m 0 | |
1205 | fma.s1 FR_S5 = FR_S5,FR_rs3,FR_rs | |
1206 | nop.i 0 | |
1207 | };; | |
1208 | { .mib | |
1209 | (p14) cmp.lt p13,p0 = r0,r0 // set p13 to 0 if x < 0 | |
1210 | (p12) cmp.eq.unc p8,p9 = 1,GR_SigRqLin | |
1211 | (p10) br.cond.spnt tgamma_spec_res | |
1212 | };; | |
1213 | { .mfi | |
1214 | getf.sig GR_Sig = FR_iXt | |
1215 | (p6) fma.s1 FR_Rq1 = FR_Rq1,FR_RqLin,f0 | |
1216 | // should we mul by polynomial of recursion? | |
1217 | (p15) cmp.eq.unc p0,p11 = r0,GR_SigRqLin | |
1218 | } | |
1219 | { .mfb | |
0347518d | 1220 | nop.m 0 |
d5efd131 MF |
1221 | fma.s1 FR_GAMMA = FR_C01,FR_C81,f0 |
1222 | (p11) br.cond.spnt tgamma_positives | |
1223 | };; | |
1224 | { .mfi | |
1225 | nop.m 0 | |
1226 | fma.s1 FR_S21 = FR_S21,FR_rs8,FR_S13 | |
1227 | nop.i 0 | |
1228 | } | |
1229 | { .mfb | |
1230 | nop.m 0 | |
1231 | (p13) fma.d.s0 f8 = FR_C01,FR_C81,f0 | |
1232 | (p13) br.ret.spnt b0 | |
1233 | };; | |
1234 | .pred.rel "mutex",p8,p9 | |
1235 | { .mfi | |
0347518d | 1236 | nop.m 0 |
d5efd131 MF |
1237 | (p9) fma.s1 FR_GAMMA = FR_GAMMA,FR_Rq1,f0 |
1238 | tbit.z p6,p7 = GR_Sig,0 // p6 if sin<0, p7 if sin>0 | |
1239 | } | |
1240 | { .mfi | |
0347518d | 1241 | nop.m 0 |
d5efd131 MF |
1242 | (p8) fma.s1 FR_GAMMA = FR_GAMMA,FR_RqLin,f0 |
1243 | nop.i 0 | |
1244 | };; | |
1245 | { .mfi | |
1246 | nop.m 0 | |
1247 | fma.s1 FR_S21 = FR_S21,FR_rs7,FR_S5 | |
1248 | nop.i 0 | |
1249 | };; | |
1250 | .pred.rel "mutex",p6,p7 | |
1251 | { .mfi | |
0347518d MF |
1252 | nop.m 0 |
1253 | (p6) fnma.s1 FR_GAMMA = FR_GAMMA,FR_S21,f0 | |
1254 | nop.i 0 | |
d5efd131 MF |
1255 | } |
1256 | { .mfi | |
0347518d | 1257 | nop.m 0 |
d5efd131 MF |
1258 | (p7) fma.s1 FR_GAMMA = FR_GAMMA,FR_S21,f0 |
1259 | mov GR_Sig2 = 1 | |
1260 | };; | |
1261 | { .mfi | |
1262 | nop.m 0 | |
1263 | frcpa.s1 FR_Rcp0,p0 = f1,FR_GAMMA | |
1264 | cmp.ltu p13,p0 = GR_Sign_Exp,GR_ExpOf1 | |
1265 | };; | |
1266 | // NR method: ineration #1 | |
1267 | { .mfi | |
1268 | (p13) getf.exp GR_Sign_Exp = FR_AbsX | |
1269 | fnma.s1 FR_Rcp1 = FR_Rcp0,FR_GAMMA,f1 // t = 1 - r0*x | |
1270 | (p13) shl GR_Sig2 = GR_Sig2,63 | |
1271 | };; | |
1272 | { .mfi | |
1273 | (p13) getf.sig GR_Sig = FR_AbsX | |
1274 | nop.f 0 | |
1275 | (p13) mov GR_NzOvfBound = 0xFBFF | |
1276 | };; | |
1277 | { .mfi | |
1278 | (p13) cmp.ltu.unc p8,p0 = GR_Sign_Exp,GR_NzOvfBound // p8 <- overflow | |
1279 | nop.f 0 | |
1280 | (p13) cmp.eq.unc p9,p0 = GR_Sign_Exp,GR_NzOvfBound | |
1281 | };; | |
1282 | { .mfb | |
1283 | nop.m 0 | |
1284 | (p13) fma.d.s0 FR_X = f1,f1,f8 // set deno & inexact flags | |
1285 | (p8) br.cond.spnt tgamma_ovf_near_0 //tgamma_neg_overflow | |
1286 | };; | |
1287 | { .mib | |
1288 | nop.m 0 | |
1289 | (p9) cmp.eq.unc p8,p0 = GR_Sig,GR_Sig2 | |
1290 | (p8) br.cond.spnt tgamma_ovf_near_0_boundary //tgamma_neg_overflow | |
1291 | };; | |
1292 | { .mfi | |
1293 | nop.m 0 | |
1294 | fma.s1 FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0 | |
0347518d | 1295 | nop.i 0 |
d5efd131 MF |
1296 | };; |
1297 | // NR method: ineration #2 | |
1298 | { .mfi | |
1299 | nop.m 0 | |
1300 | fnma.s1 FR_Rcp2 = FR_Rcp1,FR_GAMMA,f1 // t = 1 - r1*x | |
1301 | nop.i 0 | |
1302 | };; | |
1303 | { .mfi | |
1304 | nop.m 0 | |
1305 | fma.s1 FR_Rcp2 = FR_Rcp1,FR_Rcp2,FR_Rcp1 | |
1306 | nop.i 0 | |
1307 | };; | |
1308 | // NR method: ineration #3 | |
1309 | { .mfi | |
1310 | nop.m 0 | |
1311 | fnma.s1 FR_Rcp3 = FR_Rcp2,FR_GAMMA,f1 // t = 1 - r2*x | |
1312 | nop.i 0 | |
1313 | } | |
1314 | { .mfi | |
1315 | nop.m 0 | |
1316 | (p13) fma.s1 FR_Rcp2 = FR_Rcp2,FR_AbsXp1,f0 | |
1317 | (p14) cmp.ltu p10,p11 = 0x9,GR_Tbl_Ind | |
1318 | };; | |
1319 | .pred.rel "mutex",p10,p11 | |
1320 | { .mfi | |
1321 | nop.m 0 | |
1322 | (p10) fma.s1 FR_GAMMA = FR_Rcp2,FR_Rcp3,FR_Rcp2 | |
1323 | nop.i 0 | |
1324 | } | |
1325 | { .mfi | |
1326 | nop.m 0 | |
1327 | (p11) fma.d.s0 f8 = FR_Rcp2,FR_Rcp3,FR_Rcp2 | |
1328 | nop.i 0 | |
1329 | };; | |
1330 | { .mfb | |
1331 | nop.m 0 | |
1332 | (p10) fma.d.s0 f8 = FR_GAMMA,FR_InvAn,f0 | |
1333 | br.ret.sptk b0 | |
1334 | };; | |
1335 | ||
1336 | ||
1337 | // here if x >= 3 | |
1338 | //-------------------------------------------------------------------- | |
1339 | .align 32 | |
1340 | tgamma_positives: | |
1341 | .pred.rel "mutex",p8,p9 | |
1342 | { .mfi | |
0347518d | 1343 | nop.m 0 |
d5efd131 MF |
1344 | (p9) fma.d.s0 f8 = FR_GAMMA,FR_Rq1,f0 |
1345 | nop.i 0 | |
1346 | } | |
1347 | { .mfb | |
0347518d | 1348 | nop.m 0 |
d5efd131 MF |
1349 | (p8) fma.d.s0 f8 = FR_GAMMA,FR_RqLin,f0 |
1350 | br.ret.sptk b0 | |
1351 | };; | |
1352 | ||
1353 | // here if 0 < x < 1 | |
1354 | //-------------------------------------------------------------------- | |
1355 | .align 32 | |
1356 | tgamma_from_0_to_2: | |
1357 | { .mfi | |
1358 | getf.exp GR_Sign_Exp = FR_r02 | |
1359 | fms.s1 FR_r = FR_r02,f1,FR_Xmin | |
1360 | mov GR_ExpOf025 = 0xFFFD | |
1361 | } | |
1362 | { .mfi | |
1363 | add GR_ad_Co = 0x1200,GR_ad_Data | |
1364 | (p6) fnma.s1 FR_Rcp1 = FR_Rcp0,FR_NormX,f1 // t = 1 - r0*x | |
1365 | (p6) mov GR_Sig2 = 1 | |
1366 | };; | |
1367 | { .mfi | |
1368 | (p6) getf.sig GR_Sig = FR_NormX | |
0347518d | 1369 | nop.f 0 |
d5efd131 MF |
1370 | (p6) shl GR_Sig2 = GR_Sig2,63 |
1371 | } | |
1372 | { .mfi | |
1373 | add GR_ad_Ce = 0x1210,GR_ad_Data | |
1374 | nop.f 0 | |
1375 | (p6) mov GR_NzOvfBound = 0xFBFF | |
1376 | };; | |
1377 | { .mfi | |
0347518d | 1378 | cmp.eq p8,p0 = GR_Sign_Exp,GR_ExpOf05 // r02 >= 1/2 |
d5efd131 | 1379 | nop.f 0 |
0347518d | 1380 | cmp.eq p9,p10 = GR_Sign_Exp,GR_ExpOf025 // r02 >= 1/4 |
d5efd131 MF |
1381 | } |
1382 | { .mfi | |
1383 | (p6) cmp.ltu.unc p11,p0 = GR_Sign_Exp,GR_NzOvfBound // p11 <- overflow | |
1384 | nop.f 0 | |
1385 | (p6) cmp.eq.unc p12,p0 = GR_Sign_Exp,GR_NzOvfBound | |
1386 | };; | |
1387 | .pred.rel "mutex",p8,p9 | |
1388 | { .mfi | |
1389 | (p8) add GR_ad_Co = 0x200,GR_ad_Co | |
1390 | (p6) fma.d.s0 FR_X = f1,f1,f8 // set deno & inexact flags | |
1391 | (p9) add GR_ad_Co = 0x100,GR_ad_Co | |
1392 | } | |
1393 | { .mib | |
1394 | (p8) add GR_ad_Ce = 0x200,GR_ad_Ce | |
1395 | (p9) add GR_ad_Ce = 0x100,GR_ad_Ce | |
1396 | (p11) br.cond.spnt tgamma_ovf_near_0 //tgamma_spec_res | |
1397 | };; | |
1398 | { .mfi | |
0347518d | 1399 | ldfe FR_A15 = [GR_ad_Co],32 |
d5efd131 MF |
1400 | nop.f 0 |
1401 | (p12) cmp.eq.unc p13,p0 = GR_Sig,GR_Sig2 | |
1402 | } | |
1403 | { .mfb | |
0347518d | 1404 | ldfe FR_A14 = [GR_ad_Ce],32 |
d5efd131 MF |
1405 | nop.f 0 |
1406 | (p13) br.cond.spnt tgamma_ovf_near_0_boundary //tgamma_spec_res | |
1407 | };; | |
1408 | { .mfi | |
0347518d | 1409 | ldfe FR_A13 = [GR_ad_Co],32 |
d5efd131 MF |
1410 | nop.f 0 |
1411 | nop.i 0 | |
1412 | } | |
1413 | { .mfi | |
0347518d | 1414 | ldfe FR_A12 = [GR_ad_Ce],32 |
d5efd131 MF |
1415 | nop.f 0 |
1416 | nop.i 0 | |
1417 | };; | |
1418 | .pred.rel "mutex",p9,p10 | |
1419 | { .mfi | |
0347518d MF |
1420 | ldfe FR_A11 = [GR_ad_Co],32 |
1421 | (p10) fma.s1 FR_r2 = FR_r02,FR_r02,f0 | |
d5efd131 MF |
1422 | nop.i 0 |
1423 | } | |
1424 | { .mfi | |
0347518d MF |
1425 | ldfe FR_A10 = [GR_ad_Ce],32 |
1426 | (p9) fma.s1 FR_r2 = FR_r,FR_r,f0 | |
d5efd131 MF |
1427 | nop.i 0 |
1428 | };; | |
1429 | { .mfi | |
0347518d | 1430 | ldfe FR_A9 = [GR_ad_Co],32 |
d5efd131 MF |
1431 | (p6) fma.s1 FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0 |
1432 | nop.i 0 | |
1433 | } | |
1434 | { .mfi | |
0347518d | 1435 | ldfe FR_A8 = [GR_ad_Ce],32 |
d5efd131 MF |
1436 | (p10) fma.s1 FR_r = f0,f0,FR_r02 |
1437 | nop.i 0 | |
1438 | };; | |
1439 | { .mfi | |
0347518d | 1440 | ldfe FR_A7 = [GR_ad_Co],32 |
d5efd131 MF |
1441 | nop.f 0 |
1442 | nop.i 0 | |
1443 | } | |
1444 | { .mfi | |
0347518d MF |
1445 | ldfe FR_A6 = [GR_ad_Ce],32 |
1446 | nop.f 0 | |
d5efd131 MF |
1447 | nop.i 0 |
1448 | };; | |
1449 | { .mfi | |
0347518d | 1450 | ldfe FR_A5 = [GR_ad_Co],32 |
d5efd131 MF |
1451 | nop.f 0 |
1452 | nop.i 0 | |
1453 | } | |
1454 | { .mfi | |
0347518d | 1455 | ldfe FR_A4 = [GR_ad_Ce],32 |
d5efd131 MF |
1456 | nop.f 0 |
1457 | nop.i 0 | |
1458 | };; | |
1459 | { .mfi | |
0347518d | 1460 | ldfe FR_A3 = [GR_ad_Co],32 |
d5efd131 MF |
1461 | nop.f 0 |
1462 | nop.i 0 | |
1463 | } | |
1464 | { .mfi | |
0347518d | 1465 | ldfe FR_A2 = [GR_ad_Ce],32 |
d5efd131 MF |
1466 | nop.f 0 |
1467 | nop.i 0 | |
1468 | };; | |
1469 | { .mfi | |
0347518d MF |
1470 | ldfe FR_A1 = [GR_ad_Co],32 |
1471 | fma.s1 FR_r4 = FR_r2,FR_r2,f0 | |
d5efd131 MF |
1472 | nop.i 0 |
1473 | } | |
1474 | { .mfi | |
0347518d | 1475 | ldfe FR_A0 = [GR_ad_Ce],32 |
d5efd131 MF |
1476 | nop.f 0 |
1477 | nop.i 0 | |
1478 | };; | |
1479 | { .mfi | |
1480 | nop.m 0 | |
1481 | (p6) fnma.s1 FR_Rcp2 = FR_Rcp1,FR_NormX,f1 // t = 1 - r1*x | |
1482 | nop.i 0 | |
1483 | };; | |
1484 | { .mfi | |
1485 | nop.m 0 | |
1486 | fma.s1 FR_A15 = FR_A15,FR_r,FR_A14 | |
1487 | nop.i 0 | |
1488 | } | |
1489 | { .mfi | |
1490 | nop.m 0 | |
1491 | fma.s1 FR_A11 = FR_A11,FR_r,FR_A10 | |
1492 | nop.i 0 | |
1493 | };; | |
1494 | { .mfi | |
1495 | nop.m 0 | |
0347518d | 1496 | fma.s1 FR_r8 = FR_r4,FR_r4,f0 |
d5efd131 MF |
1497 | nop.i 0 |
1498 | };; | |
1499 | { .mfi | |
1500 | nop.m 0 | |
1501 | (p6) fma.s1 FR_Rcp2 = FR_Rcp1,FR_Rcp2,FR_Rcp1 | |
1502 | nop.i 0 | |
1503 | };; | |
1504 | { .mfi | |
1505 | nop.m 0 | |
1506 | fma.s1 FR_A7 = FR_A7,FR_r,FR_A6 | |
1507 | nop.i 0 | |
1508 | } | |
1509 | { .mfi | |
1510 | nop.m 0 | |
1511 | fma.s1 FR_A3 = FR_A3,FR_r,FR_A2 | |
1512 | nop.i 0 | |
1513 | };; | |
1514 | { .mfi | |
1515 | nop.m 0 | |
1516 | fma.s1 FR_A15 = FR_A15,FR_r,FR_A13 | |
1517 | nop.i 0 | |
1518 | } | |
1519 | { .mfi | |
1520 | nop.m 0 | |
1521 | fma.s1 FR_A11 = FR_A11,FR_r,FR_A9 | |
1522 | nop.i 0 | |
1523 | };; | |
1524 | { .mfi | |
1525 | nop.m 0 | |
1526 | (p6) fnma.s1 FR_Rcp3 = FR_Rcp2,FR_NormX,f1 // t = 1 - r1*x | |
1527 | nop.i 0 | |
1528 | };; | |
1529 | { .mfi | |
1530 | nop.m 0 | |
1531 | fma.s1 FR_A7 = FR_A7,FR_r,FR_A5 | |
1532 | nop.i 0 | |
1533 | } | |
1534 | { .mfi | |
1535 | nop.m 0 | |
1536 | fma.s1 FR_A3 = FR_A3,FR_r,FR_A1 | |
1537 | nop.i 0 | |
1538 | };; | |
1539 | { .mfi | |
1540 | nop.m 0 | |
1541 | fma.s1 FR_A15 = FR_A15,FR_r,FR_A12 | |
1542 | nop.i 0 | |
1543 | } | |
1544 | { .mfi | |
1545 | nop.m 0 | |
1546 | fma.s1 FR_A11 = FR_A11,FR_r,FR_A8 | |
1547 | nop.i 0 | |
1548 | };; | |
1549 | { .mfi | |
1550 | nop.m 0 | |
1551 | (p6) fma.s1 FR_Rcp3 = FR_Rcp2,FR_Rcp3,FR_Rcp2 | |
1552 | nop.i 0 | |
1553 | };; | |
1554 | { .mfi | |
1555 | nop.m 0 | |
1556 | fma.s1 FR_A7 = FR_A7,FR_r,FR_A4 | |
1557 | nop.i 0 | |
1558 | } | |
1559 | { .mfi | |
1560 | nop.m 0 | |
1561 | fma.s1 FR_A3 = FR_A3,FR_r,FR_A0 | |
1562 | nop.i 0 | |
1563 | };; | |
1564 | { .mfi | |
1565 | nop.m 0 | |
1566 | fma.s1 FR_A15 = FR_A15,FR_r4,FR_A11 | |
1567 | nop.i 0 | |
1568 | } | |
1569 | { .mfi | |
1570 | nop.m 0 | |
1571 | fma.s1 FR_A7 = FR_A7,FR_r4,FR_A3 | |
1572 | nop.i 0 | |
1573 | };; | |
1574 | .pred.rel "mutex",p6,p7 | |
1575 | { .mfi | |
0347518d | 1576 | nop.m 0 |
d5efd131 MF |
1577 | (p6) fma.s1 FR_A15 = FR_A15,FR_r8,FR_A7 |
1578 | nop.i 0 | |
1579 | } | |
1580 | { .mfi | |
0347518d | 1581 | nop.m 0 |
d5efd131 MF |
1582 | (p7) fma.d.s0 f8 = FR_A15,FR_r8,FR_A7 |
1583 | nop.i 0 | |
1584 | };; | |
1585 | { .mfb | |
0347518d | 1586 | nop.m 0 |
d5efd131 MF |
1587 | (p6) fma.d.s0 f8 = FR_A15,FR_Rcp3,f0 |
1588 | br.ret.sptk b0 | |
1589 | };; | |
1590 | ||
1591 | // overflow | |
1592 | //-------------------------------------------------------------------- | |
1593 | .align 32 | |
1594 | tgamma_ovf_near_0_boundary: | |
1595 | .pred.rel "mutex",p14,p15 | |
1596 | { .mfi | |
1597 | mov GR_fpsr = ar.fpsr | |
1598 | nop.f 0 | |
1599 | (p15) mov r8 = 0x7ff | |
1600 | } | |
1601 | { .mfi | |
1602 | nop.m 0 | |
1603 | nop.f 0 | |
1604 | (p14) mov r8 = 0xfff | |
1605 | };; | |
1606 | { .mfi | |
1607 | nop.m 0 | |
1608 | nop.f 0 | |
0347518d | 1609 | shl r8 = r8,52 |
d5efd131 MF |
1610 | };; |
1611 | { .mfi | |
1612 | sub r8 = r8,r0,1 | |
1613 | nop.f 0 | |
1614 | extr.u GR_fpsr = GR_fpsr,10,2 // rounding mode | |
1615 | };; | |
1616 | .pred.rel "mutex",p14,p15 | |
1617 | { .mfi | |
1618 | // set p8 to 0 in case of overflow and to 1 otherwise | |
0347518d | 1619 | // for negative arg: |
d5efd131 MF |
1620 | // no overflow if rounding mode either Z or +Inf, i.e. |
1621 | // GR_fpsr > 1 | |
1622 | (p14) cmp.lt p8,p0 = 1,GR_fpsr | |
1623 | nop.f 0 | |
0347518d | 1624 | // for positive arg: |
d5efd131 MF |
1625 | // no overflow if rounding mode either Z or -Inf, i.e. |
1626 | // (GR_fpsr & 1) == 0 | |
1627 | (p15) tbit.z p0,p8 = GR_fpsr,0 | |
1628 | };; | |
1629 | { .mib | |
1630 | (p8) setf.d f8 = r8 // set result to 0x7fefffffffffffff without | |
1631 | // OVERFLOW flag raising | |
1632 | nop.i 0 | |
1633 | (p8) br.ret.sptk b0 | |
1634 | };; | |
1635 | .align 32 | |
1636 | tgamma_ovf_near_0: | |
1637 | { .mfi | |
1638 | mov r8 = 0x1FFFE | |
0347518d | 1639 | nop.f 0 |
d5efd131 MF |
1640 | nop.i 0 |
1641 | };; | |
1642 | { .mfi | |
1643 | setf.exp f9 = r8 | |
1644 | fmerge.s FR_X = f8,f8 | |
1645 | mov GR_TAG = 258 // overflow | |
1646 | };; | |
1647 | .pred.rel "mutex",p14,p15 | |
1648 | { .mfi | |
0347518d | 1649 | nop.m 0 |
d5efd131 | 1650 | (p15) fma.d.s0 f8 = f9,f9,f0 // Set I,O and +INF result |
0347518d | 1651 | nop.i 0 |
d5efd131 MF |
1652 | } |
1653 | { .mfb | |
0347518d | 1654 | nop.m 0 |
d5efd131 MF |
1655 | (p14) fnma.d.s0 f8 = f9,f9,f0 // Set I,O and -INF result |
1656 | br.cond.sptk tgamma_libm_err | |
1657 | };; | |
1658 | // overflow or absolute value of x is too big | |
1659 | //-------------------------------------------------------------------- | |
1660 | .align 32 | |
1661 | tgamma_spec_res: | |
1662 | { .mfi | |
1663 | mov GR_0x30033 = 0x30033 | |
1664 | (p14) fcmp.eq.unc.s1 p10,p11 = f8,FR_Xt | |
1665 | (p15) mov r8 = 0x1FFFE | |
1666 | };; | |
1667 | { .mfi | |
1668 | (p15) setf.exp f9 = r8 | |
1669 | nop.f 0 | |
1670 | nop.i 0 | |
1671 | };; | |
1672 | { .mfb | |
1673 | (p11) cmp.ltu.unc p7,p8 = GR_0x30033,GR_Sign_Exp | |
0347518d | 1674 | nop.f 0 |
d5efd131 MF |
1675 | (p10) br.cond.spnt tgamma_singularity |
1676 | };; | |
1677 | .pred.rel "mutex",p7,p8 | |
1678 | { .mbb | |
1679 | nop.m 0 | |
1680 | (p7) br.cond.spnt tgamma_singularity | |
1681 | (p8) br.cond.spnt tgamma_underflow | |
1682 | };; | |
1683 | { .mfi | |
1684 | nop.m 0 | |
1685 | fmerge.s FR_X = f8,f8 | |
1686 | mov GR_TAG = 258 // overflow | |
1687 | } | |
1688 | { .mfb | |
0347518d | 1689 | nop.m 0 |
d5efd131 MF |
1690 | (p15) fma.d.s0 f8 = f9,f9,f0 // Set I,O and +INF result |
1691 | br.cond.sptk tgamma_libm_err | |
1692 | };; | |
1693 | ||
1694 | // x is negative integer or +/-0 | |
1695 | //-------------------------------------------------------------------- | |
1696 | .align 32 | |
1697 | tgamma_singularity: | |
1698 | { .mfi | |
1699 | nop.m 0 | |
1700 | fmerge.s FR_X = f8,f8 | |
1701 | mov GR_TAG = 259 // negative | |
1702 | } | |
1703 | { .mfb | |
1704 | nop.m 0 | |
1705 | frcpa.s0 f8,p0 = f0,f0 | |
1706 | br.cond.sptk tgamma_libm_err | |
1707 | };; | |
1708 | // x is negative noninteger with big absolute value | |
1709 | //-------------------------------------------------------------------- | |
1710 | .align 32 | |
1711 | tgamma_underflow: | |
1712 | { .mmi | |
1713 | getf.sig GR_Sig = FR_iXt | |
1714 | mov r11 = 0x00001 | |
1715 | nop.i 0 | |
1716 | };; | |
1717 | { .mfi | |
1718 | setf.exp f9 = r11 | |
1719 | nop.f 0 | |
1720 | nop.i 0 | |
1721 | };; | |
1722 | { .mfi | |
1723 | nop.m 0 | |
1724 | nop.f 0 | |
1725 | tbit.z p6,p7 = GR_Sig,0 | |
1726 | };; | |
1727 | .pred.rel "mutex",p6,p7 | |
1728 | { .mfi | |
1729 | nop.m 0 | |
1730 | (p6) fms.d.s0 f8 = f9,f9,f9 | |
1731 | nop.i 0 | |
1732 | } | |
1733 | { .mfb | |
1734 | nop.m 0 | |
1735 | (p7) fma.d.s0 f8 = f9,f9,f9 | |
1736 | br.ret.sptk b0 | |
1737 | };; | |
1738 | ||
1739 | // x for natval, nan, +/-inf or +/-0 | |
1740 | //-------------------------------------------------------------------- | |
1741 | .align 32 | |
1742 | tgamma_spec: | |
1743 | { .mfi | |
1744 | nop.m 0 | |
1745 | fclass.m p6,p0 = f8,0x1E1 // Test x for natval, nan, +inf | |
1746 | nop.i 0 | |
1747 | };; | |
1748 | { .mfi | |
1749 | nop.m 0 | |
1750 | fclass.m p7,p8 = f8,0x7 // +/-0 | |
1751 | nop.i 0 | |
1752 | };; | |
1753 | { .mfi | |
1754 | nop.m 0 | |
1755 | fmerge.s FR_X = f8,f8 | |
1756 | nop.i 0 | |
1757 | } | |
1758 | { .mfb | |
1759 | nop.m 0 | |
1760 | (p6) fma.d.s0 f8 = f8,f1,f8 | |
1761 | (p6) br.ret.spnt b0 | |
1762 | };; | |
1763 | .pred.rel "mutex",p7,p8 | |
1764 | { .mfi | |
1765 | (p7) mov GR_TAG = 259 // negative | |
1766 | (p7) frcpa.s0 f8,p0 = f1,f8 | |
0347518d | 1767 | nop.i 0 |
d5efd131 MF |
1768 | } |
1769 | { .mib | |
1770 | nop.m 0 | |
1771 | nop.i 0 | |
1772 | (p8) br.cond.spnt tgamma_singularity | |
1773 | };; | |
1774 | ||
1775 | .align 32 | |
1776 | tgamma_libm_err: | |
1777 | { .mfi | |
1778 | alloc r32 = ar.pfs,1,4,4,0 | |
1779 | nop.f 0 | |
1780 | mov GR_Parameter_TAG = GR_TAG | |
1781 | };; | |
1782 | ||
1783 | GLOBAL_LIBM_END(tgamma) | |
1784 | ||
1785 | LOCAL_LIBM_ENTRY(__libm_error_region) | |
1786 | .prologue | |
1787 | { .mfi | |
1788 | add GR_Parameter_Y=-32,sp // Parameter 2 value | |
1789 | nop.f 0 | |
1790 | .save ar.pfs,GR_SAVE_PFS | |
0347518d | 1791 | mov GR_SAVE_PFS=ar.pfs // Save ar.pfs |
d5efd131 MF |
1792 | } |
1793 | { .mfi | |
0347518d | 1794 | .fframe 64 |
d5efd131 MF |
1795 | add sp=-64,sp // Create new stack |
1796 | nop.f 0 | |
1797 | mov GR_SAVE_GP=gp // Save gp | |
1798 | };; | |
1799 | { .mmi | |
1800 | stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack | |
1801 | add GR_Parameter_X = 16,sp // Parameter 1 address | |
0347518d MF |
1802 | .save b0, GR_SAVE_B0 |
1803 | mov GR_SAVE_B0=b0 // Save b0 | |
d5efd131 MF |
1804 | };; |
1805 | .body | |
1806 | { .mib | |
0347518d MF |
1807 | stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack |
1808 | add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address | |
d5efd131 MF |
1809 | nop.b 0 |
1810 | } | |
1811 | { .mib | |
1812 | stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack | |
0347518d | 1813 | add GR_Parameter_Y = -16,GR_Parameter_Y |
d5efd131 MF |
1814 | br.call.sptk b0=__libm_error_support# // Call error handling function |
1815 | };; | |
1816 | { .mmi | |
1817 | nop.m 0 | |
1818 | nop.m 0 | |
1819 | add GR_Parameter_RESULT = 48,sp | |
1820 | };; | |
1821 | { .mmi | |
1822 | ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack | |
1823 | .restore sp | |
1824 | add sp = 64,sp // Restore stack pointer | |
1825 | mov b0 = GR_SAVE_B0 // Restore return address | |
1826 | };; | |
1827 | { .mib | |
0347518d | 1828 | mov gp = GR_SAVE_GP // Restore gp |
d5efd131 MF |
1829 | mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs |
1830 | br.ret.sptk b0 // Return | |
0347518d | 1831 | };; |
d5efd131 MF |
1832 | |
1833 | LOCAL_LIBM_END(__libm_error_region) | |
1834 | .type __libm_error_support#,@function | |
1835 | .global __libm_error_support# | |
1836 |