]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/ia64/fpu/e_atan2f.S
2.5-18.1
[thirdparty/glibc.git] / sysdeps / ia64 / fpu / e_atan2f.S
CommitLineData
8da2915d
UD
1.file "atan2f.s"
2
0ecb606c
JJ
3
4// Copyright (c) 2000 - 2003, Intel Corporation
8da2915d
UD
5// All rights reserved.
6//
0ecb606c 7// Contributed 2000 by the Intel Numerics Group, Intel Corporation
8da2915d 8//
aeb25823
AJ
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// * Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// * Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// * The name of Intel Corporation may not be used to endorse or promote
21// products derived from this software without specific prior written
22// permission.
0ecb606c 23
8da2915d
UD
24// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
28// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
32// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
33// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35//
36// Intel Corporation is the author of this code, and requests that all
37// problem reports or change requests be submitted to it directly at
0ecb606c 38// http://www.intel.com/software/products/opensource/libraries/num.htm.
8da2915d
UD
39
40// History
41//==============================================================
0ecb606c
JJ
42// 06/01/00 Initial version
43// 08/15/00 Bundle added after call to __libm_error_support to properly
8da2915d 44// set [the previously overwritten] GR_Parameter_RESULT.
0ecb606c 45// 08/17/00 Changed predicate register macro-usage to direct predicate
8da2915d 46// names due to an assembler bug.
0ecb606c
JJ
47// 01/05/01 Fixed flag settings for denormal input.
48// 01/19/01 Added documentation
49// 01/30/01 Improved speed
50// 02/06/02 Corrected .section statement
51// 05/20/02 Cleaned up namespace and sf0 syntax
52// 02/06/03 Reordered header: .section, .global, .proc, .align
8da2915d
UD
53
54// Description
55//=========================================
56// The atan2 function computes the principle value of the arc tangent of y/x using
57// the signs of both arguments to determine the quadrant of the return value.
58// A domain error may occur if both arguments are zero.
59
60// The atan2 function returns the arc tangent of y/x in the range [-pi,+pi] radians.
61
62//..
63//..Let (v,u) = (y,x) if |y| <= |x|, and (v,u) = (x,y) otherwise. Note that
64//..v and u can be negative. We state the relationship between atan2(y,x) and
65//..atan(v/u).
66//..
67//..Let swap = false if v = y, and swap = true if v = x.
68//..Define C according to the matrix
69//..
70//.. TABLE FOR C
71//.. x +ve x -ve
72//.. no swap (swap = false) sgn(y)*0 sgn(y)*pi
73//.. swap (swap = true ) sgn(y)*pi/2 sgn(y)*pi/2
74//..
75//.. atan2(y,x) = C + atan(v/u) if no swap
76//.. atan2(y,x) = C - atan(v/u) if swap
77//..
78//..These relationship is more efficient to compute as we accommodate signs in v and u
79//..saving the need to obtain the absolute value before computation can proceed.
80//..
81//..Suppose (v,u) = (y,x), we calculate atan(v/u) as follows:
82//..A = y * frcpa(x) (so A = (y/x)(1 - beta))
83//..atan(y/x) = atan(A) + atan( ((y/x)-A))/(1 + (y/x)A) ), the second term is
84//..a correction.
85//..atan(A) is approximated by a polynomial
86//..A + p1 A^3 + p2 A^5 + ... + p10 A^21,
87//..atan(G) is approximated as follows:
88//..Let G = (y - Ax)/(x + Ay), atan(G) can be approximated by G + g * p1
89//..where g is a limited precision approximation to G via g = (y - Ax)*frcpa(x + Ay).
90//..
91//..Suppose (v,u) = (x,y), we calculate atan(v/u) as follows:
92//..Z = x * frcpa(y) (so Z = (x/y)(1 - beta))
93//..atan(x/y) = atan(Z) + atan( ((x/y)-Z))/(1 + (x/y)Z) ), the second term is
94//..a correction.
95//..atan(Z) is approximated by a polynomial
96//..Z + p1 Z^3 + p2 Z^5 + ... + p10 Z^21,
97//..atan(T) is approximated as follows:
98//..Let T = (x - Ay)/(y + Ax), atan(T) can be approximated by T + t * p1
99//..where t is a limited precision approximation to T via t = (x - Ay)*frcpa(y + Ax).
100//..
101//..
102//..A = y * frcpa(x)
103//..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21
104//..
105//..This polynomial is computed as follows:
106//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq
107//..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6
108//..
109//..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6
110//..poly_A1 = poly_A2 + A4 * poly_A1
111//..poly_A1 = poly_A3 + A4 * poly_A1
112//..
113//..poly_A4 = p1 * A
114//,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4
115//..poly_A5 = p2 + Asq * poly_A5
116//..poly_A4 = poly_A4 + A5 * poly_A5
117//..
118//..atan_A = poly_A4 + A11 * poly_A1
119//..
120//..atan(G) is approximated as follows:
121//..G_numer = y - A*x, G_denom = x + A*y
122//..H1 = frcpa(G_denom)
123//..H_beta = 1 - H1 * G_denom
124//..H2 = H1 + H1 * H_beta
125//..H_beta2 = H_beta*H_beta
126//..H3 = H2 + H2*H_beta2
127//..g = H1 * G_numer; gsq = g*g; atan_G = g*p1, atan_G = atan_G*gsq
128//..atan_G = G_numer*H3 + atan_G
129//..
130//..
131//..A = y * frcpa(x)
132//..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21
133//..
134//..This polynomial is computed as follows:
135//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq
136//..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6
137//..
138//..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6
139//..poly_A1 = poly_A2 + A4 * poly_A1
140//..poly_A1 = poly_A3 + A4 * poly_A1
141//..
142//..poly_A4 = p1 * A
143//,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4
144//..poly_A5 = p2 + Asq * poly_A5
145//..poly_A4 = poly_A4 + A5 * poly_A5
146//..
147//..atan_A = poly_A4 + A11 * poly_A1
148//..
149//..
150//..====================================================================
151//.. COEFFICIENTS USED IN THE COMPUTATION
152//..====================================================================
153
154//coef_pj, j = 1,2,...,10; atan(A) ~=~ A + p1 A^3 + p2 A^5 + ... + p10 A^21
155//
156// coef_p1 = -.3333332707155439167401311806315789E+00
157// coef_p1 in dbl = BFD5 5555 1219 1621
158//
159// coef_p2 = .1999967670926658391827857030875748E+00
160// coef_p2 in dbl = 3FC9 997E 7AFB FF4E
161//
162// coef_p3 = -.1427989384500152360161563301087296E+00
163// coef_p3 in dbl = BFC2 473C 5145 EE38
164//
165// coef_p4 = .1105852823460720770079031213661163E+00
166// coef_p4 in dbl = 3FBC 4F51 2B18 65F5
167//
168// coef_p5 = -.8811839915595312348625710228448363E-01
169// coef_p5 in dbl = BFB6 8EED 6A8C FA32
170//
171// coef_p6 = .6742329836955067042153645159059714E-01
172// coef_p6 in dbl = 3FB1 42A7 3D7C 54E3
173//
174// coef_p7 = -.4468571068774672908561591262231909E-01
175// coef_p7 in dbl = BFA6 E10B A401 393F
176//
177// coef_p8 = .2252333246746511135532726960586493E-01
178// coef_p8 in dbl = 3F97 105B 4160 F86B
179//
180// coef_p9 = -.7303884867007574742501716845542314E-02
181// coef_p9 in dbl = BF7D EAAD AA33 6451
182//
183// coef_p10 = .1109686868355312093949039454619058E-02
184// coef_p10 in dbl = 3F52 2E5D 33BC 9BAA
185//
186
187// Special values
188//==============================================================
189// Y x Result
190// +number +inf +0
191// -number +inf -0
192// +number -inf +pi
193// -number -inf -pi
194//
195// +inf +number +pi/2
196// -inf +number -pi/2
197// +inf -number +pi/2
198// -inf -number -pi/2
199//
200// +inf +inf +pi/4
201// -inf +inf -pi/4
202// +inf -inf +3pi/4
203// -inf -inf -3pi/4
204//
205// +1 +1 +pi/4
206// -1 +1 -pi/4
207// +1 -1 +3pi/4
208// -1 -1 -3pi/4
209//
210// +number +0 +pi/2 // does not raise DBZ
211// -number +0 -pi/2 // does not raise DBZ
212// +number -0 +pi/2 // does not raise DBZ
213// -number -0 -pi/2 // does not raise DBZ
214//
215// +0 +number +0
216// -0 +number -0
217// +0 -number +pi
218// -0 -number -pi
219//
220// +0 +0 +0 // does not raise invalid
221// -0 +0 -0 // does not raise invalid
222// +0 -0 +pi // does not raise invalid
223// -0 -0 -pi // does not raise invalid
224//
225// Nan anything quiet Y
226// anything NaN quiet X
227
228// atan2(+-0/+-0) sets double error tag to 37
229// atan2f(+-0/+-0) sets single error tag to 38
230// These are domain errors.
231
8da2915d
UD
232
233//
234// Assembly macros
235//=========================================
236
237
238// integer registers
239atan2f_GR_Addr_1 = r33
240atan2f_GR_Addr_2 = r34
241GR_SAVE_B0 = r35
242
243GR_SAVE_PFS = r36
244GR_SAVE_GP = r37
245
246GR_Parameter_X = r38
247GR_Parameter_Y = r39
248GR_Parameter_RESULT = r40
249GR_Parameter_TAG = r41
250
251// floating point registers
252atan2f_coef_p1 = f32
253atan2f_coef_p10 = f33
254atan2f_coef_p7 = f34
255atan2f_coef_p6 = f35
256
257atan2f_coef_p3 = f36
258atan2f_coef_p2 = f37
259atan2f_coef_p9 = f38
260atan2f_coef_p8 = f39
261atan2f_coef_p5 = f40
262
263atan2f_coef_p4 = f41
264atan2f_const_piby2 = f42
265atan2f_const_pi = f43
266atan2f_const_piby4 = f44
267atan2f_const_3piby4 = f45
268
269atan2f_xsq = f46
270atan2f_ysq = f47
271atan2f_xy = f48
272atan2f_const_1 = f49
273atan2f_sgn_Y = f50
274
275atan2f_Z0 = f51
276atan2f_A0 = f52
277atan2f_Z = f53
278atan2f_A = f54
279atan2f_C = f55
280
281atan2f_U = f56
282atan2f_Usq = f57
283atan2f_U4 = f58
284atan2f_U6 = f59
285atan2f_U8 = f60
286
287atan2f_poly_u109 = f61
288atan2f_poly_u87 = f62
289atan2f_poly_u65 = f63
290atan2f_poly_u43 = f64
291atan2f_poly_u21 = f65
292
293atan2f_poly_u10to7 = f66
294atan2f_poly_u6to3 = f67
295atan2f_poly_u10to3 = f68
296atan2f_poly_u10to0 = f69
297atan2f_poly_u210 = f70
298
299atan2f_T_numer = f71
300atan2f_T_denom = f72
301atan2f_G_numer = f73
302atan2f_G_denom = f74
303atan2f_p1rnum = f75
304
305atan2f_R_denom = f76
306atan2f_R_numer = f77
307atan2f_pR = f78
308atan2f_pRC = f79
309atan2f_pQRC = f80
310
311atan2f_Q1 = f81
312atan2f_Q_beta = f82
313atan2f_Q2 = f83
314atan2f_Q_beta2 = f84
315atan2f_Q3 = f85
316
317atan2f_r = f86
318atan2f_rsq = f87
319atan2f_poly_atan_U = f88
320
321
322// predicate registers
323//atan2f_Pred_Swap = p6 // |y| > |x|
324//atan2f_Pred_noSwap = p7 // |y| <= |x|
325//atan2f_Pred_Xpos = p8 // x >= 0
326//atan2f_Pred_Xneg = p9 // x < 0
327
328
0ecb606c 329RODATA
8da2915d
UD
330
331.align 16
332
0ecb606c 333LOCAL_OBJECT_START(atan2f_coef_table1)
8da2915d
UD
334data8 0xBFD5555512191621 // p1
335data8 0x3F522E5D33BC9BAA // p10
336data8 0xBFA6E10BA401393F // p7
337data8 0x3FB142A73D7C54E3 // p6
338data8 0xBFC2473C5145EE38 // p3
339data8 0x3FC9997E7AFBFF4E // p2
0ecb606c 340LOCAL_OBJECT_END(atan2f_coef_table1)
8da2915d 341
0ecb606c 342LOCAL_OBJECT_START(atan2f_coef_table2)
8da2915d
UD
343data8 0xBF7DEAADAA336451 // p9
344data8 0x3F97105B4160F86B // p8
345data8 0xBFB68EED6A8CFA32 // p5
346data8 0x3FBC4F512B1865F5 // p4
347data8 0x3ff921fb54442d18 // pi/2
348data8 0x400921fb54442d18 // pi
349data8 0x3fe921fb54442d18 // pi/4
350data8 0x4002d97c7f3321d2 // 3pi/4
0ecb606c 351LOCAL_OBJECT_END(atan2f_coef_table2)
8da2915d
UD
352
353
8da2915d 354
0ecb606c
JJ
355.section .text
356GLOBAL_IEEE754_ENTRY(atan2f)
8da2915d
UD
357
358{ .mfi
359 alloc r32 = ar.pfs,1,5,4,0
360 frcpa.s1 atan2f_Z0,p0 = f1,f8 // Approx to 1/y
361 nop.i 999
362}
363{ .mfi
364 addl atan2f_GR_Addr_1 = @ltoff(atan2f_coef_table1),gp
365 fma.s1 atan2f_xsq = f9,f9,f0
366 nop.i 999 ;;
367}
368
369
370{ .mfi
371 ld8 atan2f_GR_Addr_1 = [atan2f_GR_Addr_1]
372 frcpa.s1 atan2f_A0,p0 = f1,f9 // Approx to 1/x
373 nop.i 999
374}
375{ .mfi
376 nop.m 999
377 fma.s1 atan2f_ysq = f8,f8,f0
378 nop.i 999 ;;
379}
380
381{ .mfi
382 nop.m 999
383 fcmp.ge.s1 p8,p9 = f9,f0 // Set p8 if x>=0, p9 if x<0
384 nop.i 999
385}
386{ .mfi
387 nop.m 999
388 fma.s1 atan2f_xy = f9,f8,f0
389 nop.i 999 ;;
390}
391
392
393{ .mfi
394 add atan2f_GR_Addr_2 = 0x30, atan2f_GR_Addr_1
395 fmerge.s atan2f_sgn_Y = f8,f1
396 nop.i 999 ;;
397}
398
399{ .mmf
400 ldfpd atan2f_coef_p1,atan2f_coef_p10 = [atan2f_GR_Addr_1],16
401 ldfpd atan2f_coef_p9,atan2f_coef_p8 = [atan2f_GR_Addr_2],16
402 fclass.m p10,p0 = f9,0xe7 // Test x @inf|@snan|@qnan|@zero
403}
404;;
405
406{ .mfi
407 ldfpd atan2f_coef_p7,atan2f_coef_p6 = [atan2f_GR_Addr_1],16
408 fma.s1 atan2f_T_denom = atan2f_Z0,atan2f_xsq,f8
409 nop.i 999
410}
411{ .mfi
412 ldfpd atan2f_coef_p5,atan2f_coef_p4 = [atan2f_GR_Addr_2],16
413 fma.s1 atan2f_Z = atan2f_Z0,f9,f0
414 nop.i 999 ;;
415}
416
417
418{ .mfi
419 ldfpd atan2f_coef_p3,atan2f_coef_p2 = [atan2f_GR_Addr_1],16
420 fma.s1 atan2f_G_denom = atan2f_A0,atan2f_ysq,f9
421 nop.i 999
422}
423{ .mfi
424 ldfpd atan2f_const_piby2,atan2f_const_pi = [atan2f_GR_Addr_2],16
425 fma.s1 atan2f_A = atan2f_A0,f8,f0
426 nop.i 999 ;;
427}
428
429{ .mfi
430 ldfpd atan2f_const_piby4,atan2f_const_3piby4 = [atan2f_GR_Addr_2]
431 fclass.m p11,p0 = f8,0xe7 // Test y @inf|@snan|@qnan|@zero
432 nop.i 999
433}
434{ .mfb
435 nop.m 999
436 fnma.s1 atan2f_T_numer = atan2f_Z0,atan2f_xy,f9
437(p10) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on x nan,inf,zero
438}
439
440
441// p6 if |y|>|x|, p7 if |x|>=|y| , use xsq and ysq for test
442{ .mfi
443 nop.m 999
444 fcmp.gt.s1 p6,p7 = atan2f_ysq,atan2f_xsq
445 nop.i 999
446}
447{ .mfb
448 nop.m 999
449 fnma.s1 atan2f_G_numer = atan2f_A0,atan2f_xy,f8
450(p11) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on y nan,inf,zero
451}
452
453
454{ .mfi
455 nop.m 999
456(p8) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f0,f0
457 nop.i 999
458}
459{ .mfi
460 nop.m 999
461(p9) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f1,f0
462 nop.i 999 ;;
463}
464
465
466{ .mfi
467 nop.m 999
468(p6) fnma.s1 atan2f_U = atan2f_Z,f1,f0
469 nop.i 999
470}
471{ .mfi
472 nop.m 999
473(p6) fma.s1 atan2f_Usq = atan2f_Z,atan2f_Z,f0
474 nop.i 999 ;;
475}
476
477
478{ .mfi
479 nop.m 999
480(p7) fma.s1 atan2f_U = atan2f_A,f1,f0
481 nop.i 999
482}
483{ .mfi
484 nop.m 999
485(p7) fma.s1 atan2f_Usq = atan2f_A,atan2f_A,f0
486 nop.i 999 ;;
487}
488
489
490{ .mfi
491 nop.m 999
492(p6) frcpa.s1 atan2f_Q1,p0 = f1,atan2f_T_denom
493 nop.i 999
494}
495{ .mfi
496 nop.m 999
497(p6) fma.s1 atan2f_R_denom = atan2f_T_denom,f1,f0
498 nop.i 999 ;;
499}
500
501
502{ .mfi
503 nop.m 999
504(p7) frcpa.s1 atan2f_Q1,p0 = f1,atan2f_G_denom
505 nop.i 999
506}
507{ .mfi
508 nop.m 999
509(p7) fma.s1 atan2f_R_denom = atan2f_G_denom,f1,f0
510 nop.i 999 ;;
511}
512
513
514{ .mfi
515 nop.m 999
516(p6) fnma.s1 atan2f_R_numer = atan2f_T_numer,f1,f0
517 nop.i 999
518}
519{ .mfi
520 nop.m 999
521(p7) fma.s1 atan2f_R_numer = atan2f_G_numer,f1,f0
522 nop.i 999 ;;
523}
524
525
526{ .mfi
527 nop.m 999
528(p6) fnma.s1 atan2f_p1rnum = atan2f_T_numer,atan2f_coef_p1,f0
529 nop.i 999 ;;
530}
531{ .mfi
532 nop.m 999
533(p7) fma.s1 atan2f_p1rnum = atan2f_G_numer,atan2f_coef_p1,f0
534 nop.i 999 ;;
535}
536
537
538{ .mfi
539 nop.m 999
540 fma.s1 atan2f_U4 = atan2f_Usq,atan2f_Usq,f0
541 nop.i 999
542}
543{ .mfi
544 nop.m 999
545 fma.s1 atan2f_poly_u109 = atan2f_Usq,atan2f_coef_p10,atan2f_coef_p9
546 nop.i 999 ;;
547}
548
549{ .mfi
550 nop.m 999
551 fma.s1 atan2f_poly_u87 = atan2f_Usq,atan2f_coef_p8,atan2f_coef_p7
552 nop.i 999
553}
554{ .mfi
555 nop.m 999
556 fma.s1 atan2f_poly_u65 = atan2f_Usq,atan2f_coef_p6,atan2f_coef_p5
557 nop.i 999 ;;
558}
559
560
561{ .mfi
562 nop.m 999
563 fma.s1 atan2f_poly_u43 = atan2f_Usq,atan2f_coef_p4,atan2f_coef_p3
564 nop.i 999
565}
566{ .mfi
567 nop.m 999
568 fnma.s1 atan2f_Q_beta = atan2f_Q1,atan2f_R_denom,f1
569 nop.i 999 ;;
570}
571
572
573{ .mfi
574 nop.m 999
575 fma.s1 atan2f_poly_u21 = atan2f_Usq,atan2f_coef_p2,atan2f_coef_p1
576 nop.i 999
577}
578{ .mfi
579 nop.m 999
580 fma.s1 atan2f_r = atan2f_Q1,atan2f_R_numer,f0
581 nop.i 999 ;;
582}
583
584{ .mfi
585 nop.m 999
586(p6) fma.s1 atan2f_C = atan2f_sgn_Y,atan2f_const_piby2,f0
587 nop.i 999
588}
589{ .mfi
590 nop.m 999
591(p7) fma.s1 atan2f_C = atan2f_const_1,atan2f_const_pi,f0
592 nop.i 999 ;;
593}
594
595{ .mfi
596 nop.m 999
597 fma.s1 atan2f_U6 = atan2f_U4,atan2f_Usq,f0
598 nop.i 999
599}
600{ .mfi
601 nop.m 999
602 fma.s1 atan2f_U8 = atan2f_U4,atan2f_U4,f0
603 nop.i 999 ;;
604}
605
606{ .mfi
607 nop.m 999
608 fma.s1 atan2f_poly_u10to7 = atan2f_U4,atan2f_poly_u109,atan2f_poly_u87
609 nop.i 999
610}
611{ .mfi
612 nop.m 999
613 fma.s1 atan2f_pR = atan2f_p1rnum,atan2f_Q1,f0
614 nop.i 999 ;;
615}
616
617{ .mfi
618 nop.m 999
619 fma.s1 atan2f_poly_u6to3 = atan2f_U4,atan2f_poly_u65,atan2f_poly_u43
620 nop.i 999
621}
622{ .mfi
623 nop.m 999
624 fma.s1 atan2f_Q2 = atan2f_Q1,atan2f_Q_beta,atan2f_Q1
625 nop.i 999 ;;
626}
627
628{ .mfi
629 nop.m 999
630 fma.s1 atan2f_Q_beta2 = atan2f_Q_beta,atan2f_Q_beta,f0
631 nop.i 999
632}
633{ .mfi
634 nop.m 999
635 fma.s1 atan2f_rsq = atan2f_r,atan2f_r,f0
636 nop.i 999 ;;
637}
638
639{ .mfi
640 nop.m 999
641 fma.s1 atan2f_poly_u210 = atan2f_Usq,atan2f_poly_u21,f1
642 nop.i 999 ;;
643}
644
645{ .mfi
646 nop.m 999
647 fcmp.eq.s0 p8,p0 = f8,f9 // Dummy op to set flag on denormal inputs
648 nop.i 999
649}
650{ .mfi
651 nop.m 999
652 fma.s1 atan2f_poly_u10to3 = atan2f_U8,atan2f_poly_u10to7,atan2f_poly_u6to3
653 nop.i 999 ;;
654}
655
656{ .mfi
657 nop.m 999
658 fma.s1 atan2f_Q3 = atan2f_Q2,atan2f_Q_beta2,atan2f_Q2
659 nop.i 999
660}
661{ .mfi
662 nop.m 999
663 fma.s1 atan2f_pRC = atan2f_rsq,atan2f_pR,atan2f_C
664 nop.i 999 ;;
665}
666
667{ .mfi
668 nop.m 999
669 fma.s1 atan2f_poly_u10to0 = atan2f_U6,atan2f_poly_u10to3,atan2f_poly_u210
670 nop.i 999 ;;
671}
672
673{ .mfi
674 nop.m 999
675 fma.s1 atan2f_pQRC = atan2f_R_numer,atan2f_Q3,atan2f_pRC
676 nop.i 999 ;;
677}
678
679{ .mfb
680 nop.m 999
681 fma.s.s0 f8 = atan2f_U,atan2f_poly_u10to0,atan2f_pQRC
682 br.ret.sptk b0 ;;
683}
684
685
686
687ATAN2F_XY_INF_NAN_ZERO:
688
689{ .mfi
690 nop.m 999
691 fclass.m p10,p0 = f8,0xc3 // Is y nan
692 nop.i 999
693}
694;;
695
696{ .mfi
697 nop.m 999
698 fclass.m p12,p0 = f9,0xc3 // Is x nan
699 nop.i 999
700}
701;;
702
703{ .mfi
704 nop.m 999
705 fclass.m p6,p0 = f9,0x21 // Is x +inf
706 nop.i 999
707}
708{ .mfb
709 nop.m 999
0ecb606c 710(p10) fma.s.s0 f8 = f9,f8,f0 // Result quietized y if y is nan
8da2915d
UD
711(p10) br.ret.spnt b0 // Exit if y is nan
712}
713;;
714
715
716{ .mfi
717 nop.m 999
718(p6) fclass.m.unc p7,p8 = f8,0x23 // x +inf, is y inf
719 nop.i 999
720}
721{ .mfb
722 nop.m 999
0ecb606c 723(p12) fnorm.s.s0 f8 = f9 // Result quietized x if x is nan, y not nan
8da2915d
UD
724(p12) br.ret.spnt b0 // Exit if x is nan, y not nan
725}
726;;
727
728// Here if x or y inf, or x or y zero
729{ .mfi
730 nop.m 999
731 fcmp.eq.s0 p15,p0 = f8,f9 // Dummy op to set flag on denormal inputs
732 nop.i 999
733}
734;;
735
736{ .mfi
737 nop.m 999
738 fclass.m p11,p12 = f9,0x22 // Is x -inf
739 nop.i 999
740}
741{ .mfb
742 nop.m 999
0ecb606c 743(p7) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby4,f0 // Result +-pi/4
8da2915d
UD
744(p7) br.ret.spnt b0 // Exit if x +inf and y inf
745}
746;;
747
748{ .mfb
749 nop.m 999
750(p8) fmerge.s f8 = f8,f0 // If x +inf and y not inf, result +-0
751(p8) br.ret.spnt b0 // Exit if x +inf and y not inf
752}
753;;
754
755{ .mfi
756 nop.m 999
757(p12) fclass.m.unc p13,p0 = f8,0x23 // x not -inf, is y inf
758 nop.i 999
759}
760;;
761
762{ .mfi
763 nop.m 999
764(p11) fclass.m.unc p14,p15 = f8,0x23 // x -inf, is y inf
765 nop.i 999
766}
767;;
768
769{ .mfi
770 nop.m 999
771 fclass.m p6,p7 = f9,0x7 // Is x zero
772 nop.i 999
773}
774{ .mfb
775 nop.m 999
0ecb606c 776(p13) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // Result +-pi/2
8da2915d
UD
777(p13) br.ret.spnt b0 // Exit if x not -inf and y inf
778}
779;;
780
781{ .mfi
782 nop.m 999
0ecb606c 783(p14) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_3piby4,f0 // Result +-3pi/4
8da2915d
UD
784 nop.i 999
785}
786{ .mfb
787 nop.m 999
0ecb606c 788(p15) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // Result +-pi
8da2915d
UD
789(p11) br.ret.spnt b0 // Exit if x -inf
790}
791;;
792
793// Here if x or y zero
794{ .mfi
795 nop.m 999
796(p7) fclass.m.unc p8,p9 = f9,0x19 // x not zero, y zero, is x > zero
797 nop.i 999
798}
799;;
800
801{ .mfi
802 nop.m 999
803(p6) fclass.m.unc p10,p11 = f8,0x7 // x zero, is y zero
804 nop.i 999
805}
806;;
807
808{ .mfi
809 nop.m 999
810(p8) fmerge.s f8 = f8, f0 // x > zero and y zero, result is +-zero
811 nop.i 999
812}
813{ .mfb
814 nop.m 999
0ecb606c 815(p9) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // x < 0, y 0, result +-pi
8da2915d
UD
816(p10) br.cond.spnt __libm_error_region // Branch if x zero and y zero
817}
818;;
819
820{ .mfb
821 nop.m 999
0ecb606c 822(p11) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // x zero, y not zero
8da2915d
UD
823 br.ret.sptk b0 // Final special case exit
824}
825;;
826
827
0ecb606c 828GLOBAL_IEEE754_END(atan2f)
8da2915d
UD
829
830
0ecb606c 831LOCAL_LIBM_ENTRY(__libm_error_region)
8da2915d
UD
832.prologue
833 mov GR_Parameter_TAG = 38
834 fclass.m p10,p11 = f9,0x5 // @zero | @pos
835;;
836(p10) fmerge.s f10 = f8, f0
0ecb606c 837(p11) fma.s.s0 f10 = atan2f_sgn_Y, atan2f_const_pi,f0
8da2915d
UD
838;;
839
840{ .mfi
841 add GR_Parameter_Y=-32,sp // Parameter 2 value
842 nop.f 999
843.save ar.pfs,GR_SAVE_PFS
844 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
845}
846
847{ .mfi
848.fframe 64
849 add sp=-64,sp // Create new stack
850 nop.f 0
851 mov GR_SAVE_GP=gp // Save gp
852}
853;;
854
855{ .mmi
856 stfs [GR_Parameter_Y] = f9,16 // Store Parameter 2 on stack
857 add GR_Parameter_X = 16,sp // Parameter 1 address
858.save b0, GR_SAVE_B0
859 mov GR_SAVE_B0=b0 // Save b0
860}
861;;
862
863
864.body
865{ .mib
866 stfs [GR_Parameter_X] = f8 // Store Parameter 1 on stack
867 add GR_Parameter_RESULT = 0,GR_Parameter_Y
868 nop.b 0 // Parameter 3 address
869}
870{ .mib
871 stfs [GR_Parameter_Y] = f10 // Store Parameter 3 on stack
872 add GR_Parameter_Y = -16,GR_Parameter_Y
873 br.call.sptk b0=__libm_error_support# // Call error handling function
874}
875;;
876{ .mmi
877 nop.m 0
878 nop.m 0
879 add GR_Parameter_RESULT = 48,sp
880};;
881
882{ .mmi
883 ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
884.restore sp
885 add sp = 64,sp // Restore stack pointer
886 mov b0 = GR_SAVE_B0 // Restore return address
887}
888;;
889
890{ .mib
891 mov gp = GR_SAVE_GP // Restore gp
892 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
893 br.ret.sptk b0 // Return
894}
895;;
896
0ecb606c 897LOCAL_LIBM_END(__libm_error_region)
8da2915d
UD
898
899.type __libm_error_support#,@function
900.global __libm_error_support#