]>
Commit | Line | Data |
---|---|---|
8da2915d UD |
1 | .file "atan2f.s" |
2 | ||
0ecb606c JJ |
3 | |
4 | // Copyright (c) 2000 - 2003, Intel Corporation | |
8da2915d UD |
5 | // All rights reserved. |
6 | // | |
0ecb606c | 7 | // Contributed 2000 by the Intel Numerics Group, Intel Corporation |
8da2915d | 8 | // |
aeb25823 AJ |
9 | // Redistribution and use in source and binary forms, with or without |
10 | // modification, are permitted provided that the following conditions are | |
11 | // met: | |
12 | // | |
13 | // * Redistributions of source code must retain the above copyright | |
14 | // notice, this list of conditions and the following disclaimer. | |
15 | // | |
16 | // * Redistributions in binary form must reproduce the above copyright | |
17 | // notice, this list of conditions and the following disclaimer in the | |
18 | // documentation and/or other materials provided with the distribution. | |
19 | // | |
20 | // * The name of Intel Corporation may not be used to endorse or promote | |
21 | // products derived from this software without specific prior written | |
22 | // permission. | |
0ecb606c | 23 | |
8da2915d UD |
24 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
25 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
26 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
27 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS | |
28 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
29 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
30 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
31 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
32 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING | |
33 | // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
34 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
35 | // | |
36 | // Intel Corporation is the author of this code, and requests that all | |
37 | // problem reports or change requests be submitted to it directly at | |
0ecb606c | 38 | // http://www.intel.com/software/products/opensource/libraries/num.htm. |
8da2915d UD |
39 | |
40 | // History | |
41 | //============================================================== | |
0ecb606c JJ |
42 | // 06/01/00 Initial version |
43 | // 08/15/00 Bundle added after call to __libm_error_support to properly | |
8da2915d | 44 | // set [the previously overwritten] GR_Parameter_RESULT. |
0ecb606c | 45 | // 08/17/00 Changed predicate register macro-usage to direct predicate |
8da2915d | 46 | // names due to an assembler bug. |
0ecb606c JJ |
47 | // 01/05/01 Fixed flag settings for denormal input. |
48 | // 01/19/01 Added documentation | |
49 | // 01/30/01 Improved speed | |
50 | // 02/06/02 Corrected .section statement | |
51 | // 05/20/02 Cleaned up namespace and sf0 syntax | |
52 | // 02/06/03 Reordered header: .section, .global, .proc, .align | |
8da2915d UD |
53 | |
54 | // Description | |
55 | //========================================= | |
56 | // The atan2 function computes the principle value of the arc tangent of y/x using | |
57 | // the signs of both arguments to determine the quadrant of the return value. | |
58 | // A domain error may occur if both arguments are zero. | |
59 | ||
60 | // The atan2 function returns the arc tangent of y/x in the range [-pi,+pi] radians. | |
61 | ||
62 | //.. | |
63 | //..Let (v,u) = (y,x) if |y| <= |x|, and (v,u) = (x,y) otherwise. Note that | |
64 | //..v and u can be negative. We state the relationship between atan2(y,x) and | |
65 | //..atan(v/u). | |
66 | //.. | |
67 | //..Let swap = false if v = y, and swap = true if v = x. | |
68 | //..Define C according to the matrix | |
69 | //.. | |
70 | //.. TABLE FOR C | |
71 | //.. x +ve x -ve | |
72 | //.. no swap (swap = false) sgn(y)*0 sgn(y)*pi | |
73 | //.. swap (swap = true ) sgn(y)*pi/2 sgn(y)*pi/2 | |
74 | //.. | |
75 | //.. atan2(y,x) = C + atan(v/u) if no swap | |
76 | //.. atan2(y,x) = C - atan(v/u) if swap | |
77 | //.. | |
78 | //..These relationship is more efficient to compute as we accommodate signs in v and u | |
79 | //..saving the need to obtain the absolute value before computation can proceed. | |
80 | //.. | |
81 | //..Suppose (v,u) = (y,x), we calculate atan(v/u) as follows: | |
82 | //..A = y * frcpa(x) (so A = (y/x)(1 - beta)) | |
83 | //..atan(y/x) = atan(A) + atan( ((y/x)-A))/(1 + (y/x)A) ), the second term is | |
84 | //..a correction. | |
85 | //..atan(A) is approximated by a polynomial | |
86 | //..A + p1 A^3 + p2 A^5 + ... + p10 A^21, | |
87 | //..atan(G) is approximated as follows: | |
88 | //..Let G = (y - Ax)/(x + Ay), atan(G) can be approximated by G + g * p1 | |
89 | //..where g is a limited precision approximation to G via g = (y - Ax)*frcpa(x + Ay). | |
90 | //.. | |
91 | //..Suppose (v,u) = (x,y), we calculate atan(v/u) as follows: | |
92 | //..Z = x * frcpa(y) (so Z = (x/y)(1 - beta)) | |
93 | //..atan(x/y) = atan(Z) + atan( ((x/y)-Z))/(1 + (x/y)Z) ), the second term is | |
94 | //..a correction. | |
95 | //..atan(Z) is approximated by a polynomial | |
96 | //..Z + p1 Z^3 + p2 Z^5 + ... + p10 Z^21, | |
97 | //..atan(T) is approximated as follows: | |
98 | //..Let T = (x - Ay)/(y + Ax), atan(T) can be approximated by T + t * p1 | |
99 | //..where t is a limited precision approximation to T via t = (x - Ay)*frcpa(y + Ax). | |
100 | //.. | |
101 | //.. | |
102 | //..A = y * frcpa(x) | |
103 | //..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21 | |
104 | //.. | |
105 | //..This polynomial is computed as follows: | |
106 | //..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq | |
107 | //..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6 | |
108 | //.. | |
109 | //..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6 | |
110 | //..poly_A1 = poly_A2 + A4 * poly_A1 | |
111 | //..poly_A1 = poly_A3 + A4 * poly_A1 | |
112 | //.. | |
113 | //..poly_A4 = p1 * A | |
114 | //,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4 | |
115 | //..poly_A5 = p2 + Asq * poly_A5 | |
116 | //..poly_A4 = poly_A4 + A5 * poly_A5 | |
117 | //.. | |
118 | //..atan_A = poly_A4 + A11 * poly_A1 | |
119 | //.. | |
120 | //..atan(G) is approximated as follows: | |
121 | //..G_numer = y - A*x, G_denom = x + A*y | |
122 | //..H1 = frcpa(G_denom) | |
123 | //..H_beta = 1 - H1 * G_denom | |
124 | //..H2 = H1 + H1 * H_beta | |
125 | //..H_beta2 = H_beta*H_beta | |
126 | //..H3 = H2 + H2*H_beta2 | |
127 | //..g = H1 * G_numer; gsq = g*g; atan_G = g*p1, atan_G = atan_G*gsq | |
128 | //..atan_G = G_numer*H3 + atan_G | |
129 | //.. | |
130 | //.. | |
131 | //..A = y * frcpa(x) | |
132 | //..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21 | |
133 | //.. | |
134 | //..This polynomial is computed as follows: | |
135 | //..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq | |
136 | //..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6 | |
137 | //.. | |
138 | //..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6 | |
139 | //..poly_A1 = poly_A2 + A4 * poly_A1 | |
140 | //..poly_A1 = poly_A3 + A4 * poly_A1 | |
141 | //.. | |
142 | //..poly_A4 = p1 * A | |
143 | //,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4 | |
144 | //..poly_A5 = p2 + Asq * poly_A5 | |
145 | //..poly_A4 = poly_A4 + A5 * poly_A5 | |
146 | //.. | |
147 | //..atan_A = poly_A4 + A11 * poly_A1 | |
148 | //.. | |
149 | //.. | |
150 | //..==================================================================== | |
151 | //.. COEFFICIENTS USED IN THE COMPUTATION | |
152 | //..==================================================================== | |
153 | ||
154 | //coef_pj, j = 1,2,...,10; atan(A) ~=~ A + p1 A^3 + p2 A^5 + ... + p10 A^21 | |
155 | // | |
156 | // coef_p1 = -.3333332707155439167401311806315789E+00 | |
157 | // coef_p1 in dbl = BFD5 5555 1219 1621 | |
158 | // | |
159 | // coef_p2 = .1999967670926658391827857030875748E+00 | |
160 | // coef_p2 in dbl = 3FC9 997E 7AFB FF4E | |
161 | // | |
162 | // coef_p3 = -.1427989384500152360161563301087296E+00 | |
163 | // coef_p3 in dbl = BFC2 473C 5145 EE38 | |
164 | // | |
165 | // coef_p4 = .1105852823460720770079031213661163E+00 | |
166 | // coef_p4 in dbl = 3FBC 4F51 2B18 65F5 | |
167 | // | |
168 | // coef_p5 = -.8811839915595312348625710228448363E-01 | |
169 | // coef_p5 in dbl = BFB6 8EED 6A8C FA32 | |
170 | // | |
171 | // coef_p6 = .6742329836955067042153645159059714E-01 | |
172 | // coef_p6 in dbl = 3FB1 42A7 3D7C 54E3 | |
173 | // | |
174 | // coef_p7 = -.4468571068774672908561591262231909E-01 | |
175 | // coef_p7 in dbl = BFA6 E10B A401 393F | |
176 | // | |
177 | // coef_p8 = .2252333246746511135532726960586493E-01 | |
178 | // coef_p8 in dbl = 3F97 105B 4160 F86B | |
179 | // | |
180 | // coef_p9 = -.7303884867007574742501716845542314E-02 | |
181 | // coef_p9 in dbl = BF7D EAAD AA33 6451 | |
182 | // | |
183 | // coef_p10 = .1109686868355312093949039454619058E-02 | |
184 | // coef_p10 in dbl = 3F52 2E5D 33BC 9BAA | |
185 | // | |
186 | ||
187 | // Special values | |
188 | //============================================================== | |
189 | // Y x Result | |
190 | // +number +inf +0 | |
191 | // -number +inf -0 | |
192 | // +number -inf +pi | |
193 | // -number -inf -pi | |
194 | // | |
195 | // +inf +number +pi/2 | |
196 | // -inf +number -pi/2 | |
197 | // +inf -number +pi/2 | |
198 | // -inf -number -pi/2 | |
199 | // | |
200 | // +inf +inf +pi/4 | |
201 | // -inf +inf -pi/4 | |
202 | // +inf -inf +3pi/4 | |
203 | // -inf -inf -3pi/4 | |
204 | // | |
205 | // +1 +1 +pi/4 | |
206 | // -1 +1 -pi/4 | |
207 | // +1 -1 +3pi/4 | |
208 | // -1 -1 -3pi/4 | |
209 | // | |
210 | // +number +0 +pi/2 // does not raise DBZ | |
211 | // -number +0 -pi/2 // does not raise DBZ | |
212 | // +number -0 +pi/2 // does not raise DBZ | |
213 | // -number -0 -pi/2 // does not raise DBZ | |
214 | // | |
215 | // +0 +number +0 | |
216 | // -0 +number -0 | |
217 | // +0 -number +pi | |
218 | // -0 -number -pi | |
219 | // | |
220 | // +0 +0 +0 // does not raise invalid | |
221 | // -0 +0 -0 // does not raise invalid | |
222 | // +0 -0 +pi // does not raise invalid | |
223 | // -0 -0 -pi // does not raise invalid | |
224 | // | |
225 | // Nan anything quiet Y | |
226 | // anything NaN quiet X | |
227 | ||
228 | // atan2(+-0/+-0) sets double error tag to 37 | |
229 | // atan2f(+-0/+-0) sets single error tag to 38 | |
230 | // These are domain errors. | |
231 | ||
8da2915d UD |
232 | |
233 | // | |
234 | // Assembly macros | |
235 | //========================================= | |
236 | ||
237 | ||
238 | // integer registers | |
239 | atan2f_GR_Addr_1 = r33 | |
240 | atan2f_GR_Addr_2 = r34 | |
241 | GR_SAVE_B0 = r35 | |
242 | ||
243 | GR_SAVE_PFS = r36 | |
244 | GR_SAVE_GP = r37 | |
245 | ||
246 | GR_Parameter_X = r38 | |
247 | GR_Parameter_Y = r39 | |
248 | GR_Parameter_RESULT = r40 | |
249 | GR_Parameter_TAG = r41 | |
250 | ||
251 | // floating point registers | |
252 | atan2f_coef_p1 = f32 | |
253 | atan2f_coef_p10 = f33 | |
254 | atan2f_coef_p7 = f34 | |
255 | atan2f_coef_p6 = f35 | |
256 | ||
257 | atan2f_coef_p3 = f36 | |
258 | atan2f_coef_p2 = f37 | |
259 | atan2f_coef_p9 = f38 | |
260 | atan2f_coef_p8 = f39 | |
261 | atan2f_coef_p5 = f40 | |
262 | ||
263 | atan2f_coef_p4 = f41 | |
264 | atan2f_const_piby2 = f42 | |
265 | atan2f_const_pi = f43 | |
266 | atan2f_const_piby4 = f44 | |
267 | atan2f_const_3piby4 = f45 | |
268 | ||
269 | atan2f_xsq = f46 | |
270 | atan2f_ysq = f47 | |
271 | atan2f_xy = f48 | |
272 | atan2f_const_1 = f49 | |
273 | atan2f_sgn_Y = f50 | |
274 | ||
275 | atan2f_Z0 = f51 | |
276 | atan2f_A0 = f52 | |
277 | atan2f_Z = f53 | |
278 | atan2f_A = f54 | |
279 | atan2f_C = f55 | |
280 | ||
281 | atan2f_U = f56 | |
282 | atan2f_Usq = f57 | |
283 | atan2f_U4 = f58 | |
284 | atan2f_U6 = f59 | |
285 | atan2f_U8 = f60 | |
286 | ||
287 | atan2f_poly_u109 = f61 | |
288 | atan2f_poly_u87 = f62 | |
289 | atan2f_poly_u65 = f63 | |
290 | atan2f_poly_u43 = f64 | |
291 | atan2f_poly_u21 = f65 | |
292 | ||
293 | atan2f_poly_u10to7 = f66 | |
294 | atan2f_poly_u6to3 = f67 | |
295 | atan2f_poly_u10to3 = f68 | |
296 | atan2f_poly_u10to0 = f69 | |
297 | atan2f_poly_u210 = f70 | |
298 | ||
299 | atan2f_T_numer = f71 | |
300 | atan2f_T_denom = f72 | |
301 | atan2f_G_numer = f73 | |
302 | atan2f_G_denom = f74 | |
303 | atan2f_p1rnum = f75 | |
304 | ||
305 | atan2f_R_denom = f76 | |
306 | atan2f_R_numer = f77 | |
307 | atan2f_pR = f78 | |
308 | atan2f_pRC = f79 | |
309 | atan2f_pQRC = f80 | |
310 | ||
311 | atan2f_Q1 = f81 | |
312 | atan2f_Q_beta = f82 | |
313 | atan2f_Q2 = f83 | |
314 | atan2f_Q_beta2 = f84 | |
315 | atan2f_Q3 = f85 | |
316 | ||
317 | atan2f_r = f86 | |
318 | atan2f_rsq = f87 | |
319 | atan2f_poly_atan_U = f88 | |
320 | ||
321 | ||
322 | // predicate registers | |
323 | //atan2f_Pred_Swap = p6 // |y| > |x| | |
324 | //atan2f_Pred_noSwap = p7 // |y| <= |x| | |
325 | //atan2f_Pred_Xpos = p8 // x >= 0 | |
326 | //atan2f_Pred_Xneg = p9 // x < 0 | |
327 | ||
328 | ||
0ecb606c | 329 | RODATA |
8da2915d UD |
330 | |
331 | .align 16 | |
332 | ||
0ecb606c | 333 | LOCAL_OBJECT_START(atan2f_coef_table1) |
8da2915d UD |
334 | data8 0xBFD5555512191621 // p1 |
335 | data8 0x3F522E5D33BC9BAA // p10 | |
336 | data8 0xBFA6E10BA401393F // p7 | |
337 | data8 0x3FB142A73D7C54E3 // p6 | |
338 | data8 0xBFC2473C5145EE38 // p3 | |
339 | data8 0x3FC9997E7AFBFF4E // p2 | |
0ecb606c | 340 | LOCAL_OBJECT_END(atan2f_coef_table1) |
8da2915d | 341 | |
0ecb606c | 342 | LOCAL_OBJECT_START(atan2f_coef_table2) |
8da2915d UD |
343 | data8 0xBF7DEAADAA336451 // p9 |
344 | data8 0x3F97105B4160F86B // p8 | |
345 | data8 0xBFB68EED6A8CFA32 // p5 | |
346 | data8 0x3FBC4F512B1865F5 // p4 | |
347 | data8 0x3ff921fb54442d18 // pi/2 | |
348 | data8 0x400921fb54442d18 // pi | |
349 | data8 0x3fe921fb54442d18 // pi/4 | |
350 | data8 0x4002d97c7f3321d2 // 3pi/4 | |
0ecb606c | 351 | LOCAL_OBJECT_END(atan2f_coef_table2) |
8da2915d UD |
352 | |
353 | ||
8da2915d | 354 | |
0ecb606c JJ |
355 | .section .text |
356 | GLOBAL_IEEE754_ENTRY(atan2f) | |
8da2915d UD |
357 | |
358 | { .mfi | |
359 | alloc r32 = ar.pfs,1,5,4,0 | |
360 | frcpa.s1 atan2f_Z0,p0 = f1,f8 // Approx to 1/y | |
361 | nop.i 999 | |
362 | } | |
363 | { .mfi | |
364 | addl atan2f_GR_Addr_1 = @ltoff(atan2f_coef_table1),gp | |
365 | fma.s1 atan2f_xsq = f9,f9,f0 | |
366 | nop.i 999 ;; | |
367 | } | |
368 | ||
369 | ||
370 | { .mfi | |
371 | ld8 atan2f_GR_Addr_1 = [atan2f_GR_Addr_1] | |
372 | frcpa.s1 atan2f_A0,p0 = f1,f9 // Approx to 1/x | |
373 | nop.i 999 | |
374 | } | |
375 | { .mfi | |
376 | nop.m 999 | |
377 | fma.s1 atan2f_ysq = f8,f8,f0 | |
378 | nop.i 999 ;; | |
379 | } | |
380 | ||
381 | { .mfi | |
382 | nop.m 999 | |
383 | fcmp.ge.s1 p8,p9 = f9,f0 // Set p8 if x>=0, p9 if x<0 | |
384 | nop.i 999 | |
385 | } | |
386 | { .mfi | |
387 | nop.m 999 | |
388 | fma.s1 atan2f_xy = f9,f8,f0 | |
389 | nop.i 999 ;; | |
390 | } | |
391 | ||
392 | ||
393 | { .mfi | |
394 | add atan2f_GR_Addr_2 = 0x30, atan2f_GR_Addr_1 | |
395 | fmerge.s atan2f_sgn_Y = f8,f1 | |
396 | nop.i 999 ;; | |
397 | } | |
398 | ||
399 | { .mmf | |
400 | ldfpd atan2f_coef_p1,atan2f_coef_p10 = [atan2f_GR_Addr_1],16 | |
401 | ldfpd atan2f_coef_p9,atan2f_coef_p8 = [atan2f_GR_Addr_2],16 | |
402 | fclass.m p10,p0 = f9,0xe7 // Test x @inf|@snan|@qnan|@zero | |
403 | } | |
404 | ;; | |
405 | ||
406 | { .mfi | |
407 | ldfpd atan2f_coef_p7,atan2f_coef_p6 = [atan2f_GR_Addr_1],16 | |
408 | fma.s1 atan2f_T_denom = atan2f_Z0,atan2f_xsq,f8 | |
409 | nop.i 999 | |
410 | } | |
411 | { .mfi | |
412 | ldfpd atan2f_coef_p5,atan2f_coef_p4 = [atan2f_GR_Addr_2],16 | |
413 | fma.s1 atan2f_Z = atan2f_Z0,f9,f0 | |
414 | nop.i 999 ;; | |
415 | } | |
416 | ||
417 | ||
418 | { .mfi | |
419 | ldfpd atan2f_coef_p3,atan2f_coef_p2 = [atan2f_GR_Addr_1],16 | |
420 | fma.s1 atan2f_G_denom = atan2f_A0,atan2f_ysq,f9 | |
421 | nop.i 999 | |
422 | } | |
423 | { .mfi | |
424 | ldfpd atan2f_const_piby2,atan2f_const_pi = [atan2f_GR_Addr_2],16 | |
425 | fma.s1 atan2f_A = atan2f_A0,f8,f0 | |
426 | nop.i 999 ;; | |
427 | } | |
428 | ||
429 | { .mfi | |
430 | ldfpd atan2f_const_piby4,atan2f_const_3piby4 = [atan2f_GR_Addr_2] | |
431 | fclass.m p11,p0 = f8,0xe7 // Test y @inf|@snan|@qnan|@zero | |
432 | nop.i 999 | |
433 | } | |
434 | { .mfb | |
435 | nop.m 999 | |
436 | fnma.s1 atan2f_T_numer = atan2f_Z0,atan2f_xy,f9 | |
437 | (p10) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on x nan,inf,zero | |
438 | } | |
439 | ||
440 | ||
441 | // p6 if |y|>|x|, p7 if |x|>=|y| , use xsq and ysq for test | |
442 | { .mfi | |
443 | nop.m 999 | |
444 | fcmp.gt.s1 p6,p7 = atan2f_ysq,atan2f_xsq | |
445 | nop.i 999 | |
446 | } | |
447 | { .mfb | |
448 | nop.m 999 | |
449 | fnma.s1 atan2f_G_numer = atan2f_A0,atan2f_xy,f8 | |
450 | (p11) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on y nan,inf,zero | |
451 | } | |
452 | ||
453 | ||
454 | { .mfi | |
455 | nop.m 999 | |
456 | (p8) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f0,f0 | |
457 | nop.i 999 | |
458 | } | |
459 | { .mfi | |
460 | nop.m 999 | |
461 | (p9) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f1,f0 | |
462 | nop.i 999 ;; | |
463 | } | |
464 | ||
465 | ||
466 | { .mfi | |
467 | nop.m 999 | |
468 | (p6) fnma.s1 atan2f_U = atan2f_Z,f1,f0 | |
469 | nop.i 999 | |
470 | } | |
471 | { .mfi | |
472 | nop.m 999 | |
473 | (p6) fma.s1 atan2f_Usq = atan2f_Z,atan2f_Z,f0 | |
474 | nop.i 999 ;; | |
475 | } | |
476 | ||
477 | ||
478 | { .mfi | |
479 | nop.m 999 | |
480 | (p7) fma.s1 atan2f_U = atan2f_A,f1,f0 | |
481 | nop.i 999 | |
482 | } | |
483 | { .mfi | |
484 | nop.m 999 | |
485 | (p7) fma.s1 atan2f_Usq = atan2f_A,atan2f_A,f0 | |
486 | nop.i 999 ;; | |
487 | } | |
488 | ||
489 | ||
490 | { .mfi | |
491 | nop.m 999 | |
492 | (p6) frcpa.s1 atan2f_Q1,p0 = f1,atan2f_T_denom | |
493 | nop.i 999 | |
494 | } | |
495 | { .mfi | |
496 | nop.m 999 | |
497 | (p6) fma.s1 atan2f_R_denom = atan2f_T_denom,f1,f0 | |
498 | nop.i 999 ;; | |
499 | } | |
500 | ||
501 | ||
502 | { .mfi | |
503 | nop.m 999 | |
504 | (p7) frcpa.s1 atan2f_Q1,p0 = f1,atan2f_G_denom | |
505 | nop.i 999 | |
506 | } | |
507 | { .mfi | |
508 | nop.m 999 | |
509 | (p7) fma.s1 atan2f_R_denom = atan2f_G_denom,f1,f0 | |
510 | nop.i 999 ;; | |
511 | } | |
512 | ||
513 | ||
514 | { .mfi | |
515 | nop.m 999 | |
516 | (p6) fnma.s1 atan2f_R_numer = atan2f_T_numer,f1,f0 | |
517 | nop.i 999 | |
518 | } | |
519 | { .mfi | |
520 | nop.m 999 | |
521 | (p7) fma.s1 atan2f_R_numer = atan2f_G_numer,f1,f0 | |
522 | nop.i 999 ;; | |
523 | } | |
524 | ||
525 | ||
526 | { .mfi | |
527 | nop.m 999 | |
528 | (p6) fnma.s1 atan2f_p1rnum = atan2f_T_numer,atan2f_coef_p1,f0 | |
529 | nop.i 999 ;; | |
530 | } | |
531 | { .mfi | |
532 | nop.m 999 | |
533 | (p7) fma.s1 atan2f_p1rnum = atan2f_G_numer,atan2f_coef_p1,f0 | |
534 | nop.i 999 ;; | |
535 | } | |
536 | ||
537 | ||
538 | { .mfi | |
539 | nop.m 999 | |
540 | fma.s1 atan2f_U4 = atan2f_Usq,atan2f_Usq,f0 | |
541 | nop.i 999 | |
542 | } | |
543 | { .mfi | |
544 | nop.m 999 | |
545 | fma.s1 atan2f_poly_u109 = atan2f_Usq,atan2f_coef_p10,atan2f_coef_p9 | |
546 | nop.i 999 ;; | |
547 | } | |
548 | ||
549 | { .mfi | |
550 | nop.m 999 | |
551 | fma.s1 atan2f_poly_u87 = atan2f_Usq,atan2f_coef_p8,atan2f_coef_p7 | |
552 | nop.i 999 | |
553 | } | |
554 | { .mfi | |
555 | nop.m 999 | |
556 | fma.s1 atan2f_poly_u65 = atan2f_Usq,atan2f_coef_p6,atan2f_coef_p5 | |
557 | nop.i 999 ;; | |
558 | } | |
559 | ||
560 | ||
561 | { .mfi | |
562 | nop.m 999 | |
563 | fma.s1 atan2f_poly_u43 = atan2f_Usq,atan2f_coef_p4,atan2f_coef_p3 | |
564 | nop.i 999 | |
565 | } | |
566 | { .mfi | |
567 | nop.m 999 | |
568 | fnma.s1 atan2f_Q_beta = atan2f_Q1,atan2f_R_denom,f1 | |
569 | nop.i 999 ;; | |
570 | } | |
571 | ||
572 | ||
573 | { .mfi | |
574 | nop.m 999 | |
575 | fma.s1 atan2f_poly_u21 = atan2f_Usq,atan2f_coef_p2,atan2f_coef_p1 | |
576 | nop.i 999 | |
577 | } | |
578 | { .mfi | |
579 | nop.m 999 | |
580 | fma.s1 atan2f_r = atan2f_Q1,atan2f_R_numer,f0 | |
581 | nop.i 999 ;; | |
582 | } | |
583 | ||
584 | { .mfi | |
585 | nop.m 999 | |
586 | (p6) fma.s1 atan2f_C = atan2f_sgn_Y,atan2f_const_piby2,f0 | |
587 | nop.i 999 | |
588 | } | |
589 | { .mfi | |
590 | nop.m 999 | |
591 | (p7) fma.s1 atan2f_C = atan2f_const_1,atan2f_const_pi,f0 | |
592 | nop.i 999 ;; | |
593 | } | |
594 | ||
595 | { .mfi | |
596 | nop.m 999 | |
597 | fma.s1 atan2f_U6 = atan2f_U4,atan2f_Usq,f0 | |
598 | nop.i 999 | |
599 | } | |
600 | { .mfi | |
601 | nop.m 999 | |
602 | fma.s1 atan2f_U8 = atan2f_U4,atan2f_U4,f0 | |
603 | nop.i 999 ;; | |
604 | } | |
605 | ||
606 | { .mfi | |
607 | nop.m 999 | |
608 | fma.s1 atan2f_poly_u10to7 = atan2f_U4,atan2f_poly_u109,atan2f_poly_u87 | |
609 | nop.i 999 | |
610 | } | |
611 | { .mfi | |
612 | nop.m 999 | |
613 | fma.s1 atan2f_pR = atan2f_p1rnum,atan2f_Q1,f0 | |
614 | nop.i 999 ;; | |
615 | } | |
616 | ||
617 | { .mfi | |
618 | nop.m 999 | |
619 | fma.s1 atan2f_poly_u6to3 = atan2f_U4,atan2f_poly_u65,atan2f_poly_u43 | |
620 | nop.i 999 | |
621 | } | |
622 | { .mfi | |
623 | nop.m 999 | |
624 | fma.s1 atan2f_Q2 = atan2f_Q1,atan2f_Q_beta,atan2f_Q1 | |
625 | nop.i 999 ;; | |
626 | } | |
627 | ||
628 | { .mfi | |
629 | nop.m 999 | |
630 | fma.s1 atan2f_Q_beta2 = atan2f_Q_beta,atan2f_Q_beta,f0 | |
631 | nop.i 999 | |
632 | } | |
633 | { .mfi | |
634 | nop.m 999 | |
635 | fma.s1 atan2f_rsq = atan2f_r,atan2f_r,f0 | |
636 | nop.i 999 ;; | |
637 | } | |
638 | ||
639 | { .mfi | |
640 | nop.m 999 | |
641 | fma.s1 atan2f_poly_u210 = atan2f_Usq,atan2f_poly_u21,f1 | |
642 | nop.i 999 ;; | |
643 | } | |
644 | ||
645 | { .mfi | |
646 | nop.m 999 | |
647 | fcmp.eq.s0 p8,p0 = f8,f9 // Dummy op to set flag on denormal inputs | |
648 | nop.i 999 | |
649 | } | |
650 | { .mfi | |
651 | nop.m 999 | |
652 | fma.s1 atan2f_poly_u10to3 = atan2f_U8,atan2f_poly_u10to7,atan2f_poly_u6to3 | |
653 | nop.i 999 ;; | |
654 | } | |
655 | ||
656 | { .mfi | |
657 | nop.m 999 | |
658 | fma.s1 atan2f_Q3 = atan2f_Q2,atan2f_Q_beta2,atan2f_Q2 | |
659 | nop.i 999 | |
660 | } | |
661 | { .mfi | |
662 | nop.m 999 | |
663 | fma.s1 atan2f_pRC = atan2f_rsq,atan2f_pR,atan2f_C | |
664 | nop.i 999 ;; | |
665 | } | |
666 | ||
667 | { .mfi | |
668 | nop.m 999 | |
669 | fma.s1 atan2f_poly_u10to0 = atan2f_U6,atan2f_poly_u10to3,atan2f_poly_u210 | |
670 | nop.i 999 ;; | |
671 | } | |
672 | ||
673 | { .mfi | |
674 | nop.m 999 | |
675 | fma.s1 atan2f_pQRC = atan2f_R_numer,atan2f_Q3,atan2f_pRC | |
676 | nop.i 999 ;; | |
677 | } | |
678 | ||
679 | { .mfb | |
680 | nop.m 999 | |
681 | fma.s.s0 f8 = atan2f_U,atan2f_poly_u10to0,atan2f_pQRC | |
682 | br.ret.sptk b0 ;; | |
683 | } | |
684 | ||
685 | ||
686 | ||
687 | ATAN2F_XY_INF_NAN_ZERO: | |
688 | ||
689 | { .mfi | |
690 | nop.m 999 | |
691 | fclass.m p10,p0 = f8,0xc3 // Is y nan | |
692 | nop.i 999 | |
693 | } | |
694 | ;; | |
695 | ||
696 | { .mfi | |
697 | nop.m 999 | |
698 | fclass.m p12,p0 = f9,0xc3 // Is x nan | |
699 | nop.i 999 | |
700 | } | |
701 | ;; | |
702 | ||
703 | { .mfi | |
704 | nop.m 999 | |
705 | fclass.m p6,p0 = f9,0x21 // Is x +inf | |
706 | nop.i 999 | |
707 | } | |
708 | { .mfb | |
709 | nop.m 999 | |
0ecb606c | 710 | (p10) fma.s.s0 f8 = f9,f8,f0 // Result quietized y if y is nan |
8da2915d UD |
711 | (p10) br.ret.spnt b0 // Exit if y is nan |
712 | } | |
713 | ;; | |
714 | ||
715 | ||
716 | { .mfi | |
717 | nop.m 999 | |
718 | (p6) fclass.m.unc p7,p8 = f8,0x23 // x +inf, is y inf | |
719 | nop.i 999 | |
720 | } | |
721 | { .mfb | |
722 | nop.m 999 | |
0ecb606c | 723 | (p12) fnorm.s.s0 f8 = f9 // Result quietized x if x is nan, y not nan |
8da2915d UD |
724 | (p12) br.ret.spnt b0 // Exit if x is nan, y not nan |
725 | } | |
726 | ;; | |
727 | ||
728 | // Here if x or y inf, or x or y zero | |
729 | { .mfi | |
730 | nop.m 999 | |
731 | fcmp.eq.s0 p15,p0 = f8,f9 // Dummy op to set flag on denormal inputs | |
732 | nop.i 999 | |
733 | } | |
734 | ;; | |
735 | ||
736 | { .mfi | |
737 | nop.m 999 | |
738 | fclass.m p11,p12 = f9,0x22 // Is x -inf | |
739 | nop.i 999 | |
740 | } | |
741 | { .mfb | |
742 | nop.m 999 | |
0ecb606c | 743 | (p7) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby4,f0 // Result +-pi/4 |
8da2915d UD |
744 | (p7) br.ret.spnt b0 // Exit if x +inf and y inf |
745 | } | |
746 | ;; | |
747 | ||
748 | { .mfb | |
749 | nop.m 999 | |
750 | (p8) fmerge.s f8 = f8,f0 // If x +inf and y not inf, result +-0 | |
751 | (p8) br.ret.spnt b0 // Exit if x +inf and y not inf | |
752 | } | |
753 | ;; | |
754 | ||
755 | { .mfi | |
756 | nop.m 999 | |
757 | (p12) fclass.m.unc p13,p0 = f8,0x23 // x not -inf, is y inf | |
758 | nop.i 999 | |
759 | } | |
760 | ;; | |
761 | ||
762 | { .mfi | |
763 | nop.m 999 | |
764 | (p11) fclass.m.unc p14,p15 = f8,0x23 // x -inf, is y inf | |
765 | nop.i 999 | |
766 | } | |
767 | ;; | |
768 | ||
769 | { .mfi | |
770 | nop.m 999 | |
771 | fclass.m p6,p7 = f9,0x7 // Is x zero | |
772 | nop.i 999 | |
773 | } | |
774 | { .mfb | |
775 | nop.m 999 | |
0ecb606c | 776 | (p13) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // Result +-pi/2 |
8da2915d UD |
777 | (p13) br.ret.spnt b0 // Exit if x not -inf and y inf |
778 | } | |
779 | ;; | |
780 | ||
781 | { .mfi | |
782 | nop.m 999 | |
0ecb606c | 783 | (p14) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_3piby4,f0 // Result +-3pi/4 |
8da2915d UD |
784 | nop.i 999 |
785 | } | |
786 | { .mfb | |
787 | nop.m 999 | |
0ecb606c | 788 | (p15) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // Result +-pi |
8da2915d UD |
789 | (p11) br.ret.spnt b0 // Exit if x -inf |
790 | } | |
791 | ;; | |
792 | ||
793 | // Here if x or y zero | |
794 | { .mfi | |
795 | nop.m 999 | |
796 | (p7) fclass.m.unc p8,p9 = f9,0x19 // x not zero, y zero, is x > zero | |
797 | nop.i 999 | |
798 | } | |
799 | ;; | |
800 | ||
801 | { .mfi | |
802 | nop.m 999 | |
803 | (p6) fclass.m.unc p10,p11 = f8,0x7 // x zero, is y zero | |
804 | nop.i 999 | |
805 | } | |
806 | ;; | |
807 | ||
808 | { .mfi | |
809 | nop.m 999 | |
810 | (p8) fmerge.s f8 = f8, f0 // x > zero and y zero, result is +-zero | |
811 | nop.i 999 | |
812 | } | |
813 | { .mfb | |
814 | nop.m 999 | |
0ecb606c | 815 | (p9) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // x < 0, y 0, result +-pi |
8da2915d UD |
816 | (p10) br.cond.spnt __libm_error_region // Branch if x zero and y zero |
817 | } | |
818 | ;; | |
819 | ||
820 | { .mfb | |
821 | nop.m 999 | |
0ecb606c | 822 | (p11) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // x zero, y not zero |
8da2915d UD |
823 | br.ret.sptk b0 // Final special case exit |
824 | } | |
825 | ;; | |
826 | ||
827 | ||
0ecb606c | 828 | GLOBAL_IEEE754_END(atan2f) |
8da2915d UD |
829 | |
830 | ||
0ecb606c | 831 | LOCAL_LIBM_ENTRY(__libm_error_region) |
8da2915d UD |
832 | .prologue |
833 | mov GR_Parameter_TAG = 38 | |
834 | fclass.m p10,p11 = f9,0x5 // @zero | @pos | |
835 | ;; | |
836 | (p10) fmerge.s f10 = f8, f0 | |
0ecb606c | 837 | (p11) fma.s.s0 f10 = atan2f_sgn_Y, atan2f_const_pi,f0 |
8da2915d UD |
838 | ;; |
839 | ||
840 | { .mfi | |
841 | add GR_Parameter_Y=-32,sp // Parameter 2 value | |
842 | nop.f 999 | |
843 | .save ar.pfs,GR_SAVE_PFS | |
844 | mov GR_SAVE_PFS=ar.pfs // Save ar.pfs | |
845 | } | |
846 | ||
847 | { .mfi | |
848 | .fframe 64 | |
849 | add sp=-64,sp // Create new stack | |
850 | nop.f 0 | |
851 | mov GR_SAVE_GP=gp // Save gp | |
852 | } | |
853 | ;; | |
854 | ||
855 | { .mmi | |
856 | stfs [GR_Parameter_Y] = f9,16 // Store Parameter 2 on stack | |
857 | add GR_Parameter_X = 16,sp // Parameter 1 address | |
858 | .save b0, GR_SAVE_B0 | |
859 | mov GR_SAVE_B0=b0 // Save b0 | |
860 | } | |
861 | ;; | |
862 | ||
863 | ||
864 | .body | |
865 | { .mib | |
866 | stfs [GR_Parameter_X] = f8 // Store Parameter 1 on stack | |
867 | add GR_Parameter_RESULT = 0,GR_Parameter_Y | |
868 | nop.b 0 // Parameter 3 address | |
869 | } | |
870 | { .mib | |
871 | stfs [GR_Parameter_Y] = f10 // Store Parameter 3 on stack | |
872 | add GR_Parameter_Y = -16,GR_Parameter_Y | |
873 | br.call.sptk b0=__libm_error_support# // Call error handling function | |
874 | } | |
875 | ;; | |
876 | { .mmi | |
877 | nop.m 0 | |
878 | nop.m 0 | |
879 | add GR_Parameter_RESULT = 48,sp | |
880 | };; | |
881 | ||
882 | { .mmi | |
883 | ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack | |
884 | .restore sp | |
885 | add sp = 64,sp // Restore stack pointer | |
886 | mov b0 = GR_SAVE_B0 // Restore return address | |
887 | } | |
888 | ;; | |
889 | ||
890 | { .mib | |
891 | mov gp = GR_SAVE_GP // Restore gp | |
892 | mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs | |
893 | br.ret.sptk b0 // Return | |
894 | } | |
895 | ;; | |
896 | ||
0ecb606c | 897 | LOCAL_LIBM_END(__libm_error_region) |
8da2915d UD |
898 | |
899 | .type __libm_error_support#,@function | |
900 | .global __libm_error_support# |