]>
Commit | Line | Data |
---|---|---|
8da2915d UD |
1 | .file "atan.s" |
2 | ||
0ecb606c JJ |
3 | |
4 | // Copyright (c) 2000 - 2003, Intel Corporation | |
8da2915d | 5 | // All rights reserved. |
0ecb606c JJ |
6 | // |
7 | // Contributed 2000 by the Intel Numerics Group, Intel Corporation | |
aeb25823 AJ |
8 | // |
9 | // Redistribution and use in source and binary forms, with or without | |
10 | // modification, are permitted provided that the following conditions are | |
11 | // met: | |
12 | // | |
13 | // * Redistributions of source code must retain the above copyright | |
14 | // notice, this list of conditions and the following disclaimer. | |
15 | // | |
16 | // * Redistributions in binary form must reproduce the above copyright | |
17 | // notice, this list of conditions and the following disclaimer in the | |
18 | // documentation and/or other materials provided with the distribution. | |
19 | // | |
20 | // * The name of Intel Corporation may not be used to endorse or promote | |
21 | // products derived from this software without specific prior written | |
22 | // permission. | |
0ecb606c JJ |
23 | |
24 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
25 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
8da2915d | 26 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
0ecb606c | 27 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS |
8da2915d | 28 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
0ecb606c JJ |
29 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
30 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
31 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
8da2915d | 32 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING |
0ecb606c JJ |
33 | // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
34 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
35 | // | |
8da2915d | 36 | // Intel Corporation is the author of this code, and requests that all |
0ecb606c JJ |
37 | // problem reports or change requests be submitted to it directly at |
38 | // http://www.intel.com/software/products/opensource/libraries/num.htm. | |
8da2915d UD |
39 | // |
40 | // History | |
41 | //============================================================== | |
0ecb606c JJ |
42 | // 02/02/00 Initial version |
43 | // 04/13/00 Improved speed | |
44 | // 04/19/00 Removed the qualifying predicate from the fmerge.s that | |
45 | // takes the absolute value. | |
46 | // 06/16/00 Reassigned FP registers to eliminate stalls on loads | |
47 | // 08/30/00 Saved 5 cycles in main path by rearranging large argument logic | |
48 | // and delaying use of result of fcmp in load by 1 group | |
49 | // 05/20/02 Cleaned up namespace and sf0 syntax | |
50 | // 08/20/02 Use atan2 algorithm with x=1 for better accuracy | |
51 | // 02/06/03 Reordered header: .section, .global, .proc, .align | |
8da2915d UD |
52 | // |
53 | // API | |
54 | //============================================================== | |
0ecb606c | 55 | // double atan(double Y) |
8da2915d UD |
56 | // |
57 | // Overview of operation | |
58 | //============================================================== | |
8da2915d | 59 | // |
0ecb606c | 60 | // The atan function returns values in the interval [-pi/2,+pi/2]. |
8da2915d | 61 | // |
0ecb606c | 62 | // The algorithm used is the atan2(Y,X) algorithm where we fix X=1.0. |
8da2915d | 63 | // |
0ecb606c JJ |
64 | // There are two basic paths: swap true and swap false. |
65 | // atan2(Y,X) ==> atan2(V/U) where U >= V. If Y > X, we must swap. | |
8da2915d | 66 | // |
0ecb606c JJ |
67 | // p6 swap True |Y| > |X| |
68 | // p7 swap False |Y| <= |X| | |
8da2915d | 69 | // |
8da2915d | 70 | // |
0ecb606c JJ |
71 | // Simple trigonometric identities show |
72 | // Region 1 | |
73 | // |Y|<=1.0, V=Y, U=1.0 atan2(Y,X) = sgnY * (0 + atan(V/U)) | |
8da2915d | 74 | // |
0ecb606c JJ |
75 | // Region 2 |
76 | // |Y|>1.0, V=1.0, U=Y atan2(Y,X) = sgnY * (pi/2 - atan(V/U)) | |
77 | // | |
78 | // | |
79 | // We compute atan(V/U) from the identity | |
80 | // atan(z) + atan([(V/U)-z] / [1+(V/U)z]) | |
81 | // where z is a limited precision approximation (16 bits) to V/U | |
82 | // | |
83 | // z is calculated with the assistance of the frcpa instruction. | |
84 | // | |
85 | // atan(z) is calculated by a polynomial z + z^3 * p(w), w=z^2 | |
86 | // where p(w) = P0+P1*w+...+P22*w^22 | |
87 | // | |
88 | // Let d = [(V/U)-z] / [1+(V/U)z]) = (V-U*z)/(U+V*z) | |
89 | // | |
90 | // Approximate atan(d) by d + P0*d^3 | |
91 | // Let F = 1/(U+V*z) * (1-a), where |a|< 2^-8.8. | |
92 | // Compute q(a) = 1 + a + ... + a^5. | |
93 | // Then F*q(a) approximates the reciprocal to more than 50 bits. | |
8da2915d | 94 | |
0ecb606c | 95 | // Special values |
8da2915d UD |
96 | //============================================================== |
97 | // atan(QNAN) = QNAN | |
98 | // atan(SNAN) = quieted SNAN | |
0ecb606c | 99 | // atan(+-inf) = +- pi/2 |
8da2915d UD |
100 | // atan(+-0) = +-0 |
101 | ||
8da2915d UD |
102 | // Registers used |
103 | //============================================================== | |
104 | ||
0ecb606c JJ |
105 | // predicate registers used: |
106 | // p6 -> p15 | |
8da2915d | 107 | |
0ecb606c JJ |
108 | // floating-point registers used: |
109 | // f8, input | |
110 | // f32 -> f116 | |
8da2915d UD |
111 | |
112 | // general registers used | |
0ecb606c | 113 | // r14 -> r16 |
8da2915d UD |
114 | |
115 | // Assembly macros | |
116 | //============================================================== | |
8da2915d | 117 | |
0ecb606c JJ |
118 | EXP_AD_P1 = r14 |
119 | EXP_AD_P2 = r15 | |
120 | rsig_near_one = r16 | |
121 | ||
122 | atan2_Y = f8 | |
123 | atan2_X = f1 | |
124 | ||
125 | atan2_u1_X = f32 | |
126 | atan2_u1_Y = f33 | |
127 | atan2_z2_X = f34 | |
128 | ||
129 | atan2_two = f36 | |
130 | atan2_B1sq_Y = f37 | |
131 | atan2_z1_X = f38 | |
132 | atan2_B1X = f40 | |
133 | ||
134 | atan2_B1Y = f41 | |
135 | atan2_wp_X = f42 | |
136 | atan2_B1sq_X = f43 | |
137 | atan2_z = f44 | |
138 | atan2_w = f45 | |
139 | ||
140 | atan2_P0 = f46 | |
141 | atan2_P1 = f47 | |
142 | atan2_P2 = f48 | |
143 | atan2_P3 = f49 | |
144 | atan2_P4 = f50 | |
145 | ||
146 | atan2_P5 = f51 | |
147 | atan2_P6 = f52 | |
148 | atan2_P7 = f53 | |
149 | atan2_P8 = f54 | |
150 | atan2_P9 = f55 | |
151 | ||
152 | atan2_P10 = f56 | |
153 | atan2_P11 = f57 | |
154 | atan2_P12 = f58 | |
155 | atan2_P13 = f59 | |
156 | atan2_P14 = f60 | |
157 | ||
158 | atan2_P15 = f61 | |
159 | atan2_P16 = f62 | |
160 | atan2_P17 = f63 | |
161 | atan2_P18 = f64 | |
162 | atan2_P19 = f65 | |
163 | ||
164 | atan2_P20 = f66 | |
165 | atan2_P21 = f67 | |
166 | atan2_P22 = f68 | |
167 | atan2_pi_by_2 = f69 | |
168 | atan2_sgn_pi_by_2 = f69 | |
169 | atan2_V13 = f70 | |
170 | ||
171 | atan2_W11 = f71 | |
172 | atan2_E = f72 | |
173 | atan2_wp_Y = f73 | |
174 | atan2_V11 = f74 | |
175 | atan2_V12 = f75 | |
176 | ||
177 | atan2_V7 = f76 | |
178 | atan2_V8 = f77 | |
179 | atan2_W7 = f78 | |
180 | atan2_W8 = f79 | |
181 | atan2_W3 = f80 | |
182 | ||
183 | atan2_W4 = f81 | |
184 | atan2_V3 = f82 | |
185 | atan2_V4 = f83 | |
186 | atan2_F = f84 | |
187 | atan2_gV = f85 | |
188 | ||
189 | atan2_V10 = f86 | |
190 | atan2_zcub = f87 | |
191 | atan2_V6 = f88 | |
192 | atan2_V9 = f89 | |
193 | atan2_W10 = f90 | |
194 | ||
195 | atan2_W6 = f91 | |
196 | atan2_W2 = f92 | |
197 | atan2_V2 = f93 | |
198 | atan2_alpha = f94 | |
199 | atan2_alpha_1 = f95 | |
200 | ||
201 | atan2_gVF = f96 | |
202 | atan2_V5 = f97 | |
203 | atan2_W12 = f98 | |
204 | atan2_W5 = f99 | |
205 | atan2_alpha_sq = f100 | |
206 | ||
207 | atan2_Cp = f101 | |
208 | atan2_V1 = f102 | |
209 | atan2_ysq = f103 | |
210 | atan2_W1 = f104 | |
211 | atan2_alpha_cub = f105 | |
212 | ||
213 | atan2_C = f106 | |
214 | atan2_d = f108 | |
215 | atan2_A_hi = f109 | |
216 | atan2_dsq = f110 | |
217 | ||
218 | atan2_pd = f111 | |
219 | atan2_A_lo = f112 | |
220 | atan2_A = f113 | |
221 | atan2_Pp = f114 | |
222 | atan2_sgnY = f115 | |
223 | ||
224 | atan2_sig_near_one = f116 | |
225 | atan2_near_one = f116 | |
8da2915d UD |
226 | |
227 | ///////////////////////////////////////////////////////////// | |
228 | ||
229 | ||
0ecb606c | 230 | RODATA |
8da2915d UD |
231 | |
232 | .align 16 | |
233 | ||
0ecb606c JJ |
234 | LOCAL_OBJECT_START(atan2_tb1) |
235 | data8 0xA21922DC45605EA1 , 0x00003FFA // P11 | |
236 | data8 0xB199DD6D2675C40F , 0x0000BFFA // P10 | |
237 | data8 0xC2F01E5DDD100DBE , 0x00003FFA // P9 | |
238 | data8 0xD78F28FC2A592781 , 0x0000BFFA // P8 | |
239 | data8 0xF0F03ADB3FC930D3 , 0x00003FFA // P7 | |
240 | data8 0x88887EBB209E3543 , 0x0000BFFB // P6 | |
241 | data8 0x9D89D7D55C3287A5 , 0x00003FFB // P5 | |
242 | data8 0xBA2E8B9793955C77 , 0x0000BFFB // P4 | |
243 | data8 0xE38E38E320A8A098 , 0x00003FFB // P3 | |
244 | data8 0x9249249247E37913 , 0x0000BFFC // P2 | |
245 | data8 0xCCCCCCCCCCC906CD , 0x00003FFC // P1 | |
246 | data8 0xAAAAAAAAAAAAA8A9 , 0x0000BFFD // P0 | |
247 | data8 0x0000000000000000 , 0x00000000 // pad to avoid bank conflict | |
248 | LOCAL_OBJECT_END(atan2_tb1) | |
249 | ||
250 | LOCAL_OBJECT_START(atan2_tb2) | |
251 | data8 0xCE585A259BD8374C , 0x00003FF0 // P21 | |
252 | data8 0x9F90FB984D8E39D0 , 0x0000BFF3 // P20 | |
253 | data8 0x9D3436AABE218776 , 0x00003FF5 // P19 | |
254 | data8 0xDEC343E068A6D2A8 , 0x0000BFF6 // P18 | |
255 | data8 0xF396268151CFB11C , 0x00003FF7 // P17 | |
256 | data8 0xD818B4BB43D84BF2 , 0x0000BFF8 // P16 | |
257 | data8 0xA2270D30A90AA220 , 0x00003FF9 // P15 | |
258 | data8 0xD5F4F2182E7A8725 , 0x0000BFF9 // P14 | |
259 | data8 0x80D601879218B53A , 0x00003FFA // P13 | |
260 | data8 0x9297B23CCFFB291F , 0x0000BFFA // P12 | |
261 | data8 0xFE7E52D2A89995B3 , 0x0000BFEC // P22 | |
262 | data8 0xC90FDAA22168C235 , 0x00003FFF // pi/2 | |
263 | LOCAL_OBJECT_END(atan2_tb2) | |
8da2915d UD |
264 | |
265 | ||
8da2915d | 266 | |
8da2915d | 267 | |
0ecb606c JJ |
268 | .section .text |
269 | GLOBAL_LIBM_ENTRY(atan) | |
8da2915d | 270 | |
0ecb606c JJ |
271 | { .mfi |
272 | nop.m 999 | |
273 | frcpa.s1 atan2_u1_Y,p7 = f1,atan2_Y | |
274 | nop.i 999 | |
8da2915d | 275 | } |
0ecb606c JJ |
276 | { .mfi |
277 | addl EXP_AD_P1 = @ltoff(atan2_tb1), gp | |
278 | fma.s1 atan2_two = f1,f1,f1 | |
279 | nop.i 999 | |
8da2915d | 280 | ;; |
8da2915d | 281 | } |
8da2915d | 282 | |
0ecb606c JJ |
283 | { .mfi |
284 | ld8 EXP_AD_P1 = [EXP_AD_P1] | |
285 | frcpa.s1 atan2_u1_X,p6 = f1,atan2_X | |
286 | nop.i 999 | |
8da2915d | 287 | } |
0ecb606c JJ |
288 | { .mfi |
289 | nop.m 999 | |
290 | fma.s1 atan2_ysq = atan2_Y,atan2_Y,f0 | |
291 | nop.i 999 | |
8da2915d UD |
292 | } |
293 | ;; | |
294 | ||
0ecb606c JJ |
295 | { .mfi |
296 | add EXP_AD_P2 = 0xd0,EXP_AD_P1 | |
297 | fmerge.s atan2_sgnY = atan2_Y,f1 | |
298 | nop.i 999 | |
8da2915d | 299 | } |
8da2915d | 300 | ;; |
8da2915d UD |
301 | |
302 | ||
303 | { .mfi | |
0ecb606c JJ |
304 | ldfe atan2_P11 = [EXP_AD_P1],16 |
305 | fclass.m p10,p0 = atan2_Y, 0xc3 // Test for y=nan | |
306 | nop.i 999 | |
8da2915d UD |
307 | } |
308 | { .mfi | |
0ecb606c JJ |
309 | ldfe atan2_P21 = [EXP_AD_P2],16 |
310 | nop.f 999 | |
311 | nop.i 999 | |
312 | ;; | |
8da2915d UD |
313 | } |
314 | ||
315 | ||
316 | { .mfi | |
0ecb606c JJ |
317 | ldfe atan2_P10 = [EXP_AD_P1],16 |
318 | fnma.s1 atan2_B1Y = atan2_u1_Y, atan2_Y, atan2_two | |
319 | nop.i 999 | |
8da2915d UD |
320 | } |
321 | { .mfi | |
0ecb606c JJ |
322 | ldfe atan2_P20 = [EXP_AD_P2],16 |
323 | fma.s1 atan2_wp_Y = atan2_u1_Y, atan2_u1_Y, f0 | |
324 | nop.i 999 | |
8da2915d UD |
325 | ;; |
326 | } | |
327 | ||
328 | { .mfi | |
0ecb606c JJ |
329 | ldfe atan2_P9 = [EXP_AD_P1],16 |
330 | fma.s1 atan2_z1_X = atan2_u1_X, atan2_Y, f0 | |
331 | nop.i 999 | |
8da2915d UD |
332 | } |
333 | { .mfi | |
0ecb606c JJ |
334 | ldfe atan2_P19 = [EXP_AD_P2],16 |
335 | fnma.s1 atan2_B1X = atan2_u1_X, atan2_X, atan2_two | |
336 | nop.i 999 | |
8da2915d | 337 | } |
0ecb606c | 338 | ;; |
8da2915d UD |
339 | |
340 | { .mfi | |
0ecb606c JJ |
341 | ldfe atan2_P8 = [EXP_AD_P1],16 |
342 | fma.s1 atan2_z2_X = atan2_u1_X, atan2_ysq, f0 | |
343 | nop.i 999 | |
8da2915d | 344 | } |
0ecb606c JJ |
345 | { .mfb |
346 | ldfe atan2_P18 = [EXP_AD_P2],16 | |
347 | (p10) fma.d.s0 f8 = atan2_Y,atan2_X,f0 // If y=nan, result quietized y | |
348 | (p10) br.ret.spnt b0 // Exit if y=nan | |
8da2915d | 349 | } |
0ecb606c | 350 | ;; |
8da2915d | 351 | |
0ecb606c JJ |
352 | // p6 true if swap, means |y| > 1.0 or ysq > 1.0 |
353 | // p7 true if no swap, means 1.0 >= |y| or 1.0 >= ysq | |
8da2915d | 354 | { .mfi |
0ecb606c JJ |
355 | ldfe atan2_P7 = [EXP_AD_P1],16 |
356 | fcmp.ge.s1 p7,p6 = f1, atan2_ysq | |
357 | nop.i 999 | |
8da2915d | 358 | } |
0ecb606c JJ |
359 | { .mmf |
360 | ldfe atan2_P17 = [EXP_AD_P2],16 | |
361 | nop.m 999 | |
362 | nop.f 999 | |
8da2915d | 363 | } |
0ecb606c | 364 | ;; |
8da2915d UD |
365 | |
366 | { .mfi | |
0ecb606c JJ |
367 | ldfe atan2_P6 = [EXP_AD_P1],16 |
368 | fma.s1 atan2_E = atan2_u1_Y, atan2_B1Y, atan2_Y | |
369 | nop.i 999 | |
8da2915d UD |
370 | } |
371 | { .mfi | |
0ecb606c JJ |
372 | ldfe atan2_P16 = [EXP_AD_P2],16 |
373 | fma.s1 atan2_B1sq_Y = atan2_B1Y, atan2_B1Y, f0 | |
374 | nop.i 999 | |
375 | ;; | |
8da2915d UD |
376 | } |
377 | ||
378 | { .mfi | |
0ecb606c JJ |
379 | ldfe atan2_P5 = [EXP_AD_P1],16 |
380 | (p7) fma.s1 atan2_wp_X = atan2_z1_X, atan2_z1_X, f0 | |
381 | nop.i 999 | |
8da2915d UD |
382 | } |
383 | { .mfi | |
0ecb606c JJ |
384 | ldfe atan2_P15 = [EXP_AD_P2],16 |
385 | (p7) fma.s1 atan2_B1sq_X = atan2_B1X, atan2_B1X, f0 | |
386 | nop.i 999 | |
8da2915d UD |
387 | ;; |
388 | } | |
389 | ||
390 | { .mfi | |
0ecb606c JJ |
391 | ldfe atan2_P4 = [EXP_AD_P1],16 |
392 | (p6) fma.s1 atan2_z = atan2_u1_Y, atan2_B1Y, f0 | |
393 | nop.i 999 | |
8da2915d UD |
394 | } |
395 | { .mfi | |
0ecb606c JJ |
396 | ldfe atan2_P14 = [EXP_AD_P2],16 |
397 | (p7) fma.s1 atan2_E = atan2_z2_X, atan2_B1X, atan2_X | |
398 | nop.i 999 | |
399 | ;; | |
8da2915d UD |
400 | } |
401 | ||
402 | ||
403 | { .mfi | |
0ecb606c JJ |
404 | ldfe atan2_P3 = [EXP_AD_P1],16 |
405 | fcmp.eq.s0 p14,p15=atan2_X,atan2_Y // Dummy for denorm and invalid | |
406 | nop.i 999 | |
8da2915d | 407 | } |
0ecb606c JJ |
408 | { .mmf |
409 | ldfe atan2_P13 = [EXP_AD_P2],16 | |
410 | nop.m 999 | |
411 | (p7) fma.s1 atan2_z = atan2_z1_X, atan2_B1X, f0 | |
412 | ;; | |
8da2915d UD |
413 | } |
414 | ||
8da2915d | 415 | { .mfi |
0ecb606c JJ |
416 | ldfe atan2_P2 = [EXP_AD_P1],16 |
417 | (p6) fma.s1 atan2_w = atan2_wp_Y, atan2_B1sq_Y,f0 | |
418 | nop.i 999 | |
8da2915d | 419 | } |
0ecb606c JJ |
420 | { .mlx |
421 | ldfe atan2_P12 = [EXP_AD_P2],16 | |
422 | movl rsig_near_one = 0x8000000000000001 // signif near 1.0 | |
423 | ;; | |
8da2915d UD |
424 | } |
425 | ||
8da2915d | 426 | { .mfi |
0ecb606c JJ |
427 | ldfe atan2_P1 = [EXP_AD_P1],16 |
428 | fclass.m p9,p0 = atan2_Y, 0x23 // test if y inf | |
429 | nop.i 999 | |
8da2915d UD |
430 | } |
431 | { .mfi | |
0ecb606c JJ |
432 | ldfe atan2_P22 = [EXP_AD_P2],16 |
433 | (p7) fma.s1 atan2_w = atan2_wp_X, atan2_B1sq_X,f0 | |
434 | nop.i 999 | |
435 | ;; | |
8da2915d UD |
436 | } |
437 | ||
438 | { .mfi | |
0ecb606c JJ |
439 | ldfe atan2_P0 = [EXP_AD_P1],16 |
440 | frcpa.s1 atan2_F,p0 = f1, atan2_E | |
441 | nop.i 999 | |
8da2915d UD |
442 | } |
443 | { .mfi | |
0ecb606c JJ |
444 | ldfe atan2_pi_by_2 = [EXP_AD_P2],16 |
445 | (p6) fnma.s1 atan2_gV = atan2_Y, atan2_z, atan2_X | |
446 | nop.i 999 | |
447 | ;; | |
8da2915d UD |
448 | } |
449 | ||
450 | { .mfi | |
0ecb606c JJ |
451 | setf.sig atan2_sig_near_one = rsig_near_one |
452 | (p7) fnma.s1 atan2_gV = atan2_X, atan2_z, atan2_Y | |
453 | nop.i 999 | |
8da2915d | 454 | } |
0ecb606c JJ |
455 | { .mfb |
456 | nop.m 999 | |
457 | (p9) fma.d.s0 f8 = atan2_sgnY, atan2_pi_by_2, f0 // +-pi/2 if y inf | |
458 | (p9) br.ret.spnt b0 // exit if y inf, result is +-pi/2 | |
459 | ;; | |
8da2915d UD |
460 | } |
461 | ||
8da2915d | 462 | { .mfi |
0ecb606c JJ |
463 | nop.m 999 |
464 | fma.s1 atan2_V13 = atan2_w, atan2_P11, atan2_P10 | |
465 | nop.i 999 | |
8da2915d UD |
466 | } |
467 | { .mfi | |
0ecb606c JJ |
468 | nop.m 999 |
469 | fma.s1 atan2_W11 = atan2_w, atan2_P21, atan2_P20 | |
470 | nop.i 999 | |
471 | ;; | |
8da2915d UD |
472 | } |
473 | ||
8da2915d | 474 | { .mfi |
0ecb606c JJ |
475 | nop.m 999 |
476 | fma.s1 atan2_V11 = atan2_w, atan2_P9, atan2_P8 | |
477 | nop.i 999 | |
8da2915d UD |
478 | } |
479 | { .mfi | |
0ecb606c JJ |
480 | nop.m 999 |
481 | fma.s1 atan2_V12 = atan2_w, atan2_w, f0 | |
482 | nop.i 999 | |
483 | ;; | |
8da2915d UD |
484 | } |
485 | ||
8da2915d | 486 | { .mfi |
0ecb606c JJ |
487 | nop.m 999 |
488 | fma.s1 atan2_V8 = atan2_w, atan2_P7 , atan2_P6 | |
489 | nop.i 999 | |
8da2915d UD |
490 | } |
491 | { .mfi | |
0ecb606c JJ |
492 | nop.m 999 |
493 | fma.s1 atan2_W8 = atan2_w, atan2_P19, atan2_P18 | |
494 | nop.i 999 | |
495 | ;; | |
8da2915d UD |
496 | } |
497 | ||
8da2915d | 498 | { .mfi |
0ecb606c JJ |
499 | nop.m 999 |
500 | fnma.s1 atan2_alpha = atan2_E, atan2_F, f1 | |
501 | nop.i 999 | |
8da2915d UD |
502 | } |
503 | { .mfi | |
0ecb606c JJ |
504 | nop.m 999 |
505 | fnma.s1 atan2_alpha_1 = atan2_E, atan2_F, atan2_two | |
506 | nop.i 999 | |
507 | ;; | |
8da2915d UD |
508 | } |
509 | ||
510 | ||
511 | { .mfi | |
0ecb606c JJ |
512 | nop.m 999 |
513 | fma.s1 atan2_V7 = atan2_w, atan2_P5 , atan2_P4 | |
514 | nop.i 999 | |
8da2915d UD |
515 | } |
516 | { .mfi | |
0ecb606c JJ |
517 | nop.m 999 |
518 | fma.s1 atan2_W7 = atan2_w, atan2_P17, atan2_P16 | |
519 | nop.i 999 | |
8da2915d UD |
520 | ;; |
521 | } | |
522 | ||
8da2915d | 523 | { .mfi |
0ecb606c JJ |
524 | nop.m 999 |
525 | fma.s1 atan2_V4 = atan2_w, atan2_P3 , atan2_P2 | |
526 | nop.i 999 | |
8da2915d UD |
527 | } |
528 | { .mfi | |
0ecb606c JJ |
529 | nop.m 999 |
530 | fma.s1 atan2_W4 = atan2_w, atan2_P15, atan2_P14 | |
531 | nop.i 999 | |
532 | ;; | |
8da2915d UD |
533 | } |
534 | ||
8da2915d | 535 | { .mfi |
0ecb606c JJ |
536 | nop.m 999 |
537 | fma.s1 atan2_V3 = atan2_w, atan2_P1 , atan2_P0 | |
538 | nop.i 999 | |
8da2915d UD |
539 | } |
540 | { .mfi | |
0ecb606c JJ |
541 | nop.m 999 |
542 | fma.s1 atan2_W3 = atan2_w, atan2_P13, atan2_P12 | |
543 | nop.i 999 | |
544 | ;; | |
8da2915d UD |
545 | } |
546 | ||
8da2915d | 547 | { .mfi |
0ecb606c JJ |
548 | nop.m 999 |
549 | fma.s1 atan2_V10 = atan2_V12, atan2_V13, atan2_V11 | |
550 | nop.i 999 | |
8da2915d UD |
551 | } |
552 | { .mfi | |
0ecb606c JJ |
553 | nop.m 999 |
554 | fma.s1 atan2_gVF = atan2_gV, atan2_F, f0 | |
555 | nop.i 999 | |
556 | ;; | |
8da2915d UD |
557 | } |
558 | ||
8da2915d | 559 | { .mfi |
0ecb606c JJ |
560 | nop.m 999 |
561 | fma.s1 atan2_alpha_sq = atan2_alpha, atan2_alpha, f0 | |
562 | nop.i 999 | |
8da2915d UD |
563 | } |
564 | { .mfi | |
0ecb606c JJ |
565 | nop.m 999 |
566 | fma.s1 atan2_Cp = atan2_alpha, atan2_alpha_1, f1 | |
567 | nop.i 999 | |
568 | ;; | |
8da2915d UD |
569 | } |
570 | ||
8da2915d | 571 | { .mfi |
0ecb606c JJ |
572 | nop.m 999 |
573 | fma.s1 atan2_V9 = atan2_V12, atan2_V12, f0 | |
574 | nop.i 999 | |
8da2915d UD |
575 | } |
576 | { .mfi | |
0ecb606c JJ |
577 | nop.m 999 |
578 | fma.s1 atan2_W10 = atan2_V12, atan2_P22 , atan2_W11 | |
579 | nop.i 999 | |
580 | ;; | |
8da2915d UD |
581 | } |
582 | ||
583 | { .mfi | |
0ecb606c JJ |
584 | nop.m 999 |
585 | fma.s1 atan2_V6 = atan2_V12, atan2_V8 , atan2_V7 | |
586 | nop.i 999 | |
8da2915d UD |
587 | } |
588 | { .mfi | |
0ecb606c JJ |
589 | nop.m 999 |
590 | fma.s1 atan2_W6 = atan2_V12, atan2_W8 , atan2_W7 | |
591 | nop.i 999 | |
592 | ;; | |
8da2915d UD |
593 | } |
594 | ||
595 | { .mfi | |
0ecb606c JJ |
596 | nop.m 999 |
597 | fma.s1 atan2_V2 = atan2_V12, atan2_V4 , atan2_V3 | |
598 | nop.i 999 | |
8da2915d UD |
599 | } |
600 | { .mfi | |
0ecb606c JJ |
601 | nop.m 999 |
602 | fma.s1 atan2_W2 = atan2_V12, atan2_W4 , atan2_W3 | |
603 | nop.i 999 | |
604 | ;; | |
8da2915d UD |
605 | } |
606 | ||
8da2915d | 607 | { .mfi |
0ecb606c JJ |
608 | nop.m 999 |
609 | fma.s1 atan2_alpha_cub = atan2_alpha, atan2_alpha_sq, f0 | |
610 | nop.i 999 | |
8da2915d UD |
611 | } |
612 | { .mfi | |
0ecb606c JJ |
613 | nop.m 999 |
614 | fma.s1 atan2_C = atan2_gVF, atan2_Cp, f0 | |
615 | nop.i 999 | |
616 | ;; | |
8da2915d UD |
617 | } |
618 | ||
8da2915d | 619 | { .mfi |
0ecb606c JJ |
620 | nop.m 999 |
621 | fma.s1 atan2_W12 = atan2_V9, atan2_V9, f0 | |
622 | nop.i 999 | |
623 | ;; | |
8da2915d UD |
624 | } |
625 | ||
8da2915d | 626 | { .mfi |
0ecb606c JJ |
627 | nop.m 999 |
628 | fma.s1 atan2_V5 = atan2_V9, atan2_V10, atan2_V6 | |
629 | nop.i 999 | |
8da2915d UD |
630 | } |
631 | { .mfi | |
0ecb606c JJ |
632 | nop.m 999 |
633 | fma.s1 atan2_W5 = atan2_V9, atan2_W10, atan2_W6 | |
634 | nop.i 999 | |
635 | ;; | |
8da2915d UD |
636 | } |
637 | ||
8da2915d | 638 | { .mfi |
0ecb606c JJ |
639 | nop.m 999 |
640 | fclass.m p8,p0 = atan2_Y, 0x07 // Test for y=0 | |
641 | nop.i 999 | |
8da2915d UD |
642 | } |
643 | { .mfi | |
0ecb606c JJ |
644 | nop.m 999 |
645 | fma.s1 atan2_d = atan2_alpha_cub, atan2_C, atan2_C | |
646 | nop.i 999 | |
8da2915d | 647 | } |
0ecb606c | 648 | ;; |
8da2915d UD |
649 | |
650 | { .mfi | |
0ecb606c JJ |
651 | nop.m 999 |
652 | fma.s1 atan2_W12 = atan2_V9, atan2_W12, f0 | |
653 | nop.i 999 | |
8da2915d | 654 | } |
0ecb606c | 655 | ;; |
8da2915d UD |
656 | |
657 | { .mfi | |
0ecb606c JJ |
658 | nop.m 999 |
659 | fma.s1 atan2_V1 = atan2_V9, atan2_V5, atan2_V2 | |
660 | nop.i 999 | |
8da2915d UD |
661 | } |
662 | { .mfi | |
0ecb606c JJ |
663 | nop.m 999 |
664 | fma.s1 atan2_W1 = atan2_V9, atan2_W5, atan2_W2 | |
665 | nop.i 999 | |
666 | ;; | |
8da2915d UD |
667 | } |
668 | ||
8da2915d | 669 | { .mfi |
0ecb606c JJ |
670 | nop.m 999 |
671 | (p8) fmerge.s f8 = atan2_sgnY, f0 // +-0 if y=0 | |
672 | nop.i 999 | |
8da2915d | 673 | } |
0ecb606c JJ |
674 | { .mfb |
675 | nop.m 999 | |
676 | fma.s1 atan2_zcub = atan2_z, atan2_w, f0 | |
677 | (p8) br.ret.spnt b0 // Exit if y=0 | |
678 | ;; | |
8da2915d UD |
679 | } |
680 | ||
8da2915d | 681 | { .mfi |
0ecb606c JJ |
682 | nop.m 999 |
683 | fma.s1 atan2_pd = atan2_P0, atan2_d, f0 | |
684 | nop.i 999 | |
8da2915d UD |
685 | } |
686 | { .mfi | |
0ecb606c JJ |
687 | nop.m 999 |
688 | fma.s1 atan2_dsq = atan2_d, atan2_d, f0 | |
689 | nop.i 999 | |
690 | ;; | |
8da2915d UD |
691 | } |
692 | ||
693 | ||
694 | { .mfi | |
0ecb606c JJ |
695 | nop.m 999 |
696 | fmerge.se atan2_near_one = f1, atan2_sig_near_one // Const ~1.0 | |
697 | nop.i 999 | |
8da2915d UD |
698 | } |
699 | { .mfi | |
0ecb606c JJ |
700 | nop.m 999 |
701 | fma.s1 atan2_Pp = atan2_W12, atan2_W1, atan2_V1 | |
702 | nop.i 999 | |
703 | ;; | |
8da2915d UD |
704 | } |
705 | ||
8da2915d | 706 | { .mfi |
0ecb606c JJ |
707 | nop.m 999 |
708 | fma.s1 atan2_sgn_pi_by_2 = atan2_pi_by_2, atan2_sgnY, f0 | |
709 | nop.i 999 | |
8da2915d UD |
710 | } |
711 | { .mfi | |
0ecb606c JJ |
712 | nop.m 999 |
713 | fma.s1 atan2_A_lo = atan2_pd, atan2_dsq, atan2_d | |
714 | nop.i 999 | |
715 | ;; | |
8da2915d UD |
716 | } |
717 | ||
718 | ||
719 | { .mfi | |
0ecb606c JJ |
720 | nop.m 999 |
721 | fma.s1 atan2_A_hi = atan2_zcub, atan2_Pp, atan2_z | |
722 | nop.i 999 | |
723 | ;; | |
8da2915d UD |
724 | } |
725 | ||
726 | ||
727 | { .mfi | |
0ecb606c JJ |
728 | nop.m 999 |
729 | (p6) fma.s1 atan2_A = atan2_A_hi, f1, atan2_A_lo | |
730 | nop.i 999 | |
8da2915d | 731 | } |
0ecb606c | 732 | // For |Y| <= |X| and X > 0, result is A_hi + A_lo |
8da2915d | 733 | { .mfi |
0ecb606c JJ |
734 | nop.m 999 |
735 | (p7) fma.d.s0 f8 = atan2_A_hi, f1, atan2_A_lo | |
736 | nop.i 999 | |
737 | ;; | |
8da2915d UD |
738 | } |
739 | ||
0ecb606c JJ |
740 | // For |Y| > |X|, result is +- pi/2 - (A_hi + A_lo) |
741 | // We perturb A by multiplying by 1.0+1ulp as we produce the result | |
742 | // in order to get symmetrically rounded results in directed rounding modes. | |
743 | // If we don't do this, there are a few cases where the trailing 11 bits of | |
744 | // the significand of the result, before converting to double, are zero. These | |
745 | // cases do not round symmetrically in round to +infinity or round to -infinity. | |
8da2915d | 746 | { .mfb |
0ecb606c JJ |
747 | nop.m 999 |
748 | (p6) fnma.d.s0 f8 = atan2_A, atan2_near_one, atan2_sgn_pi_by_2 | |
749 | br.ret.sptk b0 | |
750 | ;; | |
8da2915d UD |
751 | } |
752 | ||
0ecb606c | 753 | GLOBAL_LIBM_END(atan) |