]>
Commit | Line | Data |
---|---|---|
8da2915d UD |
1 | .file "asinf.s" |
2 | ||
a334319f | 3 | // Copyright (C) 2000, 2001, Intel Corporation |
8da2915d UD |
4 | // All rights reserved. |
5 | // | |
a334319f UD |
6 | // Contributed 2/02/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, |
7 | // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation. | |
8da2915d | 8 | // |
aeb25823 AJ |
9 | // Redistribution and use in source and binary forms, with or without |
10 | // modification, are permitted provided that the following conditions are | |
11 | // met: | |
12 | // | |
13 | // * Redistributions of source code must retain the above copyright | |
14 | // notice, this list of conditions and the following disclaimer. | |
15 | // | |
16 | // * Redistributions in binary form must reproduce the above copyright | |
17 | // notice, this list of conditions and the following disclaimer in the | |
18 | // documentation and/or other materials provided with the distribution. | |
19 | // | |
20 | // * The name of Intel Corporation may not be used to endorse or promote | |
21 | // products derived from this software without specific prior written | |
22 | // permission. | |
a334319f | 23 | // |
8da2915d UD |
24 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
25 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
26 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
27 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS | |
28 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
29 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
30 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
31 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
32 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING | |
33 | // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
34 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
35 | // | |
36 | // Intel Corporation is the author of this code, and requests that all | |
37 | // problem reports or change requests be submitted to it directly at | |
a334319f | 38 | // http://developer.intel.com/opensource. |
8da2915d UD |
39 | |
40 | // History | |
41 | //============================================================== | |
a334319f UD |
42 | // 2/02/00 Initial revision |
43 | // 6/28/00 Improved speed | |
44 | // 6/31/00 Changed register allocation because of some duplicate macros | |
8da2915d | 45 | // moved nan exit bundle up to gain a cycle. |
a334319f UD |
46 | // 8/08/00 Improved speed by avoiding SIR flush. |
47 | // 8/15/00 Bundle added after call to __libm_error_support to properly | |
8da2915d | 48 | // set [the previously overwritten] GR_Parameter_RESULT. |
a334319f | 49 | // 8/17/00 Changed predicate register macro-usage to direct predicate |
8da2915d UD |
50 | // names due to an assembler bug. |
51 | // 10/17/00 Improved speed of x=0 and x=1 paths, set D flag if x denormal. | |
52 | ||
53 | // Description | |
54 | //========================================= | |
55 | // The asinf function computes the arc sine of x in the range [-pi,+pi]. | |
56 | // A doman error occurs for arguments not in the range [-1,+1]. | |
57 | // asinf(+-0) returns +-0 | |
58 | // asinf(x) returns a Nan and raises the invalid exception for |x| >1 | |
59 | ||
60 | // The acosf function returns the arc cosine in the range [0, +pi] radians. | |
61 | // A doman error occurs for arguments not in the range [-1,+1]. | |
62 | // acosf(1) returns +0 | |
63 | // acosf(x) returns a Nan and raises the invalid exception for |x| >1 | |
64 | ||
65 | ||
66 | // |x| <= sqrt(2)/2. get Ax and Bx | |
67 | ||
68 | // poly_p1 = x p1 | |
69 | // poly_p3 = x2 p4 + p3 | |
70 | // poly_p1 = x2 (poly_p1) + x = x2(x p1) + x | |
71 | // poly_p2 = x2( poly_p3) + p2 = x2(x2 p4 + p3) + p2 | |
72 | ||
73 | // poly_Ax = x5(x2( poly_p3) + p2) + x2(x p1) + x | |
74 | // = x5(x2(x2 p4 + p3) + p2) + x2(x p1) + x | |
75 | ||
76 | // poly_p7 = x2 p8 + p7 | |
77 | // poly_p5 = x2 p6 + p5 | |
78 | ||
79 | // poly_p7 = x4 p9 + (poly_p7) | |
80 | // poly_p7 = x4 p9 + (x2 p8 + p7) | |
81 | // poly_Bx = x4 (x4 p9 + (x2 p8 + p7)) + x2 p6 + p5 | |
82 | ||
83 | // answer1 = x11(x4 (x4 p9 + (x2 p8 + p7)) + x2 p6 + p5) + x5(x2(x2 p4 + p3) + p2) + x2(x p1) + x | |
84 | // = x19 p9 + x17 p8 + x15 p7 x13 p6 + x11 p5 + x9 p4 + x7 p3 + x5 p2 + x3 p1 + x | |
85 | ||
86 | ||
87 | ||
88 | // |x| > sqrt(2)/2 | |
89 | ||
90 | // Get z = sqrt(1-x2) | |
91 | ||
92 | // Get polynomial in t = 1-x2 | |
93 | ||
94 | // t2 = t t | |
95 | // t4 = t2 t2 | |
96 | ||
97 | // poly_p4 = t p5 + p4 | |
98 | // poly_p1 = t p1 + 1 | |
99 | ||
100 | // poly_p6 = t p7 + p6 | |
101 | // poly_p2 = t p3 + p2 | |
102 | ||
103 | // poly_p8 = t p9 + p8 | |
104 | ||
105 | // poly_p4 = t2 poly_p6 + poly_p4 | |
106 | // = t2 (t p7 + p6) + (t p5 + p4) | |
107 | ||
108 | // poly_p2 = t2 poly_p2 + poly_p1 | |
109 | // = t2 (t p3 + p2) + (t p1 + 1) | |
110 | ||
111 | // poly_p4 = t4 poly_p8 + poly_p4 | |
112 | // = t4 (t p9 + p8) + (t2 (t p7 + p6) + (t p5 + p4)) | |
113 | ||
114 | // P(t) = poly_p2 + t4 poly_p8 | |
115 | // = t2 (t p3 + p2) + (t p1 + 1) + t4 (t4 (t p9 + p8) + (t2 (t p7 + p6) + (t p5 + p4))) | |
116 | // = t3 p3 + t2 p2 + t p1 + 1 + t9 p9 + t8 p8 + t7 p7 + t6 p6 + t5 p5 + t4 p4 | |
117 | ||
118 | ||
119 | // answer2 = - sign(x) z P(t) + (sign(x) pi/2) | |
120 | // | |
121 | ||
a334319f | 122 | #include "libm_support.h" |
8da2915d UD |
123 | |
124 | // Assembly macros | |
125 | //========================================= | |
126 | ||
127 | // predicate registers | |
128 | //asinf_pred_LEsqrt2by2 = p7 | |
129 | //asinf_pred_GTsqrt2by2 = p8 | |
130 | ||
131 | // integer registers | |
132 | ASINF_Addr1 = r33 | |
133 | ASINF_Addr2 = r34 | |
134 | ASINF_GR_1by2 = r35 | |
135 | ||
136 | ASINF_GR_3by2 = r36 | |
137 | ASINF_GR_5by2 = r37 | |
138 | ||
139 | GR_SAVE_B0 = r38 | |
140 | GR_SAVE_PFS = r39 | |
141 | GR_SAVE_GP = r40 | |
142 | ||
143 | GR_Parameter_X = r41 | |
144 | GR_Parameter_Y = r42 | |
145 | GR_Parameter_RESULT = r43 | |
146 | GR_Parameter_TAG = r44 | |
147 | ||
148 | // floating point registers | |
149 | ||
150 | asinf_y = f32 | |
151 | asinf_abs_x = f33 | |
152 | asinf_x2 = f34 | |
153 | asinf_sgn_x = f35 | |
154 | ||
155 | asinf_1by2 = f36 | |
156 | asinf_3by2 = f37 | |
157 | asinf_5by2 = f38 | |
158 | asinf_coeff_P3 = f39 | |
159 | asinf_coeff_P8 = f40 | |
160 | ||
161 | asinf_coeff_P1 = f41 | |
162 | asinf_coeff_P4 = f42 | |
163 | asinf_coeff_P5 = f43 | |
164 | asinf_coeff_P2 = f44 | |
165 | asinf_coeff_P7 = f45 | |
166 | ||
167 | asinf_coeff_P6 = f46 | |
168 | asinf_coeff_P9 = f47 | |
169 | asinf_x2 = f48 | |
170 | asinf_x3 = f49 | |
171 | asinf_x4 = f50 | |
172 | ||
173 | asinf_x8 = f51 | |
174 | asinf_x5 = f52 | |
175 | asinf_const_piby2 = f53 | |
176 | asinf_const_sqrt2by2 = f54 | |
177 | asinf_x11 = f55 | |
178 | ||
179 | asinf_poly_p1 = f56 | |
180 | asinf_poly_p3 = f57 | |
181 | asinf_sinf1 = f58 | |
182 | asinf_poly_p2 = f59 | |
183 | asinf_poly_Ax = f60 | |
184 | ||
185 | asinf_poly_p7 = f61 | |
186 | asinf_poly_p5 = f62 | |
187 | asinf_sgnx_t4 = f63 | |
188 | asinf_poly_Bx = f64 | |
189 | asinf_t = f65 | |
190 | ||
191 | asinf_yby2 = f66 | |
192 | asinf_B = f67 | |
193 | asinf_B2 = f68 | |
194 | asinf_Az = f69 | |
195 | asinf_dz = f70 | |
196 | ||
197 | asinf_Sz = f71 | |
198 | asinf_d2z = f72 | |
199 | asinf_Fz = f73 | |
200 | asinf_z = f74 | |
201 | asinf_sgnx_z = f75 | |
202 | ||
203 | asinf_t2 = f76 | |
204 | asinf_2poly_p4 = f77 | |
205 | asinf_2poly_p6 = f78 | |
206 | asinf_2poly_p1 = f79 | |
207 | asinf_2poly_p2 = f80 | |
208 | ||
209 | asinf_2poly_p8 = f81 | |
210 | asinf_t4 = f82 | |
211 | asinf_Pt = f83 | |
212 | asinf_sgnx_2poly_p2 = f84 | |
213 | asinf_sgn_x_piby2 = f85 | |
214 | ||
215 | asinf_poly_p7a = f86 | |
216 | asinf_2poly_p4a = f87 | |
217 | asinf_2poly_p4b = f88 | |
218 | asinf_2poly_p2a = f89 | |
219 | asinf_poly_p1a = f90 | |
220 | ||
221 | ||
222 | ||
223 | ||
224 | ||
225 | // Data tables | |
226 | //============================================================== | |
227 | ||
a334319f UD |
228 | #ifdef _LIBC |
229 | .rodata | |
230 | #else | |
231 | .data | |
232 | #endif | |
8da2915d UD |
233 | |
234 | .align 16 | |
235 | ||
a334319f UD |
236 | asinf_coeff_1_table: |
237 | ASM_TYPE_DIRECTIVE(asinf_coeff_1_table,@object) | |
8da2915d UD |
238 | data8 0x3FC5555607DCF816 // P1 |
239 | data8 0x3F9CF81AD9BAB2C6 // P4 | |
240 | data8 0x3FC59E0975074DF3 // P7 | |
241 | data8 0xBFA6F4CC2780AA1D // P6 | |
242 | data8 0x3FC2DD45292E93CB // P9 | |
243 | data8 0x3fe6a09e667f3bcd // sqrt(2)/2 | |
a334319f | 244 | ASM_SIZE_DIRECTIVE(asinf_coeff_1_table) |
8da2915d | 245 | |
a334319f UD |
246 | asinf_coeff_2_table: |
247 | ASM_TYPE_DIRECTIVE(asinf_coeff_2_table,@object) | |
8da2915d UD |
248 | data8 0x3FA6F108E31EFBA6 // P3 |
249 | data8 0xBFCA31BF175D82A0 // P8 | |
250 | data8 0x3FA30C0337F6418B // P5 | |
251 | data8 0x3FB332C9266CB1F9 // P2 | |
252 | data8 0x3ff921fb54442d18 // pi_by_2 | |
a334319f | 253 | ASM_SIZE_DIRECTIVE(asinf_coeff_2_table) |
8da2915d UD |
254 | |
255 | ||
a334319f UD |
256 | .align 32 |
257 | .global asinf | |
258 | ||
8da2915d | 259 | .section .text |
a334319f UD |
260 | .proc asinf |
261 | .align 32 | |
262 | ||
263 | asinf: | |
8da2915d UD |
264 | |
265 | // Load the addresses of the two tables. | |
266 | // Then, load the coefficients and other constants. | |
267 | ||
268 | { .mfi | |
269 | alloc r32 = ar.pfs,1,8,4,0 | |
270 | fnma.s1 asinf_t = f8,f8,f1 | |
271 | dep.z ASINF_GR_1by2 = 0x3f,24,8 // 0x3f000000 | |
272 | } | |
273 | { .mfi | |
274 | addl ASINF_Addr1 = @ltoff(asinf_coeff_1_table),gp | |
275 | fma.s1 asinf_x2 = f8,f8,f0 | |
276 | addl ASINF_Addr2 = @ltoff(asinf_coeff_2_table),gp ;; | |
277 | } | |
278 | ||
279 | ||
280 | { .mfi | |
281 | ld8 ASINF_Addr1 = [ASINF_Addr1] | |
282 | fmerge.s asinf_abs_x = f1,f8 | |
283 | dep ASINF_GR_3by2 = -1,r0,22,8 // 0x3fc00000 | |
284 | } | |
285 | { .mlx | |
286 | nop.m 999 | |
287 | movl ASINF_GR_5by2 = 0x40200000;; | |
288 | } | |
289 | ||
290 | ||
291 | ||
292 | { .mfi | |
293 | setf.s asinf_1by2 = ASINF_GR_1by2 | |
294 | fmerge.s asinf_sgn_x = f8,f1 | |
295 | nop.i 999 | |
296 | } | |
297 | { .mfi | |
298 | ld8 ASINF_Addr2 = [ASINF_Addr2] | |
299 | nop.f 0 | |
300 | nop.i 999;; | |
301 | } | |
302 | ||
303 | ||
304 | { .mfi | |
305 | setf.s asinf_5by2 = ASINF_GR_5by2 | |
306 | fcmp.lt.s1 p11,p12 = f8,f0 | |
307 | nop.i 999;; | |
308 | } | |
309 | ||
310 | { .mmf | |
311 | ldfpd asinf_coeff_P1,asinf_coeff_P4 = [ASINF_Addr1],16 | |
312 | setf.s asinf_3by2 = ASINF_GR_3by2 | |
313 | fclass.m.unc p8,p0 = f8, 0xc3 ;; //@qnan | @snan | |
314 | } | |
315 | ||
316 | ||
317 | { .mfi | |
318 | ldfpd asinf_coeff_P7,asinf_coeff_P6 = [ASINF_Addr1],16 | |
319 | fma.s1 asinf_t2 = asinf_t,asinf_t,f0 | |
320 | nop.i 999 | |
321 | } | |
322 | { .mfi | |
323 | ldfpd asinf_coeff_P3,asinf_coeff_P8 = [ASINF_Addr2],16 | |
324 | fma.s1 asinf_x4 = asinf_x2,asinf_x2,f0 | |
325 | nop.i 999;; | |
326 | } | |
327 | ||
328 | ||
329 | { .mfi | |
330 | ldfpd asinf_coeff_P9,asinf_const_sqrt2by2 = [ASINF_Addr1] | |
331 | fclass.m.unc p10,p0 = f8, 0x07 //@zero | |
332 | nop.i 999 | |
333 | } | |
334 | { .mfi | |
335 | ldfpd asinf_coeff_P5,asinf_coeff_P2 = [ASINF_Addr2],16 | |
336 | fma.s1 asinf_x3 = f8,asinf_x2,f0 | |
337 | nop.i 999;; | |
338 | } | |
339 | ||
340 | ||
341 | { .mfi | |
342 | ldfd asinf_const_piby2 = [ASINF_Addr2] | |
343 | frsqrta.s1 asinf_B,p0 = asinf_t | |
344 | nop.i 999 | |
345 | } | |
346 | { .mfb | |
347 | nop.m 999 | |
a334319f | 348 | (p8) fma.s f8 = f8,f1,f0 |
8da2915d UD |
349 | (p8) br.ret.spnt b0 ;; // Exit if x=nan |
350 | } | |
351 | ||
352 | ||
353 | { .mfb | |
354 | nop.m 999 | |
355 | fcmp.eq.s1 p6,p0 = asinf_abs_x,f1 | |
356 | (p10) br.ret.spnt b0 ;; // Exit if x=0 | |
357 | } | |
358 | ||
359 | { .mfi | |
360 | nop.m 999 | |
361 | fcmp.gt.s1 p9,p0 = asinf_abs_x,f1 | |
362 | nop.i 999;; | |
363 | } | |
364 | ||
365 | { .mfi | |
366 | nop.m 999 | |
367 | fma.s1 asinf_x8 = asinf_x4,asinf_x4,f0 | |
368 | nop.i 999 | |
369 | } | |
370 | { .mfb | |
371 | nop.m 999 | |
372 | fma.s1 asinf_t4 = asinf_t2,asinf_t2,f0 | |
a334319f | 373 | (p6) br.cond.spnt L(ASINF_ABS_ONE) ;; // Branch if |x|=1 |
8da2915d UD |
374 | } |
375 | ||
376 | { .mfi | |
377 | nop.m 999 | |
378 | fma.s1 asinf_x5 = asinf_x2,asinf_x3,f0 | |
379 | nop.i 999 | |
380 | } | |
381 | { .mfb | |
382 | (p9) mov GR_Parameter_TAG = 62 | |
383 | fma.s1 asinf_yby2 = asinf_t,asinf_1by2,f0 | |
384 | (p9) br.cond.spnt __libm_error_region ;; // Branch if |x|>1 | |
385 | } | |
386 | ||
387 | ||
388 | { .mfi | |
389 | nop.m 999 | |
390 | fma.s1 asinf_Az = asinf_t,asinf_B,f0 | |
391 | nop.i 999 | |
392 | } | |
393 | { .mfi | |
394 | nop.m 999 | |
395 | fma.s1 asinf_B2 = asinf_B,asinf_B,f0 | |
396 | nop.i 999;; | |
397 | } | |
398 | ||
399 | { .mfi | |
400 | nop.m 999 | |
401 | fma.s1 asinf_poly_p1 = f8,asinf_coeff_P1,f0 | |
402 | nop.i 999 | |
403 | } | |
404 | { .mfi | |
405 | nop.m 999 | |
406 | fma.s1 asinf_2poly_p1 = asinf_coeff_P1,asinf_t,f1 | |
407 | nop.i 999;; | |
408 | } | |
409 | ||
410 | { .mfi | |
411 | nop.m 999 | |
412 | fma.s1 asinf_poly_p3 = asinf_coeff_P4,asinf_x2,asinf_coeff_P3 | |
413 | nop.i 999 | |
414 | } | |
415 | { .mfi | |
416 | nop.m 999 | |
417 | fma.s1 asinf_2poly_p6 = asinf_coeff_P7,asinf_t,asinf_coeff_P6 | |
418 | nop.i 999;; | |
419 | } | |
420 | ||
421 | { .mfi | |
422 | nop.m 999 | |
423 | fma.s1 asinf_poly_p7 = asinf_x2,asinf_coeff_P8,asinf_coeff_P7 | |
424 | nop.i 999 | |
425 | } | |
426 | { .mfi | |
427 | nop.m 999 | |
428 | fma.s1 asinf_2poly_p2 = asinf_coeff_P3,asinf_t,asinf_coeff_P2 | |
429 | nop.i 999;; | |
430 | } | |
431 | ||
432 | ||
433 | { .mfi | |
434 | nop.m 999 | |
435 | fma.s1 asinf_poly_p5 = asinf_x2,asinf_coeff_P6,asinf_coeff_P5 | |
436 | nop.i 999 | |
437 | } | |
438 | { .mfi | |
439 | nop.m 999 | |
440 | fma.s1 asinf_2poly_p4 = asinf_coeff_P5,asinf_t,asinf_coeff_P4 | |
441 | nop.i 999;; | |
442 | } | |
443 | ||
444 | ||
445 | { .mfi | |
446 | nop.m 999 | |
447 | fma.d.s1 asinf_x11 = asinf_x8,asinf_x3,f0 | |
448 | nop.i 999 | |
449 | } | |
450 | { .mfi | |
451 | nop.m 999 | |
452 | fnma.s1 asinf_dz = asinf_B2,asinf_yby2,asinf_1by2 | |
453 | nop.i 999;; | |
454 | } | |
455 | ||
456 | ||
457 | { .mfi | |
458 | nop.m 999 | |
459 | fma.s1 asinf_poly_p1a = asinf_x2,asinf_poly_p1,f8 | |
460 | nop.i 999 | |
461 | } | |
462 | { .mfi | |
463 | nop.m 999 | |
464 | fma.s1 asinf_2poly_p8 = asinf_coeff_P9,asinf_t,asinf_coeff_P8 | |
465 | nop.i 999;; | |
466 | } | |
467 | ||
468 | ||
469 | // Get the absolute value of x and determine the region in which x lies | |
470 | ||
471 | { .mfi | |
472 | nop.m 999 | |
473 | fcmp.le.s1 p7,p8 = asinf_abs_x,asinf_const_sqrt2by2 | |
474 | nop.i 999 | |
475 | } | |
476 | { .mfi | |
477 | nop.m 999 | |
478 | fma.s1 asinf_poly_p2 = asinf_x2,asinf_poly_p3,asinf_coeff_P2 | |
479 | nop.i 999;; | |
480 | } | |
481 | ||
482 | ||
483 | { .mfi | |
484 | nop.m 999 | |
485 | fma.s1 asinf_poly_p7a = asinf_x4,asinf_coeff_P9,asinf_poly_p7 | |
486 | nop.i 999 | |
487 | } | |
488 | { .mfi | |
489 | nop.m 999 | |
490 | fma.s1 asinf_2poly_p2a = asinf_2poly_p2,asinf_t2,asinf_2poly_p1 | |
491 | nop.i 999;; | |
492 | } | |
493 | ||
494 | ||
495 | { .mfi | |
496 | nop.m 999 | |
497 | (p8) fma.s1 asinf_sgnx_t4 = asinf_sgn_x,asinf_t4,f0 | |
498 | nop.i 999 | |
499 | } | |
500 | { .mfi | |
501 | nop.m 999 | |
502 | (p8) fma.s1 asinf_2poly_p4a = asinf_2poly_p6,asinf_t2,asinf_2poly_p4 | |
503 | nop.i 999;; | |
504 | } | |
505 | ||
506 | ||
507 | { .mfi | |
508 | nop.m 999 | |
509 | (p8) fma.s1 asinf_Sz = asinf_5by2,asinf_dz,asinf_3by2 | |
510 | nop.i 999 | |
511 | } | |
512 | { .mfi | |
513 | nop.m 999 | |
514 | (p8) fma.s1 asinf_d2z = asinf_dz,asinf_dz,f0 | |
515 | nop.i 999;; | |
516 | } | |
517 | ||
518 | ||
519 | { .mfi | |
520 | nop.m 999 | |
521 | (p8) fma.s1 asinf_sgn_x_piby2 = asinf_sgn_x,asinf_const_piby2,f0 | |
522 | nop.i 999 | |
523 | } | |
524 | { .mfi | |
525 | nop.m 999 | |
526 | (p7) fma.d.s1 asinf_poly_Ax = asinf_x5,asinf_poly_p2,asinf_poly_p1a | |
527 | nop.i 999;; | |
528 | } | |
529 | ||
530 | { .mfi | |
531 | nop.m 999 | |
532 | (p7) fma.d.s1 asinf_poly_Bx = asinf_x4,asinf_poly_p7a,asinf_poly_p5 | |
533 | nop.i 999 | |
534 | } | |
535 | { .mfi | |
536 | nop.m 999 | |
537 | (p8) fma.s1 asinf_sgnx_2poly_p2 = asinf_sgn_x,asinf_2poly_p2a,f0 | |
538 | nop.i 999;; | |
539 | } | |
540 | ||
541 | { .mfi | |
542 | nop.m 999 | |
543 | fcmp.eq.s0 p6,p0 = f8,f0 // Only purpose is to set D if x denormal | |
544 | nop.i 999 | |
545 | } | |
546 | { .mfi | |
547 | nop.m 999 | |
548 | (p8) fma.s1 asinf_2poly_p4b = asinf_2poly_p8,asinf_t4,asinf_2poly_p4a | |
549 | nop.i 999;; | |
550 | } | |
551 | ||
552 | ||
553 | { .mfi | |
554 | nop.m 999 | |
555 | (p8) fma.s1 asinf_Fz = asinf_d2z,asinf_Sz,asinf_dz | |
556 | nop.i 999;; | |
557 | } | |
558 | ||
559 | ||
560 | { .mfi | |
561 | nop.m 999 | |
562 | (p8) fma.d.s1 asinf_Pt = asinf_2poly_p4b,asinf_sgnx_t4,asinf_sgnx_2poly_p2 | |
563 | nop.i 999;; | |
564 | } | |
565 | ||
566 | { .mfi | |
567 | nop.m 999 | |
568 | (p8) fma.d.s1 asinf_z = asinf_Az,asinf_Fz,asinf_Az | |
569 | nop.i 999;; | |
570 | } | |
571 | ||
572 | .pred.rel "mutex",p8,p7 //asinf_pred_GTsqrt2by2,asinf_pred_LEsqrt2by2 | |
573 | { .mfi | |
574 | nop.m 999 | |
a334319f | 575 | (p8) fnma.s f8 = asinf_z,asinf_Pt,asinf_sgn_x_piby2 |
8da2915d UD |
576 | nop.i 999 |
577 | } | |
578 | ||
579 | { .mfb | |
580 | nop.m 999 | |
a334319f | 581 | (p7) fma.s f8 = asinf_x11,asinf_poly_Bx,asinf_poly_Ax |
8da2915d UD |
582 | br.ret.sptk b0 ;; |
583 | } | |
584 | ||
a334319f | 585 | L(ASINF_ABS_ONE): |
8da2915d UD |
586 | // Here for short exit if |x|=1 |
587 | { .mfb | |
588 | nop.m 999 | |
a334319f | 589 | fma.s f8 = asinf_sgn_x,asinf_const_piby2,f0 |
8da2915d UD |
590 | br.ret.sptk b0 |
591 | } | |
592 | ;; | |
593 | ||
a334319f UD |
594 | .endp asinf |
595 | ASM_SIZE_DIRECTIVE(asinf) | |
8da2915d UD |
596 | |
597 | // Stack operations when calling error support. | |
598 | // (1) (2) | |
599 | // sp -> + psp -> + | |
600 | // | | | |
601 | // | | <- GR_Y | |
602 | // | | | |
603 | // | <-GR_Y Y2->| | |
604 | // | | | |
605 | // | | <- GR_X | |
606 | // | | | |
607 | // sp-64 -> + sp -> + | |
608 | // save ar.pfs save b0 | |
609 | // save gp | |
610 | ||
611 | ||
612 | // Stack operations when calling error support. | |
613 | // (3) (call) (4) | |
614 | // psp -> + sp -> + | |
615 | // | | | |
616 | // R3 ->| <- GR_RESULT | -> f8 | |
617 | // | | | |
618 | // Y2 ->| <- GR_Y | | |
619 | // | | | |
620 | // X1 ->| | | |
621 | // | | | |
622 | // sp -> + + | |
623 | // restore gp | |
624 | // restore ar.pfs | |
625 | ||
a334319f UD |
626 | .proc __libm_error_region |
627 | __libm_error_region: | |
8da2915d UD |
628 | .prologue |
629 | { .mfi | |
630 | add GR_Parameter_Y=-32,sp // Parameter 2 value | |
631 | nop.f 999 | |
632 | .save ar.pfs,GR_SAVE_PFS | |
633 | mov GR_SAVE_PFS=ar.pfs // Save ar.pfs | |
634 | } | |
635 | { .mfi | |
636 | .fframe 64 | |
637 | add sp=-64,sp // Create new stack | |
638 | nop.f 0 | |
639 | mov GR_SAVE_GP=gp // Save gp | |
640 | };; | |
641 | { .mmi | |
642 | stfs [GR_Parameter_Y] = f1,16 // Store Parameter 2 on stack | |
643 | add GR_Parameter_X = 16,sp // Parameter 1 address | |
644 | .save b0, GR_SAVE_B0 | |
645 | mov GR_SAVE_B0=b0 // Save b0 | |
646 | };; | |
647 | ||
648 | .body | |
649 | { .mfi | |
650 | nop.m 0 | |
651 | frcpa.s0 f9,p0 = f0,f0 | |
652 | nop.i 0 | |
653 | };; | |
654 | ||
655 | { .mib | |
656 | stfs [GR_Parameter_X] = f8 // Store Parameter 1 on stack | |
657 | add GR_Parameter_RESULT = 0,GR_Parameter_Y | |
658 | nop.b 0 // Parameter 3 address | |
659 | } | |
660 | { .mib | |
661 | stfs [GR_Parameter_Y] = f9 // Store Parameter 3 on stack | |
662 | add GR_Parameter_Y = -16,GR_Parameter_Y | |
663 | br.call.sptk b0=__libm_error_support# // Call error handling function | |
664 | };; | |
665 | { .mmi | |
666 | nop.m 0 | |
667 | nop.m 0 | |
668 | add GR_Parameter_RESULT = 48,sp | |
669 | };; | |
670 | ||
671 | { .mmi | |
672 | ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack | |
673 | .restore sp | |
674 | add sp = 64,sp // Restore stack pointer | |
675 | mov b0 = GR_SAVE_B0 // Restore return address | |
676 | };; | |
677 | { .mib | |
678 | mov gp = GR_SAVE_GP // Restore gp | |
679 | mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs | |
680 | br.ret.sptk b0 // Return | |
681 | };; | |
682 | ||
a334319f UD |
683 | .endp __libm_error_region |
684 | ASM_SIZE_DIRECTIVE(__libm_error_region) | |
8da2915d UD |
685 | |
686 | .type __libm_error_support#,@function | |
687 | .global __libm_error_support# |