]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/ia64/fpu/e_asin.S
(CFLAGS-tst-align.c): Add -mpreferred-stack-boundary=4.
[thirdparty/glibc.git] / sysdeps / ia64 / fpu / e_asin.S
1 .file "asin.s"
2
3 // Copyright (C) 2000, 2001, Intel Corporation
4 // All rights reserved.
5 //
6 // Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
7 // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
11 // met:
12 //
13 // * Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
15 //
16 // * Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
19 //
20 // * The name of Intel Corporation may not be used to endorse or promote
21 // products derived from this software without specific prior written
22 // permission.
23 //
24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
28 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
32 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
33 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 //
36 // Intel Corporation is the author of this code, and requests that all
37 // problem reports or change requests be submitted to it directly at
38 // http://developer.intel.com/opensource.
39
40 // History
41 //==============================================================
42 // 2/02/00 Initial version
43 // 8/17/00 New and much faster algorithm.
44 // 8/31/00 Avoided bank conflicts on loads, shortened |x|=1 path,
45 // fixed mfb split issue stalls.
46 // 12/19/00 Fixed small arg cases to force inexact, or inexact and underflow.
47
48 // Description
49 //=========================================
50 // The asin function computes the principle value of the arc sine of x.
51 // asin(0) returns 0, asin(1) returns pi/2, asin(-1) returns -pi/2.
52 // A doman error occurs for arguments not in the range [-1,+1].
53
54 // The asin function returns the arc sine in the range [-pi/2, +pi/2] radians.
55
56 #include "libm_support.h"
57
58 //
59 // Assembly macros
60 //=========================================
61
62
63 // predicate registers
64 //asin_pred_LEsqrt2by2 = p7
65 //asin_pred_GTsqrt2by2 = p8
66
67 // integer registers
68 ASIN_Addr1 = r33
69 ASIN_Addr2 = r34
70 ASIN_FFFE = r35
71 ASIN_lnorm_sig = r36
72 ASIN_snorm_exp = r37
73
74 GR_SAVE_B0 = r36
75 GR_SAVE_PFS = r37
76 GR_SAVE_GP = r38
77
78 GR_Parameter_X = r39
79 GR_Parameter_Y = r40
80 GR_Parameter_RESULT = r41
81 GR_Parameter_Tag = r42
82
83 // floating point registers
84 asin_coeff_P1 = f32
85 asin_coeff_P2 = f33
86 asin_coeff_P3 = f34
87 asin_coeff_P4 = f35
88
89 asin_coeff_P5 = f36
90 asin_coeff_P6 = f37
91 asin_coeff_P7 = f38
92 asin_coeff_P8 = f39
93 asin_coeff_P9 = f40
94
95 asin_coeff_P10 = f41
96 asin_coeff_P11 = f42
97 asin_coeff_P12 = f43
98 asin_coeff_P13 = f44
99 asin_coeff_P14 = f45
100
101 asin_coeff_P15 = f46
102 asin_coeff_P16 = f47
103 asin_coeff_P17 = f48
104 asin_coeff_P18 = f49
105 asin_coeff_P19 = f50
106
107 asin_coeff_P20 = f51
108 asin_coeff_P21 = f52
109 asin_const_sqrt2by2 = f53
110 asin_const_piby2 = f54
111 asin_abs_x = f55
112
113 asin_tx = f56
114 asin_tx2 = f57
115 asin_tx3 = f58
116 asin_tx4 = f59
117 asin_tx8 = f60
118
119 asin_tx11 = f61
120 asin_1poly_p8 = f62
121 asin_1poly_p19 = f63
122 asin_1poly_p4 = f64
123 asin_1poly_p15 = f65
124
125 asin_1poly_p6 = f66
126 asin_1poly_p17 = f67
127 asin_1poly_p0 = f68
128 asin_1poly_p11 = f69
129 asin_1poly_p2 = f70
130
131 asin_1poly_p13 = f71
132 asin_series_tx = f72
133 asin_t = f73
134 asin_t2 = f74
135 asin_t3 = f75
136
137 asin_t4 = f76
138 asin_t8 = f77
139 asin_t11 = f78
140 asin_poly_p8 = f79
141 asin_poly_p19 = f80
142
143 asin_poly_p4 = f81
144 asin_poly_p15 = f82
145 asin_poly_p6 = f83
146 asin_poly_p17 = f84
147 asin_poly_p0 = f85
148
149 asin_poly_p11 = f86
150 asin_poly_p2 = f87
151 asin_poly_p13 = f88
152 asin_series_t = f89
153 asin_1by2 = f90
154
155 asin_3by2 = f91
156 asin_5by2 = f92
157 asin_11by4 = f93
158 asin_35by8 = f94
159 asin_63by8 = f95
160
161 asin_231by16 = f96
162 asin_y0 = f97
163 asin_H0 = f98
164 asin_S0 = f99
165 asin_d = f100
166
167 asin_l1 = f101
168 asin_d2 = f102
169 asin_T0 = f103
170 asin_d1 = f104
171 asin_e0 = f105
172
173 asin_l2 = f106
174 asin_d3 = f107
175 asin_T3 = f108
176 asin_S1 = f109
177 asin_e1 = f110
178
179 asin_z = f111
180 answer2 = f112
181 asin_sgn_x = f113
182 asin_429by16 = f114
183 asin_18by4 = f115
184
185 asin_3by4 = f116
186 asin_l3 = f117
187 asin_T6 = f118
188 asin_eps_exp = f119
189 asin_eps_sig = f120
190 asin_eps = f120
191
192 // Data tables
193 //==============================================================
194
195 #ifdef _LIBC
196 .rodata
197 #else
198 .data
199 #endif
200
201 .align 16
202
203 asin_coeff_1_table:
204 ASM_TYPE_DIRECTIVE(asin_coeff_1_table,@object)
205 data8 0xE4E7E0A423A21249 , 0x00003FF8 //P7
206 data8 0xC2F7EE0200FCE2A5 , 0x0000C003 //P18
207 data8 0xB745D7F6C65C20E0 , 0x00003FF9 //P5
208 data8 0xF75E381A323D4D94 , 0x0000C002 //P16
209 data8 0x8959C2629C1024C0 , 0x0000C002 //P20
210 data8 0xAFF68E7D241292C5 , 0x00003FF8 //P9
211 data8 0xB6DB6DB7260AC30D , 0x00003FFA //P3
212 data8 0xD0417CE2B41CB7BF , 0x0000C000 //P14
213 data8 0x81D570FEA724E3E4 , 0x0000BFFD //P12
214 data8 0xAAAAAAAAAAAAC277 , 0x00003FFC //P1
215 data8 0xF534912FF3E7B76F , 0x00003FFF //P21
216 data8 0xc90fdaa22168c235 , 0x00003fff // pi/2
217 data8 0x0000000000000000 , 0x00000000 // pad to avoid data bank conflict
218 ASM_SIZE_DIRECTIVE(asin_coeff_1_table)
219
220
221 asin_coeff_2_table:
222 ASM_TYPE_DIRECTIVE(asin_coeff_2_table,@object)
223 data8 0x8E26AF5F29B39A2A , 0x00003FF9 //P6
224 data8 0xB4F118A4B1015470 , 0x00004003 //P17
225 data8 0xF8E38E10C25990E0 , 0x00003FF9 //P4
226 data8 0x80F50489AEF1CAC6 , 0x00004002 //P15
227 data8 0x92728015172CFE1C , 0x00004003 //P19
228 data8 0xBBC3D831D4595971 , 0x00003FF8 //P8
229 data8 0x999999999952A5C3 , 0x00003FFB //P2
230 data8 0x855576BE6F0975EC , 0x00003FFF //P13
231 data8 0xF12420E778077D89 , 0x00003FFA //P11
232 data8 0xB6590FF4D23DE003 , 0x00003FF3 //P10
233 data8 0xb504f333f9de6484 , 0x00003ffe // sqrt(2)/2
234 ASM_SIZE_DIRECTIVE(asin_coeff_2_table)
235
236
237
238 .align 32
239 .global asin
240
241 .section .text
242 .proc asin
243 .align 32
244
245
246 asin:
247
248 { .mfi
249 alloc r32 = ar.pfs,1,6,4,0
250 fma.s1 asin_tx = f8,f8,f0
251 addl ASIN_Addr2 = @ltoff(asin_coeff_2_table),gp
252 }
253 { .mfi
254 mov ASIN_FFFE = 0xFFFE
255 fnma.s1 asin_t = f8,f8,f1
256 addl ASIN_Addr1 = @ltoff(asin_coeff_1_table),gp
257 }
258 ;;
259
260
261 { .mfi
262 setf.exp asin_1by2 = ASIN_FFFE
263 fmerge.s asin_abs_x = f1,f8
264 nop.i 999 ;;
265 }
266
267 { .mmf
268 ld8 ASIN_Addr1 = [ASIN_Addr1]
269 ld8 ASIN_Addr2 = [ASIN_Addr2]
270 fmerge.s asin_sgn_x = f8,f1 ;;
271 }
272
273
274 { .mfi
275 ldfe asin_coeff_P7 = [ASIN_Addr1],16
276 fma.s1 asin_tx2 = asin_tx,asin_tx,f0
277 nop.i 999
278 }
279 { .mfi
280 ldfe asin_coeff_P6 = [ASIN_Addr2],16
281 fma.s1 asin_t2 = asin_t,asin_t,f0
282 nop.i 999;;
283 }
284
285
286 { .mmf
287 ldfe asin_coeff_P18 = [ASIN_Addr1],16
288 ldfe asin_coeff_P17 = [ASIN_Addr2],16
289 fclass.m.unc p8,p0 = f8, 0xc3 //@qnan |@snan
290 }
291 ;;
292
293 { .mmf
294 ldfe asin_coeff_P5 = [ASIN_Addr1],16
295 ldfe asin_coeff_P4 = [ASIN_Addr2],16
296 frsqrta.s1 asin_y0,p0 = asin_t
297 }
298 ;;
299
300 { .mfi
301 ldfe asin_coeff_P16 = [ASIN_Addr1],16
302 fcmp.gt.s1 p9,p0 = asin_abs_x,f1
303 nop.i 999
304 }
305 { .mfb
306 ldfe asin_coeff_P15 = [ASIN_Addr2],16
307 (p8) fma.d f8 = f8,f1,f0
308 (p8) br.ret.spnt b0
309 }
310 ;;
311
312
313 { .mmf
314 ldfe asin_coeff_P20 = [ASIN_Addr1],16
315 ldfe asin_coeff_P19 = [ASIN_Addr2],16
316 fclass.m.unc p8,p0 = f8, 0x07 //@zero
317 }
318 ;;
319
320
321 { .mfi
322 ldfe asin_coeff_P9 = [ASIN_Addr1],16
323 fma.s1 asin_t4 = asin_t2,asin_t2,f0
324 (p9) mov GR_Parameter_Tag = 61
325 }
326 { .mfi
327 ldfe asin_coeff_P8 = [ASIN_Addr2],16
328 fma.s1 asin_3by2 = asin_1by2,f1,f1
329 nop.i 999;;
330 }
331
332
333 { .mfi
334 ldfe asin_coeff_P2 = [ASIN_Addr2],16
335 fma.s1 asin_tx4 = asin_tx2,asin_tx2,f0
336 nop.i 999
337 }
338 { .mfb
339 ldfe asin_coeff_P3 = [ASIN_Addr1],16
340 fma.s1 asin_t3 = asin_t,asin_t2,f0
341 (p8) br.ret.spnt b0
342 }
343 ;;
344
345
346 { .mfi
347 ldfe asin_coeff_P13 = [ASIN_Addr2],16
348 fma.s1 asin_H0 = asin_y0,asin_1by2,f0
349 nop.i 999
350 }
351 { .mfb
352 ldfe asin_coeff_P14 = [ASIN_Addr1],16
353 fma.s1 asin_S0 = asin_y0,asin_t,f0
354 (p9) br.cond.spnt __libm_error_region
355 }
356 ;;
357
358
359 { .mfi
360 ldfe asin_coeff_P11 = [ASIN_Addr2],16
361 fcmp.eq.s1 p6,p0 = asin_abs_x,f1
362 nop.i 999
363 }
364 { .mfi
365 ldfe asin_coeff_P12 = [ASIN_Addr1],16
366 fma.s1 asin_tx3 = asin_tx,asin_tx2,f0
367 nop.i 999;;
368 }
369
370
371 { .mfi
372 ldfe asin_coeff_P10 = [ASIN_Addr2],16
373 fma.s1 asin_1poly_p6 = asin_tx,asin_coeff_P7,asin_coeff_P6
374 nop.i 999
375 }
376 { .mfi
377 ldfe asin_coeff_P1 = [ASIN_Addr1],16
378 fma.s1 asin_poly_p6 = asin_t,asin_coeff_P7,asin_coeff_P6
379 nop.i 999;;
380 }
381
382
383 { .mfi
384 ldfe asin_const_sqrt2by2 = [ASIN_Addr2],16
385 fma.s1 asin_5by2 = asin_3by2,f1,f1
386 nop.i 999
387 }
388 { .mfi
389 ldfe asin_coeff_P21 = [ASIN_Addr1],16
390 fma.s1 asin_11by4 = asin_3by2,asin_3by2,asin_1by2
391 nop.i 999;;
392 }
393
394
395 { .mfi
396 ldfe asin_const_piby2 = [ASIN_Addr1],16
397 fma.s1 asin_poly_p17 = asin_t,asin_coeff_P18,asin_coeff_P17
398 nop.i 999
399 }
400 { .mfb
401 nop.m 999
402 fma.s1 asin_3by4 = asin_3by2,asin_1by2,f0
403 (p6) br.cond.spnt L(ASIN_ABS_1) // Branch to short exit if |x|=1
404 }
405 ;;
406
407
408 { .mfi
409 addl ASIN_lnorm_sig = -0x1,r0 // Form significand 0xffffffffffffffff
410 fma.s1 asin_poly_p15 = asin_t,asin_coeff_P16,asin_coeff_P15
411 nop.i 999
412 }
413 { .mfi
414 addl ASIN_snorm_exp = 0x0c001,r0 // Form small exponent
415 fnma.s1 asin_d = asin_S0,asin_H0,asin_1by2
416 nop.i 999;;
417 }
418
419
420 // Form the exponent and significand of a small number
421 { .mfi
422 setf.sig asin_eps_sig = ASIN_lnorm_sig
423 fma.s1 asin_poly_p19 = asin_t,asin_coeff_P20,asin_coeff_P19
424 nop.i 999
425 }
426 { .mfi
427 setf.exp asin_eps_exp = ASIN_snorm_exp
428 fma.s1 asin_poly_p4 = asin_t,asin_coeff_P5,asin_coeff_P4
429 nop.i 999;;
430 }
431
432
433 { .mfi
434 nop.m 999
435 fma.s1 asin_1poly_p17 = asin_tx,asin_coeff_P18,asin_coeff_P17
436 nop.i 999
437 }
438 { .mfi
439 nop.m 999
440 fma.s1 asin_poly_p8 = asin_t,asin_coeff_P9,asin_coeff_P8
441 nop.i 999;;
442 }
443
444
445 { .mfi
446 nop.m 999
447 fms.s1 asin_35by8 = asin_5by2,asin_11by4,asin_5by2
448 nop.i 999
449 }
450 { .mfi
451 nop.m 999
452 fma.s1 asin_63by8 = asin_5by2,asin_11by4,f1
453 nop.i 999;;
454 }
455
456
457 { .mfi
458 nop.m 999
459 fma.s1 asin_poly_p13 = asin_t,asin_coeff_P14,asin_coeff_P13
460 nop.i 999
461 }
462 { .mfi
463 nop.m 999
464 fma.s1 asin_18by4 = asin_3by2,asin_5by2,asin_3by4
465 nop.i 999;;
466 }
467
468
469 { .mfi
470 nop.m 999
471 fma.s1 asin_l1 = asin_5by2,asin_d,asin_3by2
472 nop.i 999
473 }
474 { .mfi
475 nop.m 999
476 fma.s1 asin_d2 = asin_d,asin_d,f0
477 nop.i 999;;
478 }
479
480
481 { .mfi
482 nop.m 999
483 fma.s1 asin_poly_p15 = asin_t2,asin_poly_p17,asin_poly_p15
484 nop.i 999
485 }
486 { .mfi
487 nop.m 999
488 fma.s1 asin_T0 = asin_d,asin_S0,f0
489 nop.i 999;;
490 }
491
492
493 { .mfi
494 nop.m 999
495 fma.s1 asin_poly_p19 = asin_t2,asin_coeff_P21,asin_poly_p19
496 nop.i 999
497 }
498 { .mfi
499 nop.m 999
500 fma.s1 asin_poly_p4 = asin_t2,asin_poly_p6,asin_poly_p4
501 nop.i 999;;
502 }
503
504
505 { .mfi
506 nop.m 999
507 fma.s1 asin_d1 = asin_35by8,asin_d,f0
508 nop.i 999
509 }
510 { .mfi
511 nop.m 999
512 fma.s1 asin_231by16 = asin_3by2,asin_35by8,asin_63by8
513 nop.i 999;;
514 }
515
516
517 { .mfi
518 nop.m 999
519 fma.s1 asin_poly_p2 = asin_t,asin_coeff_P3,asin_coeff_P2
520 nop.i 999
521 }
522 { .mfi
523 nop.m 999
524 fma.s1 asin_poly_p8 = asin_t2,asin_coeff_P10,asin_poly_p8
525 nop.i 999;;
526 }
527
528
529 { .mfi
530 nop.m 999
531 fma.s1 asin_poly_p11 = asin_t,asin_coeff_P12,asin_coeff_P11
532 nop.i 999
533 }
534 { .mfi
535 nop.m 999
536 fma.s1 asin_e0 = asin_d2,asin_l1,asin_d
537 nop.i 999;;
538 }
539
540
541 { .mfi
542 nop.m 999
543 fma.s1 asin_1poly_p15 = asin_tx,asin_coeff_P16,asin_coeff_P15
544 nop.i 999
545 }
546 { .mfi
547 nop.m 999
548 fma.s1 asin_poly_p0 = asin_t,asin_coeff_P1,f1
549 nop.i 999;;
550 }
551
552
553 { .mfi
554 nop.m 999
555 fma.s1 asin_1poly_p19 = asin_tx,asin_coeff_P20,asin_coeff_P19
556 nop.i 999
557 }
558 { .mfi
559 nop.m 999
560 fma.s1 asin_1poly_p4 = asin_tx,asin_coeff_P5,asin_coeff_P4
561 nop.i 999;;
562 }
563
564
565 { .mfi
566 nop.m 999
567 fma.s1 asin_1poly_p8 = asin_tx,asin_coeff_P9,asin_coeff_P8
568 nop.i 999
569 }
570 { .mfi
571 nop.m 999
572 fma.s1 asin_l2 = asin_231by16,asin_d,asin_63by8
573 nop.i 999;;
574 }
575
576
577 { .mfi
578 nop.m 999
579 fma.s1 asin_d3 = asin_d2,asin_d,f0
580 nop.i 999
581 }
582 { .mfi
583 nop.m 999
584 fma.s1 asin_T3 = asin_d2,asin_T0,f0
585 nop.i 999;;
586 }
587
588
589 { .mfi
590 nop.m 999
591 fma.s1 asin_429by16 = asin_18by4,asin_11by4,asin_231by16
592 nop.i 999
593 }
594 { .mfi
595 nop.m 999
596 fma.s1 asin_S1 = asin_e0,asin_S0,asin_S0
597 nop.i 999;;
598 }
599
600
601 { .mfi
602 nop.m 999
603 fma.s1 asin_poly_p4 = asin_t4,asin_poly_p8,asin_poly_p4
604 nop.i 999
605 }
606 { .mfi
607 nop.m 999
608 fma.s1 asin_poly_p15 = asin_t4,asin_poly_p19,asin_poly_p15
609 nop.i 999;;
610 }
611
612
613 { .mfi
614 nop.m 999
615 fma.s1 asin_poly_p0 = asin_t2,asin_poly_p2,asin_poly_p0
616 nop.i 999
617 }
618 { .mfi
619 nop.m 999
620 fma.s1 asin_poly_p11 = asin_t2,asin_poly_p13,asin_poly_p11
621 nop.i 999;;
622 }
623
624
625 { .mfi
626 nop.m 999
627 fma.s1 asin_t8 = asin_t4,asin_t4,f0
628 nop.i 999
629 }
630 { .mfi
631 nop.m 999
632 fma.s1 asin_e1 = asin_d2,asin_l2,asin_d1
633 nop.i 999;;
634 }
635
636
637 { .mfi
638 nop.m 999
639 fma.s1 asin_1poly_p4 = asin_tx2,asin_1poly_p6,asin_1poly_p4
640 nop.i 999
641 }
642 { .mfi
643 nop.m 999
644 fma.s1 asin_1poly_p15 = asin_tx2,asin_1poly_p17,asin_1poly_p15
645 nop.i 999;;
646 }
647
648
649 { .mfi
650 nop.m 999
651 fma.s1 asin_1poly_p8 = asin_tx2,asin_coeff_P10,asin_1poly_p8
652 nop.i 999
653 }
654 { .mfi
655 nop.m 999
656 fma.s1 asin_1poly_p19 = asin_tx2,asin_coeff_P21,asin_1poly_p19
657 nop.i 999;;
658 }
659
660
661 { .mfi
662 nop.m 999
663 fma.s1 asin_1poly_p2 = asin_tx,asin_coeff_P3,asin_coeff_P2
664 nop.i 999
665 }
666 { .mfi
667 nop.m 999
668 fma.s1 asin_1poly_p13 = asin_tx,asin_coeff_P14,asin_coeff_P13
669 nop.i 999;;
670 }
671
672
673 { .mfi
674 nop.m 999
675 fma.s1 asin_1poly_p0 = asin_tx,asin_coeff_P1,f1
676 nop.i 999
677 }
678 { .mfi
679 nop.m 999
680 fma.s1 asin_1poly_p11 = asin_tx,asin_coeff_P12,asin_coeff_P11
681 nop.i 999;;
682 }
683
684
685 { .mfi
686 nop.m 999
687 fma.s1 asin_l3 = asin_429by16,asin_d,f0
688 nop.i 999
689 }
690 { .mfi
691 nop.m 999
692 fma.s1 asin_z = asin_e1,asin_T3,asin_S1
693 nop.i 999;;
694 }
695
696
697 { .mfi
698 nop.m 999
699 fma.s1 asin_poly_p11 = asin_t4,asin_poly_p15,asin_poly_p11
700 nop.i 999
701 }
702 { .mfi
703 nop.m 999
704 fma.s1 asin_T6 = asin_T3,asin_d3,f0
705 nop.i 999;;
706 }
707
708
709 { .mfi
710 nop.m 999
711 fma.s1 asin_t11 = asin_t8,asin_t3,f0
712 nop.i 999
713 }
714 { .mfi
715 nop.m 999
716 fma.s1 asin_poly_p0 = asin_t4,asin_poly_p4,asin_poly_p0
717 nop.i 999;;
718 }
719
720
721 { .mfi
722 nop.m 999
723 fma.s1 asin_1poly_p4 = asin_tx4,asin_1poly_p8,asin_1poly_p4
724 nop.i 999
725 }
726 { .mfi
727 nop.m 999
728 fma.s1 asin_1poly_p15 = asin_tx4,asin_1poly_p19,asin_1poly_p15
729 nop.i 999;;
730 }
731
732
733 { .mfi
734 nop.m 999
735 fma.s1 asin_1poly_p0 = asin_tx2,asin_1poly_p2,asin_1poly_p0
736 nop.i 999
737 }
738 { .mfi
739 nop.m 999
740 fma.s1 asin_1poly_p11 = asin_tx2,asin_1poly_p13,asin_1poly_p11
741 nop.i 999;;
742 }
743
744
745 { .mfi
746 nop.m 999
747 // fcmp.le.s1 asin_pred_LEsqrt2by2,asin_pred_GTsqrt2by2 = asin_abs_x,asin_const_sqrt2by2
748 fcmp.le.s1 p7,p8 = asin_abs_x,asin_const_sqrt2by2
749 nop.i 999
750 }
751 { .mfi
752 nop.m 999
753 fma.s1 asin_tx8 = asin_tx4,asin_tx4,f0
754 nop.i 999;;
755 }
756
757
758 // Form a small number to force inexact flag for small args
759 { .mfi
760 nop.m 999
761 fmerge.se asin_eps = asin_eps_exp,asin_eps_sig
762 nop.i 999
763 }
764 { .mfi
765 nop.m 999
766 fma.s1 asin_z = asin_l3,asin_T6,asin_z
767 nop.i 999;;
768 }
769
770 { .mfi
771 nop.m 999
772 fma.s1 asin_series_t = asin_t11,asin_poly_p11,asin_poly_p0
773 nop.i 999;;
774 }
775
776 { .mfi
777 nop.m 999
778 fma.s1 asin_1poly_p0 = asin_tx4,asin_1poly_p4,asin_1poly_p0
779 nop.i 999
780 }
781 { .mfi
782 nop.m 999
783 fma.s1 asin_1poly_p11 = asin_tx4,asin_1poly_p15,asin_1poly_p11
784 nop.i 999;;
785 }
786
787
788 { .mfi
789 nop.m 999
790 fma.s1 asin_tx11 = asin_tx8,asin_tx3,f0
791 nop.i 999;;
792 }
793
794 { .mfi
795 nop.m 999
796 //(asin_pred_GTsqrt2by2) fnma.s1 answer2 = asin_z,asin_series_t,asin_const_piby2
797 (p8) fnma.s1 answer2 = asin_z,asin_series_t,asin_const_piby2
798 nop.i 999;;
799 }
800
801 { .mfi
802 nop.m 999
803 fma.s1 asin_series_tx = asin_tx11,asin_1poly_p11,asin_1poly_p0
804 nop.i 999;;
805 }
806
807 { .mfi
808 nop.m 999
809 //(asin_pred_GTsqrt2by2) fma.d f8 = asin_sgn_x,answer2,f0
810 (p8) fma.d f8 = asin_sgn_x,answer2,f0
811 nop.i 999;;
812 }
813
814 // asin_eps is added only to force inexact and possibly underflow flag
815 // in case asin_series_tx is zero
816 //
817 { .mfi
818 nop.m 999
819 (p7) fma.d asin_eps = f8,asin_series_tx,asin_eps
820 nop.i 999
821 }
822 { .mfb
823 nop.m 999
824 //(asin_pred_LEsqrt2by2) fma.d f8 = f8,asin_series_tx,f0
825 (p7) fma.d f8 = f8,asin_series_tx,f0
826 br.ret.sptk b0
827 }
828 ;;
829
830
831 L(ASIN_ABS_1):
832 // Here for short exit if |x|=1
833 { .mfb
834 nop.m 999
835 fma.d f8 = asin_sgn_x,asin_const_piby2,f0
836 br.ret.sptk b0
837 }
838 ;;
839
840
841 .endp asin
842 ASM_SIZE_DIRECTIVE(asin)
843
844 .proc __libm_error_region
845 __libm_error_region:
846 .prologue
847 { .mfi
848 add GR_Parameter_Y=-32,sp // Parameter 2 value
849 nop.f 999
850 .save ar.pfs,GR_SAVE_PFS
851 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
852 }
853 { .mfi
854 .fframe 64
855 add sp=-64,sp // Create new stack
856 nop.f 0
857 mov GR_SAVE_GP=gp // Save gp
858 };;
859 { .mmi
860 stfs [GR_Parameter_Y] = f1,16 // Store Parameter 2 on stack
861 add GR_Parameter_X = 16,sp // Parameter 1 address
862 .save b0, GR_SAVE_B0
863 mov GR_SAVE_B0=b0 // Save b0
864 };;
865
866 .body
867 frcpa.s0 f9,p0 = f0,f0
868 ;;
869
870 { .mib
871 stfd [GR_Parameter_X] = f8 // Store Parameter 1 on stack
872 add GR_Parameter_RESULT = 0,GR_Parameter_Y
873 nop.b 0 // Parameter 3 address
874 }
875 { .mib
876 stfd [GR_Parameter_Y] = f9,-16 // Store Parameter 3 on stack
877 adds r32 = 48,sp
878 br.call.sptk b0=__libm_error_support# // Call error handling function
879 };;
880 { .mmi
881 ldfd f8 = [r32] // Get return result off stack
882 .restore sp
883 add sp = 64,sp // Restore stack pointer
884 mov b0 = GR_SAVE_B0 // Restore return address
885 };;
886 { .mib
887 mov gp = GR_SAVE_GP // Restore gp
888 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
889 br.ret.sptk b0 // Return
890
891 };;
892
893 .endp __libm_error_region
894 ASM_SIZE_DIRECTIVE(__libm_error_region)
895
896 .type __libm_error_support,@function
897 .global __libm_error_support