]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/ia64/fpu/e_sinh.S
ia64: move from main tree
[thirdparty/glibc.git] / sysdeps / ia64 / fpu / e_sinh.S
CommitLineData
d5efd131
MF
1.file "sinh.s"
2
3
4// Copyright (c) 2000 - 2005, Intel Corporation
5// All rights reserved.
6//
7// Contributed 2000 by the Intel Numerics Group, Intel Corporation
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// * Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// * Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// * The name of Intel Corporation may not be used to endorse or promote
21// products derived from this software without specific prior written
22// permission.
23
24// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
28// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
32// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
33// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35//
36// Intel Corporation is the author of this code, and requests that all
37// problem reports or change requests be submitted to it directly at
38// http://www.intel.com/software/products/opensource/libraries/num.htm.
39//
40// History
41//==============================================================
42// 02/02/00 Initial version
43// 04/04/00 Unwind support added
44// 08/15/00 Bundle added after call to __libm_error_support to properly
45// set [the previously overwritten] GR_Parameter_RESULT.
46// 10/12/00 Update to set denormal operand and underflow flags
47// 01/22/01 Fixed to set inexact flag for small args.
48// 05/02/01 Reworked to improve speed of all paths
49// 05/20/02 Cleaned up namespace and sf0 syntax
50// 11/20/02 Improved speed with new algorithm
51// 03/31/05 Reformatted delimiters between data tables
52
53// API
54//==============================================================
55// double sinh(double)
56
57// Overview of operation
58//==============================================================
59// Case 1: 0 < |x| < 2^-60
60// Result = x, computed by x+sgn(x)*x^2) to handle flags and rounding
61//
62// Case 2: 2^-60 < |x| < 0.25
63// Evaluate sinh(x) by a 13th order polynomial
64// Care is take for the order of multiplication; and A1 is not exactly 1/3!,
65// A2 is not exactly 1/5!, etc.
66// sinh(x) = x + (A1*x^3 + A2*x^5 + A3*x^7 + A4*x^9 + A5*x^11 + A6*x^13)
67//
68// Case 3: 0.25 < |x| < 710.47586
69// Algorithm is based on the identity sinh(x) = ( exp(x) - exp(-x) ) / 2.
70// The algorithm for exp is described as below. There are a number of
71// economies from evaluating both exp(x) and exp(-x). Although we
72// are evaluating both quantities, only where the quantities diverge do we
73// duplicate the computations. The basic algorithm for exp(x) is described
74// below.
75//
76// Take the input x. w is "how many log2/128 in x?"
77// w = x * 128/log2
78// n = int(w)
79// x = n log2/128 + r + delta
80
81// n = 128M + index_1 + 2^4 index_2
82// x = M log2 + (log2/128) index_1 + (log2/8) index_2 + r + delta
83
84// exp(x) = 2^M 2^(index_1/128) 2^(index_2/8) exp(r) exp(delta)
85// Construct 2^M
86// Get 2^(index_1/128) from table_1;
87// Get 2^(index_2/8) from table_2;
88// Calculate exp(r) by 5th order polynomial
89// r = x - n (log2/128)_high
90// delta = - n (log2/128)_low
91// Calculate exp(delta) as 1 + delta
92
93
94// Special values
95//==============================================================
96// sinh(+0) = +0
97// sinh(-0) = -0
98
99// sinh(+qnan) = +qnan
100// sinh(-qnan) = -qnan
101// sinh(+snan) = +qnan
102// sinh(-snan) = -qnan
103
104// sinh(-inf) = -inf
105// sinh(+inf) = +inf
106
107// Overflow and Underflow
108//=======================
109// sinh(x) = largest double normal when
110// |x| = 710.47586 = 0x408633ce8fb9f87d
111//
112// Underflow is handled as described in case 1 above
113
114// Registers used
115//==============================================================
116// Floating Point registers used:
117// f8, input, output
118// f6 -> f15, f32 -> f61
119
120// General registers used:
121// r14 -> r40
122
123// Predicate registers used:
124// p6 -> p15
125
126// Assembly macros
127//==============================================================
128
129rRshf = r14
130rN_neg = r14
131rAD_TB1 = r15
132rAD_TB2 = r16
133rAD_P = r17
134rN = r18
135rIndex_1 = r19
136rIndex_2_16 = r20
137rM = r21
138rBiased_M = r21
139rSig_inv_ln2 = r22
140rIndex_1_neg = r22
141rExp_bias = r23
142rExp_bias_minus_1 = r23
143rExp_mask = r24
144rTmp = r24
145rGt_ln = r24
146rIndex_2_16_neg = r24
147rM_neg = r25
148rBiased_M_neg = r25
149rRshf_2to56 = r26
150rAD_T1_neg = r26
151rExp_2tom56 = r28
152rAD_T2_neg = r28
153rAD_T1 = r29
154rAD_T2 = r30
155rSignexp_x = r31
156rExp_x = r31
157
158GR_SAVE_B0 = r33
159GR_SAVE_PFS = r34
160GR_SAVE_GP = r35
161
162GR_Parameter_X = r37
163GR_Parameter_Y = r38
164GR_Parameter_RESULT = r39
165GR_Parameter_TAG = r40
166
167
168FR_X = f10
169FR_Y = f1
170FR_RESULT = f8
171
172fRSHF_2TO56 = f6
173fINV_LN2_2TO63 = f7
174fW_2TO56_RSH = f9
175f2TOM56 = f11
176fP5 = f12
177fP4 = f13
178fP3 = f14
179fP2 = f15
180
181fLn2_by_128_hi = f33
182fLn2_by_128_lo = f34
183
184fRSHF = f35
185fNfloat = f36
186fNormX = f37
187fR = f38
188fF = f39
189
190fRsq = f40
191f2M = f41
192fS1 = f42
193fT1 = f42
194fS2 = f43
195fT2 = f43
196fS = f43
197fWre_urm_f8 = f44
198fAbsX = f44
199
200fMIN_DBL_OFLOW_ARG = f45
201fMAX_DBL_NORM_ARG = f46
202fXsq = f47
203fX4 = f48
204fGt_pln = f49
205fTmp = f49
206
207fP54 = f50
208fP5432 = f50
209fP32 = f51
210fP = f52
211fP54_neg = f53
212fP5432_neg = f53
213fP32_neg = f54
214fP_neg = f55
215fF_neg = f56
216
217f2M_neg = f57
218fS1_neg = f58
219fT1_neg = f58
220fS2_neg = f59
221fT2_neg = f59
222fS_neg = f59
223fExp = f60
224fExp_neg = f61
225
226fA6 = f50
227fA65 = f50
228fA6543 = f50
229fA654321 = f50
230fA5 = f51
231fA4 = f52
232fA43 = f52
233fA3 = f53
234fA2 = f54
235fA21 = f54
236fA1 = f55
237fX3 = f56
238
239// Data tables
240//==============================================================
241
242RODATA
243.align 16
244
245// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
246
247// double-extended 1/ln(2)
248// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
249// 3fff b8aa 3b29 5c17 f0bc
250// For speed the significand will be loaded directly with a movl and setf.sig
251// and the exponent will be bias+63 instead of bias+0. Thus subsequent
252// computations need to scale appropriately.
253// The constant 128/ln(2) is needed for the computation of w. This is also
254// obtained by scaling the computations.
255//
256// Two shifting constants are loaded directly with movl and setf.d.
257// 1. fRSHF_2TO56 = 1.1000..00 * 2^(63-7)
258// This constant is added to x*1/ln2 to shift the integer part of
259// x*128/ln2 into the rightmost bits of the significand.
260// The result of this fma is fW_2TO56_RSH.
261// 2. fRSHF = 1.1000..00 * 2^(63)
262// This constant is subtracted from fW_2TO56_RSH * 2^(-56) to give
263// the integer part of w, n, as a floating-point number.
264// The result of this fms is fNfloat.
265
266
267LOCAL_OBJECT_START(exp_table_1)
268data8 0x408633ce8fb9f87e // smallest dbl overflow arg
269data8 0x408633ce8fb9f87d // largest dbl arg to give normal dbl result
270data8 0xb17217f7d1cf79ab , 0x00003ff7 // ln2/128 hi
271data8 0xc9e3b39803f2f6af , 0x00003fb7 // ln2/128 lo
272//
273// Table 1 is 2^(index_1/128) where
274// index_1 goes from 0 to 15
275//
276data8 0x8000000000000000 , 0x00003FFF
277data8 0x80B1ED4FD999AB6C , 0x00003FFF
278data8 0x8164D1F3BC030773 , 0x00003FFF
279data8 0x8218AF4373FC25EC , 0x00003FFF
280data8 0x82CD8698AC2BA1D7 , 0x00003FFF
281data8 0x8383594EEFB6EE37 , 0x00003FFF
282data8 0x843A28C3ACDE4046 , 0x00003FFF
283data8 0x84F1F656379C1A29 , 0x00003FFF
284data8 0x85AAC367CC487B15 , 0x00003FFF
285data8 0x8664915B923FBA04 , 0x00003FFF
286data8 0x871F61969E8D1010 , 0x00003FFF
287data8 0x87DB357FF698D792 , 0x00003FFF
288data8 0x88980E8092DA8527 , 0x00003FFF
289data8 0x8955EE03618E5FDD , 0x00003FFF
290data8 0x8A14D575496EFD9A , 0x00003FFF
291data8 0x8AD4C6452C728924 , 0x00003FFF
292LOCAL_OBJECT_END(exp_table_1)
293
294// Table 2 is 2^(index_1/8) where
295// index_2 goes from 0 to 7
296LOCAL_OBJECT_START(exp_table_2)
297data8 0x8000000000000000 , 0x00003FFF
298data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
299data8 0x9837F0518DB8A96F , 0x00003FFF
300data8 0xA5FED6A9B15138EA , 0x00003FFF
301data8 0xB504F333F9DE6484 , 0x00003FFF
302data8 0xC5672A115506DADD , 0x00003FFF
303data8 0xD744FCCAD69D6AF4 , 0x00003FFF
304data8 0xEAC0C6E7DD24392F , 0x00003FFF
305LOCAL_OBJECT_END(exp_table_2)
306
307
308LOCAL_OBJECT_START(exp_p_table)
309data8 0x3f8111116da21757 //P5
310data8 0x3fa55555d787761c //P4
311data8 0x3fc5555555555414 //P3
312data8 0x3fdffffffffffd6a //P2
313LOCAL_OBJECT_END(exp_p_table)
314
315LOCAL_OBJECT_START(sinh_p_table)
316data8 0xB08AF9AE78C1239F, 0x00003FDE // A6
317data8 0xB8EF1D28926D8891, 0x00003FEC // A4
318data8 0x8888888888888412, 0x00003FF8 // A2
319data8 0xD732377688025BE9, 0x00003FE5 // A5
320data8 0xD00D00D00D4D39F2, 0x00003FF2 // A3
321data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC // A1
322LOCAL_OBJECT_END(sinh_p_table)
323
324
325.section .text
326GLOBAL_IEEE754_ENTRY(sinh)
327
328{ .mlx
329 getf.exp rSignexp_x = f8 // Must recompute if x unorm
330 movl rSig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
331}
332{ .mlx
333 addl rAD_TB1 = @ltoff(exp_table_1), gp
334 movl rRshf_2to56 = 0x4768000000000000 // 1.10000 2^(63+56)
335}
336;;
337
338{ .mfi
339 ld8 rAD_TB1 = [rAD_TB1]
340 fclass.m p6,p0 = f8,0x0b // Test for x=unorm
341 mov rExp_mask = 0x1ffff
342}
343{ .mfi
344 mov rExp_bias = 0xffff
345 fnorm.s1 fNormX = f8
346 mov rExp_2tom56 = 0xffff-56
347}
348;;
349
350// Form two constants we need
351// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128
352// 1.1000..000 * 2^(63+63-7) to right shift int(w) into the significand
353
354{ .mfi
355 setf.sig fINV_LN2_2TO63 = rSig_inv_ln2 // form 1/ln2 * 2^63
356 fclass.m p8,p0 = f8,0x07 // Test for x=0
357 nop.i 999
358}
359{ .mlx
360 setf.d fRSHF_2TO56 = rRshf_2to56 // Form const 1.100 * 2^(63+56)
361 movl rRshf = 0x43e8000000000000 // 1.10000 2^63 for right shift
362}
363;;
364
365{ .mfi
366 ldfpd fMIN_DBL_OFLOW_ARG, fMAX_DBL_NORM_ARG = [rAD_TB1],16
367 fclass.m p10,p0 = f8,0x1e3 // Test for x=inf, nan, NaT
368 nop.i 0
369}
370{ .mfb
371 setf.exp f2TOM56 = rExp_2tom56 // form 2^-56 for scaling Nfloat
372 nop.f 0
373(p6) br.cond.spnt SINH_UNORM // Branch if x=unorm
374}
375;;
376
377SINH_COMMON:
378{ .mfi
379 ldfe fLn2_by_128_hi = [rAD_TB1],16
380 nop.f 0
381 nop.i 0
382}
383{ .mfb
384 setf.d fRSHF = rRshf // Form right shift const 1.100 * 2^63
385 nop.f 0
386(p8) br.ret.spnt b0 // Exit for x=0, result=x
387}
388;;
389
390{ .mfi
391 ldfe fLn2_by_128_lo = [rAD_TB1],16
392 nop.f 0
393 nop.i 0
394}
395{ .mfb
396 and rExp_x = rExp_mask, rSignexp_x // Biased exponent of x
397(p10) fma.d.s0 f8 = f8,f1,f0 // Result if x=inf, nan, NaT
398(p10) br.ret.spnt b0 // quick exit for x=inf, nan, NaT
399}
400;;
401
402// After that last load rAD_TB1 points to the beginning of table 1
403{ .mfi
404 nop.m 0
405 fcmp.eq.s0 p6,p0 = f8, f0 // Dummy to set D
406 sub rExp_x = rExp_x, rExp_bias // True exponent of x
407}
408;;
409
410{ .mfi
411 nop.m 0
412 fmerge.s fAbsX = f0, fNormX // Form |x|
413 nop.i 0
414}
415{ .mfb
416 cmp.gt p7, p0 = -2, rExp_x // Test |x| < 2^(-2)
417 fma.s1 fXsq = fNormX, fNormX, f0 // x*x for small path
418(p7) br.cond.spnt SINH_SMALL // Branch if 0 < |x| < 2^-2
419}
420;;
421
422// W = X * Inv_log2_by_128
423// By adding 1.10...0*2^63 we shift and get round_int(W) in significand.
424// We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing.
425
426{ .mfi
427 add rAD_P = 0x180, rAD_TB1
428 fma.s1 fW_2TO56_RSH = fNormX, fINV_LN2_2TO63, fRSHF_2TO56
429 add rAD_TB2 = 0x100, rAD_TB1
430}
431;;
432
433// Divide arguments into the following categories:
434// Certain Safe - 0.25 <= |x| <= MAX_DBL_NORM_ARG
435// Possible Overflow p14 - MAX_DBL_NORM_ARG < |x| < MIN_DBL_OFLOW_ARG
436// Certain Overflow p15 - MIN_DBL_OFLOW_ARG <= |x| < +inf
437//
438// If the input is really a double arg, then there will never be
439// "Possible Overflow" arguments.
440//
441
442{ .mfi
443 ldfpd fP5, fP4 = [rAD_P] ,16
444 fcmp.ge.s1 p15,p14 = fAbsX,fMIN_DBL_OFLOW_ARG
445 nop.i 0
446}
447;;
448
449// Nfloat = round_int(W)
450// The signficand of fW_2TO56_RSH contains the rounded integer part of W,
451// as a twos complement number in the lower bits (that is, it may be negative).
452// That twos complement number (called N) is put into rN.
453
454// Since fW_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56
455// before the shift constant 1.10000 * 2^63 is subtracted to yield fNfloat.
456// Thus, fNfloat contains the floating point version of N
457
458{ .mfi
459 ldfpd fP3, fP2 = [rAD_P]
460(p14) fcmp.gt.unc.s1 p14,p0 = fAbsX,fMAX_DBL_NORM_ARG
461 nop.i 0
462}
463{ .mfb
464 nop.m 0
465 fms.s1 fNfloat = fW_2TO56_RSH, f2TOM56, fRSHF
466(p15) br.cond.spnt SINH_CERTAIN_OVERFLOW
467}
468;;
469
470{ .mfi
471 getf.sig rN = fW_2TO56_RSH
472 nop.f 0
473 mov rExp_bias_minus_1 = 0xfffe
474}
475;;
476
477// rIndex_1 has index_1
478// rIndex_2_16 has index_2 * 16
479// rBiased_M has M
480
481// rM has true M
482// r = x - Nfloat * ln2_by_128_hi
483// f = 1 - Nfloat * ln2_by_128_lo
484{ .mfi
485 and rIndex_1 = 0x0f, rN
486 fnma.s1 fR = fNfloat, fLn2_by_128_hi, fNormX
487 shr rM = rN, 0x7
488}
489{ .mfi
490 and rIndex_2_16 = 0x70, rN
491 fnma.s1 fF = fNfloat, fLn2_by_128_lo, f1
492 sub rN_neg = r0, rN
493}
494;;
495
496{ .mmi
497 and rIndex_1_neg = 0x0f, rN_neg
498 add rBiased_M = rExp_bias_minus_1, rM
499 shr rM_neg = rN_neg, 0x7
500}
501{ .mmi
502 and rIndex_2_16_neg = 0x70, rN_neg
503 add rAD_T2 = rAD_TB2, rIndex_2_16
504 shladd rAD_T1 = rIndex_1, 4, rAD_TB1
505}
506;;
507
508// rAD_T1 has address of T1
509// rAD_T2 has address if T2
510
511{ .mmi
512 setf.exp f2M = rBiased_M
513 ldfe fT2 = [rAD_T2]
514 nop.i 0
515}
516{ .mmi
517 add rBiased_M_neg = rExp_bias_minus_1, rM_neg
518 add rAD_T2_neg = rAD_TB2, rIndex_2_16_neg
519 shladd rAD_T1_neg = rIndex_1_neg, 4, rAD_TB1
520}
521;;
522
523// Create Scale = 2^M
524// Load T1 and T2
525{ .mmi
526 ldfe fT1 = [rAD_T1]
527 nop.m 0
528 nop.i 0
529}
530{ .mmf
531 setf.exp f2M_neg = rBiased_M_neg
532 ldfe fT2_neg = [rAD_T2_neg]
533 fma.s1 fF_neg = fNfloat, fLn2_by_128_lo, f1
534}
535;;
536
537{ .mfi
538 nop.m 0
539 fma.s1 fRsq = fR, fR, f0
540 nop.i 0
541}
542{ .mfi
543 ldfe fT1_neg = [rAD_T1_neg]
544 fma.s1 fP54 = fR, fP5, fP4
545 nop.i 0
546}
547;;
548
549{ .mfi
550 nop.m 0
551 fma.s1 fP32 = fR, fP3, fP2
552 nop.i 0
553}
554{ .mfi
555 nop.m 0
556 fnma.s1 fP54_neg = fR, fP5, fP4
557 nop.i 0
558}
559;;
560
561{ .mfi
562 nop.m 0
563 fnma.s1 fP32_neg = fR, fP3, fP2
564 nop.i 0
565}
566;;
567
568{ .mfi
569 nop.m 0
570 fma.s1 fP5432 = fRsq, fP54, fP32
571 nop.i 0
572}
573{ .mfi
574 nop.m 0
575 fma.s1 fS2 = fF,fT2,f0
576 nop.i 0
577}
578;;
579
580{ .mfi
581 nop.m 0
582 fma.s1 fS1 = f2M,fT1,f0
583 nop.i 0
584}
585{ .mfi
586 nop.m 0
587 fma.s1 fP5432_neg = fRsq, fP54_neg, fP32_neg
588 nop.i 0
589}
590;;
591
592{ .mfi
593 nop.m 0
594 fma.s1 fS1_neg = f2M_neg,fT1_neg,f0
595 nop.i 0
596}
597{ .mfi
598 nop.m 0
599 fma.s1 fS2_neg = fF_neg,fT2_neg,f0
600 nop.i 0
601}
602;;
603
604{ .mfi
605 nop.m 0
606 fma.s1 fP = fRsq, fP5432, fR
607 nop.i 0
608}
609{ .mfi
610 nop.m 0
611 fma.s1 fS = fS1,fS2,f0
612 nop.i 0
613}
614;;
615
616{ .mfi
617 nop.m 0
618 fms.s1 fP_neg = fRsq, fP5432_neg, fR
619 nop.i 0
620}
621{ .mfi
622 nop.m 0
623 fma.s1 fS_neg = fS1_neg,fS2_neg,f0
624 nop.i 0
625}
626;;
627
628{ .mfb
629 nop.m 0
630 fmpy.s0 fTmp = fLn2_by_128_lo, fLn2_by_128_lo // Force inexact
631(p14) br.cond.spnt SINH_POSSIBLE_OVERFLOW
632}
633;;
634
635{ .mfi
636 nop.m 0
637 fma.s1 fExp = fS, fP, fS
638 nop.i 0
639}
640{ .mfi
641 nop.m 0
642 fma.s1 fExp_neg = fS_neg, fP_neg, fS_neg
643 nop.i 0
644}
645;;
646
647{ .mfb
648 nop.m 0
649 fms.d.s0 f8 = fExp, f1, fExp_neg
650 br.ret.sptk b0 // Normal path exit
651}
652;;
653
654// Here if 0 < |x| < 0.25
655SINH_SMALL:
656{ .mfi
657 add rAD_T1 = 0x1a0, rAD_TB1
658 fcmp.lt.s1 p7, p8 = fNormX, f0 // Test sign of x
659 cmp.gt p6, p0 = -60, rExp_x // Test |x| < 2^(-60)
660}
661{ .mfi
662 add rAD_T2 = 0x1d0, rAD_TB1
663 nop.f 0
664 nop.i 0
665}
666;;
667
668{ .mmb
669 ldfe fA6 = [rAD_T1],16
670 ldfe fA5 = [rAD_T2],16
671(p6) br.cond.spnt SINH_VERY_SMALL // Branch if |x| < 2^(-60)
672}
673;;
674
675{ .mmi
676 ldfe fA4 = [rAD_T1],16
677 ldfe fA3 = [rAD_T2],16
678 nop.i 0
679}
680;;
681
682{ .mmi
683 ldfe fA2 = [rAD_T1]
684 ldfe fA1 = [rAD_T2]
685 nop.i 0
686}
687;;
688
689{ .mfi
690 nop.m 0
691 fma.s1 fX3 = fNormX, fXsq, f0
692 nop.i 0
693}
694{ .mfi
695 nop.m 0
696 fma.s1 fX4 = fXsq, fXsq, f0
697 nop.i 0
698}
699;;
700
701{ .mfi
702 nop.m 0
703 fma.s1 fA65 = fXsq, fA6, fA5
704 nop.i 0
705}
706{ .mfi
707 nop.m 0
708 fma.s1 fA43 = fXsq, fA4, fA3
709 nop.i 0
710}
711;;
712
713{ .mfi
714 nop.m 0
715 fma.s1 fA21 = fXsq, fA2, fA1
716 nop.i 0
717}
718;;
719
720{ .mfi
721 nop.m 0
722 fma.s1 fA6543 = fX4, fA65, fA43
723 nop.i 0
724}
725;;
726
727{ .mfi
728 nop.m 0
729 fma.s1 fA654321 = fX4, fA6543, fA21
730 nop.i 0
731}
732;;
733
734// Dummy multiply to generate inexact
735{ .mfi
736 nop.m 0
737 fmpy.s0 fTmp = fA6, fA6
738 nop.i 0
739}
740{ .mfb
741 nop.m 0
742 fma.d.s0 f8 = fA654321, fX3, fNormX
743 br.ret.sptk b0 // Exit if 2^-60 < |x| < 0.25
744}
745;;
746
747SINH_VERY_SMALL:
748// Here if 0 < |x| < 2^-60
749// Compute result by x + sgn(x)*x^2 to get properly rounded result
750.pred.rel "mutex",p7,p8
751{ .mfi
752 nop.m 0
753(p7) fnma.d.s0 f8 = fNormX, fNormX, fNormX // If x<0 result ~ x-x^2
754 nop.i 0
755}
756{ .mfb
757 nop.m 0
758(p8) fma.d.s0 f8 = fNormX, fNormX, fNormX // If x>0 result ~ x+x^2
759 br.ret.sptk b0 // Exit if |x| < 2^-60
760}
761;;
762
763
764SINH_POSSIBLE_OVERFLOW:
765
766// Here if fMAX_DBL_NORM_ARG < |x| < fMIN_DBL_OFLOW_ARG
767// This cannot happen if input is a double, only if input higher precision.
768// Overflow is a possibility, not a certainty.
769
770// Recompute result using status field 2 with user's rounding mode,
771// and wre set. If result is larger than largest double, then we have
772// overflow
773
774{ .mfi
775 mov rGt_ln = 0x103ff // Exponent for largest dbl + 1 ulp
776 fsetc.s2 0x7F,0x42 // Get user's round mode, set wre
777 nop.i 0
778}
779;;
780
781{ .mfi
782 setf.exp fGt_pln = rGt_ln // Create largest double + 1 ulp
783 fma.d.s2 fWre_urm_f8 = fS, fP, fS // Result with wre set
784 nop.i 0
785}
786;;
787
788{ .mfi
789 nop.m 0
790 fsetc.s2 0x7F,0x40 // Turn off wre in sf2
791 nop.i 0
792}
793;;
794
795{ .mfi
796 nop.m 0
797 fcmp.ge.s1 p6, p0 = fWre_urm_f8, fGt_pln // Test for overflow
798 nop.i 0
799}
800;;
801
802{ .mfb
803 nop.m 0
804 nop.f 0
805(p6) br.cond.spnt SINH_CERTAIN_OVERFLOW // Branch if overflow
806}
807;;
808
809{ .mfb
810 nop.m 0
811 fma.d.s0 f8 = fS, fP, fS
812 br.ret.sptk b0 // Exit if really no overflow
813}
814;;
815
816SINH_CERTAIN_OVERFLOW:
817{ .mfi
818 sub rTmp = rExp_mask, r0, 1
819 fcmp.lt.s1 p6, p7 = fNormX, f0 // Test for x < 0
820 nop.i 0
821}
822;;
823
824{ .mmf
825 alloc r32=ar.pfs,1,4,4,0
826 setf.exp fTmp = rTmp
827 fmerge.s FR_X = f8,f8
828}
829;;
830
831{ .mfi
832 mov GR_Parameter_TAG = 127
833(p6) fnma.d.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and -INF result
834 nop.i 0
835}
836{ .mfb
837 nop.m 0
838(p7) fma.d.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
839 br.cond.sptk __libm_error_region
840}
841;;
842
843// Here if x unorm
844SINH_UNORM:
845{ .mfb
846 getf.exp rSignexp_x = fNormX // Must recompute if x unorm
847 fcmp.eq.s0 p6, p0 = f8, f0 // Set D flag
848 br.cond.sptk SINH_COMMON
849}
850;;
851
852GLOBAL_IEEE754_END(sinh)
853
854
855LOCAL_LIBM_ENTRY(__libm_error_region)
856.prologue
857{ .mfi
858 add GR_Parameter_Y=-32,sp // Parameter 2 value
859 nop.f 0
860.save ar.pfs,GR_SAVE_PFS
861 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
862}
863{ .mfi
864.fframe 64
865 add sp=-64,sp // Create new stack
866 nop.f 0
867 mov GR_SAVE_GP=gp // Save gp
868};;
869{ .mmi
870 stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
871 add GR_Parameter_X = 16,sp // Parameter 1 address
872.save b0, GR_SAVE_B0
873 mov GR_SAVE_B0=b0 // Save b0
874};;
875.body
876{ .mib
877 stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
878 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
879 nop.b 0
880}
881{ .mib
882 stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
883 add GR_Parameter_Y = -16,GR_Parameter_Y
884 br.call.sptk b0=__libm_error_support# // Call error handling function
885};;
886{ .mmi
887 add GR_Parameter_RESULT = 48,sp
888 nop.m 0
889 nop.i 0
890};;
891{ .mmi
892 ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
893.restore sp
894 add sp = 64,sp // Restore stack pointer
895 mov b0 = GR_SAVE_B0 // Restore return address
896};;
897{ .mib
898 mov gp = GR_SAVE_GP // Restore gp
899 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
900 br.ret.sptk b0 // Return
901};;
902
903LOCAL_LIBM_END(__libm_error_region)
904.type __libm_error_support#,@function
905.global __libm_error_support#