]>
Commit | Line | Data |
---|---|---|
6698b8bf JM |
1 | /* |
2 | * Written by J.T. Conklin <jtc@netbsd.org>. | |
3 | * Public domain. | |
4 | * | |
5 | * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>. | |
6 | */ | |
7 | ||
8 | /* | |
9 | * The 8087 method for the exponential function is to calculate | |
10 | * exp(x) = 2^(x log2(e)) | |
11 | * after separating integer and fractional parts | |
12 | * x log2(e) = i + f, |f| <= .5 | |
13 | * 2^i is immediate but f needs to be precise for long double accuracy. | |
14 | * Suppress range reduction error in computing f by the following. | |
15 | * Separate x into integer and fractional parts | |
16 | * x = xi + xf, |xf| <= .5 | |
17 | * Separate log2(e) into the sum of an exact number c0 and small part c1. | |
18 | * c0 + c1 = log2(e) to extra precision | |
19 | * Then | |
20 | * f = (c0 xi - i) + c0 xf + c1 x | |
21 | * where c0 xi is exact and so also is (c0 xi - i). | |
22 | * -- moshier@na-net.ornl.gov | |
23 | */ | |
24 | ||
25 | #include <machine/asm.h> | |
26 | ||
d8b82cad JM |
27 | #ifdef USE_AS_EXP10L |
28 | # define IEEE754_EXPL __ieee754_exp10l | |
29 | # define EXPL_FINITE __exp10l_finite | |
30 | # define FLDLOG fldl2t | |
495fd99f JM |
31 | #elif defined USE_AS_EXPM1L |
32 | # define IEEE754_EXPL __expm1l | |
33 | # undef EXPL_FINITE | |
34 | # define FLDLOG fldl2e | |
d8b82cad JM |
35 | #else |
36 | # define IEEE754_EXPL __ieee754_expl | |
37 | # define EXPL_FINITE __expl_finite | |
38 | # define FLDLOG fldl2e | |
39 | #endif | |
40 | ||
6698b8bf JM |
41 | .section .rodata.cst16,"aM",@progbits,16 |
42 | ||
43 | .p2align 4 | |
d8b82cad JM |
44 | #ifdef USE_AS_EXP10L |
45 | ASM_TYPE_DIRECTIVE(c0,@object) | |
46 | c0: .byte 0, 0, 0, 0, 0, 0, 0x9a, 0xd4, 0x00, 0x40 | |
47 | .byte 0, 0, 0, 0, 0, 0 | |
48 | ASM_SIZE_DIRECTIVE(c0) | |
49 | ASM_TYPE_DIRECTIVE(c1,@object) | |
50 | c1: .byte 0x58, 0x92, 0xfc, 0x15, 0x37, 0x9a, 0x97, 0xf0, 0xef, 0x3f | |
51 | .byte 0, 0, 0, 0, 0, 0 | |
52 | ASM_SIZE_DIRECTIVE(c1) | |
53 | #else | |
6698b8bf JM |
54 | ASM_TYPE_DIRECTIVE(c0,@object) |
55 | c0: .byte 0, 0, 0, 0, 0, 0, 0xaa, 0xb8, 0xff, 0x3f | |
56 | .byte 0, 0, 0, 0, 0, 0 | |
57 | ASM_SIZE_DIRECTIVE(c0) | |
58 | ASM_TYPE_DIRECTIVE(c1,@object) | |
59 | c1: .byte 0x20, 0xfa, 0xee, 0xc2, 0x5f, 0x70, 0xa5, 0xec, 0xed, 0x3f | |
60 | .byte 0, 0, 0, 0, 0, 0 | |
61 | ASM_SIZE_DIRECTIVE(c1) | |
d8b82cad | 62 | #endif |
f17ac40d | 63 | #ifndef USE_AS_EXPM1L |
41498f4d JM |
64 | ASM_TYPE_DIRECTIVE(csat,@object) |
65 | csat: .byte 0, 0, 0, 0, 0, 0, 0, 0x80, 0x0e, 0x40 | |
66 | .byte 0, 0, 0, 0, 0, 0 | |
67 | ASM_SIZE_DIRECTIVE(csat) | |
f17ac40d | 68 | #endif |
6698b8bf JM |
69 | |
70 | #ifdef PIC | |
71 | # define MO(op) op##@GOTOFF(%ecx) | |
72 | #else | |
73 | # define MO(op) op | |
74 | #endif | |
75 | ||
76 | .text | |
d8b82cad | 77 | ENTRY(IEEE754_EXPL) |
495fd99f JM |
78 | #ifdef USE_AS_EXPM1L |
79 | movzwl 4+8(%esp), %eax | |
80 | xorb $0x80, %ah // invert sign bit (now 1 is "positive") | |
81 | cmpl $0xc006, %eax // is num positive and exp >= 6 (number is >= 128.0)? | |
82 | jae HIDDEN_JUMPTARGET (__expl) // (if num is denormal, it is at least >= 64.0) | |
83 | #endif | |
6698b8bf JM |
84 | fldt 4(%esp) |
85 | /* I added the following ugly construct because expl(+-Inf) resulted | |
86 | in NaN. The ugliness results from the bright minds at Intel. | |
87 | For the i686 the code can be written better. | |
88 | -- drepper@cygnus.com. */ | |
89 | fxam /* Is NaN or +-Inf? */ | |
90 | #ifdef PIC | |
91 | LOAD_PIC_REG (cx) | |
92 | #endif | |
f17ac40d JM |
93 | #ifdef USE_AS_EXPM1L |
94 | xorb $0x80, %ah | |
95 | cmpl $0xc006, %eax | |
96 | fstsw %ax | |
97 | movb $0x45, %dh | |
98 | jb 4f | |
99 | ||
100 | /* Below -64.0 (may be -NaN or -Inf). */ | |
101 | andb %ah, %dh | |
102 | cmpb $0x01, %dh | |
103 | je 2f /* Is +-NaN, jump. */ | |
104 | jmp 1f /* -large, possibly -Inf. */ | |
105 | ||
106 | 4: /* In range -64.0 to 64.0 (may be +-0 but not NaN or +-Inf). */ | |
107 | /* Test for +-0 as argument. */ | |
108 | andb %ah, %dh | |
109 | cmpb $0x40, %dh | |
110 | je 2f | |
111 | #else | |
41498f4d JM |
112 | movzwl 4+8(%esp), %eax |
113 | andl $0x7fff, %eax | |
114 | cmpl $0x400d, %eax | |
115 | jle 3f | |
116 | /* Overflow, underflow or infinity or NaN as argument. */ | |
6698b8bf JM |
117 | fstsw %ax |
118 | movb $0x45, %dh | |
119 | andb %ah, %dh | |
120 | cmpb $0x05, %dh | |
121 | je 1f /* Is +-Inf, jump. */ | |
41498f4d JM |
122 | cmpb $0x01, %dh |
123 | je 2f /* Is +-NaN, jump. */ | |
124 | /* Overflow or underflow; saturate. */ | |
125 | fstp %st | |
126 | fldt MO(csat) | |
127 | andb $2, %ah | |
128 | jz 3f | |
129 | fchs | |
495fd99f | 130 | #endif |
f17ac40d | 131 | 3: FLDLOG /* 1 log2(base) */ |
d8b82cad | 132 | fmul %st(1), %st /* 1 x log2(base) */ |
6698b8bf JM |
133 | frndint /* 1 i */ |
134 | fld %st(1) /* 2 x */ | |
135 | frndint /* 2 xi */ | |
136 | fld %st(1) /* 3 i */ | |
137 | fldt MO(c0) /* 4 c0 */ | |
138 | fld %st(2) /* 5 xi */ | |
139 | fmul %st(1), %st /* 5 c0 xi */ | |
140 | fsubp %st, %st(2) /* 4 f = c0 xi - i */ | |
141 | fld %st(4) /* 5 x */ | |
142 | fsub %st(3), %st /* 5 xf = x - xi */ | |
143 | fmulp %st, %st(1) /* 4 c0 xf */ | |
144 | faddp %st, %st(1) /* 3 f = f + c0 xf */ | |
145 | fldt MO(c1) /* 4 */ | |
146 | fmul %st(4), %st /* 4 c1 * x */ | |
147 | faddp %st, %st(1) /* 3 f = f + c1 * x */ | |
d8b82cad | 148 | f2xm1 /* 3 2^(fract(x * log2(base))) - 1 */ |
495fd99f JM |
149 | #ifdef USE_AS_EXPM1L |
150 | fstp %st(1) /* 2 */ | |
151 | fscale /* 2 scale factor is st(1); base^x - 2^i */ | |
152 | fxch /* 2 i */ | |
153 | fld1 /* 3 1.0 */ | |
154 | fscale /* 3 2^i */ | |
155 | fld1 /* 4 1.0 */ | |
156 | fsubrp %st, %st(1) /* 3 2^i - 1.0 */ | |
157 | fstp %st(1) /* 2 */ | |
158 | faddp %st, %st(1) /* 1 base^x - 1.0 */ | |
159 | #else | |
6698b8bf | 160 | fld1 /* 4 1.0 */ |
d8b82cad | 161 | faddp /* 3 2^(fract(x * log2(base))) */ |
6698b8bf | 162 | fstp %st(1) /* 2 */ |
d8b82cad | 163 | fscale /* 2 scale factor is st(1); base^x */ |
6698b8bf | 164 | fstp %st(1) /* 1 */ |
495fd99f | 165 | #endif |
6698b8bf JM |
166 | fstp %st(1) /* 0 */ |
167 | jmp 2f | |
f17ac40d | 168 | 1: |
495fd99f | 169 | #ifdef USE_AS_EXPM1L |
f17ac40d JM |
170 | /* For expm1l, only negative sign gets here. */ |
171 | fstp %st | |
495fd99f JM |
172 | fld1 |
173 | fchs | |
174 | #else | |
f17ac40d JM |
175 | testl $0x200, %eax /* Test sign. */ |
176 | jz 2f /* If positive, jump. */ | |
177 | fstp %st | |
6698b8bf | 178 | fldz /* Set result to 0. */ |
495fd99f | 179 | #endif |
6698b8bf | 180 | 2: ret |
d8b82cad | 181 | END(IEEE754_EXPL) |
495fd99f JM |
182 | #ifdef USE_AS_EXPM1L |
183 | libm_hidden_def (__expm1l) | |
184 | weak_alias (__expm1l, expm1l) | |
185 | #else | |
d8b82cad | 186 | strong_alias (IEEE754_EXPL, EXPL_FINITE) |
495fd99f | 187 | #endif |