]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/ieee754/ldbl-128/e_powl.c
ldbl-128: Use L(x) macro for long double constants
[thirdparty/glibc.git] / sysdeps / ieee754 / ldbl-128 / e_powl.c
1 /*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4 *
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
10 */
11
12 /* Expansions and modifications for 128-bit long double are
13 Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14 and are incorporated herein by permission of the author. The author
15 reserves the right to distribute this material elsewhere under different
16 copying permissions. These modifications are distributed here under
17 the following terms:
18
19 This library is free software; you can redistribute it and/or
20 modify it under the terms of the GNU Lesser General Public
21 License as published by the Free Software Foundation; either
22 version 2.1 of the License, or (at your option) any later version.
23
24 This library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
28
29 You should have received a copy of the GNU Lesser General Public
30 License along with this library; if not, see
31 <http://www.gnu.org/licenses/>. */
32
33 /* __ieee754_powl(x,y) return x**y
34 *
35 * n
36 * Method: Let x = 2 * (1+f)
37 * 1. Compute and return log2(x) in two pieces:
38 * log2(x) = w1 + w2,
39 * where w1 has 113-53 = 60 bit trailing zeros.
40 * 2. Perform y*log2(x) = n+y' by simulating muti-precision
41 * arithmetic, where |y'|<=0.5.
42 * 3. Return x**y = 2**n*exp(y'*log2)
43 *
44 * Special cases:
45 * 1. (anything) ** 0 is 1
46 * 2. (anything) ** 1 is itself
47 * 3. (anything) ** NAN is NAN
48 * 4. NAN ** (anything except 0) is NAN
49 * 5. +-(|x| > 1) ** +INF is +INF
50 * 6. +-(|x| > 1) ** -INF is +0
51 * 7. +-(|x| < 1) ** +INF is +0
52 * 8. +-(|x| < 1) ** -INF is +INF
53 * 9. +-1 ** +-INF is NAN
54 * 10. +0 ** (+anything except 0, NAN) is +0
55 * 11. -0 ** (+anything except 0, NAN, odd integer) is +0
56 * 12. +0 ** (-anything except 0, NAN) is +INF
57 * 13. -0 ** (-anything except 0, NAN, odd integer) is +INF
58 * 14. -0 ** (odd integer) = -( +0 ** (odd integer) )
59 * 15. +INF ** (+anything except 0,NAN) is +INF
60 * 16. +INF ** (-anything except 0,NAN) is +0
61 * 17. -INF ** (anything) = -0 ** (-anything)
62 * 18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
63 * 19. (-anything except 0 and inf) ** (non-integer) is NAN
64 *
65 */
66
67 #include <math.h>
68 #include <math_private.h>
69
70 static const _Float128 bp[] = {
71 1,
72 L(1.5),
73 };
74
75 /* log_2(1.5) */
76 static const _Float128 dp_h[] = {
77 0.0,
78 L(5.8496250072115607565592654282227158546448E-1)
79 };
80
81 /* Low part of log_2(1.5) */
82 static const _Float128 dp_l[] = {
83 0.0,
84 L(1.0579781240112554492329533686862998106046E-16)
85 };
86
87 static const _Float128 zero = 0,
88 one = 1,
89 two = 2,
90 two113 = L(1.0384593717069655257060992658440192E34),
91 huge = L(1.0e3000),
92 tiny = L(1.0e-3000);
93
94 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
95 z = (x-1)/(x+1)
96 1 <= x <= 1.25
97 Peak relative error 2.3e-37 */
98 static const _Float128 LN[] =
99 {
100 L(-3.0779177200290054398792536829702930623200E1),
101 L(6.5135778082209159921251824580292116201640E1),
102 L(-4.6312921812152436921591152809994014413540E1),
103 L(1.2510208195629420304615674658258363295208E1),
104 L(-9.9266909031921425609179910128531667336670E-1)
105 };
106 static const _Float128 LD[] =
107 {
108 L(-5.129862866715009066465422805058933131960E1),
109 L(1.452015077564081884387441590064272782044E2),
110 L(-1.524043275549860505277434040464085593165E2),
111 L(7.236063513651544224319663428634139768808E1),
112 L(-1.494198912340228235853027849917095580053E1)
113 /* 1.0E0 */
114 };
115
116 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
117 0 <= x <= 0.5
118 Peak relative error 5.7e-38 */
119 static const _Float128 PN[] =
120 {
121 L(5.081801691915377692446852383385968225675E8),
122 L(9.360895299872484512023336636427675327355E6),
123 L(4.213701282274196030811629773097579432957E4),
124 L(5.201006511142748908655720086041570288182E1),
125 L(9.088368420359444263703202925095675982530E-3),
126 };
127 static const _Float128 PD[] =
128 {
129 L(3.049081015149226615468111430031590411682E9),
130 L(1.069833887183886839966085436512368982758E8),
131 L(8.259257717868875207333991924545445705394E5),
132 L(1.872583833284143212651746812884298360922E3),
133 /* 1.0E0 */
134 };
135
136 static const _Float128
137 /* ln 2 */
138 lg2 = L(6.9314718055994530941723212145817656807550E-1),
139 lg2_h = L(6.9314718055994528622676398299518041312695E-1),
140 lg2_l = L(2.3190468138462996154948554638754786504121E-17),
141 ovt = L(8.0085662595372944372e-0017),
142 /* 2/(3*log(2)) */
143 cp = L(9.6179669392597560490661645400126142495110E-1),
144 cp_h = L(9.6179669392597555432899980587535537779331E-1),
145 cp_l = L(5.0577616648125906047157785230014751039424E-17);
146
147 _Float128
148 __ieee754_powl (_Float128 x, _Float128 y)
149 {
150 _Float128 z, ax, z_h, z_l, p_h, p_l;
151 _Float128 y1, t1, t2, r, s, sgn, t, u, v, w;
152 _Float128 s2, s_h, s_l, t_h, t_l, ay;
153 int32_t i, j, k, yisint, n;
154 u_int32_t ix, iy;
155 int32_t hx, hy;
156 ieee854_long_double_shape_type o, p, q;
157
158 p.value = x;
159 hx = p.parts32.w0;
160 ix = hx & 0x7fffffff;
161
162 q.value = y;
163 hy = q.parts32.w0;
164 iy = hy & 0x7fffffff;
165
166
167 /* y==zero: x**0 = 1 */
168 if ((iy | q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
169 return one;
170
171 /* 1.0**y = 1; -1.0**+-Inf = 1 */
172 if (x == one)
173 return one;
174 if (x == -1 && iy == 0x7fff0000
175 && (q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
176 return one;
177
178 /* +-NaN return x+y */
179 if ((ix > 0x7fff0000)
180 || ((ix == 0x7fff0000)
181 && ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) != 0))
182 || (iy > 0x7fff0000)
183 || ((iy == 0x7fff0000)
184 && ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) != 0)))
185 return x + y;
186
187 /* determine if y is an odd int when x < 0
188 * yisint = 0 ... y is not an integer
189 * yisint = 1 ... y is an odd int
190 * yisint = 2 ... y is an even int
191 */
192 yisint = 0;
193 if (hx < 0)
194 {
195 if (iy >= 0x40700000) /* 2^113 */
196 yisint = 2; /* even integer y */
197 else if (iy >= 0x3fff0000) /* 1.0 */
198 {
199 if (__floorl (y) == y)
200 {
201 z = 0.5 * y;
202 if (__floorl (z) == z)
203 yisint = 2;
204 else
205 yisint = 1;
206 }
207 }
208 }
209
210 /* special value of y */
211 if ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
212 {
213 if (iy == 0x7fff0000) /* y is +-inf */
214 {
215 if (((ix - 0x3fff0000) | p.parts32.w1 | p.parts32.w2 | p.parts32.w3)
216 == 0)
217 return y - y; /* +-1**inf is NaN */
218 else if (ix >= 0x3fff0000) /* (|x|>1)**+-inf = inf,0 */
219 return (hy >= 0) ? y : zero;
220 else /* (|x|<1)**-,+inf = inf,0 */
221 return (hy < 0) ? -y : zero;
222 }
223 if (iy == 0x3fff0000)
224 { /* y is +-1 */
225 if (hy < 0)
226 return one / x;
227 else
228 return x;
229 }
230 if (hy == 0x40000000)
231 return x * x; /* y is 2 */
232 if (hy == 0x3ffe0000)
233 { /* y is 0.5 */
234 if (hx >= 0) /* x >= +0 */
235 return __ieee754_sqrtl (x);
236 }
237 }
238
239 ax = fabsl (x);
240 /* special value of x */
241 if ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) == 0)
242 {
243 if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
244 {
245 z = ax; /*x is +-0,+-inf,+-1 */
246 if (hy < 0)
247 z = one / z; /* z = (1/|x|) */
248 if (hx < 0)
249 {
250 if (((ix - 0x3fff0000) | yisint) == 0)
251 {
252 z = (z - z) / (z - z); /* (-1)**non-int is NaN */
253 }
254 else if (yisint == 1)
255 z = -z; /* (x<0)**odd = -(|x|**odd) */
256 }
257 return z;
258 }
259 }
260
261 /* (x<0)**(non-int) is NaN */
262 if (((((u_int32_t) hx >> 31) - 1) | yisint) == 0)
263 return (x - x) / (x - x);
264
265 /* sgn (sign of result -ve**odd) = -1 else = 1 */
266 sgn = one;
267 if (((((u_int32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
268 sgn = -one; /* (-ve)**(odd int) */
269
270 /* |y| is huge.
271 2^-16495 = 1/2 of smallest representable value.
272 If (1 - 1/131072)^y underflows, y > 1.4986e9 */
273 if (iy > 0x401d654b)
274 {
275 /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
276 if (iy > 0x407d654b)
277 {
278 if (ix <= 0x3ffeffff)
279 return (hy < 0) ? huge * huge : tiny * tiny;
280 if (ix >= 0x3fff0000)
281 return (hy > 0) ? huge * huge : tiny * tiny;
282 }
283 /* over/underflow if x is not close to one */
284 if (ix < 0x3ffeffff)
285 return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
286 if (ix > 0x3fff0000)
287 return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
288 }
289
290 ay = y > 0 ? y : -y;
291 if (ay < 0x1p-128)
292 y = y < 0 ? -0x1p-128 : 0x1p-128;
293
294 n = 0;
295 /* take care subnormal number */
296 if (ix < 0x00010000)
297 {
298 ax *= two113;
299 n -= 113;
300 o.value = ax;
301 ix = o.parts32.w0;
302 }
303 n += ((ix) >> 16) - 0x3fff;
304 j = ix & 0x0000ffff;
305 /* determine interval */
306 ix = j | 0x3fff0000; /* normalize ix */
307 if (j <= 0x3988)
308 k = 0; /* |x|<sqrt(3/2) */
309 else if (j < 0xbb67)
310 k = 1; /* |x|<sqrt(3) */
311 else
312 {
313 k = 0;
314 n += 1;
315 ix -= 0x00010000;
316 }
317
318 o.value = ax;
319 o.parts32.w0 = ix;
320 ax = o.value;
321
322 /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
323 u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
324 v = one / (ax + bp[k]);
325 s = u * v;
326 s_h = s;
327
328 o.value = s_h;
329 o.parts32.w3 = 0;
330 o.parts32.w2 &= 0xf8000000;
331 s_h = o.value;
332 /* t_h=ax+bp[k] High */
333 t_h = ax + bp[k];
334 o.value = t_h;
335 o.parts32.w3 = 0;
336 o.parts32.w2 &= 0xf8000000;
337 t_h = o.value;
338 t_l = ax - (t_h - bp[k]);
339 s_l = v * ((u - s_h * t_h) - s_h * t_l);
340 /* compute log(ax) */
341 s2 = s * s;
342 u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
343 v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
344 r = s2 * s2 * u / v;
345 r += s_l * (s_h + s);
346 s2 = s_h * s_h;
347 t_h = 3.0 + s2 + r;
348 o.value = t_h;
349 o.parts32.w3 = 0;
350 o.parts32.w2 &= 0xf8000000;
351 t_h = o.value;
352 t_l = r - ((t_h - 3.0) - s2);
353 /* u+v = s*(1+...) */
354 u = s_h * t_h;
355 v = s_l * t_h + t_l * s;
356 /* 2/(3log2)*(s+...) */
357 p_h = u + v;
358 o.value = p_h;
359 o.parts32.w3 = 0;
360 o.parts32.w2 &= 0xf8000000;
361 p_h = o.value;
362 p_l = v - (p_h - u);
363 z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */
364 z_l = cp_l * p_h + p_l * cp + dp_l[k];
365 /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
366 t = (_Float128) n;
367 t1 = (((z_h + z_l) + dp_h[k]) + t);
368 o.value = t1;
369 o.parts32.w3 = 0;
370 o.parts32.w2 &= 0xf8000000;
371 t1 = o.value;
372 t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
373
374 /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
375 y1 = y;
376 o.value = y1;
377 o.parts32.w3 = 0;
378 o.parts32.w2 &= 0xf8000000;
379 y1 = o.value;
380 p_l = (y - y1) * t1 + y * t2;
381 p_h = y1 * t1;
382 z = p_l + p_h;
383 o.value = z;
384 j = o.parts32.w0;
385 if (j >= 0x400d0000) /* z >= 16384 */
386 {
387 /* if z > 16384 */
388 if (((j - 0x400d0000) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3) != 0)
389 return sgn * huge * huge; /* overflow */
390 else
391 {
392 if (p_l + ovt > z - p_h)
393 return sgn * huge * huge; /* overflow */
394 }
395 }
396 else if ((j & 0x7fffffff) >= 0x400d01b9) /* z <= -16495 */
397 {
398 /* z < -16495 */
399 if (((j - 0xc00d01bc) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3)
400 != 0)
401 return sgn * tiny * tiny; /* underflow */
402 else
403 {
404 if (p_l <= z - p_h)
405 return sgn * tiny * tiny; /* underflow */
406 }
407 }
408 /* compute 2**(p_h+p_l) */
409 i = j & 0x7fffffff;
410 k = (i >> 16) - 0x3fff;
411 n = 0;
412 if (i > 0x3ffe0000)
413 { /* if |z| > 0.5, set n = [z+0.5] */
414 n = __floorl (z + L(0.5));
415 t = n;
416 p_h -= t;
417 }
418 t = p_l + p_h;
419 o.value = t;
420 o.parts32.w3 = 0;
421 o.parts32.w2 &= 0xf8000000;
422 t = o.value;
423 u = t * lg2_h;
424 v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
425 z = u + v;
426 w = v - (z - u);
427 /* exp(z) */
428 t = z * z;
429 u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
430 v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
431 t1 = z - t * u / v;
432 r = (z * t1) / (t1 - two) - (w + z * w);
433 z = one - (r - z);
434 o.value = z;
435 j = o.parts32.w0;
436 j += (n << 16);
437 if ((j >> 16) <= 0)
438 {
439 z = __scalbnl (z, n); /* subnormal output */
440 _Float128 force_underflow = z * z;
441 math_force_eval (force_underflow);
442 }
443 else
444 {
445 o.parts32.w0 = j;
446 z = o.value;
447 }
448 return sgn * z;
449 }
450 strong_alias (__ieee754_powl, __powl_finite)