]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/ieee754/ldbl-128ibm/e_powl.c
Merge branch 'elf-move'
[thirdparty/glibc.git] / sysdeps / ieee754 / ldbl-128ibm / e_powl.c
1 /*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4 *
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
10 */
11
12 /* Expansions and modifications for 128-bit long double are
13 Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14 and are incorporated herein by permission of the author. The author
15 reserves the right to distribute this material elsewhere under different
16 copying permissions. These modifications are distributed here under
17 the following terms:
18
19 This library is free software; you can redistribute it and/or
20 modify it under the terms of the GNU Lesser General Public
21 License as published by the Free Software Foundation; either
22 version 2.1 of the License, or (at your option) any later version.
23
24 This library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
28
29 You should have received a copy of the GNU Lesser General Public
30 License along with this library; if not, see
31 <http://www.gnu.org/licenses/>. */
32
33 /* __ieee754_powl(x,y) return x**y
34 *
35 * n
36 * Method: Let x = 2 * (1+f)
37 * 1. Compute and return log2(x) in two pieces:
38 * log2(x) = w1 + w2,
39 * where w1 has 113-53 = 60 bit trailing zeros.
40 * 2. Perform y*log2(x) = n+y' by simulating muti-precision
41 * arithmetic, where |y'|<=0.5.
42 * 3. Return x**y = 2**n*exp(y'*log2)
43 *
44 * Special cases:
45 * 1. (anything) ** 0 is 1
46 * 2. (anything) ** 1 is itself
47 * 3. (anything) ** NAN is NAN
48 * 4. NAN ** (anything except 0) is NAN
49 * 5. +-(|x| > 1) ** +INF is +INF
50 * 6. +-(|x| > 1) ** -INF is +0
51 * 7. +-(|x| < 1) ** +INF is +0
52 * 8. +-(|x| < 1) ** -INF is +INF
53 * 9. +-1 ** +-INF is NAN
54 * 10. +0 ** (+anything except 0, NAN) is +0
55 * 11. -0 ** (+anything except 0, NAN, odd integer) is +0
56 * 12. +0 ** (-anything except 0, NAN) is +INF
57 * 13. -0 ** (-anything except 0, NAN, odd integer) is +INF
58 * 14. -0 ** (odd integer) = -( +0 ** (odd integer) )
59 * 15. +INF ** (+anything except 0,NAN) is +INF
60 * 16. +INF ** (-anything except 0,NAN) is +0
61 * 17. -INF ** (anything) = -0 ** (-anything)
62 * 18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
63 * 19. (-anything except 0 and inf) ** (non-integer) is NAN
64 *
65 */
66
67 #include <math.h>
68 #include <math_private.h>
69
70 static const long double bp[] = {
71 1.0L,
72 1.5L,
73 };
74
75 /* log_2(1.5) */
76 static const long double dp_h[] = {
77 0.0,
78 5.8496250072115607565592654282227158546448E-1L
79 };
80
81 /* Low part of log_2(1.5) */
82 static const long double dp_l[] = {
83 0.0,
84 1.0579781240112554492329533686862998106046E-16L
85 };
86
87 static const long double zero = 0.0L,
88 one = 1.0L,
89 two = 2.0L,
90 two113 = 1.0384593717069655257060992658440192E34L,
91 huge = 1.0e300L,
92 tiny = 1.0e-300L;
93
94 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
95 z = (x-1)/(x+1)
96 1 <= x <= 1.25
97 Peak relative error 2.3e-37 */
98 static const long double LN[] =
99 {
100 -3.0779177200290054398792536829702930623200E1L,
101 6.5135778082209159921251824580292116201640E1L,
102 -4.6312921812152436921591152809994014413540E1L,
103 1.2510208195629420304615674658258363295208E1L,
104 -9.9266909031921425609179910128531667336670E-1L
105 };
106 static const long double LD[] =
107 {
108 -5.129862866715009066465422805058933131960E1L,
109 1.452015077564081884387441590064272782044E2L,
110 -1.524043275549860505277434040464085593165E2L,
111 7.236063513651544224319663428634139768808E1L,
112 -1.494198912340228235853027849917095580053E1L
113 /* 1.0E0 */
114 };
115
116 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
117 0 <= x <= 0.5
118 Peak relative error 5.7e-38 */
119 static const long double PN[] =
120 {
121 5.081801691915377692446852383385968225675E8L,
122 9.360895299872484512023336636427675327355E6L,
123 4.213701282274196030811629773097579432957E4L,
124 5.201006511142748908655720086041570288182E1L,
125 9.088368420359444263703202925095675982530E-3L,
126 };
127 static const long double PD[] =
128 {
129 3.049081015149226615468111430031590411682E9L,
130 1.069833887183886839966085436512368982758E8L,
131 8.259257717868875207333991924545445705394E5L,
132 1.872583833284143212651746812884298360922E3L,
133 /* 1.0E0 */
134 };
135
136 static const long double
137 /* ln 2 */
138 lg2 = 6.9314718055994530941723212145817656807550E-1L,
139 lg2_h = 6.9314718055994528622676398299518041312695E-1L,
140 lg2_l = 2.3190468138462996154948554638754786504121E-17L,
141 ovt = 8.0085662595372944372e-0017L,
142 /* 2/(3*log(2)) */
143 cp = 9.6179669392597560490661645400126142495110E-1L,
144 cp_h = 9.6179669392597555432899980587535537779331E-1L,
145 cp_l = 5.0577616648125906047157785230014751039424E-17L;
146
147 long double
148 __ieee754_powl (long double x, long double y)
149 {
150 long double z, ax, z_h, z_l, p_h, p_l;
151 long double y1, t1, t2, r, s, t, u, v, w;
152 long double s2, s_h, s_l, t_h, t_l;
153 int32_t i, j, k, yisint, n;
154 u_int32_t ix, iy;
155 int32_t hx, hy;
156 ieee854_long_double_shape_type o, p, q;
157
158 p.value = x;
159 hx = p.parts32.w0;
160 ix = hx & 0x7fffffff;
161
162 q.value = y;
163 hy = q.parts32.w0;
164 iy = hy & 0x7fffffff;
165
166
167 /* y==zero: x**0 = 1 */
168 if ((iy | q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
169 return one;
170
171 /* 1.0**y = 1; -1.0**+-Inf = 1 */
172 if (x == one)
173 return one;
174 if (x == -1.0L && iy == 0x7ff00000
175 && (q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
176 return one;
177
178 /* +-NaN return x+y */
179 if ((ix > 0x7ff00000)
180 || ((ix == 0x7ff00000)
181 && ((p.parts32.w1 | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) != 0))
182 || (iy > 0x7ff00000)
183 || ((iy == 0x7ff00000)
184 && ((q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) != 0)))
185 return x + y;
186
187 /* determine if y is an odd int when x < 0
188 * yisint = 0 ... y is not an integer
189 * yisint = 1 ... y is an odd int
190 * yisint = 2 ... y is an even int
191 */
192 yisint = 0;
193 if (hx < 0)
194 {
195 if ((q.parts32.w2 & 0x7fffffff) >= 0x43400000) /* Low part >= 2^53 */
196 yisint = 2; /* even integer y */
197 else if (iy >= 0x3ff00000) /* 1.0 */
198 {
199 if (__floorl (y) == y)
200 {
201 z = 0.5 * y;
202 if (__floorl (z) == z)
203 yisint = 2;
204 else
205 yisint = 1;
206 }
207 }
208 }
209
210 /* special value of y */
211 if ((q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
212 {
213 if (iy == 0x7ff00000 && q.parts32.w1 == 0) /* y is +-inf */
214 {
215 if (((ix - 0x3ff00000) | p.parts32.w1
216 | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) == 0)
217 return y - y; /* inf**+-1 is NaN */
218 else if (ix > 0x3ff00000 || fabsl (x) > 1.0L)
219 /* (|x|>1)**+-inf = inf,0 */
220 return (hy >= 0) ? y : zero;
221 else
222 /* (|x|<1)**-,+inf = inf,0 */
223 return (hy < 0) ? -y : zero;
224 }
225 if (iy == 0x3ff00000)
226 { /* y is +-1 */
227 if (hy < 0)
228 return one / x;
229 else
230 return x;
231 }
232 if (hy == 0x40000000)
233 return x * x; /* y is 2 */
234 if (hy == 0x3fe00000)
235 { /* y is 0.5 */
236 if (hx >= 0) /* x >= +0 */
237 return __ieee754_sqrtl (x);
238 }
239 }
240
241 ax = fabsl (x);
242 /* special value of x */
243 if ((p.parts32.w1 | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) == 0)
244 {
245 if (ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000)
246 {
247 z = ax; /*x is +-0,+-inf,+-1 */
248 if (hy < 0)
249 z = one / z; /* z = (1/|x|) */
250 if (hx < 0)
251 {
252 if (((ix - 0x3ff00000) | yisint) == 0)
253 {
254 z = (z - z) / (z - z); /* (-1)**non-int is NaN */
255 }
256 else if (yisint == 1)
257 z = -z; /* (x<0)**odd = -(|x|**odd) */
258 }
259 return z;
260 }
261 }
262
263 /* (x<0)**(non-int) is NaN */
264 if (((((u_int32_t) hx >> 31) - 1) | yisint) == 0)
265 return (x - x) / (x - x);
266
267 /* |y| is huge.
268 2^-16495 = 1/2 of smallest representable value.
269 If (1 - 1/131072)^y underflows, y > 1.4986e9 */
270 if (iy > 0x41d654b0)
271 {
272 /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
273 if (iy > 0x47d654b0)
274 {
275 if (ix <= 0x3fefffff)
276 return (hy < 0) ? huge * huge : tiny * tiny;
277 if (ix >= 0x3ff00000)
278 return (hy > 0) ? huge * huge : tiny * tiny;
279 }
280 /* over/underflow if x is not close to one */
281 if (ix < 0x3fefffff)
282 return (hy < 0) ? huge * huge : tiny * tiny;
283 if (ix > 0x3ff00000)
284 return (hy > 0) ? huge * huge : tiny * tiny;
285 }
286
287 n = 0;
288 /* take care subnormal number */
289 if (ix < 0x00100000)
290 {
291 ax *= two113;
292 n -= 113;
293 o.value = ax;
294 ix = o.parts32.w0;
295 }
296 n += ((ix) >> 20) - 0x3ff;
297 j = ix & 0x000fffff;
298 /* determine interval */
299 ix = j | 0x3ff00000; /* normalize ix */
300 if (j <= 0x39880)
301 k = 0; /* |x|<sqrt(3/2) */
302 else if (j < 0xbb670)
303 k = 1; /* |x|<sqrt(3) */
304 else
305 {
306 k = 0;
307 n += 1;
308 ix -= 0x00100000;
309 }
310
311 o.value = ax;
312 o.value = __scalbnl (o.value, ((int) ((ix - o.parts32.w0) * 2)) >> 21);
313 ax = o.value;
314
315 /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
316 u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
317 v = one / (ax + bp[k]);
318 s = u * v;
319 s_h = s;
320
321 o.value = s_h;
322 o.parts32.w3 = 0;
323 o.parts32.w2 &= 0xffff8000;
324 s_h = o.value;
325 /* t_h=ax+bp[k] High */
326 t_h = ax + bp[k];
327 o.value = t_h;
328 o.parts32.w3 = 0;
329 o.parts32.w2 &= 0xffff8000;
330 t_h = o.value;
331 t_l = ax - (t_h - bp[k]);
332 s_l = v * ((u - s_h * t_h) - s_h * t_l);
333 /* compute log(ax) */
334 s2 = s * s;
335 u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
336 v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
337 r = s2 * s2 * u / v;
338 r += s_l * (s_h + s);
339 s2 = s_h * s_h;
340 t_h = 3.0 + s2 + r;
341 o.value = t_h;
342 o.parts32.w3 = 0;
343 o.parts32.w2 &= 0xffff8000;
344 t_h = o.value;
345 t_l = r - ((t_h - 3.0) - s2);
346 /* u+v = s*(1+...) */
347 u = s_h * t_h;
348 v = s_l * t_h + t_l * s;
349 /* 2/(3log2)*(s+...) */
350 p_h = u + v;
351 o.value = p_h;
352 o.parts32.w3 = 0;
353 o.parts32.w2 &= 0xffff8000;
354 p_h = o.value;
355 p_l = v - (p_h - u);
356 z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */
357 z_l = cp_l * p_h + p_l * cp + dp_l[k];
358 /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
359 t = (long double) n;
360 t1 = (((z_h + z_l) + dp_h[k]) + t);
361 o.value = t1;
362 o.parts32.w3 = 0;
363 o.parts32.w2 &= 0xffff8000;
364 t1 = o.value;
365 t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
366
367 /* s (sign of result -ve**odd) = -1 else = 1 */
368 s = one;
369 if (((((u_int32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
370 s = -one; /* (-ve)**(odd int) */
371
372 /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
373 y1 = y;
374 o.value = y1;
375 o.parts32.w3 = 0;
376 o.parts32.w2 &= 0xffff8000;
377 y1 = o.value;
378 p_l = (y - y1) * t1 + y * t2;
379 p_h = y1 * t1;
380 z = p_l + p_h;
381 o.value = z;
382 j = o.parts32.w0;
383 if (j >= 0x40d00000) /* z >= 16384 */
384 {
385 /* if z > 16384 */
386 if (((j - 0x40d00000) | o.parts32.w1
387 | (o.parts32.w2 & 0x7fffffff) | o.parts32.w3) != 0)
388 return s * huge * huge; /* overflow */
389 else
390 {
391 if (p_l + ovt > z - p_h)
392 return s * huge * huge; /* overflow */
393 }
394 }
395 else if ((j & 0x7fffffff) >= 0x40d01b90) /* z <= -16495 */
396 {
397 /* z < -16495 */
398 if (((j - 0xc0d01bc0) | o.parts32.w1
399 | (o.parts32.w2 & 0x7fffffff) | o.parts32.w3) != 0)
400 return s * tiny * tiny; /* underflow */
401 else
402 {
403 if (p_l <= z - p_h)
404 return s * tiny * tiny; /* underflow */
405 }
406 }
407 /* compute 2**(p_h+p_l) */
408 i = j & 0x7fffffff;
409 k = (i >> 20) - 0x3ff;
410 n = 0;
411 if (i > 0x3fe00000)
412 { /* if |z| > 0.5, set n = [z+0.5] */
413 n = __floorl (z + 0.5L);
414 t = n;
415 p_h -= t;
416 }
417 t = p_l + p_h;
418 o.value = t;
419 o.parts32.w3 = 0;
420 o.parts32.w2 &= 0xffff8000;
421 t = o.value;
422 u = t * lg2_h;
423 v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
424 z = u + v;
425 w = v - (z - u);
426 /* exp(z) */
427 t = z * z;
428 u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
429 v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
430 t1 = z - t * u / v;
431 r = (z * t1) / (t1 - two) - (w + z * w);
432 z = one - (r - z);
433 z = __scalbnl (z, n);
434 return s * z;
435 }
436 strong_alias (__ieee754_powl, __powl_finite)