]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/ieee754/ldbl-128ibm/e_powl.c
PowerPC floating point little-endian [3 of 15]
[thirdparty/glibc.git] / sysdeps / ieee754 / ldbl-128ibm / e_powl.c
1 /*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4 *
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
10 */
11
12 /* Expansions and modifications for 128-bit long double are
13 Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14 and are incorporated herein by permission of the author. The author
15 reserves the right to distribute this material elsewhere under different
16 copying permissions. These modifications are distributed here under
17 the following terms:
18
19 This library is free software; you can redistribute it and/or
20 modify it under the terms of the GNU Lesser General Public
21 License as published by the Free Software Foundation; either
22 version 2.1 of the License, or (at your option) any later version.
23
24 This library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
28
29 You should have received a copy of the GNU Lesser General Public
30 License along with this library; if not, see
31 <http://www.gnu.org/licenses/>. */
32
33 /* __ieee754_powl(x,y) return x**y
34 *
35 * n
36 * Method: Let x = 2 * (1+f)
37 * 1. Compute and return log2(x) in two pieces:
38 * log2(x) = w1 + w2,
39 * where w1 has 113-53 = 60 bit trailing zeros.
40 * 2. Perform y*log2(x) = n+y' by simulating muti-precision
41 * arithmetic, where |y'|<=0.5.
42 * 3. Return x**y = 2**n*exp(y'*log2)
43 *
44 * Special cases:
45 * 1. (anything) ** 0 is 1
46 * 2. (anything) ** 1 is itself
47 * 3. (anything) ** NAN is NAN
48 * 4. NAN ** (anything except 0) is NAN
49 * 5. +-(|x| > 1) ** +INF is +INF
50 * 6. +-(|x| > 1) ** -INF is +0
51 * 7. +-(|x| < 1) ** +INF is +0
52 * 8. +-(|x| < 1) ** -INF is +INF
53 * 9. +-1 ** +-INF is NAN
54 * 10. +0 ** (+anything except 0, NAN) is +0
55 * 11. -0 ** (+anything except 0, NAN, odd integer) is +0
56 * 12. +0 ** (-anything except 0, NAN) is +INF
57 * 13. -0 ** (-anything except 0, NAN, odd integer) is +INF
58 * 14. -0 ** (odd integer) = -( +0 ** (odd integer) )
59 * 15. +INF ** (+anything except 0,NAN) is +INF
60 * 16. +INF ** (-anything except 0,NAN) is +0
61 * 17. -INF ** (anything) = -0 ** (-anything)
62 * 18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
63 * 19. (-anything except 0 and inf) ** (non-integer) is NAN
64 *
65 */
66
67 #include <math.h>
68 #include <math_private.h>
69
70 static const long double bp[] = {
71 1.0L,
72 1.5L,
73 };
74
75 /* log_2(1.5) */
76 static const long double dp_h[] = {
77 0.0,
78 5.8496250072115607565592654282227158546448E-1L
79 };
80
81 /* Low part of log_2(1.5) */
82 static const long double dp_l[] = {
83 0.0,
84 1.0579781240112554492329533686862998106046E-16L
85 };
86
87 static const long double zero = 0.0L,
88 one = 1.0L,
89 two = 2.0L,
90 two113 = 1.0384593717069655257060992658440192E34L,
91 huge = 1.0e300L,
92 tiny = 1.0e-300L;
93
94 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
95 z = (x-1)/(x+1)
96 1 <= x <= 1.25
97 Peak relative error 2.3e-37 */
98 static const long double LN[] =
99 {
100 -3.0779177200290054398792536829702930623200E1L,
101 6.5135778082209159921251824580292116201640E1L,
102 -4.6312921812152436921591152809994014413540E1L,
103 1.2510208195629420304615674658258363295208E1L,
104 -9.9266909031921425609179910128531667336670E-1L
105 };
106 static const long double LD[] =
107 {
108 -5.129862866715009066465422805058933131960E1L,
109 1.452015077564081884387441590064272782044E2L,
110 -1.524043275549860505277434040464085593165E2L,
111 7.236063513651544224319663428634139768808E1L,
112 -1.494198912340228235853027849917095580053E1L
113 /* 1.0E0 */
114 };
115
116 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
117 0 <= x <= 0.5
118 Peak relative error 5.7e-38 */
119 static const long double PN[] =
120 {
121 5.081801691915377692446852383385968225675E8L,
122 9.360895299872484512023336636427675327355E6L,
123 4.213701282274196030811629773097579432957E4L,
124 5.201006511142748908655720086041570288182E1L,
125 9.088368420359444263703202925095675982530E-3L,
126 };
127 static const long double PD[] =
128 {
129 3.049081015149226615468111430031590411682E9L,
130 1.069833887183886839966085436512368982758E8L,
131 8.259257717868875207333991924545445705394E5L,
132 1.872583833284143212651746812884298360922E3L,
133 /* 1.0E0 */
134 };
135
136 static const long double
137 /* ln 2 */
138 lg2 = 6.9314718055994530941723212145817656807550E-1L,
139 lg2_h = 6.9314718055994528622676398299518041312695E-1L,
140 lg2_l = 2.3190468138462996154948554638754786504121E-17L,
141 ovt = 8.0085662595372944372e-0017L,
142 /* 2/(3*log(2)) */
143 cp = 9.6179669392597560490661645400126142495110E-1L,
144 cp_h = 9.6179669392597555432899980587535537779331E-1L,
145 cp_l = 5.0577616648125906047157785230014751039424E-17L;
146
147 long double
148 __ieee754_powl (long double x, long double y)
149 {
150 long double z, ax, z_h, z_l, p_h, p_l;
151 long double y1, t1, t2, r, s, t, u, v, w;
152 long double s2, s_h, s_l, t_h, t_l, ay;
153 int32_t i, j, k, yisint, n;
154 uint32_t ix, iy;
155 int32_t hx, hy, hax;
156 double ohi, xhi, xlo, yhi, ylo;
157 uint32_t lx, ly, lj;
158
159 ldbl_unpack (x, &xhi, &xlo);
160 EXTRACT_WORDS (hx, lx, xhi);
161 ix = hx & 0x7fffffff;
162
163 ldbl_unpack (y, &yhi, &ylo);
164 EXTRACT_WORDS (hy, ly, yhi);
165 iy = hy & 0x7fffffff;
166
167 /* y==zero: x**0 = 1 */
168 if ((iy | ly) == 0)
169 return one;
170
171 /* 1.0**y = 1; -1.0**+-Inf = 1 */
172 if (x == one)
173 return one;
174 if (x == -1.0L && ((iy - 0x7ff00000) | ly) == 0)
175 return one;
176
177 /* +-NaN return x+y */
178 if ((ix >= 0x7ff00000 && ((ix - 0x7ff00000) | lx) != 0)
179 || (iy >= 0x7ff00000 && ((iy - 0x7ff00000) | ly) != 0))
180 return x + y;
181
182 /* determine if y is an odd int when x < 0
183 * yisint = 0 ... y is not an integer
184 * yisint = 1 ... y is an odd int
185 * yisint = 2 ... y is an even int
186 */
187 yisint = 0;
188 if (hx < 0)
189 {
190 uint32_t low_ye;
191
192 GET_HIGH_WORD (low_ye, ylo);
193 if ((low_ye & 0x7fffffff) >= 0x43400000) /* Low part >= 2^53 */
194 yisint = 2; /* even integer y */
195 else if (iy >= 0x3ff00000) /* 1.0 */
196 {
197 if (__floorl (y) == y)
198 {
199 z = 0.5 * y;
200 if (__floorl (z) == z)
201 yisint = 2;
202 else
203 yisint = 1;
204 }
205 }
206 }
207
208 ax = fabsl (x);
209
210 /* special value of y */
211 if (ly == 0)
212 {
213 if (iy == 0x7ff00000) /* y is +-inf */
214 {
215 if (ax > one)
216 /* (|x|>1)**+-inf = inf,0 */
217 return (hy >= 0) ? y : zero;
218 else
219 /* (|x|<1)**-,+inf = inf,0 */
220 return (hy < 0) ? -y : zero;
221 }
222 if (ylo == 0.0)
223 {
224 if (iy == 0x3ff00000)
225 { /* y is +-1 */
226 if (hy < 0)
227 return one / x;
228 else
229 return x;
230 }
231 if (hy == 0x40000000)
232 return x * x; /* y is 2 */
233 if (hy == 0x3fe00000)
234 { /* y is 0.5 */
235 if (hx >= 0) /* x >= +0 */
236 return __ieee754_sqrtl (x);
237 }
238 }
239 }
240
241 /* special value of x */
242 if (lx == 0)
243 {
244 if (ix == 0x7ff00000 || ix == 0 || (ix == 0x3ff00000 && xlo == 0.0))
245 {
246 z = ax; /*x is +-0,+-inf,+-1 */
247 if (hy < 0)
248 z = one / z; /* z = (1/|x|) */
249 if (hx < 0)
250 {
251 if (((ix - 0x3ff00000) | yisint) == 0)
252 {
253 z = (z - z) / (z - z); /* (-1)**non-int is NaN */
254 }
255 else if (yisint == 1)
256 z = -z; /* (x<0)**odd = -(|x|**odd) */
257 }
258 return z;
259 }
260 }
261
262 /* (x<0)**(non-int) is NaN */
263 if (((((u_int32_t) hx >> 31) - 1) | yisint) == 0)
264 return (x - x) / (x - x);
265
266 /* |y| is huge.
267 2^-16495 = 1/2 of smallest representable value.
268 If (1 - 1/131072)^y underflows, y > 1.4986e9 */
269 if (iy > 0x41d654b0)
270 {
271 /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
272 if (iy > 0x47d654b0)
273 {
274 if (ix <= 0x3fefffff)
275 return (hy < 0) ? huge * huge : tiny * tiny;
276 if (ix >= 0x3ff00000)
277 return (hy > 0) ? huge * huge : tiny * tiny;
278 }
279 /* over/underflow if x is not close to one */
280 if (ix < 0x3fefffff)
281 return (hy < 0) ? huge * huge : tiny * tiny;
282 if (ix > 0x3ff00000)
283 return (hy > 0) ? huge * huge : tiny * tiny;
284 }
285
286 ay = y > 0 ? y : -y;
287 if (ay < 0x1p-117)
288 y = y < 0 ? -0x1p-117 : 0x1p-117;
289
290 n = 0;
291 /* take care subnormal number */
292 if (ix < 0x00100000)
293 {
294 ax *= two113;
295 n -= 113;
296 ohi = ldbl_high (ax);
297 GET_HIGH_WORD (ix, ohi);
298 }
299 n += ((ix) >> 20) - 0x3ff;
300 j = ix & 0x000fffff;
301 /* determine interval */
302 ix = j | 0x3ff00000; /* normalize ix */
303 if (j <= 0x39880)
304 k = 0; /* |x|<sqrt(3/2) */
305 else if (j < 0xbb670)
306 k = 1; /* |x|<sqrt(3) */
307 else
308 {
309 k = 0;
310 n += 1;
311 ix -= 0x00100000;
312 }
313
314 ohi = ldbl_high (ax);
315 GET_HIGH_WORD (hax, ohi);
316 ax = __scalbnl (ax, ((int) ((ix - hax) * 2)) >> 21);
317
318 /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
319 u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
320 v = one / (ax + bp[k]);
321 s = u * v;
322 s_h = ldbl_high (s);
323
324 /* t_h=ax+bp[k] High */
325 t_h = ax + bp[k];
326 t_h = ldbl_high (t_h);
327 t_l = ax - (t_h - bp[k]);
328 s_l = v * ((u - s_h * t_h) - s_h * t_l);
329 /* compute log(ax) */
330 s2 = s * s;
331 u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
332 v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
333 r = s2 * s2 * u / v;
334 r += s_l * (s_h + s);
335 s2 = s_h * s_h;
336 t_h = 3.0 + s2 + r;
337 t_h = ldbl_high (t_h);
338 t_l = r - ((t_h - 3.0) - s2);
339 /* u+v = s*(1+...) */
340 u = s_h * t_h;
341 v = s_l * t_h + t_l * s;
342 /* 2/(3log2)*(s+...) */
343 p_h = u + v;
344 p_h = ldbl_high (p_h);
345 p_l = v - (p_h - u);
346 z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */
347 z_l = cp_l * p_h + p_l * cp + dp_l[k];
348 /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
349 t = (long double) n;
350 t1 = (((z_h + z_l) + dp_h[k]) + t);
351 t1 = ldbl_high (t1);
352 t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
353
354 /* s (sign of result -ve**odd) = -1 else = 1 */
355 s = one;
356 if (((((u_int32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
357 s = -one; /* (-ve)**(odd int) */
358
359 /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
360 y1 = ldbl_high (y);
361 p_l = (y - y1) * t1 + y * t2;
362 p_h = y1 * t1;
363 z = p_l + p_h;
364 ohi = ldbl_high (z);
365 EXTRACT_WORDS (j, lj, ohi);
366 if (j >= 0x40d00000) /* z >= 16384 */
367 {
368 /* if z > 16384 */
369 if (((j - 0x40d00000) | lj) != 0)
370 return s * huge * huge; /* overflow */
371 else
372 {
373 if (p_l + ovt > z - p_h)
374 return s * huge * huge; /* overflow */
375 }
376 }
377 else if ((j & 0x7fffffff) >= 0x40d01b90) /* z <= -16495 */
378 {
379 /* z < -16495 */
380 if (((j - 0xc0d01bc0) | lj) != 0)
381 return s * tiny * tiny; /* underflow */
382 else
383 {
384 if (p_l <= z - p_h)
385 return s * tiny * tiny; /* underflow */
386 }
387 }
388 /* compute 2**(p_h+p_l) */
389 i = j & 0x7fffffff;
390 k = (i >> 20) - 0x3ff;
391 n = 0;
392 if (i > 0x3fe00000)
393 { /* if |z| > 0.5, set n = [z+0.5] */
394 n = __floorl (z + 0.5L);
395 t = n;
396 p_h -= t;
397 }
398 t = p_l + p_h;
399 t = ldbl_high (t);
400 u = t * lg2_h;
401 v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
402 z = u + v;
403 w = v - (z - u);
404 /* exp(z) */
405 t = z * z;
406 u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
407 v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
408 t1 = z - t * u / v;
409 r = (z * t1) / (t1 - two) - (w + z * w);
410 z = one - (r - z);
411 z = __scalbnl (z, n);
412 return s * z;
413 }
414 strong_alias (__ieee754_powl, __powl_finite)