]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/ieee754/ldbl-128/e_powl.c
Optimize libm
[thirdparty/glibc.git] / sysdeps / ieee754 / ldbl-128 / e_powl.c
CommitLineData
1f5649f8
UD
1/*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4 *
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
10 */
11
cc7375ce
RM
12/* Expansions and modifications for 128-bit long double are
13 Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
0ac5ae23 14 and are incorporated herein by permission of the author. The author
9cd2726c 15 reserves the right to distribute this material elsewhere under different
0ac5ae23 16 copying permissions. These modifications are distributed here under
9cd2726c 17 the following terms:
cc7375ce
RM
18
19 This library is free software; you can redistribute it and/or
20 modify it under the terms of the GNU Lesser General Public
21 License as published by the Free Software Foundation; either
22 version 2.1 of the License, or (at your option) any later version.
23
24 This library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
28
29 You should have received a copy of the GNU Lesser General Public
30 License along with this library; if not, write to the Free Software
31 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
1f5649f8
UD
32
33/* __ieee754_powl(x,y) return x**y
34 *
35 * n
36 * Method: Let x = 2 * (1+f)
37 * 1. Compute and return log2(x) in two pieces:
38 * log2(x) = w1 + w2,
39 * where w1 has 113-53 = 60 bit trailing zeros.
40 * 2. Perform y*log2(x) = n+y' by simulating muti-precision
41 * arithmetic, where |y'|<=0.5.
42 * 3. Return x**y = 2**n*exp(y'*log2)
43 *
44 * Special cases:
45 * 1. (anything) ** 0 is 1
46 * 2. (anything) ** 1 is itself
47 * 3. (anything) ** NAN is NAN
48 * 4. NAN ** (anything except 0) is NAN
49 * 5. +-(|x| > 1) ** +INF is +INF
50 * 6. +-(|x| > 1) ** -INF is +0
51 * 7. +-(|x| < 1) ** +INF is +0
52 * 8. +-(|x| < 1) ** -INF is +INF
53 * 9. +-1 ** +-INF is NAN
54 * 10. +0 ** (+anything except 0, NAN) is +0
55 * 11. -0 ** (+anything except 0, NAN, odd integer) is +0
56 * 12. +0 ** (-anything except 0, NAN) is +INF
57 * 13. -0 ** (-anything except 0, NAN, odd integer) is +INF
58 * 14. -0 ** (odd integer) = -( +0 ** (odd integer) )
59 * 15. +INF ** (+anything except 0,NAN) is +INF
60 * 16. +INF ** (-anything except 0,NAN) is +0
61 * 17. -INF ** (anything) = -0 ** (-anything)
62 * 18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
63 * 19. (-anything except 0 and inf) ** (non-integer) is NAN
64 *
65 */
66
67#include "math.h"
68#include "math_private.h"
69
70static const long double bp[] = {
71 1.0L,
72 1.5L,
73};
74
75/* log_2(1.5) */
76static const long double dp_h[] = {
77 0.0,
78 5.8496250072115607565592654282227158546448E-1L
79};
80
81/* Low part of log_2(1.5) */
82static const long double dp_l[] = {
83 0.0,
84 1.0579781240112554492329533686862998106046E-16L
85};
86
87static const long double zero = 0.0L,
88 one = 1.0L,
89 two = 2.0L,
90 two113 = 1.0384593717069655257060992658440192E34L,
91 huge = 1.0e3000L,
92 tiny = 1.0e-3000L;
93
94/* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
95 z = (x-1)/(x+1)
96 1 <= x <= 1.25
97 Peak relative error 2.3e-37 */
98static const long double LN[] =
99{
100 -3.0779177200290054398792536829702930623200E1L,
101 6.5135778082209159921251824580292116201640E1L,
102 -4.6312921812152436921591152809994014413540E1L,
103 1.2510208195629420304615674658258363295208E1L,
104 -9.9266909031921425609179910128531667336670E-1L
105};
106static const long double LD[] =
107{
108 -5.129862866715009066465422805058933131960E1L,
109 1.452015077564081884387441590064272782044E2L,
110 -1.524043275549860505277434040464085593165E2L,
111 7.236063513651544224319663428634139768808E1L,
112 -1.494198912340228235853027849917095580053E1L
113 /* 1.0E0 */
114};
115
116/* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
117 0 <= x <= 0.5
118 Peak relative error 5.7e-38 */
119static const long double PN[] =
120{
121 5.081801691915377692446852383385968225675E8L,
122 9.360895299872484512023336636427675327355E6L,
123 4.213701282274196030811629773097579432957E4L,
124 5.201006511142748908655720086041570288182E1L,
125 9.088368420359444263703202925095675982530E-3L,
126};
127static const long double PD[] =
128{
129 3.049081015149226615468111430031590411682E9L,
130 1.069833887183886839966085436512368982758E8L,
131 8.259257717868875207333991924545445705394E5L,
132 1.872583833284143212651746812884298360922E3L,
133 /* 1.0E0 */
134};
135
136static const long double
137 /* ln 2 */
138 lg2 = 6.9314718055994530941723212145817656807550E-1L,
139 lg2_h = 6.9314718055994528622676398299518041312695E-1L,
140 lg2_l = 2.3190468138462996154948554638754786504121E-17L,
141 ovt = 8.0085662595372944372e-0017L,
142 /* 2/(3*log(2)) */
143 cp = 9.6179669392597560490661645400126142495110E-1L,
144 cp_h = 9.6179669392597555432899980587535537779331E-1L,
145 cp_l = 5.0577616648125906047157785230014751039424E-17L;
146
1f5649f8
UD
147long double
148__ieee754_powl (long double x, long double y)
1f5649f8
UD
149{
150 long double z, ax, z_h, z_l, p_h, p_l;
151 long double y1, t1, t2, r, s, t, u, v, w;
152 long double s2, s_h, s_l, t_h, t_l;
153 int32_t i, j, k, yisint, n;
154 u_int32_t ix, iy;
155 int32_t hx, hy;
156 ieee854_long_double_shape_type o, p, q;
157
158 p.value = x;
159 hx = p.parts32.w0;
160 ix = hx & 0x7fffffff;
161
162 q.value = y;
163 hy = q.parts32.w0;
164 iy = hy & 0x7fffffff;
165
166
167 /* y==zero: x**0 = 1 */
168 if ((iy | q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
169 return one;
170
52e1b618
UD
171 /* 1.0**y = 1; -1.0**+-Inf = 1 */
172 if (x == one)
173 return one;
174 if (x == -1.0L && iy == 0x7fff0000
175 && (q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
176 return one;
177
1f5649f8
UD
178 /* +-NaN return x+y */
179 if ((ix > 0x7fff0000)
180 || ((ix == 0x7fff0000)
181 && ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) != 0))
182 || (iy > 0x7fff0000)
183 || ((iy == 0x7fff0000)
184 && ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) != 0)))
185 return x + y;
186
187 /* determine if y is an odd int when x < 0
188 * yisint = 0 ... y is not an integer
189 * yisint = 1 ... y is an odd int
190 * yisint = 2 ... y is an even int
191 */
192 yisint = 0;
193 if (hx < 0)
194 {
195 if (iy >= 0x40700000) /* 2^113 */
196 yisint = 2; /* even integer y */
197 else if (iy >= 0x3fff0000) /* 1.0 */
198 {
199 if (__floorl (y) == y)
200 {
201 z = 0.5 * y;
202 if (__floorl (z) == z)
203 yisint = 2;
204 else
205 yisint = 1;
206 }
207 }
208 }
209
210 /* special value of y */
211 if ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
212 {
213 if (iy == 0x7fff0000) /* y is +-inf */
214 {
215 if (((ix - 0x3fff0000) | p.parts32.w1 | p.parts32.w2 | p.parts32.w3)
216 == 0)
f964490f 217 return y - y; /* +-1**inf is NaN */
1f5649f8
UD
218 else if (ix >= 0x3fff0000) /* (|x|>1)**+-inf = inf,0 */
219 return (hy >= 0) ? y : zero;
220 else /* (|x|<1)**-,+inf = inf,0 */
221 return (hy < 0) ? -y : zero;
222 }
223 if (iy == 0x3fff0000)
224 { /* y is +-1 */
225 if (hy < 0)
226 return one / x;
227 else
228 return x;
229 }
230 if (hy == 0x40000000)
231 return x * x; /* y is 2 */
232 if (hy == 0x3ffe0000)
233 { /* y is 0.5 */
234 if (hx >= 0) /* x >= +0 */
235 return __ieee754_sqrtl (x);
236 }
237 }
238
239 ax = fabsl (x);
240 /* special value of x */
241 if ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) == 0)
242 {
243 if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
244 {
245 z = ax; /*x is +-0,+-inf,+-1 */
246 if (hy < 0)
247 z = one / z; /* z = (1/|x|) */
248 if (hx < 0)
249 {
250 if (((ix - 0x3fff0000) | yisint) == 0)
251 {
252 z = (z - z) / (z - z); /* (-1)**non-int is NaN */
253 }
254 else if (yisint == 1)
255 z = -z; /* (x<0)**odd = -(|x|**odd) */
256 }
257 return z;
258 }
259 }
260
261 /* (x<0)**(non-int) is NaN */
262 if (((((u_int32_t) hx >> 31) - 1) | yisint) == 0)
263 return (x - x) / (x - x);
264
265 /* |y| is huge.
266 2^-16495 = 1/2 of smallest representable value.
267 If (1 - 1/131072)^y underflows, y > 1.4986e9 */
268 if (iy > 0x401d654b)
269 {
270 /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
271 if (iy > 0x407d654b)
272 {
273 if (ix <= 0x3ffeffff)
274 return (hy < 0) ? huge * huge : tiny * tiny;
275 if (ix >= 0x3fff0000)
276 return (hy > 0) ? huge * huge : tiny * tiny;
277 }
278 /* over/underflow if x is not close to one */
279 if (ix < 0x3ffeffff)
280 return (hy < 0) ? huge * huge : tiny * tiny;
281 if (ix > 0x3fff0000)
282 return (hy > 0) ? huge * huge : tiny * tiny;
283 }
284
285 n = 0;
286 /* take care subnormal number */
287 if (ix < 0x00010000)
288 {
289 ax *= two113;
290 n -= 113;
291 o.value = ax;
292 ix = o.parts32.w0;
293 }
294 n += ((ix) >> 16) - 0x3fff;
295 j = ix & 0x0000ffff;
296 /* determine interval */
297 ix = j | 0x3fff0000; /* normalize ix */
298 if (j <= 0x3988)
299 k = 0; /* |x|<sqrt(3/2) */
300 else if (j < 0xbb67)
301 k = 1; /* |x|<sqrt(3) */
302 else
303 {
304 k = 0;
305 n += 1;
306 ix -= 0x00010000;
307 }
308
309 o.value = ax;
310 o.parts32.w0 = ix;
311 ax = o.value;
312
313 /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
314 u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
315 v = one / (ax + bp[k]);
316 s = u * v;
317 s_h = s;
318
319 o.value = s_h;
320 o.parts32.w3 = 0;
321 o.parts32.w2 &= 0xf8000000;
322 s_h = o.value;
323 /* t_h=ax+bp[k] High */
324 t_h = ax + bp[k];
325 o.value = t_h;
326 o.parts32.w3 = 0;
327 o.parts32.w2 &= 0xf8000000;
328 t_h = o.value;
329 t_l = ax - (t_h - bp[k]);
330 s_l = v * ((u - s_h * t_h) - s_h * t_l);
331 /* compute log(ax) */
332 s2 = s * s;
333 u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
334 v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
335 r = s2 * s2 * u / v;
336 r += s_l * (s_h + s);
337 s2 = s_h * s_h;
338 t_h = 3.0 + s2 + r;
339 o.value = t_h;
340 o.parts32.w3 = 0;
341 o.parts32.w2 &= 0xf8000000;
342 t_h = o.value;
343 t_l = r - ((t_h - 3.0) - s2);
344 /* u+v = s*(1+...) */
345 u = s_h * t_h;
346 v = s_l * t_h + t_l * s;
347 /* 2/(3log2)*(s+...) */
348 p_h = u + v;
349 o.value = p_h;
350 o.parts32.w3 = 0;
351 o.parts32.w2 &= 0xf8000000;
352 p_h = o.value;
353 p_l = v - (p_h - u);
354 z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */
355 z_l = cp_l * p_h + p_l * cp + dp_l[k];
356 /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
357 t = (long double) n;
358 t1 = (((z_h + z_l) + dp_h[k]) + t);
359 o.value = t1;
360 o.parts32.w3 = 0;
361 o.parts32.w2 &= 0xf8000000;
362 t1 = o.value;
363 t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
364
365 /* s (sign of result -ve**odd) = -1 else = 1 */
366 s = one;
367 if (((((u_int32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
368 s = -one; /* (-ve)**(odd int) */
369
370 /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
371 y1 = y;
372 o.value = y1;
373 o.parts32.w3 = 0;
374 o.parts32.w2 &= 0xf8000000;
375 y1 = o.value;
376 p_l = (y - y1) * t1 + y * t2;
377 p_h = y1 * t1;
378 z = p_l + p_h;
379 o.value = z;
380 j = o.parts32.w0;
381 if (j >= 0x400d0000) /* z >= 16384 */
382 {
383 /* if z > 16384 */
384 if (((j - 0x400d0000) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3) != 0)
385 return s * huge * huge; /* overflow */
386 else
387 {
388 if (p_l + ovt > z - p_h)
389 return s * huge * huge; /* overflow */
390 }
391 }
392 else if ((j & 0x7fffffff) >= 0x400d01b9) /* z <= -16495 */
393 {
394 /* z < -16495 */
395 if (((j - 0xc00d01bc) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3)
396 != 0)
397 return s * tiny * tiny; /* underflow */
398 else
399 {
400 if (p_l <= z - p_h)
401 return s * tiny * tiny; /* underflow */
402 }
403 }
404 /* compute 2**(p_h+p_l) */
405 i = j & 0x7fffffff;
406 k = (i >> 16) - 0x3fff;
407 n = 0;
408 if (i > 0x3ffe0000)
409 { /* if |z| > 0.5, set n = [z+0.5] */
410 n = __floorl (z + 0.5L);
411 t = n;
412 p_h -= t;
413 }
414 t = p_l + p_h;
415 o.value = t;
416 o.parts32.w3 = 0;
417 o.parts32.w2 &= 0xf8000000;
418 t = o.value;
419 u = t * lg2_h;
420 v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
421 z = u + v;
422 w = v - (z - u);
423 /* exp(z) */
424 t = z * z;
425 u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
426 v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
427 t1 = z - t * u / v;
428 r = (z * t1) / (t1 - two) - (w + z * w);
429 z = one - (r - z);
430 o.value = z;
431 j = o.parts32.w0;
432 j += (n << 16);
433 if ((j >> 16) <= 0)
434 z = __scalbnl (z, n); /* subnormal output */
435 else
436 {
437 o.parts32.w0 = j;
438 z = o.value;
439 }
440 return s * z;
441}
0ac5ae23 442strong_alias (__ieee754_powl, __powl_finite)