sysdeps/ieee754/ldbl-128ibm/e_powl.c

   1 /*
   2  * ====================================================
   3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
   4  *
   5  * Developed at SunPro, a Sun Microsystems, Inc. business.
   6  * Permission to use, copy, modify, and distribute this
   7  * software is freely granted, provided that this notice
   8  * is preserved.
   9  * ====================================================
  10  */
  11
  12 /* Expansions and modifications for 128-bit long double are
  13    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
  14    and are incorporated herein by permission of the author.  The author
  15    reserves the right to distribute this material elsewhere under different
  16    copying permissions.  These modifications are distributed here under
  17    the following terms:
  18
  19     This library is free software; you can redistribute it and/or
  20     modify it under the terms of the GNU Lesser General Public
  21     License as published by the Free Software Foundation; either
  22     version 2.1 of the License, or (at your option) any later version.
  23
  24     This library is distributed in the hope that it will be useful,
  25     but WITHOUT ANY WARRANTY; without even the implied warranty of
  26     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27     Lesser General Public License for more details.
  28
  29     You should have received a copy of the GNU Lesser General Public
  30     License along with this library; if not, see
  31     <http://www.gnu.org/licenses/>.  */
  32
  33 /* __ieee754_powl(x,y) return x**y
  34  *
  35  *                    n
  36  * Method:  Let x =  2   * (1+f)
  37  *      1. Compute and return log2(x) in two pieces:
  38  *              log2(x) = w1 + w2,
  39  *         where w1 has 113-53 = 60 bit trailing zeros.
  40  *      2. Perform y*log2(x) = n+y' by simulating muti-precision
  41  *         arithmetic, where |y'|<=0.5.
  42  *      3. Return x**y = 2**n*exp(y'*log2)
  43  *
  44  * Special cases:
  45  *      1.  (anything) ** 0  is 1
  46  *      2.  (anything) ** 1  is itself
  47  *      3.  (anything) ** NAN is NAN
  48  *      4.  NAN ** (anything except 0) is NAN
  49  *      5.  +-(|x| > 1) **  +INF is +INF
  50  *      6.  +-(|x| > 1) **  -INF is +0
  51  *      7.  +-(|x| < 1) **  +INF is +0
  52  *      8.  +-(|x| < 1) **  -INF is +INF
  53  *      9.  +-1         ** +-INF is NAN
  54  *      10. +0 ** (+anything except 0, NAN)               is +0
  55  *      11. -0 ** (+anything except 0, NAN, odd integer)  is +0
  56  *      12. +0 ** (-anything except 0, NAN)               is +INF
  57  *      13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
  58  *      14. -0 ** (odd integer) = -( +0 ** (odd integer) )
  59  *      15. +INF ** (+anything except 0,NAN) is +INF
  60  *      16. +INF ** (-anything except 0,NAN) is +0
  61  *      17. -INF ** (anything)  = -0 ** (-anything)
  62  *      18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
  63  *      19. (-anything except 0 and inf) ** (non-integer) is NAN
  64  *
  65  */
  66
  67 #include <math.h>
  68 #include <math_private.h>
  69
  70 static const long double bp[] = {
  71   1.0L,
  72   1.5L,
  73 };
  74
  75 /* log_2(1.5) */
  76 static const long double dp_h[] = {
  77   0.0,
  78   5.8496250072115607565592654282227158546448E-1L
  79 };
  80
  81 /* Low part of log_2(1.5) */
  82 static const long double dp_l[] = {
  83   0.0,
  84   1.0579781240112554492329533686862998106046E-16L
  85 };
  86
  87 static const long double zero = 0.0L,
  88   one = 1.0L,
  89   two = 2.0L,
  90   two113 = 1.0384593717069655257060992658440192E34L,
  91   huge = 1.0e300L,
  92   tiny = 1.0e-300L;
  93
  94 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
  95    z = (x-1)/(x+1)
  96    1 <= x <= 1.25
  97    Peak relative error 2.3e-37 */
  98 static const long double LN[] =
  99 {
 100  -3.0779177200290054398792536829702930623200E1L,
 101   6.5135778082209159921251824580292116201640E1L,
 102  -4.6312921812152436921591152809994014413540E1L,
 103   1.2510208195629420304615674658258363295208E1L,
 104  -9.9266909031921425609179910128531667336670E-1L
 105 };
 106 static const long double LD[] =
 107 {
 108  -5.129862866715009066465422805058933131960E1L,
 109   1.452015077564081884387441590064272782044E2L,
 110  -1.524043275549860505277434040464085593165E2L,
 111   7.236063513651544224319663428634139768808E1L,
 112  -1.494198912340228235853027849917095580053E1L
 113   /* 1.0E0 */
 114 };
 115
 116 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
 117    0 <= x <= 0.5
 118    Peak relative error 5.7e-38  */
 119 static const long double PN[] =
 120 {
 121   5.081801691915377692446852383385968225675E8L,
 122   9.360895299872484512023336636427675327355E6L,
 123   4.213701282274196030811629773097579432957E4L,
 124   5.201006511142748908655720086041570288182E1L,
 125   9.088368420359444263703202925095675982530E-3L,
 126 };
 127 static const long double PD[] =
 128 {
 129   3.049081015149226615468111430031590411682E9L,
 130   1.069833887183886839966085436512368982758E8L,
 131   8.259257717868875207333991924545445705394E5L,
 132   1.872583833284143212651746812884298360922E3L,
 133   /* 1.0E0 */
 134 };
 135
 136 static const long double
 137   /* ln 2 */
 138   lg2 = 6.9314718055994530941723212145817656807550E-1L,
 139   lg2_h = 6.9314718055994528622676398299518041312695E-1L,
 140   lg2_l = 2.3190468138462996154948554638754786504121E-17L,
 141   ovt = 8.0085662595372944372e-0017L,
 142   /* 2/(3*log(2)) */
 143   cp = 9.6179669392597560490661645400126142495110E-1L,
 144   cp_h = 9.6179669392597555432899980587535537779331E-1L,
 145   cp_l = 5.0577616648125906047157785230014751039424E-17L;
 146
 147 long double
 148 __ieee754_powl (long double x, long double y)
 149 {
 150   long double z, ax, z_h, z_l, p_h, p_l;
 151   long double y1, t1, t2, r, s, t, u, v, w;
 152   long double s2, s_h, s_l, t_h, t_l;
 153   int32_t i, j, k, yisint, n;
 154   u_int32_t ix, iy;
 155   int32_t hx, hy;
 156   ieee854_long_double_shape_type o, p, q;
 157
 158   p.value = x;
 159   hx = p.parts32.w0;
 160   ix = hx & 0x7fffffff;
 161
 162   q.value = y;
 163   hy = q.parts32.w0;
 164   iy = hy & 0x7fffffff;
 165
 166
 167   /* y==zero: x**0 = 1 */
 168   if ((iy | q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
 169     return one;
 170
 171   /* 1.0**y = 1; -1.0**+-Inf = 1 */
 172   if (x == one)
 173     return one;
 174   if (x == -1.0L && iy == 0x7ff00000
 175       && (q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
 176     return one;
 177
 178   /* +-NaN return x+y */
 179   if ((ix > 0x7ff00000)
 180       || ((ix == 0x7ff00000)
 181           && ((p.parts32.w1 | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) != 0))
 182       || (iy > 0x7ff00000)
 183       || ((iy == 0x7ff00000)
 184           && ((q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) != 0)))
 185     return x + y;
 186
 187   /* determine if y is an odd int when x < 0
 188    * yisint = 0       ... y is not an integer
 189    * yisint = 1       ... y is an odd int
 190    * yisint = 2       ... y is an even int
 191    */
 192   yisint = 0;
 193   if (hx < 0)
 194     {
 195       if ((q.parts32.w2 & 0x7fffffff) >= 0x43400000)    /* Low part >= 2^53 */
 196         yisint = 2;             /* even integer y */
 197       else if (iy >= 0x3ff00000)        /* 1.0 */
 198         {
 199           if (__floorl (y) == y)
 200             {
 201               z = 0.5 * y;
 202               if (__floorl (z) == z)
 203                 yisint = 2;
 204               else
 205                 yisint = 1;
 206             }
 207         }
 208     }
 209
 210   /* special value of y */
 211   if ((q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
 212     {
 213       if (iy == 0x7ff00000 && q.parts32.w1 == 0)        /* y is +-inf */
 214         {
 215           if (((ix - 0x3ff00000) | p.parts32.w1
 216                | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) == 0)
 217             return y - y;       /* inf**+-1 is NaN */
 218           else if (ix > 0x3ff00000 || fabsl (x) > 1.0L)
 219             /* (|x|>1)**+-inf = inf,0 */
 220             return (hy >= 0) ? y : zero;
 221           else
 222             /* (|x|<1)**-,+inf = inf,0 */
 223             return (hy < 0) ? -y : zero;
 224         }
 225       if (iy == 0x3ff00000)
 226         {                       /* y is  +-1 */
 227           if (hy < 0)
 228             return one / x;
 229           else
 230             return x;
 231         }
 232       if (hy == 0x40000000)
 233         return x * x;           /* y is  2 */
 234       if (hy == 0x3fe00000)
 235         {                       /* y is  0.5 */
 236           if (hx >= 0)          /* x >= +0 */
 237             return __ieee754_sqrtl (x);
 238         }
 239     }
 240
 241   ax = fabsl (x);
 242   /* special value of x */
 243   if ((p.parts32.w1 | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) == 0)
 244     {
 245       if (ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000)
 246         {
 247           z = ax;               /*x is +-0,+-inf,+-1 */
 248           if (hy < 0)
 249             z = one / z;        /* z = (1/|x|) */
 250           if (hx < 0)
 251             {
 252               if (((ix - 0x3ff00000) | yisint) == 0)
 253                 {
 254                   z = (z - z) / (z - z);        /* (-1)**non-int is NaN */
 255                 }
 256               else if (yisint == 1)
 257                 z = -z;         /* (x<0)**odd = -(|x|**odd) */
 258             }
 259           return z;
 260         }
 261     }
 262
 263   /* (x<0)**(non-int) is NaN */
 264   if (((((u_int32_t) hx >> 31) - 1) | yisint) == 0)
 265     return (x - x) / (x - x);
 266
 267   /* |y| is huge.
 268      2^-16495 = 1/2 of smallest representable value.
 269      If (1 - 1/131072)^y underflows, y > 1.4986e9 */
 270   if (iy > 0x41d654b0)
 271     {
 272       /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
 273       if (iy > 0x47d654b0)
 274         {
 275           if (ix <= 0x3fefffff)
 276             return (hy < 0) ? huge * huge : tiny * tiny;
 277           if (ix >= 0x3ff00000)
 278             return (hy > 0) ? huge * huge : tiny * tiny;
 279         }
 280       /* over/underflow if x is not close to one */
 281       if (ix < 0x3fefffff)
 282         return (hy < 0) ? huge * huge : tiny * tiny;
 283       if (ix > 0x3ff00000)
 284         return (hy > 0) ? huge * huge : tiny * tiny;
 285     }
 286
 287   n = 0;
 288   /* take care subnormal number */
 289   if (ix < 0x00100000)
 290     {
 291       ax *= two113;
 292       n -= 113;
 293       o.value = ax;
 294       ix = o.parts32.w0;
 295     }
 296   n += ((ix) >> 20) - 0x3ff;
 297   j = ix & 0x000fffff;
 298   /* determine interval */
 299   ix = j | 0x3ff00000;          /* normalize ix */
 300   if (j <= 0x39880)
 301     k = 0;                      /* |x|<sqrt(3/2) */
 302   else if (j < 0xbb670)
 303     k = 1;                      /* |x|<sqrt(3)   */
 304   else
 305     {
 306       k = 0;
 307       n += 1;
 308       ix -= 0x00100000;
 309     }
 310
 311   o.value = ax;
 312   o.value = __scalbnl (o.value, ((int) ((ix - o.parts32.w0) * 2)) >> 21);
 313   ax = o.value;
 314
 315   /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
 316   u = ax - bp[k];               /* bp[0]=1.0, bp[1]=1.5 */
 317   v = one / (ax + bp[k]);
 318   s = u * v;
 319   s_h = s;
 320
 321   o.value = s_h;
 322   o.parts32.w3 = 0;
 323   o.parts32.w2 &= 0xffff8000;
 324   s_h = o.value;
 325   /* t_h=ax+bp[k] High */
 326   t_h = ax + bp[k];
 327   o.value = t_h;
 328   o.parts32.w3 = 0;
 329   o.parts32.w2 &= 0xffff8000;
 330   t_h = o.value;
 331   t_l = ax - (t_h - bp[k]);
 332   s_l = v * ((u - s_h * t_h) - s_h * t_l);
 333   /* compute log(ax) */
 334   s2 = s * s;
 335   u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
 336   v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
 337   r = s2 * s2 * u / v;
 338   r += s_l * (s_h + s);
 339   s2 = s_h * s_h;
 340   t_h = 3.0 + s2 + r;
 341   o.value = t_h;
 342   o.parts32.w3 = 0;
 343   o.parts32.w2 &= 0xffff8000;
 344   t_h = o.value;
 345   t_l = r - ((t_h - 3.0) - s2);
 346   /* u+v = s*(1+...) */
 347   u = s_h * t_h;
 348   v = s_l * t_h + t_l * s;
 349   /* 2/(3log2)*(s+...) */
 350   p_h = u + v;
 351   o.value = p_h;
 352   o.parts32.w3 = 0;
 353   o.parts32.w2 &= 0xffff8000;
 354   p_h = o.value;
 355   p_l = v - (p_h - u);
 356   z_h = cp_h * p_h;             /* cp_h+cp_l = 2/(3*log2) */
 357   z_l = cp_l * p_h + p_l * cp + dp_l[k];
 358   /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
 359   t = (long double) n;
 360   t1 = (((z_h + z_l) + dp_h[k]) + t);
 361   o.value = t1;
 362   o.parts32.w3 = 0;
 363   o.parts32.w2 &= 0xffff8000;
 364   t1 = o.value;
 365   t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
 366
 367   /* s (sign of result -ve**odd) = -1 else = 1 */
 368   s = one;
 369   if (((((u_int32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
 370     s = -one;                   /* (-ve)**(odd int) */
 371
 372   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
 373   y1 = y;
 374   o.value = y1;
 375   o.parts32.w3 = 0;
 376   o.parts32.w2 &= 0xffff8000;
 377   y1 = o.value;
 378   p_l = (y - y1) * t1 + y * t2;
 379   p_h = y1 * t1;
 380   z = p_l + p_h;
 381   o.value = z;
 382   j = o.parts32.w0;
 383   if (j >= 0x40d00000) /* z >= 16384 */
 384     {
 385       /* if z > 16384 */
 386       if (((j - 0x40d00000) | o.parts32.w1
 387         | (o.parts32.w2 & 0x7fffffff) | o.parts32.w3) != 0)
 388         return s * huge * huge; /* overflow */
 389       else
 390         {
 391           if (p_l + ovt > z - p_h)
 392             return s * huge * huge;     /* overflow */
 393         }
 394     }
 395   else if ((j & 0x7fffffff) >= 0x40d01b90)      /* z <= -16495 */
 396     {
 397       /* z < -16495 */
 398       if (((j - 0xc0d01bc0) | o.parts32.w1
 399          | (o.parts32.w2 & 0x7fffffff) | o.parts32.w3) != 0)
 400         return s * tiny * tiny; /* underflow */
 401       else
 402         {
 403           if (p_l <= z - p_h)
 404             return s * tiny * tiny;     /* underflow */
 405         }
 406     }
 407   /* compute 2**(p_h+p_l) */
 408   i = j & 0x7fffffff;
 409   k = (i >> 20) - 0x3ff;
 410   n = 0;
 411   if (i > 0x3fe00000)
 412     {                           /* if |z| > 0.5, set n = [z+0.5] */
 413       n = __floorl (z + 0.5L);
 414       t = n;
 415       p_h -= t;
 416     }
 417   t = p_l + p_h;
 418   o.value = t;
 419   o.parts32.w3 = 0;
 420   o.parts32.w2 &= 0xffff8000;
 421   t = o.value;
 422   u = t * lg2_h;
 423   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
 424   z = u + v;
 425   w = v - (z - u);
 426   /*  exp(z) */
 427   t = z * z;
 428   u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
 429   v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
 430   t1 = z - t * u / v;
 431   r = (z * t1) / (t1 - two) - (w + z * w);
 432   z = one - (r - z);
 433   z = __scalbnl (z, n);
 434   return s * z;
 435 }
 436 strong_alias (__ieee754_powl, __powl_finite)