sysdeps/ieee754/ldbl-128ibm/e_powl.c

   1 /*
   2  * ====================================================
   3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
   4  *
   5  * Developed at SunPro, a Sun Microsystems, Inc. business.
   6  * Permission to use, copy, modify, and distribute this
   7  * software is freely granted, provided that this notice
   8  * is preserved.
   9  * ====================================================
  10  */
  11
  12 /* Expansions and modifications for 128-bit long double are
  13    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
  14    and are incorporated herein by permission of the author.  The author
  15    reserves the right to distribute this material elsewhere under different
  16    copying permissions.  These modifications are distributed here under
  17    the following terms:
  18
  19     This library is free software; you can redistribute it and/or
  20     modify it under the terms of the GNU Lesser General Public
  21     License as published by the Free Software Foundation; either
  22     version 2.1 of the License, or (at your option) any later version.
  23
  24     This library is distributed in the hope that it will be useful,
  25     but WITHOUT ANY WARRANTY; without even the implied warranty of
  26     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27     Lesser General Public License for more details.
  28
  29     You should have received a copy of the GNU Lesser General Public
  30     License along with this library; if not, see
  31     <https://www.gnu.org/licenses/>.  */
  32
  33 /* __ieee754_powl(x,y) return x**y
  34  *
  35  *                    n
  36  * Method:  Let x =  2   * (1+f)
  37  *      1. Compute and return log2(x) in two pieces:
  38  *              log2(x) = w1 + w2,
  39  *         where w1 has 113-53 = 60 bit trailing zeros.
  40  *      2. Perform y*log2(x) = n+y' by simulating muti-precision
  41  *         arithmetic, where |y'|<=0.5.
  42  *      3. Return x**y = 2**n*exp(y'*log2)
  43  *
  44  * Special cases:
  45  *      1.  (anything) ** 0  is 1
  46  *      2.  (anything) ** 1  is itself
  47  *      3.  (anything) ** NAN is NAN
  48  *      4.  NAN ** (anything except 0) is NAN
  49  *      5.  +-(|x| > 1) **  +INF is +INF
  50  *      6.  +-(|x| > 1) **  -INF is +0
  51  *      7.  +-(|x| < 1) **  +INF is +0
  52  *      8.  +-(|x| < 1) **  -INF is +INF
  53  *      9.  +-1         ** +-INF is NAN
  54  *      10. +0 ** (+anything except 0, NAN)               is +0
  55  *      11. -0 ** (+anything except 0, NAN, odd integer)  is +0
  56  *      12. +0 ** (-anything except 0, NAN)               is +INF
  57  *      13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
  58  *      14. -0 ** (odd integer) = -( +0 ** (odd integer) )
  59  *      15. +INF ** (+anything except 0,NAN) is +INF
  60  *      16. +INF ** (-anything except 0,NAN) is +0
  61  *      17. -INF ** (anything)  = -0 ** (-anything)
  62  *      18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
  63  *      19. (-anything except 0 and inf) ** (non-integer) is NAN
  64  *
  65  */
  66
  67 #include <math.h>
  68 #include <math_private.h>
  69 #include <math-underflow.h>
  70
  71 static const long double bp[] = {
  72   1.0L,
  73   1.5L,
  74 };
  75
  76 /* log_2(1.5) */
  77 static const long double dp_h[] = {
  78   0.0,
  79   5.8496250072115607565592654282227158546448E-1L
  80 };
  81
  82 /* Low part of log_2(1.5) */
  83 static const long double dp_l[] = {
  84   0.0,
  85   1.0579781240112554492329533686862998106046E-16L
  86 };
  87
  88 static const long double zero = 0.0L,
  89   one = 1.0L,
  90   two = 2.0L,
  91   two113 = 1.0384593717069655257060992658440192E34L,
  92   huge = 1.0e300L,
  93   tiny = 1.0e-300L;
  94
  95 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
  96    z = (x-1)/(x+1)
  97    1 <= x <= 1.25
  98    Peak relative error 2.3e-37 */
  99 static const long double LN[] =
 100 {
 101  -3.0779177200290054398792536829702930623200E1L,
 102   6.5135778082209159921251824580292116201640E1L,
 103  -4.6312921812152436921591152809994014413540E1L,
 104   1.2510208195629420304615674658258363295208E1L,
 105  -9.9266909031921425609179910128531667336670E-1L
 106 };
 107 static const long double LD[] =
 108 {
 109  -5.129862866715009066465422805058933131960E1L,
 110   1.452015077564081884387441590064272782044E2L,
 111  -1.524043275549860505277434040464085593165E2L,
 112   7.236063513651544224319663428634139768808E1L,
 113  -1.494198912340228235853027849917095580053E1L
 114   /* 1.0E0 */
 115 };
 116
 117 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
 118    0 <= x <= 0.5
 119    Peak relative error 5.7e-38  */
 120 static const long double PN[] =
 121 {
 122   5.081801691915377692446852383385968225675E8L,
 123   9.360895299872484512023336636427675327355E6L,
 124   4.213701282274196030811629773097579432957E4L,
 125   5.201006511142748908655720086041570288182E1L,
 126   9.088368420359444263703202925095675982530E-3L,
 127 };
 128 static const long double PD[] =
 129 {
 130   3.049081015149226615468111430031590411682E9L,
 131   1.069833887183886839966085436512368982758E8L,
 132   8.259257717868875207333991924545445705394E5L,
 133   1.872583833284143212651746812884298360922E3L,
 134   /* 1.0E0 */
 135 };
 136
 137 static const long double
 138   /* ln 2 */
 139   lg2 = 6.9314718055994530941723212145817656807550E-1L,
 140   lg2_h = 6.9314718055994528622676398299518041312695E-1L,
 141   lg2_l = 2.3190468138462996154948554638754786504121E-17L,
 142   ovt = 8.0085662595372944372e-0017L,
 143   /* 2/(3*log(2)) */
 144   cp = 9.6179669392597560490661645400126142495110E-1L,
 145   cp_h = 9.6179669392597555432899980587535537779331E-1L,
 146   cp_l = 5.0577616648125906047157785230014751039424E-17L;
 147
 148 long double
 149 __ieee754_powl (long double x, long double y)
 150 {
 151   long double z, ax, z_h, z_l, p_h, p_l;
 152   long double y1, t1, t2, r, s, sgn, t, u, v, w;
 153   long double s2, s_h, s_l, t_h, t_l, ay;
 154   int32_t i, j, k, yisint, n;
 155   uint32_t ix, iy;
 156   int32_t hx, hy, hax;
 157   double ohi, xhi, xlo, yhi, ylo;
 158   uint32_t lx, ly, lj;
 159
 160   ldbl_unpack (x, &xhi, &xlo);
 161   EXTRACT_WORDS (hx, lx, xhi);
 162   ix = hx & 0x7fffffff;
 163
 164   ldbl_unpack (y, &yhi, &ylo);
 165   EXTRACT_WORDS (hy, ly, yhi);
 166   iy = hy & 0x7fffffff;
 167
 168   /* y==zero: x**0 = 1 */
 169   if ((iy | ly) == 0 && !issignaling (x))
 170     return one;
 171
 172   /* 1.0**y = 1; -1.0**+-Inf = 1 */
 173   if (x == one && !issignaling (y))
 174     return one;
 175   if (x == -1.0L && ((iy - 0x7ff00000) | ly) == 0)
 176     return one;
 177
 178   /* +-NaN return x+y */
 179   if ((ix >= 0x7ff00000 && ((ix - 0x7ff00000) | lx) != 0)
 180       || (iy >= 0x7ff00000 && ((iy - 0x7ff00000) | ly) != 0))
 181     return x + y;
 182
 183   /* determine if y is an odd int when x < 0
 184    * yisint = 0       ... y is not an integer
 185    * yisint = 1       ... y is an odd int
 186    * yisint = 2       ... y is an even int
 187    */
 188   yisint = 0;
 189   if (hx < 0)
 190     {
 191       uint32_t low_ye;
 192
 193       GET_HIGH_WORD (low_ye, ylo);
 194       if ((low_ye & 0x7fffffff) >= 0x43400000)  /* Low part >= 2^53 */
 195         yisint = 2;             /* even integer y */
 196       else if (iy >= 0x3ff00000)        /* 1.0 */
 197         {
 198           if (floorl (y) == y)
 199             {
 200               z = 0.5 * y;
 201               if (floorl (z) == z)
 202                 yisint = 2;
 203               else
 204                 yisint = 1;
 205             }
 206         }
 207     }
 208
 209   ax = fabsl (x);
 210
 211   /* special value of y */
 212   if (ly == 0)
 213     {
 214       if (iy == 0x7ff00000)     /* y is +-inf */
 215         {
 216           if (ax > one)
 217             /* (|x|>1)**+-inf = inf,0 */
 218             return (hy >= 0) ? y : zero;
 219           else
 220             /* (|x|<1)**-,+inf = inf,0 */
 221             return (hy < 0) ? -y : zero;
 222         }
 223       if (ylo == 0.0)
 224         {
 225           if (iy == 0x3ff00000)
 226             {                   /* y is  +-1 */
 227               if (hy < 0)
 228                 return one / x;
 229               else
 230                 return x;
 231             }
 232           if (hy == 0x40000000)
 233             return x * x;               /* y is  2 */
 234           if (hy == 0x3fe00000)
 235             {                   /* y is  0.5 */
 236               if (hx >= 0)              /* x >= +0 */
 237                 return sqrtl (x);
 238             }
 239         }
 240     }
 241
 242   /* special value of x */
 243   if (lx == 0)
 244     {
 245       if (ix == 0x7ff00000 || ix == 0 || (ix == 0x3ff00000 && xlo == 0.0))
 246         {
 247           z = ax;               /*x is +-0,+-inf,+-1 */
 248           if (hy < 0)
 249             z = one / z;        /* z = (1/|x|) */
 250           if (hx < 0)
 251             {
 252               if (((ix - 0x3ff00000) | yisint) == 0)
 253                 {
 254                   z = (z - z) / (z - z);        /* (-1)**non-int is NaN */
 255                 }
 256               else if (yisint == 1)
 257                 z = -z;         /* (x<0)**odd = -(|x|**odd) */
 258             }
 259           return z;
 260         }
 261     }
 262
 263   /* (x<0)**(non-int) is NaN */
 264   if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
 265     return (x - x) / (x - x);
 266
 267   /* sgn (sign of result -ve**odd) = -1 else = 1 */
 268   sgn = one;
 269   if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
 270     sgn = -one;                 /* (-ve)**(odd int) */
 271
 272   /* |y| is huge.
 273      2^-16495 = 1/2 of smallest representable value.
 274      If (1 - 1/131072)^y underflows, y > 1.4986e9 */
 275   if (iy > 0x41d654b0)
 276     {
 277       /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
 278       if (iy > 0x47d654b0)
 279         {
 280           if (ix <= 0x3fefffff)
 281             return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
 282           if (ix >= 0x3ff00000)
 283             return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
 284         }
 285       /* over/underflow if x is not close to one */
 286       if (ix < 0x3fefffff)
 287         return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
 288       if (ix > 0x3ff00000)
 289         return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
 290     }
 291
 292   ay = y > 0 ? y : -y;
 293   if (ay < 0x1p-117)
 294     y = y < 0 ? -0x1p-117 : 0x1p-117;
 295
 296   n = 0;
 297   /* take care subnormal number */
 298   if (ix < 0x00100000)
 299     {
 300       ax *= two113;
 301       n -= 113;
 302       ohi = ldbl_high (ax);
 303       GET_HIGH_WORD (ix, ohi);
 304     }
 305   n += ((ix) >> 20) - 0x3ff;
 306   j = ix & 0x000fffff;
 307   /* determine interval */
 308   ix = j | 0x3ff00000;          /* normalize ix */
 309   if (j <= 0x39880)
 310     k = 0;                      /* |x|<sqrt(3/2) */
 311   else if (j < 0xbb670)
 312     k = 1;                      /* |x|<sqrt(3)   */
 313   else
 314     {
 315       k = 0;
 316       n += 1;
 317       ix -= 0x00100000;
 318     }
 319
 320   ohi = ldbl_high (ax);
 321   GET_HIGH_WORD (hax, ohi);
 322   ax = __scalbnl (ax, ((int) ((ix - hax) * 2)) >> 21);
 323
 324   /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
 325   u = ax - bp[k];               /* bp[0]=1.0, bp[1]=1.5 */
 326   v = one / (ax + bp[k]);
 327   s = u * v;
 328   s_h = ldbl_high (s);
 329
 330   /* t_h=ax+bp[k] High */
 331   t_h = ax + bp[k];
 332   t_h = ldbl_high (t_h);
 333   t_l = ax - (t_h - bp[k]);
 334   s_l = v * ((u - s_h * t_h) - s_h * t_l);
 335   /* compute log(ax) */
 336   s2 = s * s;
 337   u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
 338   v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
 339   r = s2 * s2 * u / v;
 340   r += s_l * (s_h + s);
 341   s2 = s_h * s_h;
 342   t_h = 3.0 + s2 + r;
 343   t_h = ldbl_high (t_h);
 344   t_l = r - ((t_h - 3.0) - s2);
 345   /* u+v = s*(1+...) */
 346   u = s_h * t_h;
 347   v = s_l * t_h + t_l * s;
 348   /* 2/(3log2)*(s+...) */
 349   p_h = u + v;
 350   p_h = ldbl_high (p_h);
 351   p_l = v - (p_h - u);
 352   z_h = cp_h * p_h;             /* cp_h+cp_l = 2/(3*log2) */
 353   z_l = cp_l * p_h + p_l * cp + dp_l[k];
 354   /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
 355   t = (long double) n;
 356   t1 = (((z_h + z_l) + dp_h[k]) + t);
 357   t1 = ldbl_high (t1);
 358   t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
 359
 360   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
 361   y1 = ldbl_high (y);
 362   p_l = (y - y1) * t1 + y * t2;
 363   p_h = y1 * t1;
 364   z = p_l + p_h;
 365   ohi = ldbl_high (z);
 366   EXTRACT_WORDS (j, lj, ohi);
 367   if (j >= 0x40d00000) /* z >= 16384 */
 368     {
 369       /* if z > 16384 */
 370       if (((j - 0x40d00000) | lj) != 0)
 371         return sgn * huge * huge;       /* overflow */
 372       else
 373         {
 374           if (p_l + ovt > z - p_h)
 375             return sgn * huge * huge;   /* overflow */
 376         }
 377     }
 378   else if ((j & 0x7fffffff) >= 0x40d01b90)      /* z <= -16495 */
 379     {
 380       /* z < -16495 */
 381       if (((j - 0xc0d01bc0) | lj) != 0)
 382         return sgn * tiny * tiny;       /* underflow */
 383       else
 384         {
 385           if (p_l <= z - p_h)
 386             return sgn * tiny * tiny;   /* underflow */
 387         }
 388     }
 389   /* compute 2**(p_h+p_l) */
 390   i = j & 0x7fffffff;
 391   k = (i >> 20) - 0x3ff;
 392   n = 0;
 393   if (i > 0x3fe00000)
 394     {                           /* if |z| > 0.5, set n = [z+0.5] */
 395       n = floorl (z + 0.5L);
 396       t = n;
 397       p_h -= t;
 398     }
 399   t = p_l + p_h;
 400   t = ldbl_high (t);
 401   u = t * lg2_h;
 402   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
 403   z = u + v;
 404   w = v - (z - u);
 405   /*  exp(z) */
 406   t = z * z;
 407   u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
 408   v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
 409   t1 = z - t * u / v;
 410   r = (z * t1) / (t1 - two) - (w + z * w);
 411   z = one - (r - z);
 412   z = __scalbnl (sgn * z, n);
 413   math_check_force_underflow (z);
 414   return z;
 415 }
 416 strong_alias (__ieee754_powl, __powl_finite)