sysdeps/ieee754/ldbl-128ibm/e_powl.c

   1 /*
   2  * ====================================================
   3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
   4  *
   5  * Developed at SunPro, a Sun Microsystems, Inc. business.
   6  * Permission to use, copy, modify, and distribute this
   7  * software is freely granted, provided that this notice
   8  * is preserved.
   9  * ====================================================
  10  */
  11
  12 /* Expansions and modifications for 128-bit long double are
  13    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
  14    and are incorporated herein by permission of the author.  The author
  15    reserves the right to distribute this material elsewhere under different
  16    copying permissions.  These modifications are distributed here under
  17    the following terms:
  18
  19     This library is free software; you can redistribute it and/or
  20     modify it under the terms of the GNU Lesser General Public
  21     License as published by the Free Software Foundation; either
  22     version 2.1 of the License, or (at your option) any later version.
  23
  24     This library is distributed in the hope that it will be useful,
  25     but WITHOUT ANY WARRANTY; without even the implied warranty of
  26     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27     Lesser General Public License for more details.
  28
  29     You should have received a copy of the GNU Lesser General Public
  30     License along with this library; if not, see
  31     <https://www.gnu.org/licenses/>.  */
  32
  33 /* __ieee754_powl(x,y) return x**y
  34  *
  35  *                    n
  36  * Method:  Let x =  2   * (1+f)
  37  *      1. Compute and return log2(x) in two pieces:
  38  *              log2(x) = w1 + w2,
  39  *         where w1 has 113-53 = 60 bit trailing zeros.
  40  *      2. Perform y*log2(x) = n+y' by simulating muti-precision
  41  *         arithmetic, where |y'|<=0.5.
  42  *      3. Return x**y = 2**n*exp(y'*log2)
  43  *
  44  * Special cases:
  45  *      1.  (anything) ** 0  is 1
  46  *      2.  (anything) ** 1  is itself
  47  *      3.  (anything) ** NAN is NAN
  48  *      4.  NAN ** (anything except 0) is NAN
  49  *      5.  +-(|x| > 1) **  +INF is +INF
  50  *      6.  +-(|x| > 1) **  -INF is +0
  51  *      7.  +-(|x| < 1) **  +INF is +0
  52  *      8.  +-(|x| < 1) **  -INF is +INF
  53  *      9.  +-1         ** +-INF is NAN
  54  *      10. +0 ** (+anything except 0, NAN)               is +0
  55  *      11. -0 ** (+anything except 0, NAN, odd integer)  is +0
  56  *      12. +0 ** (-anything except 0, NAN)               is +INF
  57  *      13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
  58  *      14. -0 ** (odd integer) = -( +0 ** (odd integer) )
  59  *      15. +INF ** (+anything except 0,NAN) is +INF
  60  *      16. +INF ** (-anything except 0,NAN) is +0
  61  *      17. -INF ** (anything)  = -0 ** (-anything)
  62  *      18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
  63  *      19. (-anything except 0 and inf) ** (non-integer) is NAN
  64  *
  65  */
  66
  67 #include <math.h>
  68 #include <math_private.h>
  69 #include <math-underflow.h>
  70 #include <libm-alias-finite.h>
  71
  72 static const long double bp[] = {
  73   1.0L,
  74   1.5L,
  75 };
  76
  77 /* log_2(1.5) */
  78 static const long double dp_h[] = {
  79   0.0,
  80   5.8496250072115607565592654282227158546448E-1L
  81 };
  82
  83 /* Low part of log_2(1.5) */
  84 static const long double dp_l[] = {
  85   0.0,
  86   1.0579781240112554492329533686862998106046E-16L
  87 };
  88
  89 static const long double zero = 0.0L,
  90   one = 1.0L,
  91   two = 2.0L,
  92   two113 = 1.0384593717069655257060992658440192E34L,
  93   huge = 1.0e300L,
  94   tiny = 1.0e-300L;
  95
  96 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
  97    z = (x-1)/(x+1)
  98    1 <= x <= 1.25
  99    Peak relative error 2.3e-37 */
 100 static const long double LN[] =
 101 {
 102  -3.0779177200290054398792536829702930623200E1L,
 103   6.5135778082209159921251824580292116201640E1L,
 104  -4.6312921812152436921591152809994014413540E1L,
 105   1.2510208195629420304615674658258363295208E1L,
 106  -9.9266909031921425609179910128531667336670E-1L
 107 };
 108 static const long double LD[] =
 109 {
 110  -5.129862866715009066465422805058933131960E1L,
 111   1.452015077564081884387441590064272782044E2L,
 112  -1.524043275549860505277434040464085593165E2L,
 113   7.236063513651544224319663428634139768808E1L,
 114  -1.494198912340228235853027849917095580053E1L
 115   /* 1.0E0 */
 116 };
 117
 118 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
 119    0 <= x <= 0.5
 120    Peak relative error 5.7e-38  */
 121 static const long double PN[] =
 122 {
 123   5.081801691915377692446852383385968225675E8L,
 124   9.360895299872484512023336636427675327355E6L,
 125   4.213701282274196030811629773097579432957E4L,
 126   5.201006511142748908655720086041570288182E1L,
 127   9.088368420359444263703202925095675982530E-3L,
 128 };
 129 static const long double PD[] =
 130 {
 131   3.049081015149226615468111430031590411682E9L,
 132   1.069833887183886839966085436512368982758E8L,
 133   8.259257717868875207333991924545445705394E5L,
 134   1.872583833284143212651746812884298360922E3L,
 135   /* 1.0E0 */
 136 };
 137
 138 static const long double
 139   /* ln 2 */
 140   lg2 = 6.9314718055994530941723212145817656807550E-1L,
 141   lg2_h = 6.9314718055994528622676398299518041312695E-1L,
 142   lg2_l = 2.3190468138462996154948554638754786504121E-17L,
 143   ovt = 8.0085662595372944372e-0017L,
 144   /* 2/(3*log(2)) */
 145   cp = 9.6179669392597560490661645400126142495110E-1L,
 146   cp_h = 9.6179669392597555432899980587535537779331E-1L,
 147   cp_l = 5.0577616648125906047157785230014751039424E-17L;
 148
 149 long double
 150 __ieee754_powl (long double x, long double y)
 151 {
 152   long double z, ax, z_h, z_l, p_h, p_l;
 153   long double y1, t1, t2, r, s, sgn, t, u, v, w;
 154   long double s2, s_h, s_l, t_h, t_l, ay;
 155   int32_t i, j, k, yisint, n;
 156   uint32_t ix, iy;
 157   int32_t hx, hy, hax;
 158   double ohi, xhi, xlo, yhi, ylo;
 159   uint32_t lx, ly, lj;
 160
 161   ldbl_unpack (x, &xhi, &xlo);
 162   EXTRACT_WORDS (hx, lx, xhi);
 163   ix = hx & 0x7fffffff;
 164
 165   ldbl_unpack (y, &yhi, &ylo);
 166   EXTRACT_WORDS (hy, ly, yhi);
 167   iy = hy & 0x7fffffff;
 168
 169   /* y==zero: x**0 = 1 */
 170   if ((iy | ly) == 0 && !issignaling (x))
 171     return one;
 172
 173   /* 1.0**y = 1; -1.0**+-Inf = 1 */
 174   if (x == one && !issignaling (y))
 175     return one;
 176   if (x == -1.0L && ((iy - 0x7ff00000) | ly) == 0)
 177     return one;
 178
 179   /* +-NaN return x+y */
 180   if ((ix >= 0x7ff00000 && ((ix - 0x7ff00000) | lx) != 0)
 181       || (iy >= 0x7ff00000 && ((iy - 0x7ff00000) | ly) != 0))
 182     return x + y;
 183
 184   /* determine if y is an odd int when x < 0
 185    * yisint = 0       ... y is not an integer
 186    * yisint = 1       ... y is an odd int
 187    * yisint = 2       ... y is an even int
 188    */
 189   yisint = 0;
 190   if (hx < 0)
 191     {
 192       uint32_t low_ye;
 193
 194       GET_HIGH_WORD (low_ye, ylo);
 195       if ((low_ye & 0x7fffffff) >= 0x43400000)  /* Low part >= 2^53 */
 196         yisint = 2;             /* even integer y */
 197       else if (iy >= 0x3ff00000)        /* 1.0 */
 198         {
 199           if (floorl (y) == y)
 200             {
 201               z = 0.5 * y;
 202               if (floorl (z) == z)
 203                 yisint = 2;
 204               else
 205                 yisint = 1;
 206             }
 207         }
 208     }
 209
 210   ax = fabsl (x);
 211
 212   /* special value of y */
 213   if (ly == 0)
 214     {
 215       if (iy == 0x7ff00000)     /* y is +-inf */
 216         {
 217           if (ax > one)
 218             /* (|x|>1)**+-inf = inf,0 */
 219             return (hy >= 0) ? y : zero;
 220           else
 221             /* (|x|<1)**-,+inf = inf,0 */
 222             return (hy < 0) ? -y : zero;
 223         }
 224       if (ylo == 0.0)
 225         {
 226           if (iy == 0x3ff00000)
 227             {                   /* y is  +-1 */
 228               if (hy < 0)
 229                 return one / x;
 230               else
 231                 return x;
 232             }
 233           if (hy == 0x40000000)
 234             return x * x;               /* y is  2 */
 235           if (hy == 0x3fe00000)
 236             {                   /* y is  0.5 */
 237               if (hx >= 0)              /* x >= +0 */
 238                 return sqrtl (x);
 239             }
 240         }
 241     }
 242
 243   /* special value of x */
 244   if (lx == 0)
 245     {
 246       if (ix == 0x7ff00000 || ix == 0 || (ix == 0x3ff00000 && xlo == 0.0))
 247         {
 248           z = ax;               /*x is +-0,+-inf,+-1 */
 249           if (hy < 0)
 250             z = one / z;        /* z = (1/|x|) */
 251           if (hx < 0)
 252             {
 253               if (((ix - 0x3ff00000) | yisint) == 0)
 254                 {
 255                   z = (z - z) / (z - z);        /* (-1)**non-int is NaN */
 256                 }
 257               else if (yisint == 1)
 258                 z = -z;         /* (x<0)**odd = -(|x|**odd) */
 259             }
 260           return z;
 261         }
 262     }
 263
 264   /* (x<0)**(non-int) is NaN */
 265   if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
 266     return (x - x) / (x - x);
 267
 268   /* sgn (sign of result -ve**odd) = -1 else = 1 */
 269   sgn = one;
 270   if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
 271     sgn = -one;                 /* (-ve)**(odd int) */
 272
 273   /* |y| is huge.
 274      2^-16495 = 1/2 of smallest representable value.
 275      If (1 - 1/131072)^y underflows, y > 1.4986e9 */
 276   if (iy > 0x41d654b0)
 277     {
 278       /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
 279       if (iy > 0x47d654b0)
 280         {
 281           if (ix <= 0x3fefffff)
 282             return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
 283           if (ix >= 0x3ff00000)
 284             return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
 285         }
 286       /* over/underflow if x is not close to one */
 287       if (ix < 0x3fefffff)
 288         return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
 289       if (ix > 0x3ff00000)
 290         return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
 291     }
 292
 293   ay = y > 0 ? y : -y;
 294   if (ay < 0x1p-117)
 295     y = y < 0 ? -0x1p-117 : 0x1p-117;
 296
 297   n = 0;
 298   /* take care subnormal number */
 299   if (ix < 0x00100000)
 300     {
 301       ax *= two113;
 302       n -= 113;
 303       ohi = ldbl_high (ax);
 304       GET_HIGH_WORD (ix, ohi);
 305     }
 306   n += ((ix) >> 20) - 0x3ff;
 307   j = ix & 0x000fffff;
 308   /* determine interval */
 309   ix = j | 0x3ff00000;          /* normalize ix */
 310   if (j <= 0x39880)
 311     k = 0;                      /* |x|<sqrt(3/2) */
 312   else if (j < 0xbb670)
 313     k = 1;                      /* |x|<sqrt(3)   */
 314   else
 315     {
 316       k = 0;
 317       n += 1;
 318       ix -= 0x00100000;
 319     }
 320
 321   ohi = ldbl_high (ax);
 322   GET_HIGH_WORD (hax, ohi);
 323   ax = __scalbnl (ax, ((int) ((ix - hax) * 2)) >> 21);
 324
 325   /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
 326   u = ax - bp[k];               /* bp[0]=1.0, bp[1]=1.5 */
 327   v = one / (ax + bp[k]);
 328   s = u * v;
 329   s_h = ldbl_high (s);
 330
 331   /* t_h=ax+bp[k] High */
 332   t_h = ax + bp[k];
 333   t_h = ldbl_high (t_h);
 334   t_l = ax - (t_h - bp[k]);
 335   s_l = v * ((u - s_h * t_h) - s_h * t_l);
 336   /* compute log(ax) */
 337   s2 = s * s;
 338   u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
 339   v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
 340   r = s2 * s2 * u / v;
 341   r += s_l * (s_h + s);
 342   s2 = s_h * s_h;
 343   t_h = 3.0 + s2 + r;
 344   t_h = ldbl_high (t_h);
 345   t_l = r - ((t_h - 3.0) - s2);
 346   /* u+v = s*(1+...) */
 347   u = s_h * t_h;
 348   v = s_l * t_h + t_l * s;
 349   /* 2/(3log2)*(s+...) */
 350   p_h = u + v;
 351   p_h = ldbl_high (p_h);
 352   p_l = v - (p_h - u);
 353   z_h = cp_h * p_h;             /* cp_h+cp_l = 2/(3*log2) */
 354   z_l = cp_l * p_h + p_l * cp + dp_l[k];
 355   /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
 356   t = (long double) n;
 357   t1 = (((z_h + z_l) + dp_h[k]) + t);
 358   t1 = ldbl_high (t1);
 359   t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
 360
 361   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
 362   y1 = ldbl_high (y);
 363   p_l = (y - y1) * t1 + y * t2;
 364   p_h = y1 * t1;
 365   z = p_l + p_h;
 366   ohi = ldbl_high (z);
 367   EXTRACT_WORDS (j, lj, ohi);
 368   if (j >= 0x40d00000) /* z >= 16384 */
 369     {
 370       /* if z > 16384 */
 371       if (((j - 0x40d00000) | lj) != 0)
 372         return sgn * huge * huge;       /* overflow */
 373       else
 374         {
 375           if (p_l + ovt > z - p_h)
 376             return sgn * huge * huge;   /* overflow */
 377         }
 378     }
 379   else if ((j & 0x7fffffff) >= 0x40d01b90)      /* z <= -16495 */
 380     {
 381       /* z < -16495 */
 382       if (((j - 0xc0d01bc0) | lj) != 0)
 383         return sgn * tiny * tiny;       /* underflow */
 384       else
 385         {
 386           if (p_l <= z - p_h)
 387             return sgn * tiny * tiny;   /* underflow */
 388         }
 389     }
 390   /* compute 2**(p_h+p_l) */
 391   i = j & 0x7fffffff;
 392   k = (i >> 20) - 0x3ff;
 393   n = 0;
 394   if (i > 0x3fe00000)
 395     {                           /* if |z| > 0.5, set n = [z+0.5] */
 396       n = floorl (z + 0.5L);
 397       t = n;
 398       p_h -= t;
 399     }
 400   t = p_l + p_h;
 401   t = ldbl_high (t);
 402   u = t * lg2_h;
 403   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
 404   z = u + v;
 405   w = v - (z - u);
 406   /*  exp(z) */
 407   t = z * z;
 408   u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
 409   v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
 410   t1 = z - t * u / v;
 411   r = (z * t1) / (t1 - two) - (w + z * w);
 412   z = one - (r - z);
 413   z = __scalbnl (sgn * z, n);
 414   math_check_force_underflow (z);
 415   return z;
 416 }
 417 libm_alias_finite (__ieee754_powl, __powl)