sysdeps/ieee754/ldbl-128/e_powl.c

   1 /*
   2  * ====================================================
   3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
   4  *
   5  * Developed at SunPro, a Sun Microsystems, Inc. business.
   6  * Permission to use, copy, modify, and distribute this
   7  * software is freely granted, provided that this notice
   8  * is preserved.
   9  * ====================================================
  10  */
  11
  12 /* Expansions and modifications for 128-bit long double are
  13    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
  14    and are incorporated herein by permission of the author.  The author
  15    reserves the right to distribute this material elsewhere under different
  16    copying permissions.  These modifications are distributed here under
  17    the following terms:
  18
  19     This library is free software; you can redistribute it and/or
  20     modify it under the terms of the GNU Lesser General Public
  21     License as published by the Free Software Foundation; either
  22     version 2.1 of the License, or (at your option) any later version.
  23
  24     This library is distributed in the hope that it will be useful,
  25     but WITHOUT ANY WARRANTY; without even the implied warranty of
  26     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27     Lesser General Public License for more details.
  28
  29     You should have received a copy of the GNU Lesser General Public
  30     License along with this library; if not, see
  31     <https://www.gnu.org/licenses/>.  */
  32
  33 /* __ieee754_powl(x,y) return x**y
  34  *
  35  *                    n
  36  * Method:  Let x =  2   * (1+f)
  37  *      1. Compute and return log2(x) in two pieces:
  38  *              log2(x) = w1 + w2,
  39  *         where w1 has 113-53 = 60 bit trailing zeros.
  40  *      2. Perform y*log2(x) = n+y' by simulating muti-precision
  41  *         arithmetic, where |y'|<=0.5.
  42  *      3. Return x**y = 2**n*exp(y'*log2)
  43  *
  44  * Special cases:
  45  *      1.  (anything) ** 0  is 1
  46  *      2.  (anything) ** 1  is itself
  47  *      3.  (anything) ** NAN is NAN
  48  *      4.  NAN ** (anything except 0) is NAN
  49  *      5.  +-(|x| > 1) **  +INF is +INF
  50  *      6.  +-(|x| > 1) **  -INF is +0
  51  *      7.  +-(|x| < 1) **  +INF is +0
  52  *      8.  +-(|x| < 1) **  -INF is +INF
  53  *      9.  +-1         ** +-INF is NAN
  54  *      10. +0 ** (+anything except 0, NAN)               is +0
  55  *      11. -0 ** (+anything except 0, NAN, odd integer)  is +0
  56  *      12. +0 ** (-anything except 0, NAN)               is +INF
  57  *      13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
  58  *      14. -0 ** (odd integer) = -( +0 ** (odd integer) )
  59  *      15. +INF ** (+anything except 0,NAN) is +INF
  60  *      16. +INF ** (-anything except 0,NAN) is +0
  61  *      17. -INF ** (anything)  = -0 ** (-anything)
  62  *      18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
  63  *      19. (-anything except 0 and inf) ** (non-integer) is NAN
  64  *
  65  */
  66
  67 #include <math.h>
  68 #include <math-barriers.h>
  69 #include <math_private.h>
  70
  71 static const _Float128 bp[] = {
  72   1,
  73   L(1.5),
  74 };
  75
  76 /* log_2(1.5) */
  77 static const _Float128 dp_h[] = {
  78   0.0,
  79   L(5.8496250072115607565592654282227158546448E-1)
  80 };
  81
  82 /* Low part of log_2(1.5) */
  83 static const _Float128 dp_l[] = {
  84   0.0,
  85   L(1.0579781240112554492329533686862998106046E-16)
  86 };
  87
  88 static const _Float128 zero = 0,
  89   one = 1,
  90   two = 2,
  91   two113 = L(1.0384593717069655257060992658440192E34),
  92   huge = L(1.0e3000),
  93   tiny = L(1.0e-3000);
  94
  95 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
  96    z = (x-1)/(x+1)
  97    1 <= x <= 1.25
  98    Peak relative error 2.3e-37 */
  99 static const _Float128 LN[] =
 100 {
 101  L(-3.0779177200290054398792536829702930623200E1),
 102   L(6.5135778082209159921251824580292116201640E1),
 103  L(-4.6312921812152436921591152809994014413540E1),
 104   L(1.2510208195629420304615674658258363295208E1),
 105  L(-9.9266909031921425609179910128531667336670E-1)
 106 };
 107 static const _Float128 LD[] =
 108 {
 109  L(-5.129862866715009066465422805058933131960E1),
 110   L(1.452015077564081884387441590064272782044E2),
 111  L(-1.524043275549860505277434040464085593165E2),
 112   L(7.236063513651544224319663428634139768808E1),
 113  L(-1.494198912340228235853027849917095580053E1)
 114   /* 1.0E0 */
 115 };
 116
 117 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
 118    0 <= x <= 0.5
 119    Peak relative error 5.7e-38  */
 120 static const _Float128 PN[] =
 121 {
 122   L(5.081801691915377692446852383385968225675E8),
 123   L(9.360895299872484512023336636427675327355E6),
 124   L(4.213701282274196030811629773097579432957E4),
 125   L(5.201006511142748908655720086041570288182E1),
 126   L(9.088368420359444263703202925095675982530E-3),
 127 };
 128 static const _Float128 PD[] =
 129 {
 130   L(3.049081015149226615468111430031590411682E9),
 131   L(1.069833887183886839966085436512368982758E8),
 132   L(8.259257717868875207333991924545445705394E5),
 133   L(1.872583833284143212651746812884298360922E3),
 134   /* 1.0E0 */
 135 };
 136
 137 static const _Float128
 138   /* ln 2 */
 139   lg2 = L(6.9314718055994530941723212145817656807550E-1),
 140   lg2_h = L(6.9314718055994528622676398299518041312695E-1),
 141   lg2_l = L(2.3190468138462996154948554638754786504121E-17),
 142   ovt = L(8.0085662595372944372e-0017),
 143   /* 2/(3*log(2)) */
 144   cp = L(9.6179669392597560490661645400126142495110E-1),
 145   cp_h = L(9.6179669392597555432899980587535537779331E-1),
 146   cp_l = L(5.0577616648125906047157785230014751039424E-17);
 147
 148 _Float128
 149 __ieee754_powl (_Float128 x, _Float128 y)
 150 {
 151   _Float128 z, ax, z_h, z_l, p_h, p_l;
 152   _Float128 y1, t1, t2, r, s, sgn, t, u, v, w;
 153   _Float128 s2, s_h, s_l, t_h, t_l, ay;
 154   int32_t i, j, k, yisint, n;
 155   uint32_t ix, iy;
 156   int32_t hx, hy;
 157   ieee854_long_double_shape_type o, p, q;
 158
 159   p.value = x;
 160   hx = p.parts32.w0;
 161   ix = hx & 0x7fffffff;
 162
 163   q.value = y;
 164   hy = q.parts32.w0;
 165   iy = hy & 0x7fffffff;
 166
 167
 168   /* y==zero: x**0 = 1 */
 169   if ((iy | q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0
 170       && !issignaling (x))
 171     return one;
 172
 173   /* 1.0**y = 1; -1.0**+-Inf = 1 */
 174   if (x == one && !issignaling (y))
 175     return one;
 176   if (x == -1 && iy == 0x7fff0000
 177       && (q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
 178     return one;
 179
 180   /* +-NaN return x+y */
 181   if ((ix > 0x7fff0000)
 182       || ((ix == 0x7fff0000)
 183           && ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) != 0))
 184       || (iy > 0x7fff0000)
 185       || ((iy == 0x7fff0000)
 186           && ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) != 0)))
 187     return x + y;
 188
 189   /* determine if y is an odd int when x < 0
 190    * yisint = 0       ... y is not an integer
 191    * yisint = 1       ... y is an odd int
 192    * yisint = 2       ... y is an even int
 193    */
 194   yisint = 0;
 195   if (hx < 0)
 196     {
 197       if (iy >= 0x40700000)     /* 2^113 */
 198         yisint = 2;             /* even integer y */
 199       else if (iy >= 0x3fff0000)        /* 1.0 */
 200         {
 201           if (floorl (y) == y)
 202             {
 203               z = 0.5 * y;
 204               if (floorl (z) == z)
 205                 yisint = 2;
 206               else
 207                 yisint = 1;
 208             }
 209         }
 210     }
 211
 212   /* special value of y */
 213   if ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
 214     {
 215       if (iy == 0x7fff0000)     /* y is +-inf */
 216         {
 217           if (((ix - 0x3fff0000) | p.parts32.w1 | p.parts32.w2 | p.parts32.w3)
 218               == 0)
 219             return y - y;       /* +-1**inf is NaN */
 220           else if (ix >= 0x3fff0000)    /* (|x|>1)**+-inf = inf,0 */
 221             return (hy >= 0) ? y : zero;
 222           else                  /* (|x|<1)**-,+inf = inf,0 */
 223             return (hy < 0) ? -y : zero;
 224         }
 225       if (iy == 0x3fff0000)
 226         {                       /* y is  +-1 */
 227           if (hy < 0)
 228             return one / x;
 229           else
 230             return x;
 231         }
 232       if (hy == 0x40000000)
 233         return x * x;           /* y is  2 */
 234       if (hy == 0x3ffe0000)
 235         {                       /* y is  0.5 */
 236           if (hx >= 0)          /* x >= +0 */
 237             return sqrtl (x);
 238         }
 239     }
 240
 241   ax = fabsl (x);
 242   /* special value of x */
 243   if ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) == 0)
 244     {
 245       if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
 246         {
 247           z = ax;               /*x is +-0,+-inf,+-1 */
 248           if (hy < 0)
 249             z = one / z;        /* z = (1/|x|) */
 250           if (hx < 0)
 251             {
 252               if (((ix - 0x3fff0000) | yisint) == 0)
 253                 {
 254                   z = (z - z) / (z - z);        /* (-1)**non-int is NaN */
 255                 }
 256               else if (yisint == 1)
 257                 z = -z;         /* (x<0)**odd = -(|x|**odd) */
 258             }
 259           return z;
 260         }
 261     }
 262
 263   /* (x<0)**(non-int) is NaN */
 264   if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
 265     return (x - x) / (x - x);
 266
 267   /* sgn (sign of result -ve**odd) = -1 else = 1 */
 268   sgn = one;
 269   if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
 270     sgn = -one;                 /* (-ve)**(odd int) */
 271
 272   /* |y| is huge.
 273      2^-16495 = 1/2 of smallest representable value.
 274      If (1 - 1/131072)^y underflows, y > 1.4986e9 */
 275   if (iy > 0x401d654b)
 276     {
 277       /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
 278       if (iy > 0x407d654b)
 279         {
 280           if (ix <= 0x3ffeffff)
 281             return (hy < 0) ? huge * huge : tiny * tiny;
 282           if (ix >= 0x3fff0000)
 283             return (hy > 0) ? huge * huge : tiny * tiny;
 284         }
 285       /* over/underflow if x is not close to one */
 286       if (ix < 0x3ffeffff)
 287         return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
 288       if (ix > 0x3fff0000)
 289         return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
 290     }
 291
 292   ay = y > 0 ? y : -y;
 293   if (ay < 0x1p-128)
 294     y = y < 0 ? -0x1p-128 : 0x1p-128;
 295
 296   n = 0;
 297   /* take care subnormal number */
 298   if (ix < 0x00010000)
 299     {
 300       ax *= two113;
 301       n -= 113;
 302       o.value = ax;
 303       ix = o.parts32.w0;
 304     }
 305   n += ((ix) >> 16) - 0x3fff;
 306   j = ix & 0x0000ffff;
 307   /* determine interval */
 308   ix = j | 0x3fff0000;          /* normalize ix */
 309   if (j <= 0x3988)
 310     k = 0;                      /* |x|<sqrt(3/2) */
 311   else if (j < 0xbb67)
 312     k = 1;                      /* |x|<sqrt(3)   */
 313   else
 314     {
 315       k = 0;
 316       n += 1;
 317       ix -= 0x00010000;
 318     }
 319
 320   o.value = ax;
 321   o.parts32.w0 = ix;
 322   ax = o.value;
 323
 324   /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
 325   u = ax - bp[k];               /* bp[0]=1.0, bp[1]=1.5 */
 326   v = one / (ax + bp[k]);
 327   s = u * v;
 328   s_h = s;
 329
 330   o.value = s_h;
 331   o.parts32.w3 = 0;
 332   o.parts32.w2 &= 0xf8000000;
 333   s_h = o.value;
 334   /* t_h=ax+bp[k] High */
 335   t_h = ax + bp[k];
 336   o.value = t_h;
 337   o.parts32.w3 = 0;
 338   o.parts32.w2 &= 0xf8000000;
 339   t_h = o.value;
 340   t_l = ax - (t_h - bp[k]);
 341   s_l = v * ((u - s_h * t_h) - s_h * t_l);
 342   /* compute log(ax) */
 343   s2 = s * s;
 344   u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
 345   v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
 346   r = s2 * s2 * u / v;
 347   r += s_l * (s_h + s);
 348   s2 = s_h * s_h;
 349   t_h = 3.0 + s2 + r;
 350   o.value = t_h;
 351   o.parts32.w3 = 0;
 352   o.parts32.w2 &= 0xf8000000;
 353   t_h = o.value;
 354   t_l = r - ((t_h - 3.0) - s2);
 355   /* u+v = s*(1+...) */
 356   u = s_h * t_h;
 357   v = s_l * t_h + t_l * s;
 358   /* 2/(3log2)*(s+...) */
 359   p_h = u + v;
 360   o.value = p_h;
 361   o.parts32.w3 = 0;
 362   o.parts32.w2 &= 0xf8000000;
 363   p_h = o.value;
 364   p_l = v - (p_h - u);
 365   z_h = cp_h * p_h;             /* cp_h+cp_l = 2/(3*log2) */
 366   z_l = cp_l * p_h + p_l * cp + dp_l[k];
 367   /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
 368   t = (_Float128) n;
 369   t1 = (((z_h + z_l) + dp_h[k]) + t);
 370   o.value = t1;
 371   o.parts32.w3 = 0;
 372   o.parts32.w2 &= 0xf8000000;
 373   t1 = o.value;
 374   t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
 375
 376   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
 377   y1 = y;
 378   o.value = y1;
 379   o.parts32.w3 = 0;
 380   o.parts32.w2 &= 0xf8000000;
 381   y1 = o.value;
 382   p_l = (y - y1) * t1 + y * t2;
 383   p_h = y1 * t1;
 384   z = p_l + p_h;
 385   o.value = z;
 386   j = o.parts32.w0;
 387   if (j >= 0x400d0000) /* z >= 16384 */
 388     {
 389       /* if z > 16384 */
 390       if (((j - 0x400d0000) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3) != 0)
 391         return sgn * huge * huge;       /* overflow */
 392       else
 393         {
 394           if (p_l + ovt > z - p_h)
 395             return sgn * huge * huge;   /* overflow */
 396         }
 397     }
 398   else if ((j & 0x7fffffff) >= 0x400d01b9)      /* z <= -16495 */
 399     {
 400       /* z < -16495 */
 401       if (((j - 0xc00d01bc) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3)
 402           != 0)
 403         return sgn * tiny * tiny;       /* underflow */
 404       else
 405         {
 406           if (p_l <= z - p_h)
 407             return sgn * tiny * tiny;   /* underflow */
 408         }
 409     }
 410   /* compute 2**(p_h+p_l) */
 411   i = j & 0x7fffffff;
 412   k = (i >> 16) - 0x3fff;
 413   n = 0;
 414   if (i > 0x3ffe0000)
 415     {                           /* if |z| > 0.5, set n = [z+0.5] */
 416       n = floorl (z + L(0.5));
 417       t = n;
 418       p_h -= t;
 419     }
 420   t = p_l + p_h;
 421   o.value = t;
 422   o.parts32.w3 = 0;
 423   o.parts32.w2 &= 0xf8000000;
 424   t = o.value;
 425   u = t * lg2_h;
 426   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
 427   z = u + v;
 428   w = v - (z - u);
 429   /*  exp(z) */
 430   t = z * z;
 431   u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
 432   v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
 433   t1 = z - t * u / v;
 434   r = (z * t1) / (t1 - two) - (w + z * w);
 435   z = one - (r - z);
 436   o.value = z;
 437   j = o.parts32.w0;
 438   j += (n << 16);
 439   if ((j >> 16) <= 0)
 440     {
 441       z = __scalbnl (z, n);     /* subnormal output */
 442       _Float128 force_underflow = z * z;
 443       math_force_eval (force_underflow);
 444     }
 445   else
 446     {
 447       o.parts32.w0 = j;
 448       z = o.value;
 449     }
 450   return sgn * z;
 451 }
 452 strong_alias (__ieee754_powl, __powl_finite)