sysdeps/ieee754/ldbl-128ibm/e_powl.c

   1 /*
   2  * ====================================================
   3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
   4  *
   5  * Developed at SunPro, a Sun Microsystems, Inc. business.
   6  * Permission to use, copy, modify, and distribute this
   7  * software is freely granted, provided that this notice
   8  * is preserved.
   9  * ====================================================
  10  */
  11
  12 /* Expansions and modifications for 128-bit long double are
  13    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
  14    and are incorporated herein by permission of the author.  The author
  15    reserves the right to distribute this material elsewhere under different
  16    copying permissions.  These modifications are distributed here under
  17    the following terms:
  18
  19     This library is free software; you can redistribute it and/or
  20     modify it under the terms of the GNU Lesser General Public
  21     License as published by the Free Software Foundation; either
  22     version 2.1 of the License, or (at your option) any later version.
  23
  24     This library is distributed in the hope that it will be useful,
  25     but WITHOUT ANY WARRANTY; without even the implied warranty of
  26     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27     Lesser General Public License for more details.
  28
  29     You should have received a copy of the GNU Lesser General Public
  30     License along with this library; if not, see
  31     <http://www.gnu.org/licenses/>.  */
  32
  33 /* __ieee754_powl(x,y) return x**y
  34  *
  35  *                    n
  36  * Method:  Let x =  2   * (1+f)
  37  *      1. Compute and return log2(x) in two pieces:
  38  *              log2(x) = w1 + w2,
  39  *         where w1 has 113-53 = 60 bit trailing zeros.
  40  *      2. Perform y*log2(x) = n+y' by simulating muti-precision
  41  *         arithmetic, where |y'|<=0.5.
  42  *      3. Return x**y = 2**n*exp(y'*log2)
  43  *
  44  * Special cases:
  45  *      1.  (anything) ** 0  is 1
  46  *      2.  (anything) ** 1  is itself
  47  *      3.  (anything) ** NAN is NAN
  48  *      4.  NAN ** (anything except 0) is NAN
  49  *      5.  +-(|x| > 1) **  +INF is +INF
  50  *      6.  +-(|x| > 1) **  -INF is +0
  51  *      7.  +-(|x| < 1) **  +INF is +0
  52  *      8.  +-(|x| < 1) **  -INF is +INF
  53  *      9.  +-1         ** +-INF is NAN
  54  *      10. +0 ** (+anything except 0, NAN)               is +0
  55  *      11. -0 ** (+anything except 0, NAN, odd integer)  is +0
  56  *      12. +0 ** (-anything except 0, NAN)               is +INF
  57  *      13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
  58  *      14. -0 ** (odd integer) = -( +0 ** (odd integer) )
  59  *      15. +INF ** (+anything except 0,NAN) is +INF
  60  *      16. +INF ** (-anything except 0,NAN) is +0
  61  *      17. -INF ** (anything)  = -0 ** (-anything)
  62  *      18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
  63  *      19. (-anything except 0 and inf) ** (non-integer) is NAN
  64  *
  65  */
  66
  67 #include <math.h>
  68 #include <math_private.h>
  69
  70 static const long double bp[] = {
  71   1.0L,
  72   1.5L,
  73 };
  74
  75 /* log_2(1.5) */
  76 static const long double dp_h[] = {
  77   0.0,
  78   5.8496250072115607565592654282227158546448E-1L
  79 };
  80
  81 /* Low part of log_2(1.5) */
  82 static const long double dp_l[] = {
  83   0.0,
  84   1.0579781240112554492329533686862998106046E-16L
  85 };
  86
  87 static const long double zero = 0.0L,
  88   one = 1.0L,
  89   two = 2.0L,
  90   two113 = 1.0384593717069655257060992658440192E34L,
  91   huge = 1.0e300L,
  92   tiny = 1.0e-300L;
  93
  94 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
  95    z = (x-1)/(x+1)
  96    1 <= x <= 1.25
  97    Peak relative error 2.3e-37 */
  98 static const long double LN[] =
  99 {
 100  -3.0779177200290054398792536829702930623200E1L,
 101   6.5135778082209159921251824580292116201640E1L,
 102  -4.6312921812152436921591152809994014413540E1L,
 103   1.2510208195629420304615674658258363295208E1L,
 104  -9.9266909031921425609179910128531667336670E-1L
 105 };
 106 static const long double LD[] =
 107 {
 108  -5.129862866715009066465422805058933131960E1L,
 109   1.452015077564081884387441590064272782044E2L,
 110  -1.524043275549860505277434040464085593165E2L,
 111   7.236063513651544224319663428634139768808E1L,
 112  -1.494198912340228235853027849917095580053E1L
 113   /* 1.0E0 */
 114 };
 115
 116 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
 117    0 <= x <= 0.5
 118    Peak relative error 5.7e-38  */
 119 static const long double PN[] =
 120 {
 121   5.081801691915377692446852383385968225675E8L,
 122   9.360895299872484512023336636427675327355E6L,
 123   4.213701282274196030811629773097579432957E4L,
 124   5.201006511142748908655720086041570288182E1L,
 125   9.088368420359444263703202925095675982530E-3L,
 126 };
 127 static const long double PD[] =
 128 {
 129   3.049081015149226615468111430031590411682E9L,
 130   1.069833887183886839966085436512368982758E8L,
 131   8.259257717868875207333991924545445705394E5L,
 132   1.872583833284143212651746812884298360922E3L,
 133   /* 1.0E0 */
 134 };
 135
 136 static const long double
 137   /* ln 2 */
 138   lg2 = 6.9314718055994530941723212145817656807550E-1L,
 139   lg2_h = 6.9314718055994528622676398299518041312695E-1L,
 140   lg2_l = 2.3190468138462996154948554638754786504121E-17L,
 141   ovt = 8.0085662595372944372e-0017L,
 142   /* 2/(3*log(2)) */
 143   cp = 9.6179669392597560490661645400126142495110E-1L,
 144   cp_h = 9.6179669392597555432899980587535537779331E-1L,
 145   cp_l = 5.0577616648125906047157785230014751039424E-17L;
 146
 147 long double
 148 __ieee754_powl (long double x, long double y)
 149 {
 150   long double z, ax, z_h, z_l, p_h, p_l;
 151   long double y1, t1, t2, r, s, t, u, v, w;
 152   long double s2, s_h, s_l, t_h, t_l, ay;
 153   int32_t i, j, k, yisint, n;
 154   uint32_t ix, iy;
 155   int32_t hx, hy, hax;
 156   double ohi, xhi, xlo, yhi, ylo;
 157   uint32_t lx, ly, lj;
 158
 159   ldbl_unpack (x, &xhi, &xlo);
 160   EXTRACT_WORDS (hx, lx, xhi);
 161   ix = hx & 0x7fffffff;
 162
 163   ldbl_unpack (y, &yhi, &ylo);
 164   EXTRACT_WORDS (hy, ly, yhi);
 165   iy = hy & 0x7fffffff;
 166
 167   /* y==zero: x**0 = 1 */
 168   if ((iy | ly) == 0)
 169     return one;
 170
 171   /* 1.0**y = 1; -1.0**+-Inf = 1 */
 172   if (x == one)
 173     return one;
 174   if (x == -1.0L && ((iy - 0x7ff00000) | ly) == 0)
 175     return one;
 176
 177   /* +-NaN return x+y */
 178   if ((ix >= 0x7ff00000 && ((ix - 0x7ff00000) | lx) != 0)
 179       || (iy >= 0x7ff00000 && ((iy - 0x7ff00000) | ly) != 0))
 180     return x + y;
 181
 182   /* determine if y is an odd int when x < 0
 183    * yisint = 0       ... y is not an integer
 184    * yisint = 1       ... y is an odd int
 185    * yisint = 2       ... y is an even int
 186    */
 187   yisint = 0;
 188   if (hx < 0)
 189     {
 190       uint32_t low_ye;
 191
 192       GET_HIGH_WORD (low_ye, ylo);
 193       if ((low_ye & 0x7fffffff) >= 0x43400000)  /* Low part >= 2^53 */
 194         yisint = 2;             /* even integer y */
 195       else if (iy >= 0x3ff00000)        /* 1.0 */
 196         {
 197           if (__floorl (y) == y)
 198             {
 199               z = 0.5 * y;
 200               if (__floorl (z) == z)
 201                 yisint = 2;
 202               else
 203                 yisint = 1;
 204             }
 205         }
 206     }
 207
 208   ax = fabsl (x);
 209
 210   /* special value of y */
 211   if (ly == 0)
 212     {
 213       if (iy == 0x7ff00000)     /* y is +-inf */
 214         {
 215           if (ax > one)
 216             /* (|x|>1)**+-inf = inf,0 */
 217             return (hy >= 0) ? y : zero;
 218           else
 219             /* (|x|<1)**-,+inf = inf,0 */
 220             return (hy < 0) ? -y : zero;
 221         }
 222       if (ylo == 0.0)
 223         {
 224           if (iy == 0x3ff00000)
 225             {                   /* y is  +-1 */
 226               if (hy < 0)
 227                 return one / x;
 228               else
 229                 return x;
 230             }
 231           if (hy == 0x40000000)
 232             return x * x;               /* y is  2 */
 233           if (hy == 0x3fe00000)
 234             {                   /* y is  0.5 */
 235               if (hx >= 0)              /* x >= +0 */
 236                 return __ieee754_sqrtl (x);
 237             }
 238         }
 239     }
 240
 241   /* special value of x */
 242   if (lx == 0)
 243     {
 244       if (ix == 0x7ff00000 || ix == 0 || (ix == 0x3ff00000 && xlo == 0.0))
 245         {
 246           z = ax;               /*x is +-0,+-inf,+-1 */
 247           if (hy < 0)
 248             z = one / z;        /* z = (1/|x|) */
 249           if (hx < 0)
 250             {
 251               if (((ix - 0x3ff00000) | yisint) == 0)
 252                 {
 253                   z = (z - z) / (z - z);        /* (-1)**non-int is NaN */
 254                 }
 255               else if (yisint == 1)
 256                 z = -z;         /* (x<0)**odd = -(|x|**odd) */
 257             }
 258           return z;
 259         }
 260     }
 261
 262   /* (x<0)**(non-int) is NaN */
 263   if (((((u_int32_t) hx >> 31) - 1) | yisint) == 0)
 264     return (x - x) / (x - x);
 265
 266   /* |y| is huge.
 267      2^-16495 = 1/2 of smallest representable value.
 268      If (1 - 1/131072)^y underflows, y > 1.4986e9 */
 269   if (iy > 0x41d654b0)
 270     {
 271       /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
 272       if (iy > 0x47d654b0)
 273         {
 274           if (ix <= 0x3fefffff)
 275             return (hy < 0) ? huge * huge : tiny * tiny;
 276           if (ix >= 0x3ff00000)
 277             return (hy > 0) ? huge * huge : tiny * tiny;
 278         }
 279       /* over/underflow if x is not close to one */
 280       if (ix < 0x3fefffff)
 281         return (hy < 0) ? huge * huge : tiny * tiny;
 282       if (ix > 0x3ff00000)
 283         return (hy > 0) ? huge * huge : tiny * tiny;
 284     }
 285
 286   ay = y > 0 ? y : -y;
 287   if (ay < 0x1p-117)
 288     y = y < 0 ? -0x1p-117 : 0x1p-117;
 289
 290   n = 0;
 291   /* take care subnormal number */
 292   if (ix < 0x00100000)
 293     {
 294       ax *= two113;
 295       n -= 113;
 296       ohi = ldbl_high (ax);
 297       GET_HIGH_WORD (ix, ohi);
 298     }
 299   n += ((ix) >> 20) - 0x3ff;
 300   j = ix & 0x000fffff;
 301   /* determine interval */
 302   ix = j | 0x3ff00000;          /* normalize ix */
 303   if (j <= 0x39880)
 304     k = 0;                      /* |x|<sqrt(3/2) */
 305   else if (j < 0xbb670)
 306     k = 1;                      /* |x|<sqrt(3)   */
 307   else
 308     {
 309       k = 0;
 310       n += 1;
 311       ix -= 0x00100000;
 312     }
 313
 314   ohi = ldbl_high (ax);
 315   GET_HIGH_WORD (hax, ohi);
 316   ax = __scalbnl (ax, ((int) ((ix - hax) * 2)) >> 21);
 317
 318   /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
 319   u = ax - bp[k];               /* bp[0]=1.0, bp[1]=1.5 */
 320   v = one / (ax + bp[k]);
 321   s = u * v;
 322   s_h = ldbl_high (s);
 323
 324   /* t_h=ax+bp[k] High */
 325   t_h = ax + bp[k];
 326   t_h = ldbl_high (t_h);
 327   t_l = ax - (t_h - bp[k]);
 328   s_l = v * ((u - s_h * t_h) - s_h * t_l);
 329   /* compute log(ax) */
 330   s2 = s * s;
 331   u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
 332   v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
 333   r = s2 * s2 * u / v;
 334   r += s_l * (s_h + s);
 335   s2 = s_h * s_h;
 336   t_h = 3.0 + s2 + r;
 337   t_h = ldbl_high (t_h);
 338   t_l = r - ((t_h - 3.0) - s2);
 339   /* u+v = s*(1+...) */
 340   u = s_h * t_h;
 341   v = s_l * t_h + t_l * s;
 342   /* 2/(3log2)*(s+...) */
 343   p_h = u + v;
 344   p_h = ldbl_high (p_h);
 345   p_l = v - (p_h - u);
 346   z_h = cp_h * p_h;             /* cp_h+cp_l = 2/(3*log2) */
 347   z_l = cp_l * p_h + p_l * cp + dp_l[k];
 348   /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
 349   t = (long double) n;
 350   t1 = (((z_h + z_l) + dp_h[k]) + t);
 351   t1 = ldbl_high (t1);
 352   t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
 353
 354   /* s (sign of result -ve**odd) = -1 else = 1 */
 355   s = one;
 356   if (((((u_int32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
 357     s = -one;                   /* (-ve)**(odd int) */
 358
 359   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
 360   y1 = ldbl_high (y);
 361   p_l = (y - y1) * t1 + y * t2;
 362   p_h = y1 * t1;
 363   z = p_l + p_h;
 364   ohi = ldbl_high (z);
 365   EXTRACT_WORDS (j, lj, ohi);
 366   if (j >= 0x40d00000) /* z >= 16384 */
 367     {
 368       /* if z > 16384 */
 369       if (((j - 0x40d00000) | lj) != 0)
 370         return s * huge * huge; /* overflow */
 371       else
 372         {
 373           if (p_l + ovt > z - p_h)
 374             return s * huge * huge;     /* overflow */
 375         }
 376     }
 377   else if ((j & 0x7fffffff) >= 0x40d01b90)      /* z <= -16495 */
 378     {
 379       /* z < -16495 */
 380       if (((j - 0xc0d01bc0) | lj) != 0)
 381         return s * tiny * tiny; /* underflow */
 382       else
 383         {
 384           if (p_l <= z - p_h)
 385             return s * tiny * tiny;     /* underflow */
 386         }
 387     }
 388   /* compute 2**(p_h+p_l) */
 389   i = j & 0x7fffffff;
 390   k = (i >> 20) - 0x3ff;
 391   n = 0;
 392   if (i > 0x3fe00000)
 393     {                           /* if |z| > 0.5, set n = [z+0.5] */
 394       n = __floorl (z + 0.5L);
 395       t = n;
 396       p_h -= t;
 397     }
 398   t = p_l + p_h;
 399   t = ldbl_high (t);
 400   u = t * lg2_h;
 401   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
 402   z = u + v;
 403   w = v - (z - u);
 404   /*  exp(z) */
 405   t = z * z;
 406   u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
 407   v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
 408   t1 = z - t * u / v;
 409   r = (z * t1) / (t1 - two) - (w + z * w);
 410   z = one - (r - z);
 411   z = __scalbnl (z, n);
 412   return s * z;
 413 }
 414 strong_alias (__ieee754_powl, __powl_finite)