libquadmath/math/powq.c

   1 /*
   2  * ====================================================
   3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
   4  *
   5  * Developed at SunPro, a Sun Microsystems, Inc. business.
   6  * Permission to use, copy, modify, and distribute this
   7  * software is freely granted, provided that this notice
   8  * is preserved.
   9  * ====================================================
  10  */
  11
  12 /* Expansions and modifications for 128-bit long double are
  13    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
  14    and are incorporated herein by permission of the author.  The author
  15    reserves the right to distribute this material elsewhere under different
  16    copying permissions.  These modifications are distributed here under
  17    the following terms:
  18
  19     This library is free software; you can redistribute it and/or
  20     modify it under the terms of the GNU Lesser General Public
  21     License as published by the Free Software Foundation; either
  22     version 2.1 of the License, or (at your option) any later version.
  23
  24     This library is distributed in the hope that it will be useful,
  25     but WITHOUT ANY WARRANTY; without even the implied warranty of
  26     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27     Lesser General Public License for more details.
  28
  29     You should have received a copy of the GNU Lesser General Public
  30     License along with this library; if not, see
  31     <http://www.gnu.org/licenses/>.  */
  32
  33 /* powq(x,y) return x**y
  34  *
  35  *                    n
  36  * Method:  Let x =  2   * (1+f)
  37  *      1. Compute and return log2(x) in two pieces:
  38  *              log2(x) = w1 + w2,
  39  *         where w1 has 113-53 = 60 bit trailing zeros.
  40  *      2. Perform y*log2(x) = n+y' by simulating muti-precision
  41  *         arithmetic, where |y'|<=0.5.
  42  *      3. Return x**y = 2**n*exp(y'*log2)
  43  *
  44  * Special cases:
  45  *      1.  (anything) ** 0  is 1
  46  *      2.  (anything) ** 1  is itself
  47  *      3.  (anything) ** NAN is NAN
  48  *      4.  NAN ** (anything except 0) is NAN
  49  *      5.  +-(|x| > 1) **  +INF is +INF
  50  *      6.  +-(|x| > 1) **  -INF is +0
  51  *      7.  +-(|x| < 1) **  +INF is +0
  52  *      8.  +-(|x| < 1) **  -INF is +INF
  53  *      9.  +-1         ** +-INF is NAN
  54  *      10. +0 ** (+anything except 0, NAN)               is +0
  55  *      11. -0 ** (+anything except 0, NAN, odd integer)  is +0
  56  *      12. +0 ** (-anything except 0, NAN)               is +INF
  57  *      13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
  58  *      14. -0 ** (odd integer) = -( +0 ** (odd integer) )
  59  *      15. +INF ** (+anything except 0,NAN) is +INF
  60  *      16. +INF ** (-anything except 0,NAN) is +0
  61  *      17. -INF ** (anything)  = -0 ** (-anything)
  62  *      18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
  63  *      19. (-anything except 0 and inf) ** (non-integer) is NAN
  64  *
  65  */
  66
  67 #include "quadmath-imp.h"
  68
  69 static const __float128 bp[] = {
  70   1,
  71   1.5Q,
  72 };
  73
  74 /* log_2(1.5) */
  75 static const __float128 dp_h[] = {
  76   0.0,
  77   5.8496250072115607565592654282227158546448E-1Q
  78 };
  79
  80 /* Low part of log_2(1.5) */
  81 static const __float128 dp_l[] = {
  82   0.0,
  83   1.0579781240112554492329533686862998106046E-16Q
  84 };
  85
  86 static const __float128 zero = 0,
  87   one = 1,
  88   two = 2,
  89   two113 = 1.0384593717069655257060992658440192E34Q,
  90   huge = 1.0e3000Q,
  91   tiny = 1.0e-3000Q;
  92
  93 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
  94    z = (x-1)/(x+1)
  95    1 <= x <= 1.25
  96    Peak relative error 2.3e-37 */
  97 static const __float128 LN[] =
  98 {
  99  -3.0779177200290054398792536829702930623200E1Q,
 100   6.5135778082209159921251824580292116201640E1Q,
 101  -4.6312921812152436921591152809994014413540E1Q,
 102   1.2510208195629420304615674658258363295208E1Q,
 103  -9.9266909031921425609179910128531667336670E-1Q
 104 };
 105 static const __float128 LD[] =
 106 {
 107  -5.129862866715009066465422805058933131960E1Q,
 108   1.452015077564081884387441590064272782044E2Q,
 109  -1.524043275549860505277434040464085593165E2Q,
 110   7.236063513651544224319663428634139768808E1Q,
 111  -1.494198912340228235853027849917095580053E1Q
 112   /* 1.0E0 */
 113 };
 114
 115 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
 116    0 <= x <= 0.5
 117    Peak relative error 5.7e-38  */
 118 static const __float128 PN[] =
 119 {
 120   5.081801691915377692446852383385968225675E8Q,
 121   9.360895299872484512023336636427675327355E6Q,
 122   4.213701282274196030811629773097579432957E4Q,
 123   5.201006511142748908655720086041570288182E1Q,
 124   9.088368420359444263703202925095675982530E-3Q,
 125 };
 126 static const __float128 PD[] =
 127 {
 128   3.049081015149226615468111430031590411682E9Q,
 129   1.069833887183886839966085436512368982758E8Q,
 130   8.259257717868875207333991924545445705394E5Q,
 131   1.872583833284143212651746812884298360922E3Q,
 132   /* 1.0E0 */
 133 };
 134
 135 static const __float128
 136   /* ln 2 */
 137   lg2 = 6.9314718055994530941723212145817656807550E-1Q,
 138   lg2_h = 6.9314718055994528622676398299518041312695E-1Q,
 139   lg2_l = 2.3190468138462996154948554638754786504121E-17Q,
 140   ovt = 8.0085662595372944372e-0017Q,
 141   /* 2/(3*log(2)) */
 142   cp = 9.6179669392597560490661645400126142495110E-1Q,
 143   cp_h = 9.6179669392597555432899980587535537779331E-1Q,
 144   cp_l = 5.0577616648125906047157785230014751039424E-17Q;
 145
 146 __float128
 147 powq (__float128 x, __float128 y)
 148 {
 149   __float128 z, ax, z_h, z_l, p_h, p_l;
 150   __float128 y1, t1, t2, r, s, sgn, t, u, v, w;
 151   __float128 s2, s_h, s_l, t_h, t_l, ay;
 152   int32_t i, j, k, yisint, n;
 153   uint32_t ix, iy;
 154   int32_t hx, hy;
 155   ieee854_float128 o, p, q;
 156
 157   p.value = x;
 158   hx = p.words32.w0;
 159   ix = hx & 0x7fffffff;
 160
 161   q.value = y;
 162   hy = q.words32.w0;
 163   iy = hy & 0x7fffffff;
 164
 165
 166   /* y==zero: x**0 = 1 */
 167   if ((iy | q.words32.w1 | q.words32.w2 | q.words32.w3) == 0
 168       && !issignalingq (x))
 169     return one;
 170
 171   /* 1.0**y = 1; -1.0**+-Inf = 1 */
 172   if (x == one && !issignalingq (y))
 173     return one;
 174   if (x == -1 && iy == 0x7fff0000
 175       && (q.words32.w1 | q.words32.w2 | q.words32.w3) == 0)
 176     return one;
 177
 178   /* +-NaN return x+y */
 179   if ((ix > 0x7fff0000)
 180       || ((ix == 0x7fff0000)
 181           && ((p.words32.w1 | p.words32.w2 | p.words32.w3) != 0))
 182       || (iy > 0x7fff0000)
 183       || ((iy == 0x7fff0000)
 184           && ((q.words32.w1 | q.words32.w2 | q.words32.w3) != 0)))
 185     return x + y;
 186
 187   /* determine if y is an odd int when x < 0
 188    * yisint = 0       ... y is not an integer
 189    * yisint = 1       ... y is an odd int
 190    * yisint = 2       ... y is an even int
 191    */
 192   yisint = 0;
 193   if (hx < 0)
 194     {
 195       if (iy >= 0x40700000)     /* 2^113 */
 196         yisint = 2;             /* even integer y */
 197       else if (iy >= 0x3fff0000)        /* 1.0 */
 198         {
 199           if (floorq (y) == y)
 200             {
 201               z = 0.5 * y;
 202               if (floorq (z) == z)
 203                 yisint = 2;
 204               else
 205                 yisint = 1;
 206             }
 207         }
 208     }
 209
 210   /* special value of y */
 211   if ((q.words32.w1 | q.words32.w2 | q.words32.w3) == 0)
 212     {
 213       if (iy == 0x7fff0000)     /* y is +-inf */
 214         {
 215           if (((ix - 0x3fff0000) | p.words32.w1 | p.words32.w2 | p.words32.w3)
 216               == 0)
 217             return y - y;       /* +-1**inf is NaN */
 218           else if (ix >= 0x3fff0000)    /* (|x|>1)**+-inf = inf,0 */
 219             return (hy >= 0) ? y : zero;
 220           else                  /* (|x|<1)**-,+inf = inf,0 */
 221             return (hy < 0) ? -y : zero;
 222         }
 223       if (iy == 0x3fff0000)
 224         {                       /* y is  +-1 */
 225           if (hy < 0)
 226             return one / x;
 227           else
 228             return x;
 229         }
 230       if (hy == 0x40000000)
 231         return x * x;           /* y is  2 */
 232       if (hy == 0x3ffe0000)
 233         {                       /* y is  0.5 */
 234           if (hx >= 0)          /* x >= +0 */
 235             return sqrtq (x);
 236         }
 237     }
 238
 239   ax = fabsq (x);
 240   /* special value of x */
 241   if ((p.words32.w1 | p.words32.w2 | p.words32.w3) == 0)
 242     {
 243       if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
 244         {
 245           z = ax;               /*x is +-0,+-inf,+-1 */
 246           if (hy < 0)
 247             z = one / z;        /* z = (1/|x|) */
 248           if (hx < 0)
 249             {
 250               if (((ix - 0x3fff0000) | yisint) == 0)
 251                 {
 252                   z = (z - z) / (z - z);        /* (-1)**non-int is NaN */
 253                 }
 254               else if (yisint == 1)
 255                 z = -z;         /* (x<0)**odd = -(|x|**odd) */
 256             }
 257           return z;
 258         }
 259     }
 260
 261   /* (x<0)**(non-int) is NaN */
 262   if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
 263     return (x - x) / (x - x);
 264
 265   /* sgn (sign of result -ve**odd) = -1 else = 1 */
 266   sgn = one;
 267   if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
 268     sgn = -one;                 /* (-ve)**(odd int) */
 269
 270   /* |y| is huge.
 271      2^-16495 = 1/2 of smallest representable value.
 272      If (1 - 1/131072)^y underflows, y > 1.4986e9 */
 273   if (iy > 0x401d654b)
 274     {
 275       /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
 276       if (iy > 0x407d654b)
 277         {
 278           if (ix <= 0x3ffeffff)
 279             return (hy < 0) ? huge * huge : tiny * tiny;
 280           if (ix >= 0x3fff0000)
 281             return (hy > 0) ? huge * huge : tiny * tiny;
 282         }
 283       /* over/underflow if x is not close to one */
 284       if (ix < 0x3ffeffff)
 285         return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
 286       if (ix > 0x3fff0000)
 287         return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
 288     }
 289
 290   ay = y > 0 ? y : -y;
 291   if (ay < 0x1p-128)
 292     y = y < 0 ? -0x1p-128 : 0x1p-128;
 293
 294   n = 0;
 295   /* take care subnormal number */
 296   if (ix < 0x00010000)
 297     {
 298       ax *= two113;
 299       n -= 113;
 300       o.value = ax;
 301       ix = o.words32.w0;
 302     }
 303   n += ((ix) >> 16) - 0x3fff;
 304   j = ix & 0x0000ffff;
 305   /* determine interval */
 306   ix = j | 0x3fff0000;          /* normalize ix */
 307   if (j <= 0x3988)
 308     k = 0;                      /* |x|<sqrt(3/2) */
 309   else if (j < 0xbb67)
 310     k = 1;                      /* |x|<sqrt(3)   */
 311   else
 312     {
 313       k = 0;
 314       n += 1;
 315       ix -= 0x00010000;
 316     }
 317
 318   o.value = ax;
 319   o.words32.w0 = ix;
 320   ax = o.value;
 321
 322   /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
 323   u = ax - bp[k];               /* bp[0]=1.0, bp[1]=1.5 */
 324   v = one / (ax + bp[k]);
 325   s = u * v;
 326   s_h = s;
 327
 328   o.value = s_h;
 329   o.words32.w3 = 0;
 330   o.words32.w2 &= 0xf8000000;
 331   s_h = o.value;
 332   /* t_h=ax+bp[k] High */
 333   t_h = ax + bp[k];
 334   o.value = t_h;
 335   o.words32.w3 = 0;
 336   o.words32.w2 &= 0xf8000000;
 337   t_h = o.value;
 338   t_l = ax - (t_h - bp[k]);
 339   s_l = v * ((u - s_h * t_h) - s_h * t_l);
 340   /* compute log(ax) */
 341   s2 = s * s;
 342   u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
 343   v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
 344   r = s2 * s2 * u / v;
 345   r += s_l * (s_h + s);
 346   s2 = s_h * s_h;
 347   t_h = 3.0 + s2 + r;
 348   o.value = t_h;
 349   o.words32.w3 = 0;
 350   o.words32.w2 &= 0xf8000000;
 351   t_h = o.value;
 352   t_l = r - ((t_h - 3.0) - s2);
 353   /* u+v = s*(1+...) */
 354   u = s_h * t_h;
 355   v = s_l * t_h + t_l * s;
 356   /* 2/(3log2)*(s+...) */
 357   p_h = u + v;
 358   o.value = p_h;
 359   o.words32.w3 = 0;
 360   o.words32.w2 &= 0xf8000000;
 361   p_h = o.value;
 362   p_l = v - (p_h - u);
 363   z_h = cp_h * p_h;             /* cp_h+cp_l = 2/(3*log2) */
 364   z_l = cp_l * p_h + p_l * cp + dp_l[k];
 365   /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
 366   t = (__float128) n;
 367   t1 = (((z_h + z_l) + dp_h[k]) + t);
 368   o.value = t1;
 369   o.words32.w3 = 0;
 370   o.words32.w2 &= 0xf8000000;
 371   t1 = o.value;
 372   t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
 373
 374   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
 375   y1 = y;
 376   o.value = y1;
 377   o.words32.w3 = 0;
 378   o.words32.w2 &= 0xf8000000;
 379   y1 = o.value;
 380   p_l = (y - y1) * t1 + y * t2;
 381   p_h = y1 * t1;
 382   z = p_l + p_h;
 383   o.value = z;
 384   j = o.words32.w0;
 385   if (j >= 0x400d0000) /* z >= 16384 */
 386     {
 387       /* if z > 16384 */
 388       if (((j - 0x400d0000) | o.words32.w1 | o.words32.w2 | o.words32.w3) != 0)
 389         return sgn * huge * huge;       /* overflow */
 390       else
 391         {
 392           if (p_l + ovt > z - p_h)
 393             return sgn * huge * huge;   /* overflow */
 394         }
 395     }
 396   else if ((j & 0x7fffffff) >= 0x400d01b9)      /* z <= -16495 */
 397     {
 398       /* z < -16495 */
 399       if (((j - 0xc00d01bc) | o.words32.w1 | o.words32.w2 | o.words32.w3)
 400           != 0)
 401         return sgn * tiny * tiny;       /* underflow */
 402       else
 403         {
 404           if (p_l <= z - p_h)
 405             return sgn * tiny * tiny;   /* underflow */
 406         }
 407     }
 408   /* compute 2**(p_h+p_l) */
 409   i = j & 0x7fffffff;
 410   k = (i >> 16) - 0x3fff;
 411   n = 0;
 412   if (i > 0x3ffe0000)
 413     {                           /* if |z| > 0.5, set n = [z+0.5] */
 414       n = floorq (z + 0.5Q);
 415       t = n;
 416       p_h -= t;
 417     }
 418   t = p_l + p_h;
 419   o.value = t;
 420   o.words32.w3 = 0;
 421   o.words32.w2 &= 0xf8000000;
 422   t = o.value;
 423   u = t * lg2_h;
 424   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
 425   z = u + v;
 426   w = v - (z - u);
 427   /*  exp(z) */
 428   t = z * z;
 429   u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
 430   v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
 431   t1 = z - t * u / v;
 432   r = (z * t1) / (t1 - two) - (w + z * w);
 433   z = one - (r - z);
 434   o.value = z;
 435   j = o.words32.w0;
 436   j += (n << 16);
 437   if ((j >> 16) <= 0)
 438     {
 439       z = scalbnq (z, n);       /* subnormal output */
 440       __float128 force_underflow = z * z;
 441       math_force_eval (force_underflow);
 442     }
 443   else
 444     {
 445       o.words32.w0 = j;
 446       z = o.value;
 447     }
 448   return sgn * z;
 449 }