sysdeps/ieee754/ldbl-128/e_powl.c

   1 /*
   2  * ====================================================
   3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
   4  *
   5  * Developed at SunPro, a Sun Microsystems, Inc. business.
   6  * Permission to use, copy, modify, and distribute this
   7  * software is freely granted, provided that this notice
   8  * is preserved.
   9  * ====================================================
  10  */
  11
  12 /* Expansions and modifications for 128-bit long double contributed by
  13    Stephen L. Moshier <moshier@na-net.ornl.gov>  */
  14
  15 /* __ieee754_powl(x,y) return x**y
  16  *
  17  *                    n
  18  * Method:  Let x =  2   * (1+f)
  19  *      1. Compute and return log2(x) in two pieces:
  20  *              log2(x) = w1 + w2,
  21  *         where w1 has 113-53 = 60 bit trailing zeros.
  22  *      2. Perform y*log2(x) = n+y' by simulating muti-precision
  23  *         arithmetic, where |y'|<=0.5.
  24  *      3. Return x**y = 2**n*exp(y'*log2)
  25  *
  26  * Special cases:
  27  *      1.  (anything) ** 0  is 1
  28  *      2.  (anything) ** 1  is itself
  29  *      3.  (anything) ** NAN is NAN
  30  *      4.  NAN ** (anything except 0) is NAN
  31  *      5.  +-(|x| > 1) **  +INF is +INF
  32  *      6.  +-(|x| > 1) **  -INF is +0
  33  *      7.  +-(|x| < 1) **  +INF is +0
  34  *      8.  +-(|x| < 1) **  -INF is +INF
  35  *      9.  +-1         ** +-INF is NAN
  36  *      10. +0 ** (+anything except 0, NAN)               is +0
  37  *      11. -0 ** (+anything except 0, NAN, odd integer)  is +0
  38  *      12. +0 ** (-anything except 0, NAN)               is +INF
  39  *      13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
  40  *      14. -0 ** (odd integer) = -( +0 ** (odd integer) )
  41  *      15. +INF ** (+anything except 0,NAN) is +INF
  42  *      16. +INF ** (-anything except 0,NAN) is +0
  43  *      17. -INF ** (anything)  = -0 ** (-anything)
  44  *      18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
  45  *      19. (-anything except 0 and inf) ** (non-integer) is NAN
  46  *
  47  */
  48
  49 #include "math.h"
  50 #include "math_private.h"
  51
  52 static const long double bp[] = {
  53   1.0L,
  54   1.5L,
  55 };
  56
  57 /* log_2(1.5) */
  58 static const long double dp_h[] = {
  59   0.0,
  60   5.8496250072115607565592654282227158546448E-1L
  61 };
  62
  63 /* Low part of log_2(1.5) */
  64 static const long double dp_l[] = {
  65   0.0,
  66   1.0579781240112554492329533686862998106046E-16L
  67 };
  68
  69 static const long double zero = 0.0L,
  70   one = 1.0L,
  71   two = 2.0L,
  72   two113 = 1.0384593717069655257060992658440192E34L,
  73   huge = 1.0e3000L,
  74   tiny = 1.0e-3000L;
  75
  76 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
  77    z = (x-1)/(x+1)
  78    1 <= x <= 1.25
  79    Peak relative error 2.3e-37 */
  80 static const long double LN[] =
  81 {
  82  -3.0779177200290054398792536829702930623200E1L,
  83   6.5135778082209159921251824580292116201640E1L,
  84  -4.6312921812152436921591152809994014413540E1L,
  85   1.2510208195629420304615674658258363295208E1L,
  86  -9.9266909031921425609179910128531667336670E-1L
  87 };
  88 static const long double LD[] =
  89 {
  90  -5.129862866715009066465422805058933131960E1L,
  91   1.452015077564081884387441590064272782044E2L,
  92  -1.524043275549860505277434040464085593165E2L,
  93   7.236063513651544224319663428634139768808E1L,
  94  -1.494198912340228235853027849917095580053E1L
  95   /* 1.0E0 */
  96 };
  97
  98 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
  99    0 <= x <= 0.5
 100    Peak relative error 5.7e-38  */
 101 static const long double PN[] =
 102 {
 103   5.081801691915377692446852383385968225675E8L,
 104   9.360895299872484512023336636427675327355E6L,
 105   4.213701282274196030811629773097579432957E4L,
 106   5.201006511142748908655720086041570288182E1L,
 107   9.088368420359444263703202925095675982530E-3L,
 108 };
 109 static const long double PD[] =
 110 {
 111   3.049081015149226615468111430031590411682E9L,
 112   1.069833887183886839966085436512368982758E8L,
 113   8.259257717868875207333991924545445705394E5L,
 114   1.872583833284143212651746812884298360922E3L,
 115   /* 1.0E0 */
 116 };
 117
 118 static const long double
 119   /* ln 2 */
 120   lg2 = 6.9314718055994530941723212145817656807550E-1L,
 121   lg2_h = 6.9314718055994528622676398299518041312695E-1L,
 122   lg2_l = 2.3190468138462996154948554638754786504121E-17L,
 123   ovt = 8.0085662595372944372e-0017L,
 124   /* 2/(3*log(2)) */
 125   cp = 9.6179669392597560490661645400126142495110E-1L,
 126   cp_h = 9.6179669392597555432899980587535537779331E-1L,
 127   cp_l = 5.0577616648125906047157785230014751039424E-17L;
 128
 129 #ifdef __STDC__
 130 long double
 131 __ieee754_powl (long double x, long double y)
 132 #else
 133 long double
 134 __ieee754_powl (x, y)
 135      long double x, y;
 136 #endif
 137 {
 138   long double z, ax, z_h, z_l, p_h, p_l;
 139   long double y1, t1, t2, r, s, t, u, v, w;
 140   long double s2, s_h, s_l, t_h, t_l;
 141   int32_t i, j, k, yisint, n;
 142   u_int32_t ix, iy;
 143   int32_t hx, hy;
 144   ieee854_long_double_shape_type o, p, q;
 145
 146   p.value = x;
 147   hx = p.parts32.w0;
 148   ix = hx & 0x7fffffff;
 149
 150   q.value = y;
 151   hy = q.parts32.w0;
 152   iy = hy & 0x7fffffff;
 153
 154
 155   /* y==zero: x**0 = 1 */
 156   if ((iy | q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
 157     return one;
 158
 159   /* +-NaN return x+y */
 160   if ((ix > 0x7fff0000)
 161       || ((ix == 0x7fff0000)
 162           && ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) != 0))
 163       || (iy > 0x7fff0000)
 164       || ((iy == 0x7fff0000)
 165           && ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) != 0)))
 166     return x + y;
 167
 168   /* determine if y is an odd int when x < 0
 169    * yisint = 0       ... y is not an integer
 170    * yisint = 1       ... y is an odd int
 171    * yisint = 2       ... y is an even int
 172    */
 173   yisint = 0;
 174   if (hx < 0)
 175     {
 176       if (iy >= 0x40700000)     /* 2^113 */
 177         yisint = 2;             /* even integer y */
 178       else if (iy >= 0x3fff0000)        /* 1.0 */
 179         {
 180           if (__floorl (y) == y)
 181             {
 182               z = 0.5 * y;
 183               if (__floorl (z) == z)
 184                 yisint = 2;
 185               else
 186                 yisint = 1;
 187             }
 188         }
 189     }
 190
 191   /* special value of y */
 192   if ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
 193     {
 194       if (iy == 0x7fff0000)     /* y is +-inf */
 195         {
 196           if (((ix - 0x3fff0000) | p.parts32.w1 | p.parts32.w2 | p.parts32.w3)
 197               == 0)
 198             return y - y;       /* inf**+-1 is NaN */
 199           else if (ix >= 0x3fff0000)    /* (|x|>1)**+-inf = inf,0 */
 200             return (hy >= 0) ? y : zero;
 201           else                  /* (|x|<1)**-,+inf = inf,0 */
 202             return (hy < 0) ? -y : zero;
 203         }
 204       if (iy == 0x3fff0000)
 205         {                       /* y is  +-1 */
 206           if (hy < 0)
 207             return one / x;
 208           else
 209             return x;
 210         }
 211       if (hy == 0x40000000)
 212         return x * x;           /* y is  2 */
 213       if (hy == 0x3ffe0000)
 214         {                       /* y is  0.5 */
 215           if (hx >= 0)          /* x >= +0 */
 216             return __ieee754_sqrtl (x);
 217         }
 218     }
 219
 220   ax = fabsl (x);
 221   /* special value of x */
 222   if ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) == 0)
 223     {
 224       if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
 225         {
 226           z = ax;               /*x is +-0,+-inf,+-1 */
 227           if (hy < 0)
 228             z = one / z;        /* z = (1/|x|) */
 229           if (hx < 0)
 230             {
 231               if (((ix - 0x3fff0000) | yisint) == 0)
 232                 {
 233                   z = (z - z) / (z - z);        /* (-1)**non-int is NaN */
 234                 }
 235               else if (yisint == 1)
 236                 z = -z;         /* (x<0)**odd = -(|x|**odd) */
 237             }
 238           return z;
 239         }
 240     }
 241
 242   /* (x<0)**(non-int) is NaN */
 243   if (((((u_int32_t) hx >> 31) - 1) | yisint) == 0)
 244     return (x - x) / (x - x);
 245
 246   /* |y| is huge.
 247      2^-16495 = 1/2 of smallest representable value.
 248      If (1 - 1/131072)^y underflows, y > 1.4986e9 */
 249   if (iy > 0x401d654b)
 250     {
 251       /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
 252       if (iy > 0x407d654b)
 253         {
 254           if (ix <= 0x3ffeffff)
 255             return (hy < 0) ? huge * huge : tiny * tiny;
 256           if (ix >= 0x3fff0000)
 257             return (hy > 0) ? huge * huge : tiny * tiny;
 258         }
 259       /* over/underflow if x is not close to one */
 260       if (ix < 0x3ffeffff)
 261         return (hy < 0) ? huge * huge : tiny * tiny;
 262       if (ix > 0x3fff0000)
 263         return (hy > 0) ? huge * huge : tiny * tiny;
 264     }
 265
 266   n = 0;
 267   /* take care subnormal number */
 268   if (ix < 0x00010000)
 269     {
 270       ax *= two113;
 271       n -= 113;
 272       o.value = ax;
 273       ix = o.parts32.w0;
 274     }
 275   n += ((ix) >> 16) - 0x3fff;
 276   j = ix & 0x0000ffff;
 277   /* determine interval */
 278   ix = j | 0x3fff0000;          /* normalize ix */
 279   if (j <= 0x3988)
 280     k = 0;                      /* |x|<sqrt(3/2) */
 281   else if (j < 0xbb67)
 282     k = 1;                      /* |x|<sqrt(3)   */
 283   else
 284     {
 285       k = 0;
 286       n += 1;
 287       ix -= 0x00010000;
 288     }
 289
 290   o.value = ax;
 291   o.parts32.w0 = ix;
 292   ax = o.value;
 293
 294   /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
 295   u = ax - bp[k];               /* bp[0]=1.0, bp[1]=1.5 */
 296   v = one / (ax + bp[k]);
 297   s = u * v;
 298   s_h = s;
 299
 300   o.value = s_h;
 301   o.parts32.w3 = 0;
 302   o.parts32.w2 &= 0xf8000000;
 303   s_h = o.value;
 304   /* t_h=ax+bp[k] High */
 305   t_h = ax + bp[k];
 306   o.value = t_h;
 307   o.parts32.w3 = 0;
 308   o.parts32.w2 &= 0xf8000000;
 309   t_h = o.value;
 310   t_l = ax - (t_h - bp[k]);
 311   s_l = v * ((u - s_h * t_h) - s_h * t_l);
 312   /* compute log(ax) */
 313   s2 = s * s;
 314   u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
 315   v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
 316   r = s2 * s2 * u / v;
 317   r += s_l * (s_h + s);
 318   s2 = s_h * s_h;
 319   t_h = 3.0 + s2 + r;
 320   o.value = t_h;
 321   o.parts32.w3 = 0;
 322   o.parts32.w2 &= 0xf8000000;
 323   t_h = o.value;
 324   t_l = r - ((t_h - 3.0) - s2);
 325   /* u+v = s*(1+...) */
 326   u = s_h * t_h;
 327   v = s_l * t_h + t_l * s;
 328   /* 2/(3log2)*(s+...) */
 329   p_h = u + v;
 330   o.value = p_h;
 331   o.parts32.w3 = 0;
 332   o.parts32.w2 &= 0xf8000000;
 333   p_h = o.value;
 334   p_l = v - (p_h - u);
 335   z_h = cp_h * p_h;             /* cp_h+cp_l = 2/(3*log2) */
 336   z_l = cp_l * p_h + p_l * cp + dp_l[k];
 337   /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
 338   t = (long double) n;
 339   t1 = (((z_h + z_l) + dp_h[k]) + t);
 340   o.value = t1;
 341   o.parts32.w3 = 0;
 342   o.parts32.w2 &= 0xf8000000;
 343   t1 = o.value;
 344   t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
 345
 346   /* s (sign of result -ve**odd) = -1 else = 1 */
 347   s = one;
 348   if (((((u_int32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
 349     s = -one;                   /* (-ve)**(odd int) */
 350
 351   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
 352   y1 = y;
 353   o.value = y1;
 354   o.parts32.w3 = 0;
 355   o.parts32.w2 &= 0xf8000000;
 356   y1 = o.value;
 357   p_l = (y - y1) * t1 + y * t2;
 358   p_h = y1 * t1;
 359   z = p_l + p_h;
 360   o.value = z;
 361   j = o.parts32.w0;
 362   if (j >= 0x400d0000) /* z >= 16384 */
 363     {
 364       /* if z > 16384 */
 365       if (((j - 0x400d0000) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3) != 0)
 366         return s * huge * huge; /* overflow */
 367       else
 368         {
 369           if (p_l + ovt > z - p_h)
 370             return s * huge * huge;     /* overflow */
 371         }
 372     }
 373   else if ((j & 0x7fffffff) >= 0x400d01b9)      /* z <= -16495 */
 374     {
 375       /* z < -16495 */
 376       if (((j - 0xc00d01bc) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3)
 377           != 0)
 378         return s * tiny * tiny; /* underflow */
 379       else
 380         {
 381           if (p_l <= z - p_h)
 382             return s * tiny * tiny;     /* underflow */
 383         }
 384     }
 385   /* compute 2**(p_h+p_l) */
 386   i = j & 0x7fffffff;
 387   k = (i >> 16) - 0x3fff;
 388   n = 0;
 389   if (i > 0x3ffe0000)
 390     {                           /* if |z| > 0.5, set n = [z+0.5] */
 391       n = __floorl (z + 0.5L);
 392       t = n;
 393       p_h -= t;
 394     }
 395   t = p_l + p_h;
 396   o.value = t;
 397   o.parts32.w3 = 0;
 398   o.parts32.w2 &= 0xf8000000;
 399   t = o.value;
 400   u = t * lg2_h;
 401   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
 402   z = u + v;
 403   w = v - (z - u);
 404   /*  exp(z) */
 405   t = z * z;
 406   u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
 407   v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
 408   t1 = z - t * u / v;
 409   r = (z * t1) / (t1 - two) - (w + z * w);
 410   z = one - (r - z);
 411   o.value = z;
 412   j = o.parts32.w0;
 413   j += (n << 16);
 414   if ((j >> 16) <= 0)
 415     z = __scalbnl (z, n);       /* subnormal output */
 416   else
 417     {
 418       o.parts32.w0 = j;
 419       z = o.value;
 420     }
 421   return s * z;
 422 }