sysdeps/ieee754/ldbl-128ibm/e_expl.c

   1 /* Quad-precision floating point e^x.
   2    Copyright (C) 1999-2018 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Jakub Jelinek <jj@ultra.linux.cz>
   5    Partly based on double-precision code
   6    by Geoffrey Keating <geoffk@ozemail.com.au>
   7
   8    The GNU C Library is free software; you can redistribute it and/or
   9    modify it under the terms of the GNU Lesser General Public
  10    License as published by the Free Software Foundation; either
  11    version 2.1 of the License, or (at your option) any later version.
  12
  13    The GNU C Library is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16    Lesser General Public License for more details.
  17
  18    You should have received a copy of the GNU Lesser General Public
  19    License along with the GNU C Library; if not, see
  20    <http://www.gnu.org/licenses/>.  */
  21
  22 /* The basic design here is from
  23    Abraham Ziv, "Fast Evaluation of Elementary Mathematical Functions with
  24    Correctly Rounded Last Bit", ACM Trans. Math. Soft., 17 (3), September 1991,
  25    pp. 410-423.
  26
  27    We work with number pairs where the first number is the high part and
  28    the second one is the low part. Arithmetic with the high part numbers must
  29    be exact, without any roundoff errors.
  30
  31    The input value, X, is written as
  32    X = n * ln(2)_0 + arg1[t1]_0 + arg2[t2]_0 + x
  33        - n * ln(2)_1 + arg1[t1]_1 + arg2[t2]_1 + xl
  34
  35    where:
  36    - n is an integer, 16384 >= n >= -16495;
  37    - ln(2)_0 is the first 93 bits of ln(2), and |ln(2)_0-ln(2)-ln(2)_1| < 2^-205
  38    - t1 is an integer, 89 >= t1 >= -89
  39    - t2 is an integer, 65 >= t2 >= -65
  40    - |arg1[t1]-t1/256.0| < 2^-53
  41    - |arg2[t2]-t2/32768.0| < 2^-53
  42    - x + xl is whatever is left, |x + xl| < 2^-16 + 2^-53
  43
  44    Then e^x is approximated as
  45
  46    e^x = 2^n_1 ( 2^n_0 e^(arg1[t1]_0 + arg1[t1]_1) e^(arg2[t2]_0 + arg2[t2]_1)
  47                + 2^n_0 e^(arg1[t1]_0 + arg1[t1]_1) e^(arg2[t2]_0 + arg2[t2]_1)
  48                  * p (x + xl + n * ln(2)_1))
  49    where:
  50    - p(x) is a polynomial approximating e(x)-1
  51    - e^(arg1[t1]_0 + arg1[t1]_1) is obtained from a table
  52    - e^(arg2[t2]_0 + arg2[t2]_1) likewise
  53    - n_1 + n_0 = n, so that |n_0| < -LDBL_MIN_EXP-1.
  54
  55    If it happens that n_1 == 0 (this is the usual case), that multiplication
  56    is omitted.
  57    */
  58
  59 #ifndef _GNU_SOURCE
  60 #define _GNU_SOURCE
  61 #endif
  62 #include <float.h>
  63 #include <ieee754.h>
  64 #include <math.h>
  65 #include <fenv.h>
  66 #include <inttypes.h>
  67 #include <math_private.h>
  68 #include <fenv_private.h>
  69
  70
  71 #include "t_expl.h"
  72
  73 static const long double C[] = {
  74 /* Smallest integer x for which e^x overflows.  */
  75 #define himark C[0]
  76  709.78271289338399678773454114191496482L,
  77
  78 /* Largest integer x for which e^x underflows.  */
  79 #define lomark C[1]
  80 -744.44007192138126231410729844608163411L,
  81
  82 /* 3x2^96 */
  83 #define THREEp96 C[2]
  84  59421121885698253195157962752.0L,
  85
  86 /* 3x2^103 */
  87 #define THREEp103 C[3]
  88  30423614405477505635920876929024.0L,
  89
  90 /* 3x2^111 */
  91 #define THREEp111 C[4]
  92  7788445287802241442795744493830144.0L,
  93
  94 /* 1/ln(2) */
  95 #define M_1_LN2 C[5]
  96  1.44269504088896340735992468100189204L,
  97
  98 /* first 93 bits of ln(2) */
  99 #define M_LN2_0 C[6]
 100  0.693147180559945309417232121457981864L,
 101
 102 /* ln2_0 - ln(2) */
 103 #define M_LN2_1 C[7]
 104 -1.94704509238074995158795957333327386E-31L,
 105
 106 /* very small number */
 107 #define TINY C[8]
 108  1.0e-308L,
 109
 110 /* 2^16383 */
 111 #define TWO1023 C[9]
 112  8.988465674311579538646525953945123668E+307L,
 113
 114 /* 256 */
 115 #define TWO8 C[10]
 116  256.0L,
 117
 118 /* 32768 */
 119 #define TWO15 C[11]
 120  32768.0L,
 121
 122 /* Chebyshev polynom coefficients for (exp(x)-1)/x */
 123 #define P1 C[12]
 124 #define P2 C[13]
 125 #define P3 C[14]
 126 #define P4 C[15]
 127 #define P5 C[16]
 128 #define P6 C[17]
 129  0.5L,
 130  1.66666666666666666666666666666666683E-01L,
 131  4.16666666666666666666654902320001674E-02L,
 132  8.33333333333333333333314659767198461E-03L,
 133  1.38888888889899438565058018857254025E-03L,
 134  1.98412698413981650382436541785404286E-04L,
 135 };
 136
 137 /* Avoid local PLT entry use from (int) roundl (...) being converted
 138    to a call to lroundl in the case of 32-bit long and roundl not
 139    inlined.  */
 140 long int lroundl (long double) asm ("__lroundl");
 141
 142 long double
 143 __ieee754_expl (long double x)
 144 {
 145   long double result, x22;
 146   union ibm_extended_long_double ex2_u, scale_u;
 147   int unsafe;
 148
 149   /* Check for usual case.  */
 150   if (isless (x, himark) && isgreater (x, lomark))
 151     {
 152       int tval1, tval2, n_i, exponent2;
 153       long double n, xl;
 154
 155       SET_RESTORE_ROUND (FE_TONEAREST);
 156
 157       n = roundl (x*M_1_LN2);
 158       x = x-n*M_LN2_0;
 159       xl = n*M_LN2_1;
 160
 161       tval1 = roundl (x*TWO8);
 162       x -= __expl_table[T_EXPL_ARG1+2*tval1];
 163       xl -= __expl_table[T_EXPL_ARG1+2*tval1+1];
 164
 165       tval2 = roundl (x*TWO15);
 166       x -= __expl_table[T_EXPL_ARG2+2*tval2];
 167       xl -= __expl_table[T_EXPL_ARG2+2*tval2+1];
 168
 169       x = x + xl;
 170
 171       /* Compute ex2 = 2^n_0 e^(argtable[tval1]) e^(argtable[tval2]).  */
 172       ex2_u.ld = (__expl_table[T_EXPL_RES1 + tval1]
 173                   * __expl_table[T_EXPL_RES2 + tval2]);
 174       n_i = (int)n;
 175       /* 'unsafe' is 1 iff n_1 != 0.  */
 176       unsafe = fabsl(n_i) >= -LDBL_MIN_EXP - 1;
 177       ex2_u.d[0].ieee.exponent += n_i >> unsafe;
 178       /* Fortunately, there are no subnormal lowpart doubles in
 179          __expl_table, only normal values and zeros.
 180          But after scaling it can be subnormal.  */
 181       exponent2 = ex2_u.d[1].ieee.exponent + (n_i >> unsafe);
 182       if (ex2_u.d[1].ieee.exponent == 0)
 183         /* assert ((ex2_u.d[1].ieee.mantissa0|ex2_u.d[1].ieee.mantissa1) == 0) */;
 184       else if (exponent2 > 0)
 185         ex2_u.d[1].ieee.exponent = exponent2;
 186       else if (exponent2 <= -54)
 187         {
 188           ex2_u.d[1].ieee.exponent = 0;
 189           ex2_u.d[1].ieee.mantissa0 = 0;
 190           ex2_u.d[1].ieee.mantissa1 = 0;
 191         }
 192       else
 193         {
 194           static const double
 195             two54 = 1.80143985094819840000e+16, /* 4350000000000000 */
 196             twom54 = 5.55111512312578270212e-17; /* 3C90000000000000 */
 197           ex2_u.d[1].d *= two54;
 198           ex2_u.d[1].ieee.exponent += n_i >> unsafe;
 199           ex2_u.d[1].d *= twom54;
 200         }
 201
 202       /* Compute scale = 2^n_1.  */
 203       scale_u.ld = 1.0L;
 204       scale_u.d[0].ieee.exponent += n_i - (n_i >> unsafe);
 205
 206       /* Approximate e^x2 - 1, using a seventh-degree polynomial,
 207          with maximum error in [-2^-16-2^-53,2^-16+2^-53]
 208          less than 4.8e-39.  */
 209       x22 = x + x*x*(P1+x*(P2+x*(P3+x*(P4+x*(P5+x*P6)))));
 210
 211       /* Now we can test whether the result is ultimate or if we are unsure.
 212          In the later case we should probably call a mpn based routine to give
 213          the ultimate result.
 214          Empirically, this routine is already ultimate in about 99.9986% of
 215          cases, the test below for the round to nearest case will be false
 216          in ~ 99.9963% of cases.
 217          Without proc2 routine maximum error which has been seen is
 218          0.5000262 ulp.
 219
 220           union ieee854_long_double ex3_u;
 221
 222           #ifdef FE_TONEAREST
 223             fesetround (FE_TONEAREST);
 224           #endif
 225           ex3_u.d = (result - ex2_u.d) - x22 * ex2_u.d;
 226           ex2_u.d = result;
 227           ex3_u.ieee.exponent += LDBL_MANT_DIG + 15 + IEEE854_LONG_DOUBLE_BIAS
 228                                  - ex2_u.ieee.exponent;
 229           n_i = abs (ex3_u.d);
 230           n_i = (n_i + 1) / 2;
 231           fesetenv (&oldenv);
 232           #ifdef FE_TONEAREST
 233           if (fegetround () == FE_TONEAREST)
 234             n_i -= 0x4000;
 235           #endif
 236           if (!n_i) {
 237             return __ieee754_expl_proc2 (origx);
 238           }
 239        */
 240     }
 241   /* Exceptional cases:  */
 242   else if (isless (x, himark))
 243     {
 244       if (isinf (x))
 245         /* e^-inf == 0, with no error.  */
 246         return 0;
 247       else
 248         /* Underflow */
 249         return TINY * TINY;
 250     }
 251   else
 252     /* Return x, if x is a NaN or Inf; or overflow, otherwise.  */
 253     return TWO1023*x;
 254
 255   result = x22 * ex2_u.ld + ex2_u.ld;
 256   if (!unsafe)
 257     return result;
 258   return result * scale_u.ld;
 259 }
 260 strong_alias (__ieee754_expl, __expl_finite)