sysdeps/ieee754/ldbl-128/e_expl.c

   1 /* Quad-precision floating point e^x.
   2    Copyright (C) 1999-2023 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <https://www.gnu.org/licenses/>.  */
  18
  19 /* The basic design here is from
  20    Abraham Ziv, "Fast Evaluation of Elementary Mathematical Functions with
  21    Correctly Rounded Last Bit", ACM Trans. Math. Soft., 17 (3), September 1991,
  22    pp. 410-423.
  23
  24    We work with number pairs where the first number is the high part and
  25    the second one is the low part. Arithmetic with the high part numbers must
  26    be exact, without any roundoff errors.
  27
  28    The input value, X, is written as
  29    X = n * ln(2)_0 + arg1[t1]_0 + arg2[t2]_0 + x
  30        - n * ln(2)_1 + arg1[t1]_1 + arg2[t2]_1 + xl
  31
  32    where:
  33    - n is an integer, 16384 >= n >= -16495;
  34    - ln(2)_0 is the first 93 bits of ln(2), and |ln(2)_0-ln(2)-ln(2)_1| < 2^-205
  35    - t1 is an integer, 89 >= t1 >= -89
  36    - t2 is an integer, 65 >= t2 >= -65
  37    - |arg1[t1]-t1/256.0| < 2^-53
  38    - |arg2[t2]-t2/32768.0| < 2^-53
  39    - x + xl is whatever is left, |x + xl| < 2^-16 + 2^-53
  40
  41    Then e^x is approximated as
  42
  43    e^x = 2^n_1 ( 2^n_0 e^(arg1[t1]_0 + arg1[t1]_1) e^(arg2[t2]_0 + arg2[t2]_1)
  44                + 2^n_0 e^(arg1[t1]_0 + arg1[t1]_1) e^(arg2[t2]_0 + arg2[t2]_1)
  45                  * p (x + xl + n * ln(2)_1))
  46    where:
  47    - p(x) is a polynomial approximating e(x)-1
  48    - e^(arg1[t1]_0 + arg1[t1]_1) is obtained from a table
  49    - e^(arg2[t2]_0 + arg2[t2]_1) likewise
  50    - n_1 + n_0 = n, so that |n_0| < -LDBL_MIN_EXP-1.
  51
  52    If it happens that n_1 == 0 (this is the usual case), that multiplication
  53    is omitted.
  54    */
  55
  56 #ifndef _GNU_SOURCE
  57 #define _GNU_SOURCE
  58 #endif
  59 #include <float.h>
  60 #include <ieee754.h>
  61 #include <math.h>
  62 #include <fenv.h>
  63 #include <inttypes.h>
  64 #include <math-barriers.h>
  65 #include <math_private.h>
  66 #include <math-underflow.h>
  67 #include <stdlib.h>
  68 #include "t_expl.h"
  69 #include <libm-alias-finite.h>
  70
  71 static const _Float128 C[] = {
  72 /* Smallest integer x for which e^x overflows.  */
  73 #define himark C[0]
  74  L(11356.523406294143949491931077970765),
  75
  76 /* Largest integer x for which e^x underflows.  */
  77 #define lomark C[1]
  78 L(-11433.4627433362978788372438434526231),
  79
  80 /* 3x2^96 */
  81 #define THREEp96 C[2]
  82  L(59421121885698253195157962752.0),
  83
  84 /* 3x2^103 */
  85 #define THREEp103 C[3]
  86  L(30423614405477505635920876929024.0),
  87
  88 /* 3x2^111 */
  89 #define THREEp111 C[4]
  90  L(7788445287802241442795744493830144.0),
  91
  92 /* 1/ln(2) */
  93 #define M_1_LN2 C[5]
  94  L(1.44269504088896340735992468100189204),
  95
  96 /* first 93 bits of ln(2) */
  97 #define M_LN2_0 C[6]
  98  L(0.693147180559945309417232121457981864),
  99
 100 /* ln2_0 - ln(2) */
 101 #define M_LN2_1 C[7]
 102 L(-1.94704509238074995158795957333327386E-31),
 103
 104 /* very small number */
 105 #define TINY C[8]
 106  L(1.0e-4900),
 107
 108 /* 2^16383 */
 109 #define TWO16383 C[9]
 110  L(5.94865747678615882542879663314003565E+4931),
 111
 112 /* 256 */
 113 #define TWO8 C[10]
 114  256,
 115
 116 /* 32768 */
 117 #define TWO15 C[11]
 118  32768,
 119
 120 /* Chebyshev polynom coefficients for (exp(x)-1)/x */
 121 #define P1 C[12]
 122 #define P2 C[13]
 123 #define P3 C[14]
 124 #define P4 C[15]
 125 #define P5 C[16]
 126 #define P6 C[17]
 127  L(0.5),
 128  L(1.66666666666666666666666666666666683E-01),
 129  L(4.16666666666666666666654902320001674E-02),
 130  L(8.33333333333333333333314659767198461E-03),
 131  L(1.38888888889899438565058018857254025E-03),
 132  L(1.98412698413981650382436541785404286E-04),
 133 };
 134
 135 _Float128
 136 __ieee754_expl (_Float128 x)
 137 {
 138   /* Check for usual case.  */
 139   if (isless (x, himark) && isgreater (x, lomark))
 140     {
 141       int tval1, tval2, unsafe, n_i;
 142       _Float128 x22, n, t, result, xl;
 143       union ieee854_long_double ex2_u, scale_u;
 144       fenv_t oldenv;
 145
 146       feholdexcept (&oldenv);
 147 #ifdef FE_TONEAREST
 148       fesetround (FE_TONEAREST);
 149 #endif
 150
 151       /* Calculate n.  */
 152       n = x * M_1_LN2 + THREEp111;
 153       n -= THREEp111;
 154       x = x - n * M_LN2_0;
 155       xl = n * M_LN2_1;
 156
 157       /* Calculate t/256.  */
 158       t = x + THREEp103;
 159       t -= THREEp103;
 160
 161       /* Compute tval1 = t.  */
 162       tval1 = (int) (t * TWO8);
 163
 164       x -= __expl_table[T_EXPL_ARG1+2*tval1];
 165       xl -= __expl_table[T_EXPL_ARG1+2*tval1+1];
 166
 167       /* Calculate t/32768.  */
 168       t = x + THREEp96;
 169       t -= THREEp96;
 170
 171       /* Compute tval2 = t.  */
 172       tval2 = (int) (t * TWO15);
 173
 174       x -= __expl_table[T_EXPL_ARG2+2*tval2];
 175       xl -= __expl_table[T_EXPL_ARG2+2*tval2+1];
 176
 177       x = x + xl;
 178
 179       /* Compute ex2 = 2^n_0 e^(argtable[tval1]) e^(argtable[tval2]).  */
 180       ex2_u.d = __expl_table[T_EXPL_RES1 + tval1]
 181                 * __expl_table[T_EXPL_RES2 + tval2];
 182       n_i = (int)n;
 183       /* 'unsafe' is 1 iff n_1 != 0.  */
 184       unsafe = abs(n_i) >= 15000;
 185       ex2_u.ieee.exponent += n_i >> unsafe;
 186
 187       /* Compute scale = 2^n_1.  */
 188       scale_u.d = 1;
 189       scale_u.ieee.exponent += n_i - (n_i >> unsafe);
 190
 191       /* Approximate e^x2 - 1, using a seventh-degree polynomial,
 192          with maximum error in [-2^-16-2^-53,2^-16+2^-53]
 193          less than 4.8e-39.  */
 194       x22 = x + x*x*(P1+x*(P2+x*(P3+x*(P4+x*(P5+x*P6)))));
 195       math_force_eval (x22);
 196
 197       /* Return result.  */
 198       fesetenv (&oldenv);
 199
 200       result = x22 * ex2_u.d + ex2_u.d;
 201
 202       /* Now we can test whether the result is ultimate or if we are unsure.
 203          In the later case we should probably call a mpn based routine to give
 204          the ultimate result.
 205          Empirically, this routine is already ultimate in about 99.9986% of
 206          cases, the test below for the round to nearest case will be false
 207          in ~ 99.9963% of cases.
 208          Without proc2 routine maximum error which has been seen is
 209          0.5000262 ulp.
 210
 211           union ieee854_long_double ex3_u;
 212
 213           #ifdef FE_TONEAREST
 214             fesetround (FE_TONEAREST);
 215           #endif
 216           ex3_u.d = (result - ex2_u.d) - x22 * ex2_u.d;
 217           ex2_u.d = result;
 218           ex3_u.ieee.exponent += LDBL_MANT_DIG + 15 + IEEE854_LONG_DOUBLE_BIAS
 219                                  - ex2_u.ieee.exponent;
 220           n_i = abs (ex3_u.d);
 221           n_i = (n_i + 1) / 2;
 222           fesetenv (&oldenv);
 223           #ifdef FE_TONEAREST
 224           if (fegetround () == FE_TONEAREST)
 225             n_i -= 0x4000;
 226           #endif
 227           if (!n_i) {
 228             return __ieee754_expl_proc2 (origx);
 229           }
 230        */
 231       if (!unsafe)
 232         return result;
 233       else
 234         {
 235           result *= scale_u.d;
 236           math_check_force_underflow_nonneg (result);
 237           return result;
 238         }
 239     }
 240   /* Exceptional cases:  */
 241   else if (isless (x, himark))
 242     {
 243       if (isinf (x))
 244         /* e^-inf == 0, with no error.  */
 245         return 0;
 246       else
 247         /* Underflow */
 248         return TINY * TINY;
 249     }
 250   else
 251     /* Return x, if x is a NaN or Inf; or overflow, otherwise.  */
 252     return TWO16383*x;
 253 }
 254 libm_alias_finite (__ieee754_expl, __expl)