sysdeps/ieee754/dbl-64/s_fma.c

   1 /* Compute x * y + z as ternary operation.
   2    Copyright (C) 2010-2019 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 #include <float.h>
  21 #include <math.h>
  22 #include <fenv.h>
  23 #include <ieee754.h>
  24 #include <math-barriers.h>
  25 #include <fenv_private.h>
  26 #include <libm-alias-double.h>
  27 #include <tininess.h>
  28
  29 /* This implementation uses rounding to odd to avoid problems with
  30    double rounding.  See a paper by Boldo and Melquiond:
  31    http://www.lri.fr/~melquion/doc/08-tc.pdf  */
  32
  33 double
  34 __fma (double x, double y, double z)
  35 {
  36   union ieee754_double u, v, w;
  37   int adjust = 0;
  38   u.d = x;
  39   v.d = y;
  40   w.d = z;
  41   if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
  42                         >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG, 0)
  43       || __builtin_expect (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
  44       || __builtin_expect (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
  45       || __builtin_expect (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
  46       || __builtin_expect (u.ieee.exponent + v.ieee.exponent
  47                            <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG, 0))
  48     {
  49       /* If z is Inf, but x and y are finite, the result should be
  50          z rather than NaN.  */
  51       if (w.ieee.exponent == 0x7ff
  52           && u.ieee.exponent != 0x7ff
  53           && v.ieee.exponent != 0x7ff)
  54         return (z + x) + y;
  55       /* If z is zero and x are y are nonzero, compute the result
  56          as x * y to avoid the wrong sign of a zero result if x * y
  57          underflows to 0.  */
  58       if (z == 0 && x != 0 && y != 0)
  59         return x * y;
  60       /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
  61          x * y + z.  */
  62       if (u.ieee.exponent == 0x7ff
  63           || v.ieee.exponent == 0x7ff
  64           || w.ieee.exponent == 0x7ff
  65           || x == 0
  66           || y == 0)
  67         return x * y + z;
  68       /* If fma will certainly overflow, compute as x * y.  */
  69       if (u.ieee.exponent + v.ieee.exponent > 0x7ff + IEEE754_DOUBLE_BIAS)
  70         return x * y;
  71       /* If x * y is less than 1/4 of DBL_TRUE_MIN, neither the
  72          result nor whether there is underflow depends on its exact
  73          value, only on its sign.  */
  74       if (u.ieee.exponent + v.ieee.exponent
  75           < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2)
  76         {
  77           int neg = u.ieee.negative ^ v.ieee.negative;
  78           double tiny = neg ? -0x1p-1074 : 0x1p-1074;
  79           if (w.ieee.exponent >= 3)
  80             return tiny + z;
  81           /* Scaling up, adding TINY and scaling down produces the
  82              correct result, because in round-to-nearest mode adding
  83              TINY has no effect and in other modes double rounding is
  84              harmless.  But it may not produce required underflow
  85              exceptions.  */
  86           v.d = z * 0x1p54 + tiny;
  87           if (TININESS_AFTER_ROUNDING
  88               ? v.ieee.exponent < 55
  89               : (w.ieee.exponent == 0
  90                  || (w.ieee.exponent == 1
  91                      && w.ieee.negative != neg
  92                      && w.ieee.mantissa1 == 0
  93                      && w.ieee.mantissa0 == 0)))
  94             {
  95               double force_underflow = x * y;
  96               math_force_eval (force_underflow);
  97             }
  98           return v.d * 0x1p-54;
  99         }
 100       if (u.ieee.exponent + v.ieee.exponent
 101           >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
 102         {
 103           /* Compute 1p-53 times smaller result and multiply
 104              at the end.  */
 105           if (u.ieee.exponent > v.ieee.exponent)
 106             u.ieee.exponent -= DBL_MANT_DIG;
 107           else
 108             v.ieee.exponent -= DBL_MANT_DIG;
 109           /* If x + y exponent is very large and z exponent is very small,
 110              it doesn't matter if we don't adjust it.  */
 111           if (w.ieee.exponent > DBL_MANT_DIG)
 112             w.ieee.exponent -= DBL_MANT_DIG;
 113           adjust = 1;
 114         }
 115       else if (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
 116         {
 117           /* Similarly.
 118              If z exponent is very large and x and y exponents are
 119              very small, adjust them up to avoid spurious underflows,
 120              rather than down.  */
 121           if (u.ieee.exponent + v.ieee.exponent
 122               <= IEEE754_DOUBLE_BIAS + 2 * DBL_MANT_DIG)
 123             {
 124               if (u.ieee.exponent > v.ieee.exponent)
 125                 u.ieee.exponent += 2 * DBL_MANT_DIG + 2;
 126               else
 127                 v.ieee.exponent += 2 * DBL_MANT_DIG + 2;
 128             }
 129           else if (u.ieee.exponent > v.ieee.exponent)
 130             {
 131               if (u.ieee.exponent > DBL_MANT_DIG)
 132                 u.ieee.exponent -= DBL_MANT_DIG;
 133             }
 134           else if (v.ieee.exponent > DBL_MANT_DIG)
 135             v.ieee.exponent -= DBL_MANT_DIG;
 136           w.ieee.exponent -= DBL_MANT_DIG;
 137           adjust = 1;
 138         }
 139       else if (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
 140         {
 141           u.ieee.exponent -= DBL_MANT_DIG;
 142           if (v.ieee.exponent)
 143             v.ieee.exponent += DBL_MANT_DIG;
 144           else
 145             v.d *= 0x1p53;
 146         }
 147       else if (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
 148         {
 149           v.ieee.exponent -= DBL_MANT_DIG;
 150           if (u.ieee.exponent)
 151             u.ieee.exponent += DBL_MANT_DIG;
 152           else
 153             u.d *= 0x1p53;
 154         }
 155       else /* if (u.ieee.exponent + v.ieee.exponent
 156                   <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */
 157         {
 158           if (u.ieee.exponent > v.ieee.exponent)
 159             u.ieee.exponent += 2 * DBL_MANT_DIG + 2;
 160           else
 161             v.ieee.exponent += 2 * DBL_MANT_DIG + 2;
 162           if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 6)
 163             {
 164               if (w.ieee.exponent)
 165                 w.ieee.exponent += 2 * DBL_MANT_DIG + 2;
 166               else
 167                 w.d *= 0x1p108;
 168               adjust = -1;
 169             }
 170           /* Otherwise x * y should just affect inexact
 171              and nothing else.  */
 172         }
 173       x = u.d;
 174       y = v.d;
 175       z = w.d;
 176     }
 177
 178   /* Ensure correct sign of exact 0 + 0.  */
 179   if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
 180     {
 181       x = math_opt_barrier (x);
 182       return x * y + z;
 183     }
 184
 185   fenv_t env;
 186   libc_feholdexcept_setround (&env, FE_TONEAREST);
 187
 188   /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
 189 #define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
 190   double x1 = x * C;
 191   double y1 = y * C;
 192   double m1 = x * y;
 193   x1 = (x - x1) + x1;
 194   y1 = (y - y1) + y1;
 195   double x2 = x - x1;
 196   double y2 = y - y1;
 197   double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
 198
 199   /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
 200   double a1 = z + m1;
 201   double t1 = a1 - z;
 202   double t2 = a1 - t1;
 203   t1 = m1 - t1;
 204   t2 = z - t2;
 205   double a2 = t1 + t2;
 206   /* Ensure the arithmetic is not scheduled after feclearexcept call.  */
 207   math_force_eval (m2);
 208   math_force_eval (a2);
 209   feclearexcept (FE_INEXACT);
 210
 211   /* If the result is an exact zero, ensure it has the correct sign.  */
 212   if (a1 == 0 && m2 == 0)
 213     {
 214       libc_feupdateenv (&env);
 215       /* Ensure that round-to-nearest value of z + m1 is not reused.  */
 216       z = math_opt_barrier (z);
 217       return z + m1;
 218     }
 219
 220   libc_fesetround (FE_TOWARDZERO);
 221
 222   /* Perform m2 + a2 addition with round to odd.  */
 223   u.d = a2 + m2;
 224
 225   if (__glibc_unlikely (adjust < 0))
 226     {
 227       if ((u.ieee.mantissa1 & 1) == 0)
 228         u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
 229       v.d = a1 + u.d;
 230       /* Ensure the addition is not scheduled after fetestexcept call.  */
 231       math_force_eval (v.d);
 232     }
 233
 234   /* Reset rounding mode and test for inexact simultaneously.  */
 235   int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0;
 236
 237   if (__glibc_likely (adjust == 0))
 238     {
 239       if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
 240         u.ieee.mantissa1 |= j;
 241       /* Result is a1 + u.d.  */
 242       return a1 + u.d;
 243     }
 244   else if (__glibc_likely (adjust > 0))
 245     {
 246       if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
 247         u.ieee.mantissa1 |= j;
 248       /* Result is a1 + u.d, scaled up.  */
 249       return (a1 + u.d) * 0x1p53;
 250     }
 251   else
 252     {
 253       /* If a1 + u.d is exact, the only rounding happens during
 254          scaling down.  */
 255       if (j == 0)
 256         return v.d * 0x1p-108;
 257       /* If result rounded to zero is not subnormal, no double
 258          rounding will occur.  */
 259       if (v.ieee.exponent > 108)
 260         return (a1 + u.d) * 0x1p-108;
 261       /* If v.d * 0x1p-108 with round to zero is a subnormal above
 262          or equal to DBL_MIN / 2, then v.d * 0x1p-108 shifts mantissa
 263          down just by 1 bit, which means v.ieee.mantissa1 |= j would
 264          change the round bit, not sticky or guard bit.
 265          v.d * 0x1p-108 never normalizes by shifting up,
 266          so round bit plus sticky bit should be already enough
 267          for proper rounding.  */
 268       if (v.ieee.exponent == 108)
 269         {
 270           /* If the exponent would be in the normal range when
 271              rounding to normal precision with unbounded exponent
 272              range, the exact result is known and spurious underflows
 273              must be avoided on systems detecting tininess after
 274              rounding.  */
 275           if (TININESS_AFTER_ROUNDING)
 276             {
 277               w.d = a1 + u.d;
 278               if (w.ieee.exponent == 109)
 279                 return w.d * 0x1p-108;
 280             }
 281           /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
 282              v.ieee.mantissa1 & 1 is the round bit and j is our sticky
 283              bit.  */
 284           w.d = 0.0;
 285           w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
 286           w.ieee.negative = v.ieee.negative;
 287           v.ieee.mantissa1 &= ~3U;
 288           v.d *= 0x1p-108;
 289           w.d *= 0x1p-2;
 290           return v.d + w.d;
 291         }
 292       v.ieee.mantissa1 |= j;
 293       return v.d * 0x1p-108;
 294     }
 295 }
 296 #ifndef __fma
 297 libm_alias_double (__fma, fma)
 298 #endif