[thirdparty/glibc.git] / sysdeps / ieee754 / ldbl-128 / s_fmal.c

/* Compute x * y + z as ternary operation.
   Copyright (C) 2010-2024 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <https://www.gnu.org/licenses/>.  */

#define NO_MATH_REDIRECT
#include <float.h>
#define f64xfmaf128 __hide_f64xfmaf128
#include <math.h>
#undef f64xfmaf128
#include <fenv.h>
#include <ieee754.h>
#include <math-barriers.h>
#include <math_private.h>
#include <libm-alias-ldouble.h>
#include <math-narrow-alias.h>
#include <tininess.h>
#include <math-use-builtins.h>

/* This implementation uses rounding to odd to avoid problems with
   double rounding.  See a paper by Boldo and Melquiond:
   http://www.lri.fr/~melquion/doc/08-tc.pdf  */

_Float128
__fmal (_Float128 x, _Float128 y, _Float128 z)
{
#if USE_FMAL_BUILTIN
  return __builtin_fmal (x, y, z);
#else
  union ieee854_long_double u, v, w;
  int adjust = 0;
  u.d = x;
  v.d = y;
  w.d = z;
  if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
			>= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
			   - LDBL_MANT_DIG, 0)
      || __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
      || __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
      || __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
      || __builtin_expect (u.ieee.exponent + v.ieee.exponent
			   <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))
    {
      /* If z is Inf, but x and y are finite, the result should be
	 z rather than NaN.  */
      if (w.ieee.exponent == 0x7fff
	  && u.ieee.exponent != 0x7fff
          && v.ieee.exponent != 0x7fff)
	return (z + x) + y;
      /* If z is zero and x are y are nonzero, compute the result
	 as x * y to avoid the wrong sign of a zero result if x * y
	 underflows to 0.  */
      if (z == 0 && x != 0 && y != 0)
	return x * y;
      /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
	 x * y + z.  */
      if (u.ieee.exponent == 0x7fff
	  || v.ieee.exponent == 0x7fff
	  || w.ieee.exponent == 0x7fff
	  || x == 0
	  || y == 0)
	return x * y + z;
      /* If fma will certainly overflow, compute as x * y.  */
      if (u.ieee.exponent + v.ieee.exponent
	  > 0x7fff + IEEE854_LONG_DOUBLE_BIAS)
	return x * y;
      /* If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the
	 result nor whether there is underflow depends on its exact
	 value, only on its sign.  */
      if (u.ieee.exponent + v.ieee.exponent
	  < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
	{
	  int neg = u.ieee.negative ^ v.ieee.negative;
	  _Float128 tiny = neg ? L(-0x1p-16494) : L(0x1p-16494);
	  if (w.ieee.exponent >= 3)
	    return tiny + z;
	  /* Scaling up, adding TINY and scaling down produces the
	     correct result, because in round-to-nearest mode adding
	     TINY has no effect and in other modes double rounding is
	     harmless.  But it may not produce required underflow
	     exceptions.  */
	  v.d = z * L(0x1p114) + tiny;
	  if (TININESS_AFTER_ROUNDING
	      ? v.ieee.exponent < 115
	      : (w.ieee.exponent == 0
		 || (w.ieee.exponent == 1
		     && w.ieee.negative != neg
		     && w.ieee.mantissa3 == 0
		     && w.ieee.mantissa2 == 0
		     && w.ieee.mantissa1 == 0
		     && w.ieee.mantissa0 == 0)))
	    {
	      _Float128 force_underflow = x * y;
	      math_force_eval (force_underflow);
	    }
	  return v.d * L(0x1p-114);
	}
      if (u.ieee.exponent + v.ieee.exponent
	  >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
	{
	  /* Compute 1p-113 times smaller result and multiply
	     at the end.  */
	  if (u.ieee.exponent > v.ieee.exponent)
	    u.ieee.exponent -= LDBL_MANT_DIG;
	  else
	    v.ieee.exponent -= LDBL_MANT_DIG;
	  /* If x + y exponent is very large and z exponent is very small,
	     it doesn't matter if we don't adjust it.  */
	  if (w.ieee.exponent > LDBL_MANT_DIG)
	    w.ieee.exponent -= LDBL_MANT_DIG;
	  adjust = 1;
	}
      else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
	{
	  /* Similarly.
	     If z exponent is very large and x and y exponents are
	     very small, adjust them up to avoid spurious underflows,
	     rather than down.  */
	  if (u.ieee.exponent + v.ieee.exponent
	      <= IEEE854_LONG_DOUBLE_BIAS + 2 * LDBL_MANT_DIG)
	    {
	      if (u.ieee.exponent > v.ieee.exponent)
		u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
	      else
		v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
	    }
	  else if (u.ieee.exponent > v.ieee.exponent)
	    {
	      if (u.ieee.exponent > LDBL_MANT_DIG)
		u.ieee.exponent -= LDBL_MANT_DIG;
	    }
	  else if (v.ieee.exponent > LDBL_MANT_DIG)
	    v.ieee.exponent -= LDBL_MANT_DIG;
	  w.ieee.exponent -= LDBL_MANT_DIG;
	  adjust = 1;
	}
      else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
	{
	  u.ieee.exponent -= LDBL_MANT_DIG;
	  if (v.ieee.exponent)
	    v.ieee.exponent += LDBL_MANT_DIG;
	  else
	    v.d *= L(0x1p113);
	}
      else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
	{
	  v.ieee.exponent -= LDBL_MANT_DIG;
	  if (u.ieee.exponent)
	    u.ieee.exponent += LDBL_MANT_DIG;
	  else
	    u.d *= L(0x1p113);
	}
      else /* if (u.ieee.exponent + v.ieee.exponent
		  <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
	{
	  if (u.ieee.exponent > v.ieee.exponent)
	    u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
	  else
	    v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
	  if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6)
	    {
	      if (w.ieee.exponent)
		w.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
	      else
		w.d *= L(0x1p228);
	      adjust = -1;
	    }
	  /* Otherwise x * y should just affect inexact
	     and nothing else.  */
	}
      x = u.d;
      y = v.d;
      z = w.d;
    }

  /* Ensure correct sign of exact 0 + 0.  */
  if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
    {
      x = math_opt_barrier (x);
      return x * y + z;
    }

  fenv_t env;
  feholdexcept (&env);
  fesetround (FE_TONEAREST);

  /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
#define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
  _Float128 x1 = x * C;
  _Float128 y1 = y * C;
  _Float128 m1 = x * y;
  x1 = (x - x1) + x1;
  y1 = (y - y1) + y1;
  _Float128 x2 = x - x1;
  _Float128 y2 = y - y1;
  _Float128 m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;

  /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
  _Float128 a1 = z + m1;
  _Float128 t1 = a1 - z;
  _Float128 t2 = a1 - t1;
  t1 = m1 - t1;
  t2 = z - t2;
  _Float128 a2 = t1 + t2;
  /* Ensure the arithmetic is not scheduled after feclearexcept call.  */
  math_force_eval (m2);
  math_force_eval (a2);
  feclearexcept (FE_INEXACT);

  /* If the result is an exact zero, ensure it has the correct sign.  */
  if (a1 == 0 && m2 == 0)
    {
      feupdateenv (&env);
      /* Ensure that round-to-nearest value of z + m1 is not reused.  */
      z = math_opt_barrier (z);
      return z + m1;
    }

  fesetround (FE_TOWARDZERO);
  /* Perform m2 + a2 addition with round to odd.  */
  u.d = a2 + m2;

  if (__glibc_likely (adjust == 0))
    {
      if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
	u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
      feupdateenv (&env);
      /* Result is a1 + u.d.  */
      return a1 + u.d;
    }
  else if (__glibc_likely (adjust > 0))
    {
      if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
	u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
      feupdateenv (&env);
      /* Result is a1 + u.d, scaled up.  */
      return (a1 + u.d) * L(0x1p113);
    }
  else
    {
      if ((u.ieee.mantissa3 & 1) == 0)
	u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
      v.d = a1 + u.d;
      /* Ensure the addition is not scheduled after fetestexcept call.  */
      math_force_eval (v.d);
      int j = fetestexcept (FE_INEXACT) != 0;
      feupdateenv (&env);
      /* Ensure the following computations are performed in default rounding
	 mode instead of just reusing the round to zero computation.  */
      asm volatile ("" : "=m" (u) : "m" (u));
      /* If a1 + u.d is exact, the only rounding happens during
	 scaling down.  */
      if (j == 0)
	return v.d * L(0x1p-228);
      /* If result rounded to zero is not subnormal, no double
	 rounding will occur.  */
      if (v.ieee.exponent > 228)
	return (a1 + u.d) * L(0x1p-228);
      /* If v.d * 0x1p-228L with round to zero is a subnormal above
	 or equal to LDBL_MIN / 2, then v.d * 0x1p-228L shifts mantissa
	 down just by 1 bit, which means v.ieee.mantissa3 |= j would
	 change the round bit, not sticky or guard bit.
	 v.d * 0x1p-228L never normalizes by shifting up,
	 so round bit plus sticky bit should be already enough
	 for proper rounding.  */
      if (v.ieee.exponent == 228)
	{
	  /* If the exponent would be in the normal range when
	     rounding to normal precision with unbounded exponent
	     range, the exact result is known and spurious underflows
	     must be avoided on systems detecting tininess after
	     rounding.  */
	  if (TININESS_AFTER_ROUNDING)
	    {
	      w.d = a1 + u.d;
	      if (w.ieee.exponent == 229)
		return w.d * L(0x1p-228);
	    }
	  /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
	     v.ieee.mantissa3 & 1 is the round bit and j is our sticky
	     bit.  */
	  w.d = 0;
	  w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j;
	  w.ieee.negative = v.ieee.negative;
	  v.ieee.mantissa3 &= ~3U;
	  v.d *= L(0x1p-228);
	  w.d *= L(0x1p-2);
	  return v.d + w.d;
	}
      v.ieee.mantissa3 |= j;
      return v.d * L(0x1p-228);
    }
#endif /* ! USE_FMAL_BUILTIN  */
}
libm_alias_ldouble (__fma, fma)
libm_alias_ldouble_narrow (__fma, fma)
Commit	Line	Data
3e692e05	1	/* Compute x * y + z as ternary operation.
dff8da6b	2	Copyright (C) 2010-2024 Free Software Foundation, Inc.
3e692e05	3	This file is part of the GNU C Library.
3e692e05 JJ	4
	5	The GNU C Library is free software; you can redistribute it and/or
	6	modify it under the terms of the GNU Lesser General Public
	7	License as published by the Free Software Foundation; either
	8	version 2.1 of the License, or (at your option) any later version.
	9
	10	The GNU C Library is distributed in the hope that it will be useful,
	11	but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	Lesser General Public License for more details.
	14
	15	You should have received a copy of the GNU Lesser General Public
59ba27a6	16	License along with the GNU C Library; if not, see
5a82c748	17	<https://www.gnu.org/licenses/>. */
3e692e05	18
4b6574a6	19	#define NO_MATH_REDIRECT
3e692e05	20	#include <float.h>
b3f27d81	21	#define f64xfmaf128 __hide_f64xfmaf128
3e692e05	22	#include <math.h>
b3f27d81	23	#undef f64xfmaf128
3e692e05 JJ	24	#include <fenv.h>
3e692e05 JJ	25	#include <ieee754.h>
b4d5b8b0	26	#include <math-barriers.h>
4842e4fe	27	#include <math_private.h>
fd3b4e7c	28	#include <libm-alias-ldouble.h>
b3f27d81	29	#include <math-narrow-alias.h>
ef82f4da	30	#include <tininess.h>
628d90c5	31	#include <math-use-builtins.h>
3e692e05 JJ	32
	33	/* This implementation uses rounding to odd to avoid problems with
	34	double rounding. See a paper by Boldo and Melquiond:
	35	http://www.lri.fr/~melquion/doc/08-tc.pdf */
	36
15089e04 PM	37	_Float128
15089e04 PM	38	__fmal (_Float128 x, _Float128 y, _Float128 z)
3e692e05	39	{
628d90c5 VG	40	#if USE_FMAL_BUILTIN
	41	return __builtin_fmal (x, y, z);
	42	#else
3e692e05 JJ	43	union ieee854_long_double u, v, w;
	44	int adjust = 0;
	45	u.d = x;
	46	v.d = y;
	47	w.d = z;
	48	if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
	49	>= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
	50	- LDBL_MANT_DIG, 0)
	51	\|\| __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
	52	\|\| __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
	53	\|\| __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
	54	\|\| __builtin_expect (u.ieee.exponent + v.ieee.exponent
	55	<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))
	56	{
	57	/* If z is Inf, but x and y are finite, the result should be
	58	z rather than NaN. */
	59	if (w.ieee.exponent == 0x7fff
	60	&& u.ieee.exponent != 0x7fff
	61	&& v.ieee.exponent != 0x7fff)
	62	return (z + x) + y;
bec749fd JM	63	/* If z is zero and x are y are nonzero, compute the result
	64	as x * y to avoid the wrong sign of a zero result if x * y
	65	underflows to 0. */
	66	if (z == 0 && x != 0 && y != 0)
	67	return x * y;
a0c2940d JM	68	/* If x or y or z is Inf/NaN, or if x * y is zero, compute as
a0c2940d JM	69	x * y + z. */
3e692e05 JJ	70	if (u.ieee.exponent == 0x7fff
	71	\|\| v.ieee.exponent == 0x7fff
	72	\|\| w.ieee.exponent == 0x7fff
473611b2 JM	73	\|\| x == 0
473611b2 JM	74	\|\| y == 0)
3e692e05	75	return x * y + z;
a0c2940d JM	76	/* If fma will certainly overflow, compute as x * y. */
	77	if (u.ieee.exponent + v.ieee.exponent
	78	> 0x7fff + IEEE854_LONG_DOUBLE_BIAS)
	79	return x * y;
1f4dafa3	80	/* If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the
473611b2 JM	81	result nor whether there is underflow depends on its exact
	82	value, only on its sign. */
	83	if (u.ieee.exponent + v.ieee.exponent
	84	< IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
	85	{
	86	int neg = u.ieee.negative ^ v.ieee.negative;
02bbfb41	87	_Float128 tiny = neg ? L(-0x1p-16494) : L(0x1p-16494);
473611b2 JM	88	if (w.ieee.exponent >= 3)
	89	return tiny + z;
	90	/* Scaling up, adding TINY and scaling down produces the
	91	correct result, because in round-to-nearest mode adding
	92	TINY has no effect and in other modes double rounding is
	93	harmless. But it may not produce required underflow
	94	exceptions. */
02bbfb41	95	v.d = z * L(0x1p114) + tiny;
473611b2 JM	96	if (TININESS_AFTER_ROUNDING
	97	? v.ieee.exponent < 115
	98	: (w.ieee.exponent == 0
	99	\|\| (w.ieee.exponent == 1
	100	&& w.ieee.negative != neg
	101	&& w.ieee.mantissa3 == 0
	102	&& w.ieee.mantissa2 == 0
	103	&& w.ieee.mantissa1 == 0
	104	&& w.ieee.mantissa0 == 0)))
	105	{
15089e04	106	_Float128 force_underflow = x * y;
d96164c3	107	math_force_eval (force_underflow);
473611b2	108	}
02bbfb41	109	return v.d * L(0x1p-114);
473611b2	110	}
3e692e05 JJ	111	if (u.ieee.exponent + v.ieee.exponent
	112	>= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
	113	{
	114	/* Compute 1p-113 times smaller result and multiply
	115	at the end. */
	116	if (u.ieee.exponent > v.ieee.exponent)
	117	u.ieee.exponent -= LDBL_MANT_DIG;
	118	else
	119	v.ieee.exponent -= LDBL_MANT_DIG;
	120	/* If x + y exponent is very large and z exponent is very small,
	121	it doesn't matter if we don't adjust it. */
	122	if (w.ieee.exponent > LDBL_MANT_DIG)
	123	w.ieee.exponent -= LDBL_MANT_DIG;
	124	adjust = 1;
	125	}
	126	else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
	127	{
	128	/* Similarly.
	129	If z exponent is very large and x and y exponents are
82477c28 JM	130	very small, adjust them up to avoid spurious underflows,
	131	rather than down. */
	132	if (u.ieee.exponent + v.ieee.exponent
739babd7	133	<= IEEE854_LONG_DOUBLE_BIAS + 2 * LDBL_MANT_DIG)
82477c28 JM	134	{
	135	if (u.ieee.exponent > v.ieee.exponent)
	136	u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
	137	else
	138	v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
	139	}
	140	else if (u.ieee.exponent > v.ieee.exponent)
3e692e05 JJ	141	{
	142	if (u.ieee.exponent > LDBL_MANT_DIG)
	143	u.ieee.exponent -= LDBL_MANT_DIG;
	144	}
	145	else if (v.ieee.exponent > LDBL_MANT_DIG)
	146	v.ieee.exponent -= LDBL_MANT_DIG;
	147	w.ieee.exponent -= LDBL_MANT_DIG;
	148	adjust = 1;
	149	}
	150	else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
	151	{
	152	u.ieee.exponent -= LDBL_MANT_DIG;
	153	if (v.ieee.exponent)
	154	v.ieee.exponent += LDBL_MANT_DIG;
	155	else
02bbfb41	156	v.d *= L(0x1p113);
3e692e05 JJ	157	}
	158	else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
	159	{
	160	v.ieee.exponent -= LDBL_MANT_DIG;
	161	if (u.ieee.exponent)
	162	u.ieee.exponent += LDBL_MANT_DIG;
	163	else
02bbfb41	164	u.d *= L(0x1p113);
3e692e05 JJ	165	}
	166	else /* if (u.ieee.exponent + v.ieee.exponent
	167	<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
	168	{
	169	if (u.ieee.exponent > v.ieee.exponent)
82477c28	170	u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
3e692e05	171	else
82477c28 JM	172	v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
82477c28 JM	173	if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6)
3e692e05 JJ	174	{
3e692e05 JJ	175	if (w.ieee.exponent)
82477c28	176	w.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
3e692e05	177	else
02bbfb41	178	w.d *= L(0x1p228);
3e692e05 JJ	179	adjust = -1;
	180	}
	181	/* Otherwise x * y should just affect inexact
	182	and nothing else. */
	183	}
	184	x = u.d;
	185	y = v.d;
	186	z = w.d;
	187	}
8ec5b013 JM	188
8ec5b013 JM	189	/* Ensure correct sign of exact 0 + 0. */
a1ffb40e	190	if (__glibc_unlikely ((x == 0 \|\| y == 0) && z == 0))
09245377 L	191	{
	192	x = math_opt_barrier (x);
	193	return x * y + z;
	194	}
8ec5b013	195
5b5b04d6 JM	196	fenv_t env;
	197	feholdexcept (&env);
	198	fesetround (FE_TONEAREST);
	199
3e692e05 JJ	200	/* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
3e692e05 JJ	201	#define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
15089e04 PM	202	_Float128 x1 = x * C;
	203	_Float128 y1 = y * C;
	204	_Float128 m1 = x * y;
3e692e05 JJ	205	x1 = (x - x1) + x1;
3e692e05 JJ	206	y1 = (y - y1) + y1;
15089e04 PM	207	_Float128 x2 = x - x1;
	208	_Float128 y2 = y - y1;
	209	_Float128 m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
3e692e05 JJ	210
3e692e05 JJ	211	/* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
15089e04 PM	212	_Float128 a1 = z + m1;
	213	_Float128 t1 = a1 - z;
	214	_Float128 t2 = a1 - t1;
3e692e05 JJ	215	t1 = m1 - t1;
3e692e05 JJ	216	t2 = z - t2;
15089e04	217	_Float128 a2 = t1 + t2;
4896f049 RH	218	/* Ensure the arithmetic is not scheduled after feclearexcept call. */
	219	math_force_eval (m2);
	220	math_force_eval (a2);
5b5b04d6 JM	221	feclearexcept (FE_INEXACT);
5b5b04d6 JM	222
4896f049	223	/* If the result is an exact zero, ensure it has the correct sign. */
5b5b04d6 JM	224	if (a1 == 0 && m2 == 0)
	225	{
	226	feupdateenv (&env);
4896f049 RH	227	/* Ensure that round-to-nearest value of z + m1 is not reused. */
4896f049 RH	228	z = math_opt_barrier (z);
5b5b04d6 JM	229	return z + m1;
5b5b04d6 JM	230	}
3e692e05	231
3e692e05 JJ	232	fesetround (FE_TOWARDZERO);
	233	/* Perform m2 + a2 addition with round to odd. */
	234	u.d = a2 + m2;
	235
a1ffb40e	236	if (__glibc_likely (adjust == 0))
3e692e05 JJ	237	{
	238	if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
	239	u.ieee.mantissa3 \|= fetestexcept (FE_INEXACT) != 0;
	240	feupdateenv (&env);
	241	/* Result is a1 + u.d. */
	242	return a1 + u.d;
	243	}
a1ffb40e	244	else if (__glibc_likely (adjust > 0))
3e692e05 JJ	245	{
	246	if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
	247	u.ieee.mantissa3 \|= fetestexcept (FE_INEXACT) != 0;
	248	feupdateenv (&env);
	249	/* Result is a1 + u.d, scaled up. */
02bbfb41	250	return (a1 + u.d) * L(0x1p113);
3e692e05 JJ	251	}
	252	else
	253	{
	254	if ((u.ieee.mantissa3 & 1) == 0)
	255	u.ieee.mantissa3 \|= fetestexcept (FE_INEXACT) != 0;
	256	v.d = a1 + u.d;
7c08a05c	257	/* Ensure the addition is not scheduled after fetestexcept call. */
4842e4fe	258	math_force_eval (v.d);
3e692e05 JJ	259	int j = fetestexcept (FE_INEXACT) != 0;
	260	feupdateenv (&env);
	261	/* Ensure the following computations are performed in default rounding
	262	mode instead of just reusing the round to zero computation. */
	263	asm volatile ("" : "=m" (u) : "m" (u));
	264	/* If a1 + u.d is exact, the only rounding happens during
	265	scaling down. */
	266	if (j == 0)
02bbfb41	267	return v.d * L(0x1p-228);
3e692e05 JJ	268	/* If result rounded to zero is not subnormal, no double
3e692e05 JJ	269	rounding will occur. */
82477c28	270	if (v.ieee.exponent > 228)
02bbfb41	271	return (a1 + u.d) * L(0x1p-228);
82477c28 JM	272	/* If v.d * 0x1p-228L with round to zero is a subnormal above
82477c28 JM	273	or equal to LDBL_MIN / 2, then v.d * 0x1p-228L shifts mantissa
3e692e05 JJ	274	down just by 1 bit, which means v.ieee.mantissa3 \|= j would
3e692e05 JJ	275	change the round bit, not sticky or guard bit.
82477c28	276	v.d * 0x1p-228L never normalizes by shifting up,
3e692e05 JJ	277	so round bit plus sticky bit should be already enough
3e692e05 JJ	278	for proper rounding. */
82477c28	279	if (v.ieee.exponent == 228)
3e692e05	280	{
ef82f4da JM	281	/* If the exponent would be in the normal range when
	282	rounding to normal precision with unbounded exponent
	283	range, the exact result is known and spurious underflows
	284	must be avoided on systems detecting tininess after
	285	rounding. */
	286	if (TININESS_AFTER_ROUNDING)
	287	{
	288	w.d = a1 + u.d;
82477c28	289	if (w.ieee.exponent == 229)
02bbfb41	290	return w.d * L(0x1p-228);
ef82f4da	291	}
3e692e05 JJ	292	/* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
3e692e05 JJ	293	v.ieee.mantissa3 & 1 is the round bit and j is our sticky
8627a232	294	bit. */
02bbfb41	295	w.d = 0;
8627a232 JM	296	w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) \| j;
	297	w.ieee.negative = v.ieee.negative;
	298	v.ieee.mantissa3 &= ~3U;
02bbfb41 PM	299	v.d *= L(0x1p-228);
02bbfb41 PM	300	w.d *= L(0x1p-2);
8627a232	301	return v.d + w.d;
3e692e05 JJ	302	}
3e692e05 JJ	303	v.ieee.mantissa3 \|= j;
02bbfb41	304	return v.d * L(0x1p-228);
3e692e05	305	}
628d90c5	306	#endif /* ! USE_FMAL_BUILTIN */
3e692e05	307	}
fd3b4e7c	308	libm_alias_ldouble (__fma, fma)
b3f27d81	309	libm_alias_ldouble_narrow (__fma, fma)