[thirdparty/glibc.git] / sysdeps / ieee754 / ldbl-128 / s_fmal.c

/* Compute x * y + z as ternary operation.
   Copyright (C) 2010-2021 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <https://www.gnu.org/licenses/>.  */

#include <float.h>
#include <math.h>
#include <fenv.h>
#include <ieee754.h>
#include <math-barriers.h>
#include <math_private.h>
#include <libm-alias-ldouble.h>
#include <tininess.h>
#include <math-use-builtins.h>

/* This implementation uses rounding to odd to avoid problems with
   double rounding.  See a paper by Boldo and Melquiond:
   http://www.lri.fr/~melquion/doc/08-tc.pdf  */

_Float128
__fmal (_Float128 x, _Float128 y, _Float128 z)
{
#if USE_FMAL_BUILTIN
  return __builtin_fmal (x, y, z);
#else
  union ieee854_long_double u, v, w;
  int adjust = 0;
  u.d = x;
  v.d = y;
  w.d = z;
  if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
			>= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
			   - LDBL_MANT_DIG, 0)
      || __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
      || __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
      || __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
      || __builtin_expect (u.ieee.exponent + v.ieee.exponent
			   <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))
    {
      /* If z is Inf, but x and y are finite, the result should be
	 z rather than NaN.  */
      if (w.ieee.exponent == 0x7fff
	  && u.ieee.exponent != 0x7fff
          && v.ieee.exponent != 0x7fff)
	return (z + x) + y;
      /* If z is zero and x are y are nonzero, compute the result
	 as x * y to avoid the wrong sign of a zero result if x * y
	 underflows to 0.  */
      if (z == 0 && x != 0 && y != 0)
	return x * y;
      /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
	 x * y + z.  */
      if (u.ieee.exponent == 0x7fff
	  || v.ieee.exponent == 0x7fff
	  || w.ieee.exponent == 0x7fff
	  || x == 0
	  || y == 0)
	return x * y + z;
      /* If fma will certainly overflow, compute as x * y.  */
      if (u.ieee.exponent + v.ieee.exponent
	  > 0x7fff + IEEE854_LONG_DOUBLE_BIAS)
	return x * y;
      /* If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the
	 result nor whether there is underflow depends on its exact
	 value, only on its sign.  */
      if (u.ieee.exponent + v.ieee.exponent
	  < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
	{
	  int neg = u.ieee.negative ^ v.ieee.negative;
	  _Float128 tiny = neg ? L(-0x1p-16494) : L(0x1p-16494);
	  if (w.ieee.exponent >= 3)
	    return tiny + z;
	  /* Scaling up, adding TINY and scaling down produces the
	     correct result, because in round-to-nearest mode adding
	     TINY has no effect and in other modes double rounding is
	     harmless.  But it may not produce required underflow
	     exceptions.  */
	  v.d = z * L(0x1p114) + tiny;
	  if (TININESS_AFTER_ROUNDING
	      ? v.ieee.exponent < 115
	      : (w.ieee.exponent == 0
		 || (w.ieee.exponent == 1
		     && w.ieee.negative != neg
		     && w.ieee.mantissa3 == 0
		     && w.ieee.mantissa2 == 0
		     && w.ieee.mantissa1 == 0
		     && w.ieee.mantissa0 == 0)))
	    {
	      _Float128 force_underflow = x * y;
	      math_force_eval (force_underflow);
	    }
	  return v.d * L(0x1p-114);
	}
      if (u.ieee.exponent + v.ieee.exponent
	  >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
	{
	  /* Compute 1p-113 times smaller result and multiply
	     at the end.  */
	  if (u.ieee.exponent > v.ieee.exponent)
	    u.ieee.exponent -= LDBL_MANT_DIG;
	  else
	    v.ieee.exponent -= LDBL_MANT_DIG;
	  /* If x + y exponent is very large and z exponent is very small,
	     it doesn't matter if we don't adjust it.  */
	  if (w.ieee.exponent > LDBL_MANT_DIG)
	    w.ieee.exponent -= LDBL_MANT_DIG;
	  adjust = 1;
	}
      else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
	{
	  /* Similarly.
	     If z exponent is very large and x and y exponents are
	     very small, adjust them up to avoid spurious underflows,
	     rather than down.  */
	  if (u.ieee.exponent + v.ieee.exponent
	      <= IEEE854_LONG_DOUBLE_BIAS + 2 * LDBL_MANT_DIG)
	    {
	      if (u.ieee.exponent > v.ieee.exponent)
		u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
	      else
		v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
	    }
	  else if (u.ieee.exponent > v.ieee.exponent)
	    {
	      if (u.ieee.exponent > LDBL_MANT_DIG)
		u.ieee.exponent -= LDBL_MANT_DIG;
	    }
	  else if (v.ieee.exponent > LDBL_MANT_DIG)
	    v.ieee.exponent -= LDBL_MANT_DIG;
	  w.ieee.exponent -= LDBL_MANT_DIG;
	  adjust = 1;
	}
      else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
	{
	  u.ieee.exponent -= LDBL_MANT_DIG;
	  if (v.ieee.exponent)
	    v.ieee.exponent += LDBL_MANT_DIG;
	  else
	    v.d *= L(0x1p113);
	}
      else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
	{
	  v.ieee.exponent -= LDBL_MANT_DIG;
	  if (u.ieee.exponent)
	    u.ieee.exponent += LDBL_MANT_DIG;
	  else
	    u.d *= L(0x1p113);
	}
      else /* if (u.ieee.exponent + v.ieee.exponent
		  <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
	{
	  if (u.ieee.exponent > v.ieee.exponent)
	    u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
	  else
	    v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
	  if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6)
	    {
	      if (w.ieee.exponent)
		w.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
	      else
		w.d *= L(0x1p228);
	      adjust = -1;
	    }
	  /* Otherwise x * y should just affect inexact
	     and nothing else.  */
	}
      x = u.d;
      y = v.d;
      z = w.d;
    }

  /* Ensure correct sign of exact 0 + 0.  */
  if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
    {
      x = math_opt_barrier (x);
      return x * y + z;
    }

  fenv_t env;
  feholdexcept (&env);
  fesetround (FE_TONEAREST);

  /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
#define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
  _Float128 x1 = x * C;
  _Float128 y1 = y * C;
  _Float128 m1 = x * y;
  x1 = (x - x1) + x1;
  y1 = (y - y1) + y1;
  _Float128 x2 = x - x1;
  _Float128 y2 = y - y1;
  _Float128 m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;

  /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
  _Float128 a1 = z + m1;
  _Float128 t1 = a1 - z;
  _Float128 t2 = a1 - t1;
  t1 = m1 - t1;
  t2 = z - t2;
  _Float128 a2 = t1 + t2;
  /* Ensure the arithmetic is not scheduled after feclearexcept call.  */
  math_force_eval (m2);
  math_force_eval (a2);
  feclearexcept (FE_INEXACT);

  /* If the result is an exact zero, ensure it has the correct sign.  */
  if (a1 == 0 && m2 == 0)
    {
      feupdateenv (&env);
      /* Ensure that round-to-nearest value of z + m1 is not reused.  */
      z = math_opt_barrier (z);
      return z + m1;
    }

  fesetround (FE_TOWARDZERO);
  /* Perform m2 + a2 addition with round to odd.  */
  u.d = a2 + m2;

  if (__glibc_likely (adjust == 0))
    {
      if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
	u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
      feupdateenv (&env);
      /* Result is a1 + u.d.  */
      return a1 + u.d;
    }
  else if (__glibc_likely (adjust > 0))
    {
      if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
	u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
      feupdateenv (&env);
      /* Result is a1 + u.d, scaled up.  */
      return (a1 + u.d) * L(0x1p113);
    }
  else
    {
      if ((u.ieee.mantissa3 & 1) == 0)
	u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
      v.d = a1 + u.d;
      /* Ensure the addition is not scheduled after fetestexcept call.  */
      math_force_eval (v.d);
      int j = fetestexcept (FE_INEXACT) != 0;
      feupdateenv (&env);
      /* Ensure the following computations are performed in default rounding
	 mode instead of just reusing the round to zero computation.  */
      asm volatile ("" : "=m" (u) : "m" (u));
      /* If a1 + u.d is exact, the only rounding happens during
	 scaling down.  */
      if (j == 0)
	return v.d * L(0x1p-228);
      /* If result rounded to zero is not subnormal, no double
	 rounding will occur.  */
      if (v.ieee.exponent > 228)
	return (a1 + u.d) * L(0x1p-228);
      /* If v.d * 0x1p-228L with round to zero is a subnormal above
	 or equal to LDBL_MIN / 2, then v.d * 0x1p-228L shifts mantissa
	 down just by 1 bit, which means v.ieee.mantissa3 |= j would
	 change the round bit, not sticky or guard bit.
	 v.d * 0x1p-228L never normalizes by shifting up,
	 so round bit plus sticky bit should be already enough
	 for proper rounding.  */
      if (v.ieee.exponent == 228)
	{
	  /* If the exponent would be in the normal range when
	     rounding to normal precision with unbounded exponent
	     range, the exact result is known and spurious underflows
	     must be avoided on systems detecting tininess after
	     rounding.  */
	  if (TININESS_AFTER_ROUNDING)
	    {
	      w.d = a1 + u.d;
	      if (w.ieee.exponent == 229)
		return w.d * L(0x1p-228);
	    }
	  /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
	     v.ieee.mantissa3 & 1 is the round bit and j is our sticky
	     bit.  */
	  w.d = 0;
	  w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j;
	  w.ieee.negative = v.ieee.negative;
	  v.ieee.mantissa3 &= ~3U;
	  v.d *= L(0x1p-228);
	  w.d *= L(0x1p-2);
	  return v.d + w.d;
	}
      v.ieee.mantissa3 |= j;
      return v.d * L(0x1p-228);
    }
#endif /* ! USE_FMAL_BUILTIN  */
}
libm_alias_ldouble (__fma, fma)
Commit	Line	Data
3e692e05	1	/* Compute x * y + z as ternary operation.
2b778ceb	2	Copyright (C) 2010-2021 Free Software Foundation, Inc.
3e692e05 JJ	3	This file is part of the GNU C Library.
	4	Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
	5
	6	The GNU C Library is free software; you can redistribute it and/or
	7	modify it under the terms of the GNU Lesser General Public
	8	License as published by the Free Software Foundation; either
	9	version 2.1 of the License, or (at your option) any later version.
	10
	11	The GNU C Library is distributed in the hope that it will be useful,
	12	but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	14	Lesser General Public License for more details.
	15
	16	You should have received a copy of the GNU Lesser General Public
59ba27a6	17	License along with the GNU C Library; if not, see
5a82c748	18	<https://www.gnu.org/licenses/>. */
3e692e05 JJ	19
	20	#include <float.h>
	21	#include <math.h>
	22	#include <fenv.h>
	23	#include <ieee754.h>
b4d5b8b0	24	#include <math-barriers.h>
4842e4fe	25	#include <math_private.h>
fd3b4e7c	26	#include <libm-alias-ldouble.h>
ef82f4da	27	#include <tininess.h>
628d90c5	28	#include <math-use-builtins.h>
3e692e05 JJ	29
	30	/* This implementation uses rounding to odd to avoid problems with
	31	double rounding. See a paper by Boldo and Melquiond:
	32	http://www.lri.fr/~melquion/doc/08-tc.pdf */
	33
15089e04 PM	34	_Float128
15089e04 PM	35	__fmal (_Float128 x, _Float128 y, _Float128 z)
3e692e05	36	{
628d90c5 VG	37	#if USE_FMAL_BUILTIN
	38	return __builtin_fmal (x, y, z);
	39	#else
3e692e05 JJ	40	union ieee854_long_double u, v, w;
	41	int adjust = 0;
	42	u.d = x;
	43	v.d = y;
	44	w.d = z;
	45	if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
	46	>= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
	47	- LDBL_MANT_DIG, 0)
	48	\|\| __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
	49	\|\| __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
	50	\|\| __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
	51	\|\| __builtin_expect (u.ieee.exponent + v.ieee.exponent
	52	<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))
	53	{
	54	/* If z is Inf, but x and y are finite, the result should be
	55	z rather than NaN. */
	56	if (w.ieee.exponent == 0x7fff
	57	&& u.ieee.exponent != 0x7fff
	58	&& v.ieee.exponent != 0x7fff)
	59	return (z + x) + y;
bec749fd JM	60	/* If z is zero and x are y are nonzero, compute the result
	61	as x * y to avoid the wrong sign of a zero result if x * y
	62	underflows to 0. */
	63	if (z == 0 && x != 0 && y != 0)
	64	return x * y;
a0c2940d JM	65	/* If x or y or z is Inf/NaN, or if x * y is zero, compute as
a0c2940d JM	66	x * y + z. */
3e692e05 JJ	67	if (u.ieee.exponent == 0x7fff
	68	\|\| v.ieee.exponent == 0x7fff
	69	\|\| w.ieee.exponent == 0x7fff
473611b2 JM	70	\|\| x == 0
473611b2 JM	71	\|\| y == 0)
3e692e05	72	return x * y + z;
a0c2940d JM	73	/* If fma will certainly overflow, compute as x * y. */
	74	if (u.ieee.exponent + v.ieee.exponent
	75	> 0x7fff + IEEE854_LONG_DOUBLE_BIAS)
	76	return x * y;
1f4dafa3	77	/* If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the
473611b2 JM	78	result nor whether there is underflow depends on its exact
	79	value, only on its sign. */
	80	if (u.ieee.exponent + v.ieee.exponent
	81	< IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
	82	{
	83	int neg = u.ieee.negative ^ v.ieee.negative;
02bbfb41	84	_Float128 tiny = neg ? L(-0x1p-16494) : L(0x1p-16494);
473611b2 JM	85	if (w.ieee.exponent >= 3)
	86	return tiny + z;
	87	/* Scaling up, adding TINY and scaling down produces the
	88	correct result, because in round-to-nearest mode adding
	89	TINY has no effect and in other modes double rounding is
	90	harmless. But it may not produce required underflow
	91	exceptions. */
02bbfb41	92	v.d = z * L(0x1p114) + tiny;
473611b2 JM	93	if (TININESS_AFTER_ROUNDING
	94	? v.ieee.exponent < 115
	95	: (w.ieee.exponent == 0
	96	\|\| (w.ieee.exponent == 1
	97	&& w.ieee.negative != neg
	98	&& w.ieee.mantissa3 == 0
	99	&& w.ieee.mantissa2 == 0
	100	&& w.ieee.mantissa1 == 0
	101	&& w.ieee.mantissa0 == 0)))
	102	{
15089e04	103	_Float128 force_underflow = x * y;
d96164c3	104	math_force_eval (force_underflow);
473611b2	105	}
02bbfb41	106	return v.d * L(0x1p-114);
473611b2	107	}
3e692e05 JJ	108	if (u.ieee.exponent + v.ieee.exponent
	109	>= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
	110	{
	111	/* Compute 1p-113 times smaller result and multiply
	112	at the end. */
	113	if (u.ieee.exponent > v.ieee.exponent)
	114	u.ieee.exponent -= LDBL_MANT_DIG;
	115	else
	116	v.ieee.exponent -= LDBL_MANT_DIG;
	117	/* If x + y exponent is very large and z exponent is very small,
	118	it doesn't matter if we don't adjust it. */
	119	if (w.ieee.exponent > LDBL_MANT_DIG)
	120	w.ieee.exponent -= LDBL_MANT_DIG;
	121	adjust = 1;
	122	}
	123	else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
	124	{
	125	/* Similarly.
	126	If z exponent is very large and x and y exponents are
82477c28 JM	127	very small, adjust them up to avoid spurious underflows,
	128	rather than down. */
	129	if (u.ieee.exponent + v.ieee.exponent
739babd7	130	<= IEEE854_LONG_DOUBLE_BIAS + 2 * LDBL_MANT_DIG)
82477c28 JM	131	{
	132	if (u.ieee.exponent > v.ieee.exponent)
	133	u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
	134	else
	135	v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
	136	}
	137	else if (u.ieee.exponent > v.ieee.exponent)
3e692e05 JJ	138	{
	139	if (u.ieee.exponent > LDBL_MANT_DIG)
	140	u.ieee.exponent -= LDBL_MANT_DIG;
	141	}
	142	else if (v.ieee.exponent > LDBL_MANT_DIG)
	143	v.ieee.exponent -= LDBL_MANT_DIG;
	144	w.ieee.exponent -= LDBL_MANT_DIG;
	145	adjust = 1;
	146	}
	147	else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
	148	{
	149	u.ieee.exponent -= LDBL_MANT_DIG;
	150	if (v.ieee.exponent)
	151	v.ieee.exponent += LDBL_MANT_DIG;
	152	else
02bbfb41	153	v.d *= L(0x1p113);
3e692e05 JJ	154	}
	155	else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
	156	{
	157	v.ieee.exponent -= LDBL_MANT_DIG;
	158	if (u.ieee.exponent)
	159	u.ieee.exponent += LDBL_MANT_DIG;
	160	else
02bbfb41	161	u.d *= L(0x1p113);
3e692e05 JJ	162	}
	163	else /* if (u.ieee.exponent + v.ieee.exponent
	164	<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
	165	{
	166	if (u.ieee.exponent > v.ieee.exponent)
82477c28	167	u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
3e692e05	168	else
82477c28 JM	169	v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
82477c28 JM	170	if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6)
3e692e05 JJ	171	{
3e692e05 JJ	172	if (w.ieee.exponent)
82477c28	173	w.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
3e692e05	174	else
02bbfb41	175	w.d *= L(0x1p228);
3e692e05 JJ	176	adjust = -1;
	177	}
	178	/* Otherwise x * y should just affect inexact
	179	and nothing else. */
	180	}
	181	x = u.d;
	182	y = v.d;
	183	z = w.d;
	184	}
8ec5b013 JM	185
8ec5b013 JM	186	/* Ensure correct sign of exact 0 + 0. */
a1ffb40e	187	if (__glibc_unlikely ((x == 0 \|\| y == 0) && z == 0))
09245377 L	188	{
	189	x = math_opt_barrier (x);
	190	return x * y + z;
	191	}
8ec5b013	192
5b5b04d6 JM	193	fenv_t env;
	194	feholdexcept (&env);
	195	fesetround (FE_TONEAREST);
	196
3e692e05 JJ	197	/* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
3e692e05 JJ	198	#define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
15089e04 PM	199	_Float128 x1 = x * C;
	200	_Float128 y1 = y * C;
	201	_Float128 m1 = x * y;
3e692e05 JJ	202	x1 = (x - x1) + x1;
3e692e05 JJ	203	y1 = (y - y1) + y1;
15089e04 PM	204	_Float128 x2 = x - x1;
	205	_Float128 y2 = y - y1;
	206	_Float128 m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
3e692e05 JJ	207
3e692e05 JJ	208	/* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
15089e04 PM	209	_Float128 a1 = z + m1;
	210	_Float128 t1 = a1 - z;
	211	_Float128 t2 = a1 - t1;
3e692e05 JJ	212	t1 = m1 - t1;
3e692e05 JJ	213	t2 = z - t2;
15089e04	214	_Float128 a2 = t1 + t2;
4896f049 RH	215	/* Ensure the arithmetic is not scheduled after feclearexcept call. */
	216	math_force_eval (m2);
	217	math_force_eval (a2);
5b5b04d6 JM	218	feclearexcept (FE_INEXACT);
5b5b04d6 JM	219
4896f049	220	/* If the result is an exact zero, ensure it has the correct sign. */
5b5b04d6 JM	221	if (a1 == 0 && m2 == 0)
	222	{
	223	feupdateenv (&env);
4896f049 RH	224	/* Ensure that round-to-nearest value of z + m1 is not reused. */
4896f049 RH	225	z = math_opt_barrier (z);
5b5b04d6 JM	226	return z + m1;
5b5b04d6 JM	227	}
3e692e05	228
3e692e05 JJ	229	fesetround (FE_TOWARDZERO);
	230	/* Perform m2 + a2 addition with round to odd. */
	231	u.d = a2 + m2;
	232
a1ffb40e	233	if (__glibc_likely (adjust == 0))
3e692e05 JJ	234	{
	235	if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
	236	u.ieee.mantissa3 \|= fetestexcept (FE_INEXACT) != 0;
	237	feupdateenv (&env);
	238	/* Result is a1 + u.d. */
	239	return a1 + u.d;
	240	}
a1ffb40e	241	else if (__glibc_likely (adjust > 0))
3e692e05 JJ	242	{
	243	if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
	244	u.ieee.mantissa3 \|= fetestexcept (FE_INEXACT) != 0;
	245	feupdateenv (&env);
	246	/* Result is a1 + u.d, scaled up. */
02bbfb41	247	return (a1 + u.d) * L(0x1p113);
3e692e05 JJ	248	}
	249	else
	250	{
	251	if ((u.ieee.mantissa3 & 1) == 0)
	252	u.ieee.mantissa3 \|= fetestexcept (FE_INEXACT) != 0;
	253	v.d = a1 + u.d;
7c08a05c	254	/* Ensure the addition is not scheduled after fetestexcept call. */
4842e4fe	255	math_force_eval (v.d);
3e692e05 JJ	256	int j = fetestexcept (FE_INEXACT) != 0;
	257	feupdateenv (&env);
	258	/* Ensure the following computations are performed in default rounding
	259	mode instead of just reusing the round to zero computation. */
	260	asm volatile ("" : "=m" (u) : "m" (u));
	261	/* If a1 + u.d is exact, the only rounding happens during
	262	scaling down. */
	263	if (j == 0)
02bbfb41	264	return v.d * L(0x1p-228);
3e692e05 JJ	265	/* If result rounded to zero is not subnormal, no double
3e692e05 JJ	266	rounding will occur. */
82477c28	267	if (v.ieee.exponent > 228)
02bbfb41	268	return (a1 + u.d) * L(0x1p-228);
82477c28 JM	269	/* If v.d * 0x1p-228L with round to zero is a subnormal above
82477c28 JM	270	or equal to LDBL_MIN / 2, then v.d * 0x1p-228L shifts mantissa
3e692e05 JJ	271	down just by 1 bit, which means v.ieee.mantissa3 \|= j would
3e692e05 JJ	272	change the round bit, not sticky or guard bit.
82477c28	273	v.d * 0x1p-228L never normalizes by shifting up,
3e692e05 JJ	274	so round bit plus sticky bit should be already enough
3e692e05 JJ	275	for proper rounding. */
82477c28	276	if (v.ieee.exponent == 228)
3e692e05	277	{
ef82f4da JM	278	/* If the exponent would be in the normal range when
	279	rounding to normal precision with unbounded exponent
	280	range, the exact result is known and spurious underflows
	281	must be avoided on systems detecting tininess after
	282	rounding. */
	283	if (TININESS_AFTER_ROUNDING)
	284	{
	285	w.d = a1 + u.d;
82477c28	286	if (w.ieee.exponent == 229)
02bbfb41	287	return w.d * L(0x1p-228);
ef82f4da	288	}
3e692e05 JJ	289	/* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
3e692e05 JJ	290	v.ieee.mantissa3 & 1 is the round bit and j is our sticky
8627a232	291	bit. */
02bbfb41	292	w.d = 0;
8627a232 JM	293	w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) \| j;
	294	w.ieee.negative = v.ieee.negative;
	295	v.ieee.mantissa3 &= ~3U;
02bbfb41 PM	296	v.d *= L(0x1p-228);
02bbfb41 PM	297	w.d *= L(0x1p-2);
8627a232	298	return v.d + w.d;
3e692e05 JJ	299	}
3e692e05 JJ	300	v.ieee.mantissa3 \|= j;
02bbfb41	301	return v.d * L(0x1p-228);
3e692e05	302	}
628d90c5	303	#endif /* ! USE_FMAL_BUILTIN */
3e692e05	304	}
fd3b4e7c	305	libm_alias_ldouble (__fma, fma)