[thirdparty/glibc.git] / sysdeps / ieee754 / dbl-64 / s_fma.c

/* Compute x * y + z as ternary operation.
   Copyright (C) 2010-2012 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <float.h>
#include <math.h>
#include <fenv.h>
#include <ieee754.h>
#include <math_private.h>

/* This implementation uses rounding to odd to avoid problems with
   double rounding.  See a paper by Boldo and Melquiond:
   http://www.lri.fr/~melquion/doc/08-tc.pdf  */

double
__fma (double x, double y, double z)
{
  union ieee754_double u, v, w;
  int adjust = 0;
  u.d = x;
  v.d = y;
  w.d = z;
  if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
			>= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG, 0)
      || __builtin_expect (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
      || __builtin_expect (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
      || __builtin_expect (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
      || __builtin_expect (u.ieee.exponent + v.ieee.exponent
			   <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG, 0))
    {
      /* If z is Inf, but x and y are finite, the result should be
	 z rather than NaN.  */
      if (w.ieee.exponent == 0x7ff
	  && u.ieee.exponent != 0x7ff
	  && v.ieee.exponent != 0x7ff)
	return (z + x) + y;
      /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
	 or if x * y is less than half of DBL_DENORM_MIN,
	 compute as x * y + z.  */
      if (u.ieee.exponent == 0x7ff
	  || v.ieee.exponent == 0x7ff
	  || w.ieee.exponent == 0x7ff
	  || u.ieee.exponent + v.ieee.exponent
	     > 0x7ff + IEEE754_DOUBLE_BIAS
	  || u.ieee.exponent + v.ieee.exponent
	     < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2)
	return x * y + z;
      if (u.ieee.exponent + v.ieee.exponent
	  >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
	{
	  /* Compute 1p-53 times smaller result and multiply
	     at the end.  */
	  if (u.ieee.exponent > v.ieee.exponent)
	    u.ieee.exponent -= DBL_MANT_DIG;
	  else
	    v.ieee.exponent -= DBL_MANT_DIG;
	  /* If x + y exponent is very large and z exponent is very small,
	     it doesn't matter if we don't adjust it.  */
	  if (w.ieee.exponent > DBL_MANT_DIG)
	    w.ieee.exponent -= DBL_MANT_DIG;
	  adjust = 1;
	}
      else if (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
	{
	  /* Similarly.
	     If z exponent is very large and x and y exponents are
	     very small, it doesn't matter if we don't adjust it.  */
	  if (u.ieee.exponent > v.ieee.exponent)
	    {
	      if (u.ieee.exponent > DBL_MANT_DIG)
		u.ieee.exponent -= DBL_MANT_DIG;
	    }
	  else if (v.ieee.exponent > DBL_MANT_DIG)
	    v.ieee.exponent -= DBL_MANT_DIG;
	  w.ieee.exponent -= DBL_MANT_DIG;
	  adjust = 1;
	}
      else if (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
	{
	  u.ieee.exponent -= DBL_MANT_DIG;
	  if (v.ieee.exponent)
	    v.ieee.exponent += DBL_MANT_DIG;
	  else
	    v.d *= 0x1p53;
	}
      else if (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
	{
	  v.ieee.exponent -= DBL_MANT_DIG;
	  if (u.ieee.exponent)
	    u.ieee.exponent += DBL_MANT_DIG;
	  else
	    u.d *= 0x1p53;
	}
      else /* if (u.ieee.exponent + v.ieee.exponent
		  <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */
	{
	  if (u.ieee.exponent > v.ieee.exponent)
	    u.ieee.exponent += 2 * DBL_MANT_DIG;
	  else
	    v.ieee.exponent += 2 * DBL_MANT_DIG;
	  if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 4)
	    {
	      if (w.ieee.exponent)
		w.ieee.exponent += 2 * DBL_MANT_DIG;
	      else
		w.d *= 0x1p106;
	      adjust = -1;
	    }
	  /* Otherwise x * y should just affect inexact
	     and nothing else.  */
	}
      x = u.d;
      y = v.d;
      z = w.d;
    }

  /* Ensure correct sign of exact 0 + 0.  */
  if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0))
    return x * y + z;

  /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
#define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
  double x1 = x * C;
  double y1 = y * C;
  double m1 = x * y;
  x1 = (x - x1) + x1;
  y1 = (y - y1) + y1;
  double x2 = x - x1;
  double y2 = y - y1;
  double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;

  /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
  double a1 = z + m1;
  double t1 = a1 - z;
  double t2 = a1 - t1;
  t1 = m1 - t1;
  t2 = z - t2;
  double a2 = t1 + t2;

  fenv_t env;
  libc_feholdexcept_setround (&env, FE_TOWARDZERO);

  /* Perform m2 + a2 addition with round to odd.  */
  u.d = a2 + m2;

  if (__builtin_expect (adjust < 0, 0))
    {
      if ((u.ieee.mantissa1 & 1) == 0)
	u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
      v.d = a1 + u.d;
      /* Ensure the addition is not scheduled after fetestexcept call.  */
      math_force_eval (v.d);
    }

  /* Reset rounding mode and test for inexact simultaneously.  */
  int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0;

  if (__builtin_expect (adjust == 0, 1))
    {
      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
	u.ieee.mantissa1 |= j;
      /* Result is a1 + u.d.  */
      return a1 + u.d;
    }
  else if (__builtin_expect (adjust > 0, 1))
    {
      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
	u.ieee.mantissa1 |= j;
      /* Result is a1 + u.d, scaled up.  */
      return (a1 + u.d) * 0x1p53;
    }
  else
    {
      /* If a1 + u.d is exact, the only rounding happens during
	 scaling down.  */
      if (j == 0)
	return v.d * 0x1p-106;
      /* If result rounded to zero is not subnormal, no double
	 rounding will occur.  */
      if (v.ieee.exponent > 106)
	return (a1 + u.d) * 0x1p-106;
      /* If v.d * 0x1p-106 with round to zero is a subnormal above
	 or equal to DBL_MIN / 2, then v.d * 0x1p-106 shifts mantissa
	 down just by 1 bit, which means v.ieee.mantissa1 |= j would
	 change the round bit, not sticky or guard bit.
	 v.d * 0x1p-106 never normalizes by shifting up,
	 so round bit plus sticky bit should be already enough
	 for proper rounding.  */
      if (v.ieee.exponent == 106)
	{
	  /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
	     v.ieee.mantissa1 & 1 is the round bit and j is our sticky
	     bit.  In round-to-nearest 001 rounds down like 00,
	     011 rounds up, even though 01 rounds down (thus we need
	     to adjust), 101 rounds down like 10 and 111 rounds up
	     like 11.  */
	  if ((v.ieee.mantissa1 & 3) == 1)
	    {
	      v.d *= 0x1p-106;
	      if (v.ieee.negative)
		return v.d - 0x1p-1074 /* __DBL_DENORM_MIN__ */;
	      else
		return v.d + 0x1p-1074 /* __DBL_DENORM_MIN__ */;
	    }
	  else
	    return v.d * 0x1p-106;
	}
      v.ieee.mantissa1 |= j;
      return v.d * 0x1p-106;
    }
}
#ifndef __fma
weak_alias (__fma, fma)
#endif

#ifdef NO_LONG_DOUBLE
strong_alias (__fma, __fmal)
weak_alias (__fmal, fmal)
#endif
Commit	Line	Data
5e908464	1	/* Compute x * y + z as ternary operation.
efb73488	2	Copyright (C) 2010-2012 Free Software Foundation, Inc.
5e908464 JJ	3	This file is part of the GNU C Library.
	4	Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
	5
	6	The GNU C Library is free software; you can redistribute it and/or
	7	modify it under the terms of the GNU Lesser General Public
	8	License as published by the Free Software Foundation; either
	9	version 2.1 of the License, or (at your option) any later version.
	10
	11	The GNU C Library is distributed in the hope that it will be useful,
	12	but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	14	Lesser General Public License for more details.
	15
	16	You should have received a copy of the GNU Lesser General Public
59ba27a6 PE	17	License along with the GNU C Library; if not, see
59ba27a6 PE	18	<http://www.gnu.org/licenses/>. */
5e908464 JJ	19
	20	#include <float.h>
	21	#include <math.h>
	22	#include <fenv.h>
	23	#include <ieee754.h>
d9a8d0ab	24	#include <math_private.h>
5e908464 JJ	25
	26	/* This implementation uses rounding to odd to avoid problems with
	27	double rounding. See a paper by Boldo and Melquiond:
	28	http://www.lri.fr/~melquion/doc/08-tc.pdf */
	29
	30	double
	31	__fma (double x, double y, double z)
	32	{
	33	union ieee754_double u, v, w;
	34	int adjust = 0;
	35	u.d = x;
	36	v.d = y;
	37	w.d = z;
	38	if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
	39	>= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG, 0)
	40	\|\| __builtin_expect (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
	41	\|\| __builtin_expect (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
f3f7372d JJ	42	\|\| __builtin_expect (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
	43	\|\| __builtin_expect (u.ieee.exponent + v.ieee.exponent
	44	<= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG, 0))
5e908464	45	{
3e692e05 JJ	46	/* If z is Inf, but x and y are finite, the result should be
	47	z rather than NaN. */
	48	if (w.ieee.exponent == 0x7ff
	49	&& u.ieee.exponent != 0x7ff
d9a8d0ab	50	&& v.ieee.exponent != 0x7ff)
3e692e05	51	return (z + x) + y;
f3f7372d JJ	52	/* If x or y or z is Inf/NaN, or if fma will certainly overflow,
f3f7372d JJ	53	or if x * y is less than half of DBL_DENORM_MIN,
5e908464 JJ	54	compute as x * y + z. */
	55	if (u.ieee.exponent == 0x7ff
	56	\|\| v.ieee.exponent == 0x7ff
	57	\|\| w.ieee.exponent == 0x7ff
	58	\|\| u.ieee.exponent + v.ieee.exponent
f3f7372d JJ	59	> 0x7ff + IEEE754_DOUBLE_BIAS
	60	\|\| u.ieee.exponent + v.ieee.exponent
	61	< IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2)
5e908464 JJ	62	return x * y + z;
	63	if (u.ieee.exponent + v.ieee.exponent
	64	>= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
	65	{
	66	/* Compute 1p-53 times smaller result and multiply
	67	at the end. */
	68	if (u.ieee.exponent > v.ieee.exponent)
	69	u.ieee.exponent -= DBL_MANT_DIG;
	70	else
	71	v.ieee.exponent -= DBL_MANT_DIG;
	72	/* If x + y exponent is very large and z exponent is very small,
	73	it doesn't matter if we don't adjust it. */
	74	if (w.ieee.exponent > DBL_MANT_DIG)
	75	w.ieee.exponent -= DBL_MANT_DIG;
	76	adjust = 1;
	77	}
	78	else if (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
	79	{
	80	/* Similarly.
	81	If z exponent is very large and x and y exponents are
	82	very small, it doesn't matter if we don't adjust it. */
	83	if (u.ieee.exponent > v.ieee.exponent)
	84	{
	85	if (u.ieee.exponent > DBL_MANT_DIG)
	86	u.ieee.exponent -= DBL_MANT_DIG;
	87	}
	88	else if (v.ieee.exponent > DBL_MANT_DIG)
	89	v.ieee.exponent -= DBL_MANT_DIG;
	90	w.ieee.exponent -= DBL_MANT_DIG;
	91	adjust = 1;
	92	}
	93	else if (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
	94	{
	95	u.ieee.exponent -= DBL_MANT_DIG;
	96	if (v.ieee.exponent)
	97	v.ieee.exponent += DBL_MANT_DIG;
	98	else
	99	v.d *= 0x1p53;
	100	}
f3f7372d	101	else if (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
5e908464 JJ	102	{
	103	v.ieee.exponent -= DBL_MANT_DIG;
	104	if (u.ieee.exponent)
	105	u.ieee.exponent += DBL_MANT_DIG;
	106	else
	107	u.d *= 0x1p53;
	108	}
f3f7372d JJ	109	else /* if (u.ieee.exponent + v.ieee.exponent
	110	<= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */
	111	{
	112	if (u.ieee.exponent > v.ieee.exponent)
	113	u.ieee.exponent += 2 * DBL_MANT_DIG;
	114	else
	115	v.ieee.exponent += 2 * DBL_MANT_DIG;
	116	if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 4)
	117	{
	118	if (w.ieee.exponent)
	119	w.ieee.exponent += 2 * DBL_MANT_DIG;
	120	else
	121	w.d *= 0x1p106;
	122	adjust = -1;
	123	}
	124	/* Otherwise x * y should just affect inexact
	125	and nothing else. */
	126	}
5e908464 JJ	127	x = u.d;
	128	y = v.d;
	129	z = w.d;
	130	}
8ec5b013 JM	131
	132	/* Ensure correct sign of exact 0 + 0. */
	133	if (__builtin_expect ((x == 0 \|\| y == 0) && z == 0, 0))
	134	return x * y + z;
	135
5e908464 JJ	136	/* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
	137	#define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
	138	double x1 = x * C;
	139	double y1 = y * C;
	140	double m1 = x * y;
	141	x1 = (x - x1) + x1;
	142	y1 = (y - y1) + y1;
	143	double x2 = x - x1;
	144	double y2 = y - y1;
	145	double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
	146
	147	/* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
	148	double a1 = z + m1;
	149	double t1 = a1 - z;
	150	double t2 = a1 - t1;
	151	t1 = m1 - t1;
	152	t2 = z - t2;
	153	double a2 = t1 + t2;
	154
	155	fenv_t env;
d9a8d0ab	156	libc_feholdexcept_setround (&env, FE_TOWARDZERO);
0fe0f1f8	157
5e908464 JJ	158	/* Perform m2 + a2 addition with round to odd. */
5e908464 JJ	159	u.d = a2 + m2;
5e908464	160
0fe0f1f8 RH	161	if (__builtin_expect (adjust < 0, 0))
	162	{
	163	if ((u.ieee.mantissa1 & 1) == 0)
	164	u.ieee.mantissa1 \|= libc_fetestexcept (FE_INEXACT) != 0;
	165	v.d = a1 + u.d;
efb73488 AJ	166	/* Ensure the addition is not scheduled after fetestexcept call. */
efb73488 AJ	167	math_force_eval (v.d);
0fe0f1f8 RH	168	}
	169
	170	/* Reset rounding mode and test for inexact simultaneously. */
	171	int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0;
	172
f3f7372d JJ	173	if (__builtin_expect (adjust == 0, 1))
	174	{
	175	if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
0fe0f1f8	176	u.ieee.mantissa1 \|= j;
f3f7372d JJ	177	/* Result is a1 + u.d. */
	178	return a1 + u.d;
	179	}
	180	else if (__builtin_expect (adjust > 0, 1))
	181	{
	182	if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
0fe0f1f8	183	u.ieee.mantissa1 \|= j;
f3f7372d JJ	184	/* Result is a1 + u.d, scaled up. */
	185	return (a1 + u.d) * 0x1p53;
	186	}
	187	else
	188	{
f3f7372d JJ	189	/* If a1 + u.d is exact, the only rounding happens during
	190	scaling down. */
	191	if (j == 0)
	192	return v.d * 0x1p-106;
	193	/* If result rounded to zero is not subnormal, no double
	194	rounding will occur. */
	195	if (v.ieee.exponent > 106)
	196	return (a1 + u.d) * 0x1p-106;
	197	/* If v.d * 0x1p-106 with round to zero is a subnormal above
	198	or equal to DBL_MIN / 2, then v.d * 0x1p-106 shifts mantissa
	199	down just by 1 bit, which means v.ieee.mantissa1 \|= j would
	200	change the round bit, not sticky or guard bit.
	201	v.d * 0x1p-106 never normalizes by shifting up,
	202	so round bit plus sticky bit should be already enough
	203	for proper rounding. */
	204	if (v.ieee.exponent == 106)
	205	{
	206	/* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
	207	v.ieee.mantissa1 & 1 is the round bit and j is our sticky
	208	bit. In round-to-nearest 001 rounds down like 00,
	209	011 rounds up, even though 01 rounds down (thus we need
	210	to adjust), 101 rounds down like 10 and 111 rounds up
	211	like 11. */
	212	if ((v.ieee.mantissa1 & 3) == 1)
	213	{
	214	v.d *= 0x1p-106;
	215	if (v.ieee.negative)
	216	return v.d - 0x1p-1074 /* __DBL_DENORM_MIN__ */;
	217	else
	218	return v.d + 0x1p-1074 /* __DBL_DENORM_MIN__ */;
	219	}
	220	else
	221	return v.d * 0x1p-106;
	222	}
	223	v.ieee.mantissa1 \|= j;
	224	return v.d * 0x1p-106;
	225	}
5e908464 JJ	226	}
	227	#ifndef __fma
	228	weak_alias (__fma, fma)
	229	#endif
	230
	231	#ifdef NO_LONG_DOUBLE
	232	strong_alias (__fma, __fmal)
	233	weak_alias (__fmal, fmal)
	234	#endif