[thirdparty/glibc.git] / sysdeps / ieee754 / dbl-64 / s_fma.c

/* Compute x * y + z as ternary operation.
   Copyright (C) 2010, 2011 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <float.h>
#include <math.h>
#include <fenv.h>
#include <ieee754.h>
#include <math_private.h>

/* This implementation uses rounding to odd to avoid problems with
   double rounding.  See a paper by Boldo and Melquiond:
   http://www.lri.fr/~melquion/doc/08-tc.pdf  */

double
__fma (double x, double y, double z)
{
  union ieee754_double u, v, w;
  int adjust = 0;
  u.d = x;
  v.d = y;
  w.d = z;
  if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
			>= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG, 0)
      || __builtin_expect (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
      || __builtin_expect (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
      || __builtin_expect (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
      || __builtin_expect (u.ieee.exponent + v.ieee.exponent
			   <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG, 0))
    {
      /* If z is Inf, but x and y are finite, the result should be
	 z rather than NaN.  */
      if (w.ieee.exponent == 0x7ff
	  && u.ieee.exponent != 0x7ff
	  && v.ieee.exponent != 0x7ff)
	return (z + x) + y;
      /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
	 or if x * y is less than half of DBL_DENORM_MIN,
	 compute as x * y + z.  */
      if (u.ieee.exponent == 0x7ff
	  || v.ieee.exponent == 0x7ff
	  || w.ieee.exponent == 0x7ff
	  || u.ieee.exponent + v.ieee.exponent
	     > 0x7ff + IEEE754_DOUBLE_BIAS
	  || u.ieee.exponent + v.ieee.exponent
	     < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2)
	return x * y + z;
      if (u.ieee.exponent + v.ieee.exponent
	  >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
	{
	  /* Compute 1p-53 times smaller result and multiply
	     at the end.  */
	  if (u.ieee.exponent > v.ieee.exponent)
	    u.ieee.exponent -= DBL_MANT_DIG;
	  else
	    v.ieee.exponent -= DBL_MANT_DIG;
	  /* If x + y exponent is very large and z exponent is very small,
	     it doesn't matter if we don't adjust it.  */
	  if (w.ieee.exponent > DBL_MANT_DIG)
	    w.ieee.exponent -= DBL_MANT_DIG;
	  adjust = 1;
	}
      else if (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
	{
	  /* Similarly.
	     If z exponent is very large and x and y exponents are
	     very small, it doesn't matter if we don't adjust it.  */
	  if (u.ieee.exponent > v.ieee.exponent)
	    {
	      if (u.ieee.exponent > DBL_MANT_DIG)
		u.ieee.exponent -= DBL_MANT_DIG;
	    }
	  else if (v.ieee.exponent > DBL_MANT_DIG)
	    v.ieee.exponent -= DBL_MANT_DIG;
	  w.ieee.exponent -= DBL_MANT_DIG;
	  adjust = 1;
	}
      else if (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
	{
	  u.ieee.exponent -= DBL_MANT_DIG;
	  if (v.ieee.exponent)
	    v.ieee.exponent += DBL_MANT_DIG;
	  else
	    v.d *= 0x1p53;
	}
      else if (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
	{
	  v.ieee.exponent -= DBL_MANT_DIG;
	  if (u.ieee.exponent)
	    u.ieee.exponent += DBL_MANT_DIG;
	  else
	    u.d *= 0x1p53;
	}
      else /* if (u.ieee.exponent + v.ieee.exponent
		  <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */
	{
	  if (u.ieee.exponent > v.ieee.exponent)
	    u.ieee.exponent += 2 * DBL_MANT_DIG;
	  else
	    v.ieee.exponent += 2 * DBL_MANT_DIG;
	  if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 4)
	    {
	      if (w.ieee.exponent)
		w.ieee.exponent += 2 * DBL_MANT_DIG;
	      else
		w.d *= 0x1p106;
	      adjust = -1;
	    }
	  /* Otherwise x * y should just affect inexact
	     and nothing else.  */
	}
      x = u.d;
      y = v.d;
      z = w.d;
    }
  /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
#define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
  double x1 = x * C;
  double y1 = y * C;
  double m1 = x * y;
  x1 = (x - x1) + x1;
  y1 = (y - y1) + y1;
  double x2 = x - x1;
  double y2 = y - y1;
  double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;

  /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
  double a1 = z + m1;
  double t1 = a1 - z;
  double t2 = a1 - t1;
  t1 = m1 - t1;
  t2 = z - t2;
  double a2 = t1 + t2;

  fenv_t env;
  libc_feholdexcept_setround (&env, FE_TOWARDZERO);

  /* Perform m2 + a2 addition with round to odd.  */
  u.d = a2 + m2;

  if (__builtin_expect (adjust < 0, 0))
    {
      if ((u.ieee.mantissa1 & 1) == 0)
	u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
      v.d = a1 + u.d;
    }

  /* Reset rounding mode and test for inexact simultaneously.  */
  int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0;

  if (__builtin_expect (adjust == 0, 1))
    {
      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
	u.ieee.mantissa1 |= j;
      /* Result is a1 + u.d.  */
      return a1 + u.d;
    }
  else if (__builtin_expect (adjust > 0, 1))
    {
      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
	u.ieee.mantissa1 |= j;
      /* Result is a1 + u.d, scaled up.  */
      return (a1 + u.d) * 0x1p53;
    }
  else
    {
      /* If a1 + u.d is exact, the only rounding happens during
	 scaling down.  */
      if (j == 0)
	return v.d * 0x1p-106;
      /* If result rounded to zero is not subnormal, no double
	 rounding will occur.  */
      if (v.ieee.exponent > 106)
	return (a1 + u.d) * 0x1p-106;
      /* If v.d * 0x1p-106 with round to zero is a subnormal above
	 or equal to DBL_MIN / 2, then v.d * 0x1p-106 shifts mantissa
	 down just by 1 bit, which means v.ieee.mantissa1 |= j would
	 change the round bit, not sticky or guard bit.
	 v.d * 0x1p-106 never normalizes by shifting up,
	 so round bit plus sticky bit should be already enough
	 for proper rounding.  */
      if (v.ieee.exponent == 106)
	{
	  /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
	     v.ieee.mantissa1 & 1 is the round bit and j is our sticky
	     bit.  In round-to-nearest 001 rounds down like 00,
	     011 rounds up, even though 01 rounds down (thus we need
	     to adjust), 101 rounds down like 10 and 111 rounds up
	     like 11.  */
	  if ((v.ieee.mantissa1 & 3) == 1)
	    {
	      v.d *= 0x1p-106;
	      if (v.ieee.negative)
		return v.d - 0x1p-1074 /* __DBL_DENORM_MIN__ */;
	      else
		return v.d + 0x1p-1074 /* __DBL_DENORM_MIN__ */;
	    }
	  else
	    return v.d * 0x1p-106;
	}
      v.ieee.mantissa1 |= j;
      return v.d * 0x1p-106;
    }
}
#ifndef __fma
weak_alias (__fma, fma)
#endif

#ifdef NO_LONG_DOUBLE
strong_alias (__fma, __fmal)
weak_alias (__fmal, fmal)
#endif
Commit	Line	Data
5e908464	1	/* Compute x * y + z as ternary operation.
d9a8d0ab	2	Copyright (C) 2010, 2011 Free Software Foundation, Inc.
5e908464 JJ	3	This file is part of the GNU C Library.
	4	Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
	5
	6	The GNU C Library is free software; you can redistribute it and/or
	7	modify it under the terms of the GNU Lesser General Public
	8	License as published by the Free Software Foundation; either
	9	version 2.1 of the License, or (at your option) any later version.
	10
	11	The GNU C Library is distributed in the hope that it will be useful,
	12	but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	14	Lesser General Public License for more details.
	15
	16	You should have received a copy of the GNU Lesser General Public
59ba27a6 PE	17	License along with the GNU C Library; if not, see
59ba27a6 PE	18	<http://www.gnu.org/licenses/>. */
5e908464 JJ	19
	20	#include <float.h>
	21	#include <math.h>
	22	#include <fenv.h>
	23	#include <ieee754.h>
d9a8d0ab	24	#include <math_private.h>
5e908464 JJ	25
	26	/* This implementation uses rounding to odd to avoid problems with
	27	double rounding. See a paper by Boldo and Melquiond:
	28	http://www.lri.fr/~melquion/doc/08-tc.pdf */
	29
	30	double
	31	__fma (double x, double y, double z)
	32	{
	33	union ieee754_double u, v, w;
	34	int adjust = 0;
	35	u.d = x;
	36	v.d = y;
	37	w.d = z;
	38	if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
	39	>= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG, 0)
	40	\|\| __builtin_expect (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
	41	\|\| __builtin_expect (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
f3f7372d JJ	42	\|\| __builtin_expect (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
	43	\|\| __builtin_expect (u.ieee.exponent + v.ieee.exponent
	44	<= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG, 0))
5e908464	45	{
3e692e05 JJ	46	/* If z is Inf, but x and y are finite, the result should be
	47	z rather than NaN. */
	48	if (w.ieee.exponent == 0x7ff
	49	&& u.ieee.exponent != 0x7ff
d9a8d0ab	50	&& v.ieee.exponent != 0x7ff)
3e692e05	51	return (z + x) + y;
f3f7372d JJ	52	/* If x or y or z is Inf/NaN, or if fma will certainly overflow,
f3f7372d JJ	53	or if x * y is less than half of DBL_DENORM_MIN,
5e908464 JJ	54	compute as x * y + z. */
	55	if (u.ieee.exponent == 0x7ff
	56	\|\| v.ieee.exponent == 0x7ff
	57	\|\| w.ieee.exponent == 0x7ff
	58	\|\| u.ieee.exponent + v.ieee.exponent
f3f7372d JJ	59	> 0x7ff + IEEE754_DOUBLE_BIAS
	60	\|\| u.ieee.exponent + v.ieee.exponent
	61	< IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2)
5e908464 JJ	62	return x * y + z;
	63	if (u.ieee.exponent + v.ieee.exponent
	64	>= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
	65	{
	66	/* Compute 1p-53 times smaller result and multiply
	67	at the end. */
	68	if (u.ieee.exponent > v.ieee.exponent)
	69	u.ieee.exponent -= DBL_MANT_DIG;
	70	else
	71	v.ieee.exponent -= DBL_MANT_DIG;
	72	/* If x + y exponent is very large and z exponent is very small,
	73	it doesn't matter if we don't adjust it. */
	74	if (w.ieee.exponent > DBL_MANT_DIG)
	75	w.ieee.exponent -= DBL_MANT_DIG;
	76	adjust = 1;
	77	}
	78	else if (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
	79	{
	80	/* Similarly.
	81	If z exponent is very large and x and y exponents are
	82	very small, it doesn't matter if we don't adjust it. */
	83	if (u.ieee.exponent > v.ieee.exponent)
	84	{
	85	if (u.ieee.exponent > DBL_MANT_DIG)
	86	u.ieee.exponent -= DBL_MANT_DIG;
	87	}
	88	else if (v.ieee.exponent > DBL_MANT_DIG)
	89	v.ieee.exponent -= DBL_MANT_DIG;
	90	w.ieee.exponent -= DBL_MANT_DIG;
	91	adjust = 1;
	92	}
	93	else if (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
	94	{
	95	u.ieee.exponent -= DBL_MANT_DIG;
	96	if (v.ieee.exponent)
	97	v.ieee.exponent += DBL_MANT_DIG;
	98	else
	99	v.d *= 0x1p53;
	100	}
f3f7372d	101	else if (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
5e908464 JJ	102	{
	103	v.ieee.exponent -= DBL_MANT_DIG;
	104	if (u.ieee.exponent)
	105	u.ieee.exponent += DBL_MANT_DIG;
	106	else
	107	u.d *= 0x1p53;
	108	}
f3f7372d JJ	109	else /* if (u.ieee.exponent + v.ieee.exponent
	110	<= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */
	111	{
	112	if (u.ieee.exponent > v.ieee.exponent)
	113	u.ieee.exponent += 2 * DBL_MANT_DIG;
	114	else
	115	v.ieee.exponent += 2 * DBL_MANT_DIG;
	116	if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 4)
	117	{
	118	if (w.ieee.exponent)
	119	w.ieee.exponent += 2 * DBL_MANT_DIG;
	120	else
	121	w.d *= 0x1p106;
	122	adjust = -1;
	123	}
	124	/* Otherwise x * y should just affect inexact
	125	and nothing else. */
	126	}
5e908464 JJ	127	x = u.d;
	128	y = v.d;
	129	z = w.d;
	130	}
	131	/* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
	132	#define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
	133	double x1 = x * C;
	134	double y1 = y * C;
	135	double m1 = x * y;
	136	x1 = (x - x1) + x1;
	137	y1 = (y - y1) + y1;
	138	double x2 = x - x1;
	139	double y2 = y - y1;
	140	double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
	141
	142	/* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
	143	double a1 = z + m1;
	144	double t1 = a1 - z;
	145	double t2 = a1 - t1;
	146	t1 = m1 - t1;
	147	t2 = z - t2;
	148	double a2 = t1 + t2;
	149
	150	fenv_t env;
d9a8d0ab	151	libc_feholdexcept_setround (&env, FE_TOWARDZERO);
0fe0f1f8	152
5e908464 JJ	153	/* Perform m2 + a2 addition with round to odd. */
5e908464 JJ	154	u.d = a2 + m2;
5e908464	155
0fe0f1f8 RH	156	if (__builtin_expect (adjust < 0, 0))
	157	{
	158	if ((u.ieee.mantissa1 & 1) == 0)
	159	u.ieee.mantissa1 \|= libc_fetestexcept (FE_INEXACT) != 0;
	160	v.d = a1 + u.d;
	161	}
	162
	163	/* Reset rounding mode and test for inexact simultaneously. */
	164	int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0;
	165
f3f7372d JJ	166	if (__builtin_expect (adjust == 0, 1))
	167	{
	168	if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
0fe0f1f8	169	u.ieee.mantissa1 \|= j;
f3f7372d JJ	170	/* Result is a1 + u.d. */
	171	return a1 + u.d;
	172	}
	173	else if (__builtin_expect (adjust > 0, 1))
	174	{
	175	if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
0fe0f1f8	176	u.ieee.mantissa1 \|= j;
f3f7372d JJ	177	/* Result is a1 + u.d, scaled up. */
	178	return (a1 + u.d) * 0x1p53;
	179	}
	180	else
	181	{
f3f7372d JJ	182	/* If a1 + u.d is exact, the only rounding happens during
	183	scaling down. */
	184	if (j == 0)
	185	return v.d * 0x1p-106;
	186	/* If result rounded to zero is not subnormal, no double
	187	rounding will occur. */
	188	if (v.ieee.exponent > 106)
	189	return (a1 + u.d) * 0x1p-106;
	190	/* If v.d * 0x1p-106 with round to zero is a subnormal above
	191	or equal to DBL_MIN / 2, then v.d * 0x1p-106 shifts mantissa
	192	down just by 1 bit, which means v.ieee.mantissa1 \|= j would
	193	change the round bit, not sticky or guard bit.
	194	v.d * 0x1p-106 never normalizes by shifting up,
	195	so round bit plus sticky bit should be already enough
	196	for proper rounding. */
	197	if (v.ieee.exponent == 106)
	198	{
	199	/* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
	200	v.ieee.mantissa1 & 1 is the round bit and j is our sticky
	201	bit. In round-to-nearest 001 rounds down like 00,
	202	011 rounds up, even though 01 rounds down (thus we need
	203	to adjust), 101 rounds down like 10 and 111 rounds up
	204	like 11. */
	205	if ((v.ieee.mantissa1 & 3) == 1)
	206	{
	207	v.d *= 0x1p-106;
	208	if (v.ieee.negative)
	209	return v.d - 0x1p-1074 /* __DBL_DENORM_MIN__ */;
	210	else
	211	return v.d + 0x1p-1074 /* __DBL_DENORM_MIN__ */;
	212	}
	213	else
	214	return v.d * 0x1p-106;
	215	}
	216	v.ieee.mantissa1 \|= j;
	217	return v.d * 0x1p-106;
	218	}
5e908464 JJ	219	}
	220	#ifndef __fma
	221	weak_alias (__fma, fma)
	222	#endif
	223
	224	#ifdef NO_LONG_DOUBLE
	225	strong_alias (__fma, __fmal)
	226	weak_alias (__fmal, fmal)
	227	#endif