[thirdparty/glibc.git] / sysdeps / ieee754 / ldbl-96 / s_fmal.c

/* Compute x * y + z as ternary operation.
   Copyright (C) 2010-2012 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <float.h>
#include <math.h>
#include <fenv.h>
#include <ieee754.h>
#include <math_private.h>

/* This implementation uses rounding to odd to avoid problems with
   double rounding.  See a paper by Boldo and Melquiond:
   http://www.lri.fr/~melquion/doc/08-tc.pdf  */

long double
__fmal (long double x, long double y, long double z)
{
  union ieee854_long_double u, v, w;
  int adjust = 0;
  u.d = x;
  v.d = y;
  w.d = z;
  if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
			>= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
			   - LDBL_MANT_DIG, 0)
      || __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
      || __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
      || __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
      || __builtin_expect (u.ieee.exponent + v.ieee.exponent
			   <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))
    {
      /* If z is Inf, but x and y are finite, the result should be
	 z rather than NaN.  */
      if (w.ieee.exponent == 0x7fff
	  && u.ieee.exponent != 0x7fff
          && v.ieee.exponent != 0x7fff)
	return (z + x) + y;
      /* If z is zero and x are y are nonzero, compute the result
	 as x * y to avoid the wrong sign of a zero result if x * y
	 underflows to 0.  */
      if (z == 0 && x != 0 && y != 0)
	return x * y;
      /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
	 or if x * y is less than half of LDBL_DENORM_MIN,
	 compute as x * y + z.  */
      if (u.ieee.exponent == 0x7fff
	  || v.ieee.exponent == 0x7fff
	  || w.ieee.exponent == 0x7fff
	  || u.ieee.exponent + v.ieee.exponent
	     > 0x7fff + IEEE854_LONG_DOUBLE_BIAS
	  || u.ieee.exponent + v.ieee.exponent
	     < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
	return x * y + z;
      if (u.ieee.exponent + v.ieee.exponent
	  >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
	{
	  /* Compute 1p-64 times smaller result and multiply
	     at the end.  */
	  if (u.ieee.exponent > v.ieee.exponent)
	    u.ieee.exponent -= LDBL_MANT_DIG;
	  else
	    v.ieee.exponent -= LDBL_MANT_DIG;
	  /* If x + y exponent is very large and z exponent is very small,
	     it doesn't matter if we don't adjust it.  */
	  if (w.ieee.exponent > LDBL_MANT_DIG)
	    w.ieee.exponent -= LDBL_MANT_DIG;
	  adjust = 1;
	}
      else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
	{
	  /* Similarly.
	     If z exponent is very large and x and y exponents are
	     very small, it doesn't matter if we don't adjust it.  */
	  if (u.ieee.exponent > v.ieee.exponent)
	    {
	      if (u.ieee.exponent > LDBL_MANT_DIG)
		u.ieee.exponent -= LDBL_MANT_DIG;
	    }
	  else if (v.ieee.exponent > LDBL_MANT_DIG)
	    v.ieee.exponent -= LDBL_MANT_DIG;
	  w.ieee.exponent -= LDBL_MANT_DIG;
	  adjust = 1;
	}
      else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
	{
	  u.ieee.exponent -= LDBL_MANT_DIG;
	  if (v.ieee.exponent)
	    v.ieee.exponent += LDBL_MANT_DIG;
	  else
	    v.d *= 0x1p64L;
	}
      else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
	{
	  v.ieee.exponent -= LDBL_MANT_DIG;
	  if (u.ieee.exponent)
	    u.ieee.exponent += LDBL_MANT_DIG;
	  else
	    u.d *= 0x1p64L;
	}
      else /* if (u.ieee.exponent + v.ieee.exponent
		  <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
	{
	  if (u.ieee.exponent > v.ieee.exponent)
	    u.ieee.exponent += 2 * LDBL_MANT_DIG;
	  else
	    v.ieee.exponent += 2 * LDBL_MANT_DIG;
	  if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 4)
	    {
	      if (w.ieee.exponent)
		w.ieee.exponent += 2 * LDBL_MANT_DIG;
	      else
		w.d *= 0x1p128L;
	      adjust = -1;
	    }
	  /* Otherwise x * y should just affect inexact
	     and nothing else.  */
	}
      x = u.d;
      y = v.d;
      z = w.d;
    }

  /* Ensure correct sign of exact 0 + 0.  */
  if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0))
    return x * y + z;

  /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
#define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
  long double x1 = x * C;
  long double y1 = y * C;
  long double m1 = x * y;
  x1 = (x - x1) + x1;
  y1 = (y - y1) + y1;
  long double x2 = x - x1;
  long double y2 = y - y1;
  long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;

  /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
  long double a1 = z + m1;
  long double t1 = a1 - z;
  long double t2 = a1 - t1;
  t1 = m1 - t1;
  t2 = z - t2;
  long double a2 = t1 + t2;

  fenv_t env;
  feholdexcept (&env);
  fesetround (FE_TOWARDZERO);
  /* Perform m2 + a2 addition with round to odd.  */
  u.d = a2 + m2;

  if (__builtin_expect (adjust == 0, 1))
    {
      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
      feupdateenv (&env);
      /* Result is a1 + u.d.  */
      return a1 + u.d;
    }
  else if (__builtin_expect (adjust > 0, 1))
    {
      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
      feupdateenv (&env);
      /* Result is a1 + u.d, scaled up.  */
      return (a1 + u.d) * 0x1p64L;
    }
  else
    {
      if ((u.ieee.mantissa1 & 1) == 0)
	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
      v.d = a1 + u.d;
      /* Ensure the addition is not scheduled after fetestexcept call.  */
      math_force_eval (v.d);
      int j = fetestexcept (FE_INEXACT) != 0;
      feupdateenv (&env);
      /* Ensure the following computations are performed in default rounding
	 mode instead of just reusing the round to zero computation.  */
      asm volatile ("" : "=m" (u) : "m" (u));
      /* If a1 + u.d is exact, the only rounding happens during
	 scaling down.  */
      if (j == 0)
	return v.d * 0x1p-128L;
      /* If result rounded to zero is not subnormal, no double
	 rounding will occur.  */
      if (v.ieee.exponent > 128)
	return (a1 + u.d) * 0x1p-128L;
      /* If v.d * 0x1p-128L with round to zero is a subnormal above
	 or equal to LDBL_MIN / 2, then v.d * 0x1p-128L shifts mantissa
	 down just by 1 bit, which means v.ieee.mantissa1 |= j would
	 change the round bit, not sticky or guard bit.
	 v.d * 0x1p-128L never normalizes by shifting up,
	 so round bit plus sticky bit should be already enough
	 for proper rounding.  */
      if (v.ieee.exponent == 128)
	{
	  /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
	     v.ieee.mantissa1 & 1 is the round bit and j is our sticky
	     bit.  */
	  w.d = 0.0L;
	  w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
	  w.ieee.negative = v.ieee.negative;
	  v.ieee.mantissa1 &= ~3U;
	  v.d *= 0x1p-128L;
	  w.d *= 0x1p-2L;
	  return v.d + w.d;
	}
      v.ieee.mantissa1 |= j;
      return v.d * 0x1p-128L;
    }
}
weak_alias (__fmal, fmal)
Commit	Line	Data
3e692e05	1	/* Compute x * y + z as ternary operation.
4842e4fe	2	Copyright (C) 2010-2012 Free Software Foundation, Inc.
3e692e05 JJ	3	This file is part of the GNU C Library.
	4	Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
	5
	6	The GNU C Library is free software; you can redistribute it and/or
	7	modify it under the terms of the GNU Lesser General Public
	8	License as published by the Free Software Foundation; either
	9	version 2.1 of the License, or (at your option) any later version.
	10
	11	The GNU C Library is distributed in the hope that it will be useful,
	12	but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	14	Lesser General Public License for more details.
	15
	16	You should have received a copy of the GNU Lesser General Public
59ba27a6 PE	17	License along with the GNU C Library; if not, see
59ba27a6 PE	18	<http://www.gnu.org/licenses/>. */
3e692e05 JJ	19
	20	#include <float.h>
	21	#include <math.h>
	22	#include <fenv.h>
	23	#include <ieee754.h>
4842e4fe	24	#include <math_private.h>
3e692e05 JJ	25
	26	/* This implementation uses rounding to odd to avoid problems with
	27	double rounding. See a paper by Boldo and Melquiond:
	28	http://www.lri.fr/~melquion/doc/08-tc.pdf */
	29
	30	long double
	31	__fmal (long double x, long double y, long double z)
	32	{
	33	union ieee854_long_double u, v, w;
	34	int adjust = 0;
	35	u.d = x;
	36	v.d = y;
	37	w.d = z;
	38	if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
	39	>= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
	40	- LDBL_MANT_DIG, 0)
	41	\|\| __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
	42	\|\| __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
	43	\|\| __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
	44	\|\| __builtin_expect (u.ieee.exponent + v.ieee.exponent
	45	<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))
	46	{
	47	/* If z is Inf, but x and y are finite, the result should be
	48	z rather than NaN. */
	49	if (w.ieee.exponent == 0x7fff
	50	&& u.ieee.exponent != 0x7fff
	51	&& v.ieee.exponent != 0x7fff)
	52	return (z + x) + y;
bec749fd JM	53	/* If z is zero and x are y are nonzero, compute the result
	54	as x * y to avoid the wrong sign of a zero result if x * y
	55	underflows to 0. */
	56	if (z == 0 && x != 0 && y != 0)
	57	return x * y;
3e692e05 JJ	58	/* If x or y or z is Inf/NaN, or if fma will certainly overflow,
	59	or if x * y is less than half of LDBL_DENORM_MIN,
	60	compute as x * y + z. */
	61	if (u.ieee.exponent == 0x7fff
	62	\|\| v.ieee.exponent == 0x7fff
	63	\|\| w.ieee.exponent == 0x7fff
	64	\|\| u.ieee.exponent + v.ieee.exponent
	65	> 0x7fff + IEEE854_LONG_DOUBLE_BIAS
	66	\|\| u.ieee.exponent + v.ieee.exponent
	67	< IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
	68	return x * y + z;
	69	if (u.ieee.exponent + v.ieee.exponent
	70	>= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
	71	{
	72	/* Compute 1p-64 times smaller result and multiply
	73	at the end. */
	74	if (u.ieee.exponent > v.ieee.exponent)
	75	u.ieee.exponent -= LDBL_MANT_DIG;
	76	else
	77	v.ieee.exponent -= LDBL_MANT_DIG;
	78	/* If x + y exponent is very large and z exponent is very small,
	79	it doesn't matter if we don't adjust it. */
	80	if (w.ieee.exponent > LDBL_MANT_DIG)
	81	w.ieee.exponent -= LDBL_MANT_DIG;
	82	adjust = 1;
	83	}
	84	else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
	85	{
	86	/* Similarly.
	87	If z exponent is very large and x and y exponents are
	88	very small, it doesn't matter if we don't adjust it. */
	89	if (u.ieee.exponent > v.ieee.exponent)
	90	{
	91	if (u.ieee.exponent > LDBL_MANT_DIG)
	92	u.ieee.exponent -= LDBL_MANT_DIG;
	93	}
	94	else if (v.ieee.exponent > LDBL_MANT_DIG)
	95	v.ieee.exponent -= LDBL_MANT_DIG;
	96	w.ieee.exponent -= LDBL_MANT_DIG;
	97	adjust = 1;
	98	}
	99	else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
	100	{
	101	u.ieee.exponent -= LDBL_MANT_DIG;
	102	if (v.ieee.exponent)
	103	v.ieee.exponent += LDBL_MANT_DIG;
	104	else
	105	v.d *= 0x1p64L;
	106	}
	107	else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
	108	{
	109	v.ieee.exponent -= LDBL_MANT_DIG;
	110	if (u.ieee.exponent)
	111	u.ieee.exponent += LDBL_MANT_DIG;
	112	else
	113	u.d *= 0x1p64L;
	114	}
	115	else /* if (u.ieee.exponent + v.ieee.exponent
	116	<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
	117	{
	118	if (u.ieee.exponent > v.ieee.exponent)
	119	u.ieee.exponent += 2 * LDBL_MANT_DIG;
	120	else
	121	v.ieee.exponent += 2 * LDBL_MANT_DIG;
122	if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 4)
123	{
124	if (w.ieee.exponent)
125	w.ieee.exponent += 2 * LDBL_MANT_DIG;
126	else
127	w.d *= 0x1p128L;
128	adjust = -1;
129	}
130	/* Otherwise x * y should just affect inexact
131	and nothing else. */
132	}
133	x = u.d;
134	y = v.d;
135	z = w.d;
136	}
8ec5b013 JM	137
	138	/* Ensure correct sign of exact 0 + 0. */
	139	if (__builtin_expect ((x == 0 \|\| y == 0) && z == 0, 0))
	140	return x * y + z;
	141
3e692e05 JJ	142	/* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
	143	#define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
	144	long double x1 = x * C;
	145	long double y1 = y * C;
	146	long double m1 = x * y;
	147	x1 = (x - x1) + x1;
	148	y1 = (y - y1) + y1;
	149	long double x2 = x - x1;
	150	long double y2 = y - y1;
	151	long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
	152
	153	/* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
	154	long double a1 = z + m1;
	155	long double t1 = a1 - z;
	156	long double t2 = a1 - t1;
	157	t1 = m1 - t1;
	158	t2 = z - t2;
	159	long double a2 = t1 + t2;
	160
	161	fenv_t env;
	162	feholdexcept (&env);
	163	fesetround (FE_TOWARDZERO);
	164	/* Perform m2 + a2 addition with round to odd. */
	165	u.d = a2 + m2;
	166
	167	if (__builtin_expect (adjust == 0, 1))
	168	{
	169	if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
	170	u.ieee.mantissa1 \|= fetestexcept (FE_INEXACT) != 0;
	171	feupdateenv (&env);
	172	/* Result is a1 + u.d. */
	173	return a1 + u.d;
	174	}
	175	else if (__builtin_expect (adjust > 0, 1))
	176	{
	177	if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
	178	u.ieee.mantissa1 \|= fetestexcept (FE_INEXACT) != 0;
	179	feupdateenv (&env);
	180	/* Result is a1 + u.d, scaled up. */
	181	return (a1 + u.d) * 0x1p64L;
	182	}
	183	else
	184	{
	185	if ((u.ieee.mantissa1 & 1) == 0)
	186	u.ieee.mantissa1 \|= fetestexcept (FE_INEXACT) != 0;
	187	v.d = a1 + u.d;
4842e4fe JM	188	/* Ensure the addition is not scheduled after fetestexcept call. */
4842e4fe JM	189	math_force_eval (v.d);
3e692e05 JJ	190	int j = fetestexcept (FE_INEXACT) != 0;
	191	feupdateenv (&env);
	192	/* Ensure the following computations are performed in default rounding
	193	mode instead of just reusing the round to zero computation. */
	194	asm volatile ("" : "=m" (u) : "m" (u));
	195	/* If a1 + u.d is exact, the only rounding happens during
	196	scaling down. */
	197	if (j == 0)
	198	return v.d * 0x1p-128L;
	199	/* If result rounded to zero is not subnormal, no double
	200	rounding will occur. */
	201	if (v.ieee.exponent > 128)
	202	return (a1 + u.d) * 0x1p-128L;
	203	/* If v.d * 0x1p-128L with round to zero is a subnormal above
	204	or equal to LDBL_MIN / 2, then v.d * 0x1p-128L shifts mantissa
	205	down just by 1 bit, which means v.ieee.mantissa1 \|= j would
	206	change the round bit, not sticky or guard bit.
	207	v.d * 0x1p-128L never normalizes by shifting up,
	208	so round bit plus sticky bit should be already enough
	209	for proper rounding. */
	210	if (v.ieee.exponent == 128)
	211	{
	212	/* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
	213	v.ieee.mantissa1 & 1 is the round bit and j is our sticky
8627a232 JM	214	bit. */
	215	w.d = 0.0L;
	216	w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) \| j;
	217	w.ieee.negative = v.ieee.negative;
	218	v.ieee.mantissa1 &= ~3U;
	219	v.d *= 0x1p-128L;
	220	w.d *= 0x1p-2L;
	221	return v.d + w.d;
3e692e05 JJ	222	}
	223	v.ieee.mantissa1 \|= j;
	224	return v.d * 0x1p-128L;
	225	}
	226	}
	227	weak_alias (__fmal, fmal)