[thirdparty/glibc.git] / sysdeps / ieee754 / ldbl-96 / x2y2m1l.c

/* Compute x^2 + y^2 - 1, without large cancellation error.
   Copyright (C) 2012-2016 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <math.h>
#include <math_private.h>
#include <float.h>
#include <stdlib.h>

/* Calculate X + Y exactly and store the result in *HI + *LO.  It is
   given that |X| >= |Y| and the values are small enough that no
   overflow occurs.  */

static inline void
add_split (long double *hi, long double *lo, long double x, long double y)
{
  /* Apply Dekker's algorithm.  */
  *hi = x + y;
  *lo = (x - *hi) + y;
}

/* Calculate X * Y exactly and store the result in *HI + *LO.  It is
   given that the values are small enough that no overflow occurs and
   large enough (or zero) that no underflow occurs.  */

static inline void
mul_split (long double *hi, long double *lo, long double x, long double y)
{
#ifdef __FP_FAST_FMAL
  /* Fast built-in fused multiply-add.  */
  *hi = x * y;
  *lo = __builtin_fmal (x, y, -*hi);
#elif defined FP_FAST_FMAL
  /* Fast library fused multiply-add, compiler before GCC 4.6.  */
  *hi = x * y;
  *lo = __fmal (x, y, -*hi);
#else
  /* Apply Dekker's algorithm.  */
  *hi = x * y;
# define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
  long double x1 = x * C;
  long double y1 = y * C;
# undef C
  x1 = (x - x1) + x1;
  y1 = (y - y1) + y1;
  long double x2 = x - x1;
  long double y2 = y - y1;
  *lo = (((x1 * y1 - *hi) + x1 * y2) + x2 * y1) + x2 * y2;
#endif
}

/* Compare absolute values of floating-point values pointed to by P
   and Q for qsort.  */

static int
compare (const void *p, const void *q)
{
  long double pld = fabsl (*(const long double *) p);
  long double qld = fabsl (*(const long double *) q);
  if (pld < qld)
    return -1;
  else if (pld == qld)
    return 0;
  else
    return 1;
}

/* Return X^2 + Y^2 - 1, computed without large cancellation error.
   It is given that 1 > X >= Y >= epsilon / 2, and that X^2 + Y^2 >=
   0.5.  */

long double
__x2y2m1l (long double x, long double y)
{
  long double vals[5];
  SET_RESTORE_ROUNDL (FE_TONEAREST);
  mul_split (&vals[1], &vals[0], x, x);
  mul_split (&vals[3], &vals[2], y, y);
  vals[4] = -1.0L;
  qsort (vals, 5, sizeof (long double), compare);
  /* Add up the values so that each element of VALS has absolute value
     at most equal to the last set bit of the next nonzero
     element.  */
  for (size_t i = 0; i <= 3; i++)
    {
      add_split (&vals[i + 1], &vals[i], vals[i + 1], vals[i]);
      qsort (vals + i + 1, 4 - i, sizeof (long double), compare);
    }
  /* Now any error from this addition will be small.  */
  return vals[4] + vals[3] + vals[2] + vals[1] + vals[0];
}
Commit	Line	Data
d032e0d2	1	/* Compute x^2 + y^2 - 1, without large cancellation error.
f7a9f785	2	Copyright (C) 2012-2016 Free Software Foundation, Inc.
d032e0d2 JM	3	This file is part of the GNU C Library.
	4
	5	The GNU C Library is free software; you can redistribute it and/or
	6	modify it under the terms of the GNU Lesser General Public
	7	License as published by the Free Software Foundation; either
	8	version 2.1 of the License, or (at your option) any later version.
	9
	10	The GNU C Library is distributed in the hope that it will be useful,
	11	but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	Lesser General Public License for more details.
	14
	15	You should have received a copy of the GNU Lesser General Public
	16	License along with the GNU C Library; if not, see
	17	<http://www.gnu.org/licenses/>. */
	18
	19	#include <math.h>
	20	#include <math_private.h>
	21	#include <float.h>
	22	#include <stdlib.h>
	23
	24	/* Calculate X + Y exactly and store the result in HI + LO. It is
	25	given that \|X\| >= \|Y\| and the values are small enough that no
	26	overflow occurs. */
	27
	28	static inline void
	29	add_split (long double hi, long double lo, long double x, long double y)
	30	{
	31	/* Apply Dekker's algorithm. */
	32	*hi = x + y;
	33	lo = (x - hi) + y;
	34	}
	35
	36	/* Calculate X * Y exactly and store the result in HI + LO. It is
	37	given that the values are small enough that no overflow occurs and
	38	large enough (or zero) that no underflow occurs. */
	39
	40	static inline void
	41	mul_split (long double hi, long double lo, long double x, long double y)
	42	{
	43	#ifdef __FP_FAST_FMAL
	44	/* Fast built-in fused multiply-add. */
	45	hi = x y;
	46	lo = __builtin_fmal (x, y, -hi);
	47	#elif defined FP_FAST_FMAL
	48	/* Fast library fused multiply-add, compiler before GCC 4.6. */
	49	hi = x y;
	50	lo = __fmal (x, y, -hi);
	51	#else
	52	/* Apply Dekker's algorithm. */
	53	hi = x y;
	54	# define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
	55	long double x1 = x * C;
	56	long double y1 = y * C;
	57	# undef C
	58	x1 = (x - x1) + x1;
	59	y1 = (y - y1) + y1;
	60	long double x2 = x - x1;
	61	long double y2 = y - y1;
	62	lo = (((x1 y1 - hi) + x1 y2) + x2 * y1) + x2 * y2;
	63	#endif
	64	}
	65
	66	/* Compare absolute values of floating-point values pointed to by P
67	and Q for qsort. */
68
69	static int
70	compare (const void p, const void q)
71	{
72	long double pld = fabsl ((const long double ) p);
73	long double qld = fabsl ((const long double ) q);
74	if (pld < qld)
75	return -1;
76	else if (pld == qld)
77	return 0;
78	else
79	return 1;
80	}
81
82	/* Return X^2 + Y^2 - 1, computed without large cancellation error.
a5721ebc JM	83	It is given that 1 > X >= Y >= epsilon / 2, and that X^2 + Y^2 >=
a5721ebc JM	84	0.5. */
d032e0d2 JM	85
	86	long double
	87	__x2y2m1l (long double x, long double y)
	88	{
a5721ebc	89	long double vals[5];
d032e0d2 JM	90	SET_RESTORE_ROUNDL (FE_TONEAREST);
	91	mul_split (&vals[1], &vals[0], x, x);
	92	mul_split (&vals[3], &vals[2], y, y);
a5721ebc JM	93	vals[4] = -1.0L;
a5721ebc JM	94	qsort (vals, 5, sizeof (long double), compare);
d032e0d2 JM	95	/* Add up the values so that each element of VALS has absolute value
	96	at most equal to the last set bit of the next nonzero
	97	element. */
a5721ebc	98	for (size_t i = 0; i <= 3; i++)
d032e0d2 JM	99	{
d032e0d2 JM	100	add_split (&vals[i + 1], &vals[i], vals[i + 1], vals[i]);
a5721ebc	101	qsort (vals + i + 1, 4 - i, sizeof (long double), compare);
d032e0d2 JM	102	}
d032e0d2 JM	103	/* Now any error from this addition will be small. */
a5721ebc	104	return vals[4] + vals[3] + vals[2] + vals[1] + vals[0];
d032e0d2	105	}