[thirdparty/glibc.git] / sysdeps / ieee754 / ldbl-128ibm / x2y2m1l.c

/* Compute x^2 + y^2 - 1, without large cancellation error.
   Copyright (C) 2012-2013 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <math.h>
#include <math_private.h>
#include <float.h>
#include <stdlib.h>

/* Calculate X + Y exactly and store the result in *HI + *LO.  It is
   given that |X| >= |Y| and the values are small enough that no
   overflow occurs.  */

static inline void
add_split (double *hi, double *lo, double x, double y)
{
  /* Apply Dekker's algorithm.  */
  *hi = x + y;
  *lo = (x - *hi) + y;
}

/* Calculate X * Y exactly and store the result in *HI + *LO.  It is
   given that the values are small enough that no overflow occurs and
   large enough (or zero) that no underflow occurs.  */

static inline void
mul_split (double *hi, double *lo, double x, double y)
{
#ifdef __FP_FAST_FMA
  /* Fast built-in fused multiply-add.  */
  *hi = x * y;
  *lo = __builtin_fma (x, y, -*hi);
#elif defined FP_FAST_FMA
  /* Fast library fused multiply-add, compiler before GCC 4.6.  */
  *hi = x * y;
  *lo = __fma (x, y, -*hi);
#else
  /* Apply Dekker's algorithm.  */
  *hi = x * y;
# define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
  double x1 = x * C;
  double y1 = y * C;
# undef C
  x1 = (x - x1) + x1;
  y1 = (y - y1) + y1;
  double x2 = x - x1;
  double y2 = y - y1;
  *lo = (((x1 * y1 - *hi) + x1 * y2) + x2 * y1) + x2 * y2;
#endif
}

/* Compare absolute values of floating-point values pointed to by P
   and Q for qsort.  */

static int
compare (const void *p, const void *q)
{
  double pd = fabs (*(const double *) p);
  double qd = fabs (*(const double *) q);
  if (pd < qd)
    return -1;
  else if (pd == qd)
    return 0;
  else
    return 1;
}

/* Return X^2 + Y^2 - 1, computed without large cancellation error.
   It is given that 1 > X >= Y >= epsilon / 2, and that either X >=
   0.75 or Y >= 0.5.  */

long double
__x2y2m1l (long double x, long double y)
{
  double vals[12];
  SET_RESTORE_ROUND (FE_TONEAREST);
  union ibm_extended_long_double xu, yu;
  xu.ld = x;
  yu.ld = y;
  if (fabs (xu.d[1].d) < 0x1p-500)
    xu.d[1].d = 0.0;
  if (fabs (yu.d[1].d) < 0x1p-500)
    yu.d[1].d = 0.0;
  mul_split (&vals[1], &vals[0], xu.d[0].d, xu.d[0].d);
  mul_split (&vals[3], &vals[2], xu.d[0].d, xu.d[1].d);
  vals[2] *= 2.0;
  vals[3] *= 2.0;
  mul_split (&vals[5], &vals[4], xu.d[1].d, xu.d[1].d);
  mul_split (&vals[7], &vals[6], yu.d[0].d, yu.d[0].d);
  mul_split (&vals[9], &vals[8], yu.d[0].d, yu.d[1].d);
  vals[8] *= 2.0;
  vals[9] *= 2.0;
  mul_split (&vals[11], &vals[10], yu.d[1].d, yu.d[1].d);
  if (xu.d[0].d >= 0.75)
    vals[1] -= 1.0;
  else
    {
      vals[1] -= 0.5;
      vals[7] -= 0.5;
    }
  qsort (vals, 12, sizeof (double), compare);
  /* Add up the values so that each element of VALS has absolute value
     at most equal to the last set bit of the next nonzero
     element.  */
  for (size_t i = 0; i <= 10; i++)
    {
      add_split (&vals[i + 1], &vals[i], vals[i + 1], vals[i]);
      qsort (vals + i + 1, 11 - i, sizeof (double), compare);
    }
  /* Now any error from this addition will be small.  */
  long double retval = (long double) vals[11];
  for (size_t i = 10; i != (size_t) -1; i--)
    retval += (long double) vals[i];
  return retval;
}
Commit	Line	Data
d032e0d2	1	/* Compute x^2 + y^2 - 1, without large cancellation error.
568035b7	2	Copyright (C) 2012-2013 Free Software Foundation, Inc.
d032e0d2 JM	3	This file is part of the GNU C Library.
	4
	5	The GNU C Library is free software; you can redistribute it and/or
	6	modify it under the terms of the GNU Lesser General Public
	7	License as published by the Free Software Foundation; either
	8	version 2.1 of the License, or (at your option) any later version.
	9
	10	The GNU C Library is distributed in the hope that it will be useful,
	11	but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	Lesser General Public License for more details.
	14
	15	You should have received a copy of the GNU Lesser General Public
	16	License along with the GNU C Library; if not, see
	17	<http://www.gnu.org/licenses/>. */
	18
	19	#include <math.h>
	20	#include <math_private.h>
	21	#include <float.h>
66ca5a5b	22	#include <stdlib.h>
d032e0d2 JM	23
	24	/* Calculate X + Y exactly and store the result in HI + LO. It is
	25	given that \|X\| >= \|Y\| and the values are small enough that no
	26	overflow occurs. */
	27
	28	static inline void
	29	add_split (double hi, double lo, double x, double y)
	30	{
	31	/* Apply Dekker's algorithm. */
	32	*hi = x + y;
	33	lo = (x - hi) + y;
	34	}
	35
	36	/* Calculate X * Y exactly and store the result in HI + LO. It is
	37	given that the values are small enough that no overflow occurs and
	38	large enough (or zero) that no underflow occurs. */
	39
	40	static inline void
	41	mul_split (double hi, double lo, double x, double y)
	42	{
	43	#ifdef __FP_FAST_FMA
	44	/* Fast built-in fused multiply-add. */
	45	hi = x y;
	46	lo = __builtin_fma (x, y, -hi);
	47	#elif defined FP_FAST_FMA
	48	/* Fast library fused multiply-add, compiler before GCC 4.6. */
	49	hi = x y;
	50	lo = __fma (x, y, -hi);
	51	#else
	52	/* Apply Dekker's algorithm. */
	53	hi = x y;
	54	# define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
	55	double x1 = x * C;
	56	double y1 = y * C;
	57	# undef C
	58	x1 = (x - x1) + x1;
	59	y1 = (y - y1) + y1;
	60	double x2 = x - x1;
	61	double y2 = y - y1;
	62	lo = (((x1 y1 - hi) + x1 y2) + x2 * y1) + x2 * y2;
	63	#endif
	64	}
	65
	66	/* Compare absolute values of floating-point values pointed to by P
	67	and Q for qsort. */
	68
	69	static int
	70	compare (const void p, const void q)
	71	{
	72	double pd = fabs ((const double ) p);
	73	double qd = fabs ((const double ) q);
	74	if (pd < qd)
	75	return -1;
	76	else if (pd == qd)
	77	return 0;
	78	else
	79	return 1;
	80	}
	81
	82	/* Return X^2 + Y^2 - 1, computed without large cancellation error.
	83	It is given that 1 > X >= Y >= epsilon / 2, and that either X >=
	84	0.75 or Y >= 0.5. */
	85
	86	long double
87	__x2y2m1l (long double x, long double y)
88	{
89	double vals[12];
90	SET_RESTORE_ROUND (FE_TONEAREST);
91	union ibm_extended_long_double xu, yu;
9605ca6c AM	92	xu.ld = x;
	93	yu.ld = y;
	94	if (fabs (xu.d[1].d) < 0x1p-500)
	95	xu.d[1].d = 0.0;
	96	if (fabs (yu.d[1].d) < 0x1p-500)
	97	yu.d[1].d = 0.0;
	98	mul_split (&vals[1], &vals[0], xu.d[0].d, xu.d[0].d);
	99	mul_split (&vals[3], &vals[2], xu.d[0].d, xu.d[1].d);
d032e0d2 JM	100	vals[2] *= 2.0;
d032e0d2 JM	101	vals[3] *= 2.0;
9605ca6c AM	102	mul_split (&vals[5], &vals[4], xu.d[1].d, xu.d[1].d);
	103	mul_split (&vals[7], &vals[6], yu.d[0].d, yu.d[0].d);
	104	mul_split (&vals[9], &vals[8], yu.d[0].d, yu.d[1].d);
d032e0d2 JM	105	vals[8] *= 2.0;
d032e0d2 JM	106	vals[9] *= 2.0;
9605ca6c AM	107	mul_split (&vals[11], &vals[10], yu.d[1].d, yu.d[1].d);
9605ca6c AM	108	if (xu.d[0].d >= 0.75)
d032e0d2 JM	109	vals[1] -= 1.0;
	110	else
	111	{
	112	vals[1] -= 0.5;
	113	vals[7] -= 0.5;
	114	}
	115	qsort (vals, 12, sizeof (double), compare);
	116	/* Add up the values so that each element of VALS has absolute value
	117	at most equal to the last set bit of the next nonzero
	118	element. */
	119	for (size_t i = 0; i <= 10; i++)
	120	{
	121	add_split (&vals[i + 1], &vals[i], vals[i + 1], vals[i]);
	122	qsort (vals + i + 1, 11 - i, sizeof (double), compare);
	123	}
	124	/* Now any error from this addition will be small. */
	125	long double retval = (long double) vals[11];
	126	for (size_t i = 10; i != (size_t) -1; i--)
	127	retval += (long double) vals[i];
	128	return retval;
	129	}