[thirdparty/gcc.git] / libgcc / config / libbid / bid64_add.c

/* Copyright (C) 2007-2020 Free Software Foundation, Inc.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.

You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
<http://www.gnu.org/licenses/>.  */

/*****************************************************************************
 *    BID64 add
 *****************************************************************************
 *
 *  Algorithm description:
 *
 *   if(exponent_a < exponent_b)
 *       switch a, b
 *   diff_expon = exponent_a - exponent_b
 *   if(diff_expon > 16)
 *      return normalize(a)
 *   if(coefficient_a*10^diff_expon guaranteed below 2^62)
 *       S = sign_a*coefficient_a*10^diff_expon + sign_b*coefficient_b
 *       if(|S|<10^16)
 *           return get_BID64(sign(S),exponent_b,|S|)
 *       else
 *          determine number of extra digits in S (1, 2, or 3)
 *            return rounded result
 *   else // large exponent difference
 *       if(number_digits(coefficient_a*10^diff_expon) +/- 10^16)
 *          guaranteed the same as
 *          number_digits(coefficient_a*10^diff_expon) )
 *           S = normalize(coefficient_a + (sign_a^sign_b)*10^(16-diff_expon))
 *           corr = 10^16 + (sign_a^sign_b)*coefficient_b
 *           corr*10^exponent_b is rounded so it aligns with S*10^exponent_S
 *           return get_BID64(sign_a,exponent(S),S+rounded(corr))
 *       else
 *         add sign_a*coefficient_a*10^diff_expon, sign_b*coefficient_b
 *             in 128-bit integer arithmetic, then round to 16 decimal digits
 *           
 *
 ****************************************************************************/

#include "bid_internal.h"

#if DECIMAL_CALL_BY_REFERENCE
void bid64_add (UINT64 * pres, UINT64 * px,
		UINT64 *
		py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		_EXC_INFO_PARAM);
#else
UINT64 bid64_add (UINT64 x,
		  UINT64 y _RND_MODE_PARAM _EXC_FLAGS_PARAM
		  _EXC_MASKS_PARAM _EXC_INFO_PARAM);
#endif

#if DECIMAL_CALL_BY_REFERENCE

void
bid64_sub (UINT64 * pres, UINT64 * px,
	   UINT64 *
	   py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
	   _EXC_INFO_PARAM) {
  UINT64 y = *py;
#if !DECIMAL_GLOBAL_ROUNDING
  _IDEC_round rnd_mode = *prnd_mode;
#endif
  // check if y is not NaN
  if (((y & NAN_MASK64) != NAN_MASK64))
    y ^= 0x8000000000000000ull;
  bid64_add (pres, px,
	     &y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
	     _EXC_INFO_ARG);
}
#else

UINT64
bid64_sub (UINT64 x,
	   UINT64 y _RND_MODE_PARAM _EXC_FLAGS_PARAM
	   _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
  // check if y is not NaN
  if (((y & NAN_MASK64) != NAN_MASK64))
    y ^= 0x8000000000000000ull;

  return bid64_add (x,
		    y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		    _EXC_INFO_ARG);
}
#endif


#if DECIMAL_CALL_BY_REFERENCE

void
bid64_add (UINT64 * pres, UINT64 * px,
	   UINT64 *
	   py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
	   _EXC_INFO_PARAM) {
  UINT64 x, y;
#else

UINT64
bid64_add (UINT64 x,
	   UINT64 y _RND_MODE_PARAM _EXC_FLAGS_PARAM
	   _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
#endif

  UINT128 CA, CT, CT_new;
  UINT64 sign_x, sign_y, coefficient_x, coefficient_y, C64_new;
  UINT64 valid_x, valid_y;
  UINT64 res;
  UINT64 sign_a, sign_b, coefficient_a, coefficient_b, sign_s, sign_ab,
    rem_a;
  UINT64 saved_ca, saved_cb, C0_64, C64, remainder_h, T1, carry, tmp;
  int_double tempx;
  int exponent_x, exponent_y, exponent_a, exponent_b, diff_dec_expon;
  int bin_expon_ca, extra_digits, amount, scale_k, scale_ca;
  unsigned rmode, status;

#if DECIMAL_CALL_BY_REFERENCE
#if !DECIMAL_GLOBAL_ROUNDING
  _IDEC_round rnd_mode = *prnd_mode;
#endif
  x = *px;
  y = *py;
#endif

  valid_x = unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x);
  valid_y = unpack_BID64 (&sign_y, &exponent_y, &coefficient_y, y);

  // unpack arguments, check for NaN or Infinity
  if (!valid_x) {
    // x is Inf. or NaN

    // test if x is NaN
    if ((x & NAN_MASK64) == NAN_MASK64) {
#ifdef SET_STATUS_FLAGS
      if (((x & SNAN_MASK64) == SNAN_MASK64)	// sNaN
	  || ((y & SNAN_MASK64) == SNAN_MASK64))
	__set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
      res = coefficient_x & QUIET_MASK64;
      BID_RETURN (res);
    }
    // x is Infinity?
    if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
      // check if y is Inf
      if (((y & NAN_MASK64) == INFINITY_MASK64)) {
	if (sign_x == (y & 0x8000000000000000ull)) {
	  res = coefficient_x;
	  BID_RETURN (res);
	}
	// return NaN
	{
#ifdef SET_STATUS_FLAGS
	  __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
	  res = NAN_MASK64;
	  BID_RETURN (res);
	}
      }
      // check if y is NaN
      if (((y & NAN_MASK64) == NAN_MASK64)) {
	res = coefficient_y & QUIET_MASK64;
#ifdef SET_STATUS_FLAGS
	if (((y & SNAN_MASK64) == SNAN_MASK64))
	  __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
	BID_RETURN (res);
      }
      // otherwise return +/-Inf
      {
	res = coefficient_x;
	BID_RETURN (res);
      }
    }
    // x is 0
    {
      if (((y & INFINITY_MASK64) != INFINITY_MASK64) && coefficient_y) {
	if (exponent_y <= exponent_x) {
	  res = y;
	  BID_RETURN (res);
	}
      }
    }

  }
  if (!valid_y) {
    // y is Inf. or NaN?
    if (((y & INFINITY_MASK64) == INFINITY_MASK64)) {
#ifdef SET_STATUS_FLAGS
      if ((y & SNAN_MASK64) == SNAN_MASK64)	// sNaN
	__set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
      res = coefficient_y & QUIET_MASK64;
      BID_RETURN (res);
    }
    // y is 0
    if (!coefficient_x) {	// x==0
      if (exponent_x <= exponent_y)
	res = ((UINT64) exponent_x) << 53;
      else
	res = ((UINT64) exponent_y) << 53;
      if (sign_x == sign_y)
	res |= sign_x;
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
#ifndef IEEE_ROUND_NEAREST
      if (rnd_mode == ROUNDING_DOWN && sign_x != sign_y)
	res |= 0x8000000000000000ull;
#endif
#endif
      BID_RETURN (res);
    } else if (exponent_y >= exponent_x) {
      res = x;
      BID_RETURN (res);
    }
  }
  // sort arguments by exponent
  if (exponent_x < exponent_y) {
    sign_a = sign_y;
    exponent_a = exponent_y;
    coefficient_a = coefficient_y;
    sign_b = sign_x;
    exponent_b = exponent_x;
    coefficient_b = coefficient_x;
  } else {
    sign_a = sign_x;
    exponent_a = exponent_x;
    coefficient_a = coefficient_x;
    sign_b = sign_y;
    exponent_b = exponent_y;
    coefficient_b = coefficient_y;
  }

  // exponent difference
  diff_dec_expon = exponent_a - exponent_b;

  /* get binary coefficients of x and y */

  //--- get number of bits in the coefficients of x and y ---

  // version 2 (original)
  tempx.d = (double) coefficient_a;
  bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;

  if (diff_dec_expon > MAX_FORMAT_DIGITS) {
    // normalize a to a 16-digit coefficient

    scale_ca = estimate_decimal_digits[bin_expon_ca];
    if (coefficient_a >= power10_table_128[scale_ca].w[0])
      scale_ca++;

    scale_k = 16 - scale_ca;

    coefficient_a *= power10_table_128[scale_k].w[0];

    diff_dec_expon -= scale_k;
    exponent_a -= scale_k;

    /* get binary coefficients of x and y */

    //--- get number of bits in the coefficients of x and y ---
    tempx.d = (double) coefficient_a;
    bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;

    if (diff_dec_expon > MAX_FORMAT_DIGITS) {
#ifdef SET_STATUS_FLAGS
      if (coefficient_b) {
	__set_status_flags (pfpsf, INEXACT_EXCEPTION);
      }
#endif

#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
#ifndef IEEE_ROUND_NEAREST
      if (((rnd_mode) & 3) && coefficient_b)	// not ROUNDING_TO_NEAREST
      {
	switch (rnd_mode) {
	case ROUNDING_DOWN:
	  if (sign_b) {
	    coefficient_a -= ((((SINT64) sign_a) >> 63) | 1);
	    if (coefficient_a < 1000000000000000ull) {
	      exponent_a--;
	      coefficient_a = 9999999999999999ull;
	    } else if (coefficient_a >= 10000000000000000ull) {
	      exponent_a++;
	      coefficient_a = 1000000000000000ull;
	    }
	  }
	  break;
	case ROUNDING_UP:
	  if (!sign_b) {
	    coefficient_a += ((((SINT64) sign_a) >> 63) | 1);
	    if (coefficient_a < 1000000000000000ull) {
	      exponent_a--;
	      coefficient_a = 9999999999999999ull;
	    } else if (coefficient_a >= 10000000000000000ull) {
	      exponent_a++;
	      coefficient_a = 1000000000000000ull;
	    }
	  }
	  break;
	default:	// RZ
	  if (sign_a != sign_b) {
	    coefficient_a--;
	    if (coefficient_a < 1000000000000000ull) {
	      exponent_a--;
	      coefficient_a = 9999999999999999ull;
	    }
	  }
	  break;
	}
      } else
#endif
#endif
	// check special case here
	if ((coefficient_a == 1000000000000000ull)
	    && (diff_dec_expon == MAX_FORMAT_DIGITS + 1)
	    && (sign_a ^ sign_b)
	    && (coefficient_b > 5000000000000000ull)) {
	coefficient_a = 9999999999999999ull;
	exponent_a--;
      }

      res =
	fast_get_BID64_check_OF (sign_a, exponent_a, coefficient_a,
				 rnd_mode, pfpsf);
      BID_RETURN (res);
    }
  }
  // test whether coefficient_a*10^(exponent_a-exponent_b)  may exceed 2^62
  if (bin_expon_ca + estimate_bin_expon[diff_dec_expon] < 60) {
    // coefficient_a*10^(exponent_a-exponent_b)<2^63

    // multiply by 10^(exponent_a-exponent_b)
    coefficient_a *= power10_table_128[diff_dec_expon].w[0];

    // sign mask
    sign_b = ((SINT64) sign_b) >> 63;
    // apply sign to coeff. of b
    coefficient_b = (coefficient_b + sign_b) ^ sign_b;

    // apply sign to coefficient a
    sign_a = ((SINT64) sign_a) >> 63;
    coefficient_a = (coefficient_a + sign_a) ^ sign_a;

    coefficient_a += coefficient_b;
    // get sign
    sign_s = ((SINT64) coefficient_a) >> 63;
    coefficient_a = (coefficient_a + sign_s) ^ sign_s;
    sign_s &= 0x8000000000000000ull;

    // coefficient_a < 10^16 ?
    if (coefficient_a < power10_table_128[MAX_FORMAT_DIGITS].w[0]) {
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
#ifndef IEEE_ROUND_NEAREST
      if (rnd_mode == ROUNDING_DOWN && (!coefficient_a)
	  && sign_a != sign_b)
	sign_s = 0x8000000000000000ull;
#endif
#endif
      res = very_fast_get_BID64 (sign_s, exponent_b, coefficient_a);
      BID_RETURN (res);
    }
    // otherwise rounding is necessary

    // already know coefficient_a<10^19
    // coefficient_a < 10^17 ?
    if (coefficient_a < power10_table_128[17].w[0])
      extra_digits = 1;
    else if (coefficient_a < power10_table_128[18].w[0])
      extra_digits = 2;
    else
      extra_digits = 3;

#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
#ifndef IEEE_ROUND_NEAREST
    rmode = rnd_mode;
    if (sign_s && (unsigned) (rmode - 1) < 2)
      rmode = 3 - rmode;
#else
    rmode = 0;
#endif
#else
    rmode = 0;
#endif
    coefficient_a += round_const_table[rmode][extra_digits];

    // get P*(2^M[extra_digits])/10^extra_digits
    __mul_64x64_to_128 (CT, coefficient_a,
			reciprocals10_64[extra_digits]);

    // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
    amount = short_recip_scale[extra_digits];
    C64 = CT.w[1] >> amount;

  } else {
    // coefficient_a*10^(exponent_a-exponent_b) is large
    sign_s = sign_a;

#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
#ifndef IEEE_ROUND_NEAREST
    rmode = rnd_mode;
    if (sign_s && (unsigned) (rmode - 1) < 2)
      rmode = 3 - rmode;
#else
    rmode = 0;
#endif
#else
    rmode = 0;
#endif

    // check whether we can take faster path
    scale_ca = estimate_decimal_digits[bin_expon_ca];

    sign_ab = sign_a ^ sign_b;
    sign_ab = ((SINT64) sign_ab) >> 63;

    // T1 = 10^(16-diff_dec_expon)
    T1 = power10_table_128[16 - diff_dec_expon].w[0];

    // get number of digits in coefficient_a
    if (coefficient_a >= power10_table_128[scale_ca].w[0]) {
      scale_ca++;
    }

    scale_k = 16 - scale_ca;

    // addition
    saved_ca = coefficient_a - T1;
    coefficient_a =
      (SINT64) saved_ca *(SINT64) power10_table_128[scale_k].w[0];
    extra_digits = diff_dec_expon - scale_k;

    // apply sign
    saved_cb = (coefficient_b + sign_ab) ^ sign_ab;
    // add 10^16 and rounding constant
    coefficient_b =
      saved_cb + 10000000000000000ull +
      round_const_table[rmode][extra_digits];

    // get P*(2^M[extra_digits])/10^extra_digits
    __mul_64x64_to_128 (CT, coefficient_b,
			reciprocals10_64[extra_digits]);

    // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
    amount = short_recip_scale[extra_digits];
    C0_64 = CT.w[1] >> amount;

    // result coefficient 
    C64 = C0_64 + coefficient_a;
    // filter out difficult (corner) cases
    // this test ensures the number of digits in coefficient_a does not change 
    // after adding (the appropriately scaled and rounded) coefficient_b
    if ((UINT64) (C64 - 1000000000000000ull - 1) >
	9000000000000000ull - 2) {
      if (C64 >= 10000000000000000ull) {
	// result has more than 16 digits
	if (!scale_k) {
	  // must divide coeff_a by 10
	  saved_ca = saved_ca + T1;
	  __mul_64x64_to_128 (CA, saved_ca, 0x3333333333333334ull);
	  //reciprocals10_64[1]);
	  coefficient_a = CA.w[1] >> 1;
	  rem_a =
	    saved_ca - (coefficient_a << 3) - (coefficient_a << 1);
	  coefficient_a = coefficient_a - T1;

	  saved_cb += rem_a * power10_table_128[diff_dec_expon].w[0];
	} else
	  coefficient_a =
	    (SINT64) (saved_ca - T1 -
		      (T1 << 3)) * (SINT64) power10_table_128[scale_k -
							      1].w[0];

	extra_digits++;
	coefficient_b =
	  saved_cb + 100000000000000000ull +
	  round_const_table[rmode][extra_digits];

	// get P*(2^M[extra_digits])/10^extra_digits
	__mul_64x64_to_128 (CT, coefficient_b,
			    reciprocals10_64[extra_digits]);

	// now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
	amount = short_recip_scale[extra_digits];
	C0_64 = CT.w[1] >> amount;

	// result coefficient 
	C64 = C0_64 + coefficient_a;
      } else if (C64 <= 1000000000000000ull) {
	// less than 16 digits in result
	coefficient_a =
	  (SINT64) saved_ca *(SINT64) power10_table_128[scale_k +
							1].w[0];
	//extra_digits --;
	exponent_b--;
	coefficient_b =
	  (saved_cb << 3) + (saved_cb << 1) + 100000000000000000ull +
	  round_const_table[rmode][extra_digits];

	// get P*(2^M[extra_digits])/10^extra_digits
	__mul_64x64_to_128 (CT_new, coefficient_b,
			    reciprocals10_64[extra_digits]);

	// now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
	amount = short_recip_scale[extra_digits];
	C0_64 = CT_new.w[1] >> amount;

	// result coefficient 
	C64_new = C0_64 + coefficient_a;
	if (C64_new < 10000000000000000ull) {
	  C64 = C64_new;
#ifdef SET_STATUS_FLAGS
	  CT = CT_new;
#endif
	} else
	  exponent_b++;
      }

    }

  }

#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
#ifndef IEEE_ROUND_NEAREST
  if (rmode == 0)	//ROUNDING_TO_NEAREST
#endif
    if (C64 & 1) {
      // check whether fractional part of initial_P/10^extra_digits is 
      // exactly .5
      // this is the same as fractional part of 
      //      (initial_P + 0.5*10^extra_digits)/10^extra_digits is exactly zero

      // get remainder
      remainder_h = CT.w[1] << (64 - amount);

      // test whether fractional part is 0
      if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits])) {
	C64--;
      }
    }
#endif

#ifdef SET_STATUS_FLAGS
  status = INEXACT_EXCEPTION;

  // get remainder
  remainder_h = CT.w[1] << (64 - amount);

  switch (rmode) {
  case ROUNDING_TO_NEAREST:
  case ROUNDING_TIES_AWAY:
    // test whether fractional part is 0
    if ((remainder_h == 0x8000000000000000ull)
	&& (CT.w[0] < reciprocals10_64[extra_digits]))
      status = EXACT_STATUS;
    break;
  case ROUNDING_DOWN:
  case ROUNDING_TO_ZERO:
    if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits]))
      status = EXACT_STATUS;
    //if(!C64 && rmode==ROUNDING_DOWN) sign_s=sign_y;
    break;
  default:
    // round up
    __add_carry_out (tmp, carry, CT.w[0],
		     reciprocals10_64[extra_digits]);
    if ((remainder_h >> (64 - amount)) + carry >=
	(((UINT64) 1) << amount))
      status = EXACT_STATUS;
    break;
  }
  __set_status_flags (pfpsf, status);

#endif

  res =
    fast_get_BID64_check_OF (sign_s, exponent_b + extra_digits, C64,
			     rnd_mode, pfpsf);
  BID_RETURN (res);
}
Commit	Line	Data
8d9254fc	1	/* Copyright (C) 2007-2020 Free Software Foundation, Inc.
200359e8 L	2
	3	This file is part of GCC.
	4
	5	GCC is free software; you can redistribute it and/or modify it under
	6	the terms of the GNU General Public License as published by the Free
748086b7	7	Software Foundation; either version 3, or (at your option) any later
200359e8 L	8	version.
200359e8 L	9
200359e8 L	10	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
	11	WARRANTY; without even the implied warranty of MERCHANTABILITY or
	12	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	13	for more details.
	14
748086b7 JJ	15	Under Section 7 of GPL version 3, you are granted additional
	16	permissions described in the GCC Runtime Library Exception, version
	17	3.1, as published by the Free Software Foundation.
	18
	19	You should have received a copy of the GNU General Public License and
	20	a copy of the GCC Runtime Library Exception along with this program;
	21	see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
	22	<http://www.gnu.org/licenses/>. */
200359e8 L	23
	24	/*****************************************************************************
	25	* BID64 add
	26	*****************************************************************************
	27	*
	28	* Algorithm description:
	29	*
	30	* if(exponent_a < exponent_b)
	31	* switch a, b
	32	* diff_expon = exponent_a - exponent_b
	33	* if(diff_expon > 16)
	34	* return normalize(a)
	35	* if(coefficient_a*10^diff_expon guaranteed below 2^62)
	36	* S = sign_acoefficient_a10^diff_expon + sign_b*coefficient_b
	37	* if(\|S\|<10^16)
	38	* return get_BID64(sign(S),exponent_b,\|S\|)
	39	* else
	40	* determine number of extra digits in S (1, 2, or 3)
	41	* return rounded result
	42	* else // large exponent difference
	43	* if(number_digits(coefficient_a*10^diff_expon) +/- 10^16)
	44	* guaranteed the same as
	45	* number_digits(coefficient_a*10^diff_expon) )
	46	* S = normalize(coefficient_a + (sign_a^sign_b)*10^(16-diff_expon))
	47	* corr = 10^16 + (sign_a^sign_b)*coefficient_b
	48	* corr10^exponent_b is rounded so it aligns with S10^exponent_S
	49	* return get_BID64(sign_a,exponent(S),S+rounded(corr))
	50	* else
	51	* add sign_acoefficient_a10^diff_expon, sign_b*coefficient_b
	52	* in 128-bit integer arithmetic, then round to 16 decimal digits
	53	*
	54	*
	55	****************************************************************************/
	56
	57	#include "bid_internal.h"
	58
	59	#if DECIMAL_CALL_BY_REFERENCE
b2a00c89	60	void bid64_add (UINT64 * pres, UINT64 * px,
200359e8 L	61	UINT64 *
	62	py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
	63	_EXC_INFO_PARAM);
	64	#else
b2a00c89 L	65	UINT64 bid64_add (UINT64 x,
	66	UINT64 y _RND_MODE_PARAM _EXC_FLAGS_PARAM
	67	_EXC_MASKS_PARAM _EXC_INFO_PARAM);
200359e8 L	68	#endif
	69
	70	#if DECIMAL_CALL_BY_REFERENCE
	71
	72	void
b2a00c89	73	bid64_sub (UINT64 * pres, UINT64 * px,
200359e8 L	74	UINT64 *
	75	py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
	76	_EXC_INFO_PARAM) {
	77	UINT64 y = *py;
	78	#if !DECIMAL_GLOBAL_ROUNDING
	79	_IDEC_round rnd_mode = *prnd_mode;
	80	#endif
	81	// check if y is not NaN
	82	if (((y & NAN_MASK64) != NAN_MASK64))
	83	y ^= 0x8000000000000000ull;
b2a00c89	84	bid64_add (pres, px,
200359e8 L	85	&y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
	86	_EXC_INFO_ARG);
	87	}
	88	#else
	89
	90	UINT64
b2a00c89	91	bid64_sub (UINT64 x,
200359e8 L	92	UINT64 y _RND_MODE_PARAM _EXC_FLAGS_PARAM
	93	_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
	94	// check if y is not NaN
	95	if (((y & NAN_MASK64) != NAN_MASK64))
	96	y ^= 0x8000000000000000ull;
	97
b2a00c89	98	return bid64_add (x,
200359e8 L	99	y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
	100	_EXC_INFO_ARG);
	101	}
	102	#endif
	103
	104
	105
	106	#if DECIMAL_CALL_BY_REFERENCE
	107
	108	void
b2a00c89	109	bid64_add (UINT64 * pres, UINT64 * px,
200359e8 L	110	UINT64 *
	111	py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
	112	_EXC_INFO_PARAM) {
	113	UINT64 x, y;
	114	#else
	115
	116	UINT64
b2a00c89	117	bid64_add (UINT64 x,
200359e8 L	118	UINT64 y _RND_MODE_PARAM _EXC_FLAGS_PARAM
	119	_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
	120	#endif
	121
	122	UINT128 CA, CT, CT_new;
	123	UINT64 sign_x, sign_y, coefficient_x, coefficient_y, C64_new;
	124	UINT64 valid_x, valid_y;
	125	UINT64 res;
	126	UINT64 sign_a, sign_b, coefficient_a, coefficient_b, sign_s, sign_ab,
	127	rem_a;
	128	UINT64 saved_ca, saved_cb, C0_64, C64, remainder_h, T1, carry, tmp;
	129	int_double tempx;
b2a00c89	130	int exponent_x, exponent_y, exponent_a, exponent_b, diff_dec_expon;
200359e8 L	131	int bin_expon_ca, extra_digits, amount, scale_k, scale_ca;
	132	unsigned rmode, status;
	133
	134	#if DECIMAL_CALL_BY_REFERENCE
	135	#if !DECIMAL_GLOBAL_ROUNDING
	136	_IDEC_round rnd_mode = *prnd_mode;
	137	#endif
	138	x = *px;
	139	y = *py;
	140	#endif
	141
	142	valid_x = unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x);
	143	valid_y = unpack_BID64 (&sign_y, &exponent_y, &coefficient_y, y);
	144
	145	// unpack arguments, check for NaN or Infinity
	146	if (!valid_x) {
	147	// x is Inf. or NaN
	148
	149	// test if x is NaN
	150	if ((x & NAN_MASK64) == NAN_MASK64) {
	151	#ifdef SET_STATUS_FLAGS
	152	if (((x & SNAN_MASK64) == SNAN_MASK64) // sNaN
	153	\|\| ((y & SNAN_MASK64) == SNAN_MASK64))
	154	__set_status_flags (pfpsf, INVALID_EXCEPTION);
	155	#endif
b2a00c89	156	res = coefficient_x & QUIET_MASK64;
200359e8 L	157	BID_RETURN (res);
	158	}
	159	// x is Infinity?
	160	if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
	161	// check if y is Inf
	162	if (((y & NAN_MASK64) == INFINITY_MASK64)) {
	163	if (sign_x == (y & 0x8000000000000000ull)) {
b2a00c89	164	res = coefficient_x;
200359e8 L	165	BID_RETURN (res);
	166	}
	167	// return NaN
	168	{
	169	#ifdef SET_STATUS_FLAGS
	170	__set_status_flags (pfpsf, INVALID_EXCEPTION);
	171	#endif
	172	res = NAN_MASK64;
	173	BID_RETURN (res);
	174	}
	175	}
	176	// check if y is NaN
	177	if (((y & NAN_MASK64) == NAN_MASK64)) {
b2a00c89	178	res = coefficient_y & QUIET_MASK64;
200359e8 L	179	#ifdef SET_STATUS_FLAGS
	180	if (((y & SNAN_MASK64) == SNAN_MASK64))
	181	__set_status_flags (pfpsf, INVALID_EXCEPTION);
	182	#endif
	183	BID_RETURN (res);
	184	}
	185	// otherwise return +/-Inf
	186	{
b2a00c89	187	res = coefficient_x;
200359e8 L	188	BID_RETURN (res);
	189	}
	190	}
	191	// x is 0
	192	{
b2a00c89	193	if (((y & INFINITY_MASK64) != INFINITY_MASK64) && coefficient_y) {
200359e8 L	194	if (exponent_y <= exponent_x) {
	195	res = y;
	196	BID_RETURN (res);
	197	}
	198	}
	199	}
	200
	201	}
	202	if (!valid_y) {
	203	// y is Inf. or NaN?
	204	if (((y & INFINITY_MASK64) == INFINITY_MASK64)) {
	205	#ifdef SET_STATUS_FLAGS
	206	if ((y & SNAN_MASK64) == SNAN_MASK64) // sNaN
	207	__set_status_flags (pfpsf, INVALID_EXCEPTION);
	208	#endif
b2a00c89	209	res = coefficient_y & QUIET_MASK64;
200359e8 L	210	BID_RETURN (res);
	211	}
	212	// y is 0
b2a00c89	213	if (!coefficient_x) { // x==0
200359e8	214	if (exponent_x <= exponent_y)
b2a00c89	215	res = ((UINT64) exponent_x) << 53;
200359e8	216	else
b2a00c89	217	res = ((UINT64) exponent_y) << 53;
200359e8 L	218	if (sign_x == sign_y)
	219	res \|= sign_x;
	220	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
	221	#ifndef IEEE_ROUND_NEAREST
	222	if (rnd_mode == ROUNDING_DOWN && sign_x != sign_y)
	223	res \|= 0x8000000000000000ull;
	224	#endif
	225	#endif
	226	BID_RETURN (res);
	227	} else if (exponent_y >= exponent_x) {
	228	res = x;
	229	BID_RETURN (res);
	230	}
	231	}
	232	// sort arguments by exponent
	233	if (exponent_x < exponent_y) {
	234	sign_a = sign_y;
	235	exponent_a = exponent_y;
	236	coefficient_a = coefficient_y;
	237	sign_b = sign_x;
	238	exponent_b = exponent_x;
	239	coefficient_b = coefficient_x;
	240	} else {
	241	sign_a = sign_x;
	242	exponent_a = exponent_x;
	243	coefficient_a = coefficient_x;
	244	sign_b = sign_y;
	245	exponent_b = exponent_y;
	246	coefficient_b = coefficient_y;
	247	}
	248
	249	// exponent difference
	250	diff_dec_expon = exponent_a - exponent_b;
	251
	252	/* get binary coefficients of x and y */
	253
	254	//--- get number of bits in the coefficients of x and y ---
	255
	256	// version 2 (original)
	257	tempx.d = (double) coefficient_a;
	258	bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
	259
	260	if (diff_dec_expon > MAX_FORMAT_DIGITS) {
	261	// normalize a to a 16-digit coefficient
	262
b2a00c89 L	263	scale_ca = estimate_decimal_digits[bin_expon_ca];
b2a00c89 L	264	if (coefficient_a >= power10_table_128[scale_ca].w[0])
200359e8 L	265	scale_ca++;
	266
	267	scale_k = 16 - scale_ca;
	268
b2a00c89	269	coefficient_a *= power10_table_128[scale_k].w[0];
200359e8 L	270
	271	diff_dec_expon -= scale_k;
	272	exponent_a -= scale_k;
	273
	274	/* get binary coefficients of x and y */
	275
	276	//--- get number of bits in the coefficients of x and y ---
	277	tempx.d = (double) coefficient_a;
	278	bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
	279
	280	if (diff_dec_expon > MAX_FORMAT_DIGITS) {
	281	#ifdef SET_STATUS_FLAGS
	282	if (coefficient_b) {
	283	__set_status_flags (pfpsf, INEXACT_EXCEPTION);
	284	}
	285	#endif
	286
	287	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
	288	#ifndef IEEE_ROUND_NEAREST
	289	if (((rnd_mode) & 3) && coefficient_b) // not ROUNDING_TO_NEAREST
	290	{
	291	switch (rnd_mode) {
	292	case ROUNDING_DOWN:
	293	if (sign_b) {
	294	coefficient_a -= ((((SINT64) sign_a) >> 63) \| 1);
	295	if (coefficient_a < 1000000000000000ull) {
	296	exponent_a--;
	297	coefficient_a = 9999999999999999ull;
	298	} else if (coefficient_a >= 10000000000000000ull) {
	299	exponent_a++;
	300	coefficient_a = 1000000000000000ull;
	301	}
	302	}
	303	break;
	304	case ROUNDING_UP:
	305	if (!sign_b) {
	306	coefficient_a += ((((SINT64) sign_a) >> 63) \| 1);
	307	if (coefficient_a < 1000000000000000ull) {
	308	exponent_a--;
	309	coefficient_a = 9999999999999999ull;
	310	} else if (coefficient_a >= 10000000000000000ull) {
	311	exponent_a++;
	312	coefficient_a = 1000000000000000ull;
	313	}
	314	}
	315	break;
	316	default: // RZ
	317	if (sign_a != sign_b) {
	318	coefficient_a--;
	319	if (coefficient_a < 1000000000000000ull) {
	320	exponent_a--;
	321	coefficient_a = 9999999999999999ull;
	322	}
	323	}
	324	break;
	325	}
	326	} else
	327	#endif
	328	#endif
	329	// check special case here
	330	if ((coefficient_a == 1000000000000000ull)
	331	&& (diff_dec_expon == MAX_FORMAT_DIGITS + 1)
b2a00c89 L	332	&& (sign_a ^ sign_b)
b2a00c89 L	333	&& (coefficient_b > 5000000000000000ull)) {
200359e8 L	334	coefficient_a = 9999999999999999ull;
	335	exponent_a--;
	336	}
	337
	338	res =
	339	fast_get_BID64_check_OF (sign_a, exponent_a, coefficient_a,
	340	rnd_mode, pfpsf);
	341	BID_RETURN (res);
	342	}
	343	}
	344	// test whether coefficient_a*10^(exponent_a-exponent_b) may exceed 2^62
b2a00c89	345	if (bin_expon_ca + estimate_bin_expon[diff_dec_expon] < 60) {
200359e8 L	346	// coefficient_a*10^(exponent_a-exponent_b)<2^63
	347
	348	// multiply by 10^(exponent_a-exponent_b)
b2a00c89	349	coefficient_a *= power10_table_128[diff_dec_expon].w[0];
200359e8 L	350
	351	// sign mask
	352	sign_b = ((SINT64) sign_b) >> 63;
	353	// apply sign to coeff. of b
	354	coefficient_b = (coefficient_b + sign_b) ^ sign_b;
	355
	356	// apply sign to coefficient a
	357	sign_a = ((SINT64) sign_a) >> 63;
	358	coefficient_a = (coefficient_a + sign_a) ^ sign_a;
	359
	360	coefficient_a += coefficient_b;
	361	// get sign
	362	sign_s = ((SINT64) coefficient_a) >> 63;
	363	coefficient_a = (coefficient_a + sign_s) ^ sign_s;
	364	sign_s &= 0x8000000000000000ull;
	365
	366	// coefficient_a < 10^16 ?
b2a00c89	367	if (coefficient_a < power10_table_128[MAX_FORMAT_DIGITS].w[0]) {
200359e8 L	368	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
	369	#ifndef IEEE_ROUND_NEAREST
	370	if (rnd_mode == ROUNDING_DOWN && (!coefficient_a)
	371	&& sign_a != sign_b)
	372	sign_s = 0x8000000000000000ull;
	373	#endif
	374	#endif
	375	res = very_fast_get_BID64 (sign_s, exponent_b, coefficient_a);
	376	BID_RETURN (res);
	377	}
	378	// otherwise rounding is necessary
	379
	380	// already know coefficient_a<10^19
	381	// coefficient_a < 10^17 ?
b2a00c89	382	if (coefficient_a < power10_table_128[17].w[0])
200359e8	383	extra_digits = 1;
b2a00c89	384	else if (coefficient_a < power10_table_128[18].w[0])
200359e8 L	385	extra_digits = 2;
	386	else
	387	extra_digits = 3;
	388
	389	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
	390	#ifndef IEEE_ROUND_NEAREST
	391	rmode = rnd_mode;
	392	if (sign_s && (unsigned) (rmode - 1) < 2)
	393	rmode = 3 - rmode;
	394	#else
	395	rmode = 0;
	396	#endif
	397	#else
	398	rmode = 0;
	399	#endif
b2a00c89	400	coefficient_a += round_const_table[rmode][extra_digits];
200359e8 L	401
	402	// get P*(2^M[extra_digits])/10^extra_digits
	403	__mul_64x64_to_128 (CT, coefficient_a,
b2a00c89	404	reciprocals10_64[extra_digits]);
200359e8 L	405
200359e8 L	406	// now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
b2a00c89	407	amount = short_recip_scale[extra_digits];
200359e8 L	408	C64 = CT.w[1] >> amount;
	409
	410	} else {
	411	// coefficient_a*10^(exponent_a-exponent_b) is large
	412	sign_s = sign_a;
	413
	414	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
	415	#ifndef IEEE_ROUND_NEAREST
	416	rmode = rnd_mode;
	417	if (sign_s && (unsigned) (rmode - 1) < 2)
	418	rmode = 3 - rmode;
	419	#else
	420	rmode = 0;
	421	#endif
	422	#else
	423	rmode = 0;
	424	#endif
	425
	426	// check whether we can take faster path
b2a00c89	427	scale_ca = estimate_decimal_digits[bin_expon_ca];
200359e8 L	428
	429	sign_ab = sign_a ^ sign_b;
	430	sign_ab = ((SINT64) sign_ab) >> 63;
	431
	432	// T1 = 10^(16-diff_dec_expon)
b2a00c89	433	T1 = power10_table_128[16 - diff_dec_expon].w[0];
200359e8 L	434
200359e8 L	435	// get number of digits in coefficient_a
b2a00c89	436	if (coefficient_a >= power10_table_128[scale_ca].w[0]) {
200359e8 L	437	scale_ca++;
	438	}
	439
	440	scale_k = 16 - scale_ca;
	441
	442	// addition
	443	saved_ca = coefficient_a - T1;
	444	coefficient_a =
b2a00c89	445	(SINT64) saved_ca *(SINT64) power10_table_128[scale_k].w[0];
200359e8 L	446	extra_digits = diff_dec_expon - scale_k;
	447
	448	// apply sign
	449	saved_cb = (coefficient_b + sign_ab) ^ sign_ab;
	450	// add 10^16 and rounding constant
	451	coefficient_b =
	452	saved_cb + 10000000000000000ull +
b2a00c89	453	round_const_table[rmode][extra_digits];
200359e8 L	454
	455	// get P*(2^M[extra_digits])/10^extra_digits
	456	__mul_64x64_to_128 (CT, coefficient_b,
b2a00c89	457	reciprocals10_64[extra_digits]);
200359e8 L	458
200359e8 L	459	// now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
b2a00c89	460	amount = short_recip_scale[extra_digits];
200359e8 L	461	C0_64 = CT.w[1] >> amount;
	462
	463	// result coefficient
	464	C64 = C0_64 + coefficient_a;
	465	// filter out difficult (corner) cases
	466	// this test ensures the number of digits in coefficient_a does not change
	467	// after adding (the appropriately scaled and rounded) coefficient_b
b2a00c89 L	468	if ((UINT64) (C64 - 1000000000000000ull - 1) >
b2a00c89 L	469	9000000000000000ull - 2) {
200359e8 L	470	if (C64 >= 10000000000000000ull) {
	471	// result has more than 16 digits
	472	if (!scale_k) {
	473	// must divide coeff_a by 10
	474	saved_ca = saved_ca + T1;
b2a00c89 L	475	__mul_64x64_to_128 (CA, saved_ca, 0x3333333333333334ull);
b2a00c89 L	476	//reciprocals10_64[1]);
200359e8 L	477	coefficient_a = CA.w[1] >> 1;
	478	rem_a =
	479	saved_ca - (coefficient_a << 3) - (coefficient_a << 1);
	480	coefficient_a = coefficient_a - T1;
	481
b2a00c89	482	saved_cb += rem_a * power10_table_128[diff_dec_expon].w[0];
200359e8 L	483	} else
	484	coefficient_a =
	485	(SINT64) (saved_ca - T1 -
b2a00c89	486	(T1 << 3)) * (SINT64) power10_table_128[scale_k -
200359e8 L	487	1].w[0];
	488
	489	extra_digits++;
	490	coefficient_b =
	491	saved_cb + 100000000000000000ull +
b2a00c89	492	round_const_table[rmode][extra_digits];
200359e8 L	493
	494	// get P*(2^M[extra_digits])/10^extra_digits
	495	__mul_64x64_to_128 (CT, coefficient_b,
b2a00c89	496	reciprocals10_64[extra_digits]);
200359e8 L	497
200359e8 L	498	// now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
b2a00c89	499	amount = short_recip_scale[extra_digits];
200359e8 L	500	C0_64 = CT.w[1] >> amount;
	501
	502	// result coefficient
	503	C64 = C0_64 + coefficient_a;
	504	} else if (C64 <= 1000000000000000ull) {
	505	// less than 16 digits in result
	506	coefficient_a =
b2a00c89	507	(SINT64) saved_ca *(SINT64) power10_table_128[scale_k +
200359e8 L	508	1].w[0];
	509	//extra_digits --;
	510	exponent_b--;
	511	coefficient_b =
	512	(saved_cb << 3) + (saved_cb << 1) + 100000000000000000ull +
b2a00c89	513	round_const_table[rmode][extra_digits];
200359e8 L	514
	515	// get P*(2^M[extra_digits])/10^extra_digits
	516	__mul_64x64_to_128 (CT_new, coefficient_b,
b2a00c89	517	reciprocals10_64[extra_digits]);
200359e8 L	518
200359e8 L	519	// now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
b2a00c89	520	amount = short_recip_scale[extra_digits];
200359e8 L	521	C0_64 = CT_new.w[1] >> amount;
	522
	523	// result coefficient
	524	C64_new = C0_64 + coefficient_a;
	525	if (C64_new < 10000000000000000ull) {
	526	C64 = C64_new;
	527	#ifdef SET_STATUS_FLAGS
	528	CT = CT_new;
	529	#endif
	530	} else
	531	exponent_b++;
	532	}
	533
	534	}
	535
	536	}
	537
	538	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
	539	#ifndef IEEE_ROUND_NEAREST
	540	if (rmode == 0) //ROUNDING_TO_NEAREST
	541	#endif
	542	if (C64 & 1) {
	543	// check whether fractional part of initial_P/10^extra_digits is
	544	// exactly .5
	545	// this is the same as fractional part of
	546	// (initial_P + 0.5*10^extra_digits)/10^extra_digits is exactly zero
	547
	548	// get remainder
	549	remainder_h = CT.w[1] << (64 - amount);
	550
	551	// test whether fractional part is 0
b2a00c89	552	if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits])) {
200359e8 L	553	C64--;
	554	}
	555	}
	556	#endif
	557
	558	#ifdef SET_STATUS_FLAGS
	559	status = INEXACT_EXCEPTION;
	560
	561	// get remainder
	562	remainder_h = CT.w[1] << (64 - amount);
	563
	564	switch (rmode) {
	565	case ROUNDING_TO_NEAREST:
	566	case ROUNDING_TIES_AWAY:
	567	// test whether fractional part is 0
	568	if ((remainder_h == 0x8000000000000000ull)
b2a00c89	569	&& (CT.w[0] < reciprocals10_64[extra_digits]))
200359e8 L	570	status = EXACT_STATUS;
	571	break;
	572	case ROUNDING_DOWN:
	573	case ROUNDING_TO_ZERO:
b2a00c89	574	if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits]))
200359e8 L	575	status = EXACT_STATUS;
	576	//if(!C64 && rmode==ROUNDING_DOWN) sign_s=sign_y;
	577	break;
	578	default:
	579	// round up
	580	__add_carry_out (tmp, carry, CT.w[0],
b2a00c89	581	reciprocals10_64[extra_digits]);
200359e8 L	582	if ((remainder_h >> (64 - amount)) + carry >=
	583	(((UINT64) 1) << amount))
	584	status = EXACT_STATUS;
	585	break;
	586	}
	587	__set_status_flags (pfpsf, status);
	588
	589	#endif
	590
	591	res =
	592	fast_get_BID64_check_OF (sign_s, exponent_b + extra_digits, C64,
	593	rnd_mode, pfpsf);
	594	BID_RETURN (res);
	595	}