[thirdparty/gcc.git] / libgcc / config / libbid / bid128_sqrt.c

/* Copyright (C) 2007  Free Software Foundation, Inc.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2, or (at your option) any later
version.

In addition to the permissions in the GNU General Public License, the
Free Software Foundation gives you unlimited permission to link the
compiled version of this file into combinations with other programs,
and to distribute those combinations without any restriction coming
from the use of this file.  (The General Public License restrictions
do apply in other respects; for example, they cover modification of
the file, and distribution when not linked into a combine
executable.)

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING.  If not, write to the Free
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.  */

#define BID_128RES
#include "sqrt_macros.h"

BID128_FUNCTION_ARG1(__bid128_sqrt, x)

  UINT256 M256, C256, C4, C8;
  UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res;
  UINT64 sign_x, Carry;
  SINT64 D;
  int_float fx, f64;
  int exponent_x = 0, bin_expon_cx;
  int digits, scale, exponent_q;

  // unpack arguments, check for NaN or Infinity
  if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
    res.w[1] = x.w[1];
    res.w[0] = x.w[0];
    // NaN ?
    if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
#ifdef SET_STATUS_FLAGS
      if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull)	// sNaN
	__set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
	  res.w[1] &= QUIET_MASK64;
      BID_RETURN (res);
    }
    // x is Infinity?
    if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
      if (sign_x) {
	// -Inf, return NaN
	res.w[1] = 0x7c00000000000000ull;
#ifdef SET_STATUS_FLAGS
	__set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
      }
      BID_RETURN (res);
    }
    // x is 0 otherwise

    res.w[1] =
      sign_x |
      ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1) <<
       49);
    BID_RETURN (res);
  }
  if (sign_x) {
    res.w[1] = 0x7c00000000000000ull;
    res.w[0] = 0;
#ifdef SET_STATUS_FLAGS
    __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
    BID_RETURN (res);
  }
  // 2^64
  f64.i = 0x5f800000;

  // fx ~ CX
  fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
  bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f;
  digits = __bid_estimate_decimal_digits[bin_expon_cx];

  A10 = CX;
  if (exponent_x & 1) {
    A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61);
    A10.w[0] = CX.w[0] << 3;
    CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63);
    CX2.w[0] = CX.w[0] << 1;
    __add_128_128 (A10, A10, CX2);
  }

  CS.w[0] = short_sqrt128 (A10);
  CS.w[1] = 0;
  // check for exact result
  if (CS.w[0] * CS.w[0] == A10.w[0]) {
    __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]);
    if (S2.w[1] == A10.w[1])	// && S2.w[0]==A10.w[0])
    {
      get_BID128_very_fast (&res, 0,
			    (exponent_x +
			     DECIMAL_EXPONENT_BIAS_128) >> 1, CS);
      BID_RETURN (res);
    }
  }
  // get number of digits in CX
  D = CX.w[1] - __bid_power10_index_binexp_128[bin_expon_cx].w[1];
  if (D > 0
      || (!D && CX.w[0] >= __bid_power10_index_binexp_128[bin_expon_cx].w[0]))
    digits++;

  // if exponent is odd, scale coefficient by 10
  scale = 67 - digits;
  exponent_q = exponent_x - scale;
  scale += (exponent_q & 1);	// exp. bias is even

  if (scale > 38) {
    T128 = __bid_power10_table_128[scale - 37];
    __mul_128x128_low (CX1, CX, T128);

    TP128 = __bid_power10_table_128[37];
    __mul_128x128_to_256 (C256, CX1, TP128);
  } else {
    T128 = __bid_power10_table_128[scale];
    __mul_128x128_to_256 (C256, CX, T128);
  }


  // 4*C256
  C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62);
  C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62);
  C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62);
  C4.w[0] = C256.w[0] << 2;

  long_sqrt128 (&CS, C256);

#ifndef IEEE_ROUND_NEAREST
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  if (!((rnd_mode) & 3)) {
#endif
#endif
    // compare to midpoints
    CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
    CSM.w[0] = (CS.w[0] + CS.w[0]) | 1;
    // CSM^2
    //__mul_128x128_to_256(M256, CSM, CSM);
    __sqr128_to_256 (M256, CSM);

    if (C4.w[3] > M256.w[3]
	|| (C4.w[3] == M256.w[3]
	    && (C4.w[2] > M256.w[2]
		|| (C4.w[2] == M256.w[2]
		    && (C4.w[1] > M256.w[1]
			|| (C4.w[1] == M256.w[1]
			    && C4.w[0] > M256.w[0])))))) {
      // round up
      CS.w[0]++;
      if (!CS.w[0])
	CS.w[1]++;
    } else {
      C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61);
      C8.w[0] = CS.w[0] << 3;
      // M256 - 8*CSM
      __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
      __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
      __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
      M256.w[3] = M256.w[3] - Carry;

      // if CSM' > C256, round up
      if (M256.w[3] > C4.w[3]
	  || (M256.w[3] == C4.w[3]
	      && (M256.w[2] > C4.w[2]
		  || (M256.w[2] == C4.w[2]
		      && (M256.w[1] > C4.w[1]
			  || (M256.w[1] == C4.w[1]
			      && M256.w[0] > C4.w[0])))))) {
	// round down
	if (!CS.w[0])
	  CS.w[1]--;
	CS.w[0]--;
      }
    }
#ifndef IEEE_ROUND_NEAREST
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  } else {
    __sqr128_to_256 (M256, CS);
    C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
    C8.w[0] = CS.w[0] << 1;
    if (M256.w[3] > C256.w[3]
	|| (M256.w[3] == C256.w[3]
	    && (M256.w[2] > C256.w[2]
		|| (M256.w[2] == C256.w[2]
		    && (M256.w[1] > C256.w[1]
			|| (M256.w[1] == C256.w[1]
			    && M256.w[0] > C256.w[0])))))) {
      __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
      __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
      __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
      M256.w[3] = M256.w[3] - Carry;
      M256.w[0]++;
      if (!M256.w[0]) {
	M256.w[1]++;
	if (!M256.w[1]) {
	  M256.w[2]++;
	  if (!M256.w[2])
	    M256.w[3]++;
	}
      }

      if (!CS.w[0])
	CS.w[1]--;
      CS.w[0]--;

      if (M256.w[3] > C256.w[3]
	  || (M256.w[3] == C256.w[3]
	      && (M256.w[2] > C256.w[2]
		  || (M256.w[2] == C256.w[2]
		      && (M256.w[1] > C256.w[1]
			  || (M256.w[1] == C256.w[1]
			      && M256.w[0] > C256.w[0])))))) {

	if (!CS.w[0])
	  CS.w[1]--;
	CS.w[0]--;
      }
    }

    else {
      __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
      __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
      __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
      M256.w[3] = M256.w[3] + Carry;
      M256.w[0]++;
      if (!M256.w[0]) {
	M256.w[1]++;
	if (!M256.w[1]) {
	  M256.w[2]++;
	  if (!M256.w[2])
	    M256.w[3]++;
	}
      }
      if (M256.w[3] < C256.w[3]
	  || (M256.w[3] == C256.w[3]
	      && (M256.w[2] < C256.w[2]
		  || (M256.w[2] == C256.w[2]
		      && (M256.w[1] < C256.w[1]
			  || (M256.w[1] == C256.w[1]
			      && M256.w[0] <= C256.w[0])))))) {

	CS.w[0]++;
	if (!CS.w[0])
	  CS.w[1]++;
      }
    }
    // RU?
    if ((rnd_mode) == ROUNDING_UP) {
      CS.w[0]++;
      if (!CS.w[0])
	CS.w[1]++;
    }

  }
#endif
#endif

#ifdef SET_STATUS_FLAGS
  __set_status_flags (pfpsf, INEXACT_EXCEPTION);
#endif
  get_BID128_fast (&res, 0,
		   (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1, CS);
  BID_RETURN (res);
}
Commit	Line	Data
200359e8 L	1	/* Copyright (C) 2007 Free Software Foundation, Inc.
	2
	3	This file is part of GCC.
	4
	5	GCC is free software; you can redistribute it and/or modify it under
	6	the terms of the GNU General Public License as published by the Free
	7	Software Foundation; either version 2, or (at your option) any later
	8	version.
	9
	10	In addition to the permissions in the GNU General Public License, the
	11	Free Software Foundation gives you unlimited permission to link the
	12	compiled version of this file into combinations with other programs,
	13	and to distribute those combinations without any restriction coming
	14	from the use of this file. (The General Public License restrictions
	15	do apply in other respects; for example, they cover modification of
	16	the file, and distribution when not linked into a combine
	17	executable.)
	18
	19	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
	20	WARRANTY; without even the implied warranty of MERCHANTABILITY or
	21	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	22	for more details.
	23
	24	You should have received a copy of the GNU General Public License
	25	along with GCC; see the file COPYING. If not, write to the Free
	26	Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
	27	02110-1301, USA. */
	28
	29	#define BID_128RES
	30	#include "sqrt_macros.h"
	31
	32	BID128_FUNCTION_ARG1(__bid128_sqrt, x)
	33
	34	UINT256 M256, C256, C4, C8;
	35	UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res;
	36	UINT64 sign_x, Carry;
	37	SINT64 D;
	38	int_float fx, f64;
	39	int exponent_x = 0, bin_expon_cx;
	40	int digits, scale, exponent_q;
	41
	42	// unpack arguments, check for NaN or Infinity
	43	if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
	44	res.w[1] = x.w[1];
	45	res.w[0] = x.w[0];
	46	// NaN ?
	47	if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
	48	#ifdef SET_STATUS_FLAGS
	49	if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN
	50	__set_status_flags (pfpsf, INVALID_EXCEPTION);
	51	#endif
	52	res.w[1] &= QUIET_MASK64;
	53	BID_RETURN (res);
	54	}
	55	// x is Infinity?
	56	if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
	57	if (sign_x) {
	58	// -Inf, return NaN
	59	res.w[1] = 0x7c00000000000000ull;
	60	#ifdef SET_STATUS_FLAGS
	61	__set_status_flags (pfpsf, INVALID_EXCEPTION);
	62	#endif
	63	}
	64	BID_RETURN (res);
65	}
66	// x is 0 otherwise
67
68	res.w[1] =
69	sign_x \|
70	((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1) <<
71	49);
72	BID_RETURN (res);
73	}
74	if (sign_x) {
75	res.w[1] = 0x7c00000000000000ull;
76	res.w[0] = 0;
77	#ifdef SET_STATUS_FLAGS
78	__set_status_flags (pfpsf, INVALID_EXCEPTION);
79	#endif
80	BID_RETURN (res);
81	}
82	// 2^64
83	f64.i = 0x5f800000;
84
85	// fx ~ CX
86	fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
87	bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f;
88	digits = __bid_estimate_decimal_digits[bin_expon_cx];
89
90	A10 = CX;
91	if (exponent_x & 1) {
92	A10.w[1] = (CX.w[1] << 3) \| (CX.w[0] >> 61);
93	A10.w[0] = CX.w[0] << 3;
94	CX2.w[1] = (CX.w[1] << 1) \| (CX.w[0] >> 63);
95	CX2.w[0] = CX.w[0] << 1;
96	__add_128_128 (A10, A10, CX2);
97	}
98
99	CS.w[0] = short_sqrt128 (A10);
100	CS.w[1] = 0;
101	// check for exact result
102	if (CS.w[0] * CS.w[0] == A10.w[0]) {
103	__mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]);
104	if (S2.w[1] == A10.w[1]) // && S2.w[0]==A10.w[0])
105	{
106	get_BID128_very_fast (&res, 0,
107	(exponent_x +
108	DECIMAL_EXPONENT_BIAS_128) >> 1, CS);
109	BID_RETURN (res);
110	}
111	}
112	// get number of digits in CX
113	D = CX.w[1] - __bid_power10_index_binexp_128[bin_expon_cx].w[1];
114	if (D > 0
115	\|\| (!D && CX.w[0] >= __bid_power10_index_binexp_128[bin_expon_cx].w[0]))
116	digits++;
117
118	// if exponent is odd, scale coefficient by 10
119	scale = 67 - digits;
120	exponent_q = exponent_x - scale;
121	scale += (exponent_q & 1); // exp. bias is even
122
123	if (scale > 38) {
124	T128 = __bid_power10_table_128[scale - 37];
125	__mul_128x128_low (CX1, CX, T128);
126
127	TP128 = __bid_power10_table_128[37];
128	__mul_128x128_to_256 (C256, CX1, TP128);
129	} else {
130	T128 = __bid_power10_table_128[scale];
131	__mul_128x128_to_256 (C256, CX, T128);
132	}
133
134
135	// 4*C256
136	C4.w[3] = (C256.w[3] << 2) \| (C256.w[2] >> 62);
137	C4.w[2] = (C256.w[2] << 2) \| (C256.w[1] >> 62);
138	C4.w[1] = (C256.w[1] << 2) \| (C256.w[0] >> 62);
139	C4.w[0] = C256.w[0] << 2;
140
141	long_sqrt128 (&CS, C256);
142
143	#ifndef IEEE_ROUND_NEAREST
144	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
145	if (!((rnd_mode) & 3)) {
146	#endif
147	#endif
148	// compare to midpoints
149	CSM.w[1] = (CS.w[1] << 1) \| (CS.w[0] >> 63);
150	CSM.w[0] = (CS.w[0] + CS.w[0]) \| 1;
151	// CSM^2
152	//__mul_128x128_to_256(M256, CSM, CSM);
153	__sqr128_to_256 (M256, CSM);
154
155	if (C4.w[3] > M256.w[3]
156	\|\| (C4.w[3] == M256.w[3]
157	&& (C4.w[2] > M256.w[2]
158	\|\| (C4.w[2] == M256.w[2]
159	&& (C4.w[1] > M256.w[1]
160	\|\| (C4.w[1] == M256.w[1]
161	&& C4.w[0] > M256.w[0])))))) {
162	// round up
163	CS.w[0]++;
164	if (!CS.w[0])
165	CS.w[1]++;
166	} else {
167	C8.w[1] = (CS.w[1] << 3) \| (CS.w[0] >> 61);
168	C8.w[0] = CS.w[0] << 3;
169	// M256 - 8*CSM
170	__sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
171	__sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
172	__sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
173	M256.w[3] = M256.w[3] - Carry;
174
175	// if CSM' > C256, round up
176	if (M256.w[3] > C4.w[3]
177	\|\| (M256.w[3] == C4.w[3]
178	&& (M256.w[2] > C4.w[2]
179	\|\| (M256.w[2] == C4.w[2]
180	&& (M256.w[1] > C4.w[1]
181	\|\| (M256.w[1] == C4.w[1]
182	&& M256.w[0] > C4.w[0])))))) {
183	// round down
184	if (!CS.w[0])
185	CS.w[1]--;
186	CS.w[0]--;
187	}
188	}
189	#ifndef IEEE_ROUND_NEAREST
190	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
191	} else {
192	__sqr128_to_256 (M256, CS);
193	C8.w[1] = (CS.w[1] << 1) \| (CS.w[0] >> 63);
194	C8.w[0] = CS.w[0] << 1;
195	if (M256.w[3] > C256.w[3]
196	\|\| (M256.w[3] == C256.w[3]
197	&& (M256.w[2] > C256.w[2]
198	\|\| (M256.w[2] == C256.w[2]
199	&& (M256.w[1] > C256.w[1]
200	\|\| (M256.w[1] == C256.w[1]
201	&& M256.w[0] > C256.w[0])))))) {
202	__sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
203	__sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
204	__sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
205	M256.w[3] = M256.w[3] - Carry;
206	M256.w[0]++;
207	if (!M256.w[0]) {
208	M256.w[1]++;
209	if (!M256.w[1]) {
210	M256.w[2]++;
211	if (!M256.w[2])
212	M256.w[3]++;
213	}
214	}
215
216	if (!CS.w[0])
217	CS.w[1]--;
218	CS.w[0]--;
219
220	if (M256.w[3] > C256.w[3]
221	\|\| (M256.w[3] == C256.w[3]
222	&& (M256.w[2] > C256.w[2]
223	\|\| (M256.w[2] == C256.w[2]
224	&& (M256.w[1] > C256.w[1]
225	\|\| (M256.w[1] == C256.w[1]
226	&& M256.w[0] > C256.w[0])))))) {
227
228	if (!CS.w[0])
229	CS.w[1]--;
230	CS.w[0]--;
231	}
232	}
233
234	else {
235	__add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
236	__add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
237	__add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
238	M256.w[3] = M256.w[3] + Carry;
239	M256.w[0]++;
240	if (!M256.w[0]) {
241	M256.w[1]++;
242	if (!M256.w[1]) {
243	M256.w[2]++;
244	if (!M256.w[2])
245	M256.w[3]++;
246	}
247	}
248	if (M256.w[3] < C256.w[3]
249	\|\| (M256.w[3] == C256.w[3]
250	&& (M256.w[2] < C256.w[2]
251	\|\| (M256.w[2] == C256.w[2]
252	&& (M256.w[1] < C256.w[1]
253	\|\| (M256.w[1] == C256.w[1]
254	&& M256.w[0] <= C256.w[0])))))) {
255
256	CS.w[0]++;
257	if (!CS.w[0])
258	CS.w[1]++;
259	}
260	}
261	// RU?
262	if ((rnd_mode) == ROUNDING_UP) {
263	CS.w[0]++;
264	if (!CS.w[0])
265	CS.w[1]++;
266	}
267
268	}
269	#endif
270	#endif
271
272	#ifdef SET_STATUS_FLAGS
273	__set_status_flags (pfpsf, INEXACT_EXCEPTION);
274	#endif
275	get_BID128_fast (&res, 0,
276	(exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1, CS);
277	BID_RETURN (res);
278	}