]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/libbid/bid128_sqrt.c
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / libbid / bid128_sqrt.c
CommitLineData
99dee823 1/* Copyright (C) 2007-2021 Free Software Foundation, Inc.
200359e8
L
2
3This file is part of GCC.
4
5GCC is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License as published by the Free
748086b7 7Software Foundation; either version 3, or (at your option) any later
200359e8
L
8version.
9
200359e8
L
10GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11WARRANTY; without even the implied warranty of MERCHANTABILITY or
12FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13for more details.
14
748086b7
JJ
15Under Section 7 of GPL version 3, you are granted additional
16permissions described in the GCC Runtime Library Exception, version
173.1, as published by the Free Software Foundation.
18
19You should have received a copy of the GNU General Public License and
20a copy of the GCC Runtime Library Exception along with this program;
21see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22<http://www.gnu.org/licenses/>. */
200359e8
L
23
24#define BID_128RES
b2a00c89
L
25#include "bid_internal.h"
26#include "bid_sqrt_macros.h"
27#ifdef UNCHANGED_BINARY_STATUS_FLAGS
28#include <fenv.h>
200359e8 29
b2a00c89
L
30#define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT
31#endif
32
33BID128_FUNCTION_ARG1 (bid128_sqrt, x)
200359e8 34
b2a00c89
L
35 UINT256 M256, C256, C4, C8;
36 UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res;
37 UINT64 sign_x, Carry;
38 SINT64 D;
39 int_float fx, f64;
40 int exponent_x, bin_expon_cx;
41 int digits, scale, exponent_q;
42#ifdef UNCHANGED_BINARY_STATUS_FLAGS
43 fexcept_t binaryflags = 0;
44#endif
200359e8
L
45
46 // unpack arguments, check for NaN or Infinity
b2a00c89
L
47if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
48res.w[1] = CX.w[1];
49res.w[0] = CX.w[0];
200359e8 50 // NaN ?
b2a00c89 51if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
200359e8 52#ifdef SET_STATUS_FLAGS
b2a00c89
L
53 if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN
54 __set_status_flags (pfpsf, INVALID_EXCEPTION);
200359e8 55#endif
b2a00c89
L
56 res.w[1] = CX.w[1] & QUIET_MASK64;
57 BID_RETURN (res);
58}
200359e8 59 // x is Infinity?
b2a00c89
L
60if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
61 res.w[1] = CX.w[1];
200359e8 62 if (sign_x) {
b2a00c89 63 // -Inf, return NaN
200359e8 64 res.w[1] = 0x7c00000000000000ull;
200359e8
L
65#ifdef SET_STATUS_FLAGS
66 __set_status_flags (pfpsf, INVALID_EXCEPTION);
67#endif
200359e8 68 }
b2a00c89
L
69 BID_RETURN (res);
70}
71 // x is 0 otherwise
72
73res.w[1] =
74 sign_x |
75 ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1) << 49);
76res.w[0] = 0;
77BID_RETURN (res);
78}
79if (sign_x) {
80 res.w[1] = 0x7c00000000000000ull;
81 res.w[0] = 0;
82#ifdef SET_STATUS_FLAGS
83 __set_status_flags (pfpsf, INVALID_EXCEPTION);
84#endif
85 BID_RETURN (res);
86}
87#ifdef UNCHANGED_BINARY_STATUS_FLAGS
88(void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
89#endif
200359e8 90 // 2^64
b2a00c89 91f64.i = 0x5f800000;
200359e8
L
92
93 // fx ~ CX
b2a00c89
L
94fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
95bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f;
96digits = estimate_decimal_digits[bin_expon_cx];
200359e8 97
b2a00c89
L
98A10 = CX;
99if (exponent_x & 1) {
100 A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61);
101 A10.w[0] = CX.w[0] << 3;
102 CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63);
103 CX2.w[0] = CX.w[0] << 1;
104 __add_128_128 (A10, A10, CX2);
105}
106
107CS.w[0] = short_sqrt128 (A10);
108CS.w[1] = 0;
200359e8 109 // check for exact result
b2a00c89
L
110if (CS.w[0] * CS.w[0] == A10.w[0]) {
111 __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]);
112 if (S2.w[1] == A10.w[1]) // && S2.w[0]==A10.w[0])
113 {
114 get_BID128_very_fast (&res, 0,
115 (exponent_x +
116 DECIMAL_EXPONENT_BIAS_128) >> 1, CS);
117#ifdef UNCHANGED_BINARY_STATUS_FLAGS
118 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
119#endif
120 BID_RETURN (res);
200359e8 121 }
b2a00c89 122}
200359e8 123 // get number of digits in CX
b2a00c89
L
124D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1];
125if (D > 0
126 || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0]))
127 digits++;
200359e8
L
128
129 // if exponent is odd, scale coefficient by 10
b2a00c89
L
130scale = 67 - digits;
131exponent_q = exponent_x - scale;
132scale += (exponent_q & 1); // exp. bias is even
200359e8 133
b2a00c89
L
134if (scale > 38) {
135 T128 = power10_table_128[scale - 37];
136 __mul_128x128_low (CX1, CX, T128);
200359e8 137
b2a00c89
L
138 TP128 = power10_table_128[37];
139 __mul_128x128_to_256 (C256, CX1, TP128);
140} else {
141 T128 = power10_table_128[scale];
142 __mul_128x128_to_256 (C256, CX, T128);
143}
200359e8
L
144
145
146 // 4*C256
b2a00c89
L
147C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62);
148C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62);
149C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62);
150C4.w[0] = C256.w[0] << 2;
200359e8 151
b2a00c89 152long_sqrt128 (&CS, C256);
200359e8
L
153
154#ifndef IEEE_ROUND_NEAREST
155#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
b2a00c89 156if (!((rnd_mode) & 3)) {
200359e8
L
157#endif
158#endif
b2a00c89
L
159 // compare to midpoints
160 CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
161 CSM.w[0] = (CS.w[0] + CS.w[0]) | 1;
162 // CSM^2
163 //__mul_128x128_to_256(M256, CSM, CSM);
164 __sqr128_to_256 (M256, CSM);
165
166 if (C4.w[3] > M256.w[3]
167 || (C4.w[3] == M256.w[3]
168 && (C4.w[2] > M256.w[2]
169 || (C4.w[2] == M256.w[2]
170 && (C4.w[1] > M256.w[1]
171 || (C4.w[1] == M256.w[1]
172 && C4.w[0] > M256.w[0])))))) {
173 // round up
174 CS.w[0]++;
175 if (!CS.w[0])
176 CS.w[1]++;
177 } else {
178 C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61);
179 C8.w[0] = CS.w[0] << 3;
180 // M256 - 8*CSM
181 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
182 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
183 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
184 M256.w[3] = M256.w[3] - Carry;
185
186 // if CSM' > C256, round up
187 if (M256.w[3] > C4.w[3]
188 || (M256.w[3] == C4.w[3]
189 && (M256.w[2] > C4.w[2]
190 || (M256.w[2] == C4.w[2]
191 && (M256.w[1] > C4.w[1]
192 || (M256.w[1] == C4.w[1]
193 && M256.w[0] > C4.w[0])))))) {
194 // round down
200359e8 195 if (!CS.w[0])
b2a00c89
L
196 CS.w[1]--;
197 CS.w[0]--;
200359e8 198 }
b2a00c89 199 }
200359e8
L
200#ifndef IEEE_ROUND_NEAREST
201#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
b2a00c89
L
202} else {
203 __sqr128_to_256 (M256, CS);
204 C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
205 C8.w[0] = CS.w[0] << 1;
206 if (M256.w[3] > C256.w[3]
207 || (M256.w[3] == C256.w[3]
208 && (M256.w[2] > C256.w[2]
209 || (M256.w[2] == C256.w[2]
210 && (M256.w[1] > C256.w[1]
211 || (M256.w[1] == C256.w[1]
212 && M256.w[0] > C256.w[0])))))) {
213 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
214 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
215 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
216 M256.w[3] = M256.w[3] - Carry;
217 M256.w[0]++;
218 if (!M256.w[0]) {
219 M256.w[1]++;
220 if (!M256.w[1]) {
221 M256.w[2]++;
222 if (!M256.w[2])
223 M256.w[3]++;
224 }
225 }
226
227 if (!CS.w[0])
228 CS.w[1]--;
229 CS.w[0]--;
230
200359e8
L
231 if (M256.w[3] > C256.w[3]
232 || (M256.w[3] == C256.w[3]
233 && (M256.w[2] > C256.w[2]
234 || (M256.w[2] == C256.w[2]
235 && (M256.w[1] > C256.w[1]
236 || (M256.w[1] == C256.w[1]
237 && M256.w[0] > C256.w[0])))))) {
200359e8
L
238
239 if (!CS.w[0])
240 CS.w[1]--;
241 CS.w[0]--;
b2a00c89
L
242 }
243 }
200359e8 244
b2a00c89
L
245 else {
246 __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
247 __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
248 __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
249 M256.w[3] = M256.w[3] + Carry;
250 M256.w[0]++;
251 if (!M256.w[0]) {
252 M256.w[1]++;
253 if (!M256.w[1]) {
254 M256.w[2]++;
255 if (!M256.w[2])
256 M256.w[3]++;
200359e8
L
257 }
258 }
b2a00c89
L
259 if (M256.w[3] < C256.w[3]
260 || (M256.w[3] == C256.w[3]
261 && (M256.w[2] < C256.w[2]
262 || (M256.w[2] == C256.w[2]
263 && (M256.w[1] < C256.w[1]
264 || (M256.w[1] == C256.w[1]
265 && M256.w[0] <= C256.w[0])))))) {
200359e8 266
b2a00c89
L
267 CS.w[0]++;
268 if (!CS.w[0])
269 CS.w[1]++;
270 }
271 }
272 // RU?
273 if ((rnd_mode) == ROUNDING_UP) {
274 CS.w[0]++;
275 if (!CS.w[0])
276 CS.w[1]++;
277 }
278
279}
280#endif
281#endif
282
283#ifdef SET_STATUS_FLAGS
284__set_status_flags (pfpsf, INEXACT_EXCEPTION);
285#endif
286get_BID128_fast (&res, 0,
287 (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1, CS);
288#ifdef UNCHANGED_BINARY_STATUS_FLAGS
289(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
290#endif
291BID_RETURN (res);
292}
293
294
295
296BID128_FUNCTION_ARGTYPE1 (bid128d_sqrt, UINT64, x)
297
298 UINT256 M256, C256, C4, C8;
299 UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res;
300 UINT64 sign_x, Carry;
301 SINT64 D;
302 int_float fx, f64;
303 int exponent_x, bin_expon_cx;
304 int digits, scale, exponent_q;
305#ifdef UNCHANGED_BINARY_STATUS_FLAGS
306 fexcept_t binaryflags = 0;
307#endif
308
309 // unpack arguments, check for NaN or Infinity
310 // unpack arguments, check for NaN or Infinity
311CX.w[1] = 0;
312if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], x)) {
313res.w[1] = CX.w[0];
314res.w[0] = 0;
315 // NaN ?
316if ((x & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
317#ifdef SET_STATUS_FLAGS
318 if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN
319 __set_status_flags (pfpsf, INVALID_EXCEPTION);
320#endif
321 res.w[0] = (CX.w[0] & 0x0003ffffffffffffull);
322 __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]);
323 res.w[1] |= ((CX.w[0]) & 0xfc00000000000000ull);
324 BID_RETURN (res);
325}
326 // x is Infinity?
327if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) {
328 if (sign_x) {
329 // -Inf, return NaN
330 res.w[1] = 0x7c00000000000000ull;
331#ifdef SET_STATUS_FLAGS
332 __set_status_flags (pfpsf, INVALID_EXCEPTION);
333#endif
334 }
335 BID_RETURN (res);
336}
337 // x is 0 otherwise
338
339exponent_x =
340 exponent_x - DECIMAL_EXPONENT_BIAS + DECIMAL_EXPONENT_BIAS_128;
341res.w[1] =
342 sign_x | ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1)
343 << 49);
344res.w[0] = 0;
345BID_RETURN (res);
346}
347if (sign_x) {
348 res.w[1] = 0x7c00000000000000ull;
349 res.w[0] = 0;
350#ifdef SET_STATUS_FLAGS
351 __set_status_flags (pfpsf, INVALID_EXCEPTION);
352#endif
353 BID_RETURN (res);
354}
355#ifdef UNCHANGED_BINARY_STATUS_FLAGS
356(void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
357#endif
358exponent_x =
359 exponent_x - DECIMAL_EXPONENT_BIAS + DECIMAL_EXPONENT_BIAS_128;
360
361 // 2^64
362f64.i = 0x5f800000;
363
364 // fx ~ CX
365fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
366bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f;
367digits = estimate_decimal_digits[bin_expon_cx];
368
369A10 = CX;
370if (exponent_x & 1) {
371 A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61);
372 A10.w[0] = CX.w[0] << 3;
373 CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63);
374 CX2.w[0] = CX.w[0] << 1;
375 __add_128_128 (A10, A10, CX2);
376}
377
378CS.w[0] = short_sqrt128 (A10);
379CS.w[1] = 0;
380 // check for exact result
381if (CS.w[0] * CS.w[0] == A10.w[0]) {
382 __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]);
383 if (S2.w[1] == A10.w[1]) {
384 get_BID128_very_fast (&res, 0,
385 (exponent_x + DECIMAL_EXPONENT_BIAS_128) >> 1,
386 CS);
387#ifdef UNCHANGED_BINARY_STATUS_FLAGS
388 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
389#endif
390 BID_RETURN (res);
391 }
392}
393 // get number of digits in CX
394D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1];
395if (D > 0
396 || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0]))
397 digits++;
398
399 // if exponent is odd, scale coefficient by 10
400scale = 67 - digits;
401exponent_q = exponent_x - scale;
402scale += (exponent_q & 1); // exp. bias is even
403
404if (scale > 38) {
405 T128 = power10_table_128[scale - 37];
406 __mul_128x128_low (CX1, CX, T128);
407
408 TP128 = power10_table_128[37];
409 __mul_128x128_to_256 (C256, CX1, TP128);
410} else {
411 T128 = power10_table_128[scale];
412 __mul_128x128_to_256 (C256, CX, T128);
413}
414
415
416 // 4*C256
417C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62);
418C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62);
419C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62);
420C4.w[0] = C256.w[0] << 2;
421
422long_sqrt128 (&CS, C256);
423
424#ifndef IEEE_ROUND_NEAREST
425#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
426if (!((rnd_mode) & 3)) {
427#endif
428#endif
429 // compare to midpoints
430 CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
431 CSM.w[0] = (CS.w[0] + CS.w[0]) | 1;
432 // CSM^2
433 //__mul_128x128_to_256(M256, CSM, CSM);
434 __sqr128_to_256 (M256, CSM);
435
436 if (C4.w[3] > M256.w[3]
437 || (C4.w[3] == M256.w[3]
438 && (C4.w[2] > M256.w[2]
439 || (C4.w[2] == M256.w[2]
440 && (C4.w[1] > M256.w[1]
441 || (C4.w[1] == M256.w[1]
442 && C4.w[0] > M256.w[0])))))) {
443 // round up
444 CS.w[0]++;
445 if (!CS.w[0])
446 CS.w[1]++;
447 } else {
448 C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61);
449 C8.w[0] = CS.w[0] << 3;
450 // M256 - 8*CSM
451 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
452 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
453 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
454 M256.w[3] = M256.w[3] - Carry;
455
456 // if CSM' > C256, round up
457 if (M256.w[3] > C4.w[3]
458 || (M256.w[3] == C4.w[3]
459 && (M256.w[2] > C4.w[2]
460 || (M256.w[2] == C4.w[2]
461 && (M256.w[1] > C4.w[1]
462 || (M256.w[1] == C4.w[1]
463 && M256.w[0] > C4.w[0])))))) {
464 // round down
465 if (!CS.w[0])
466 CS.w[1]--;
467 CS.w[0]--;
468 }
469 }
470#ifndef IEEE_ROUND_NEAREST
471#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
472} else {
473 __sqr128_to_256 (M256, CS);
474 C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
475 C8.w[0] = CS.w[0] << 1;
476 if (M256.w[3] > C256.w[3]
477 || (M256.w[3] == C256.w[3]
478 && (M256.w[2] > C256.w[2]
479 || (M256.w[2] == C256.w[2]
480 && (M256.w[1] > C256.w[1]
481 || (M256.w[1] == C256.w[1]
482 && M256.w[0] > C256.w[0])))))) {
483 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
484 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
485 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
486 M256.w[3] = M256.w[3] - Carry;
487 M256.w[0]++;
488 if (!M256.w[0]) {
489 M256.w[1]++;
490 if (!M256.w[1]) {
491 M256.w[2]++;
492 if (!M256.w[2])
493 M256.w[3]++;
200359e8 494 }
b2a00c89
L
495 }
496
497 if (!CS.w[0])
498 CS.w[1]--;
499 CS.w[0]--;
500
501 if (M256.w[3] > C256.w[3]
502 || (M256.w[3] == C256.w[3]
503 && (M256.w[2] > C256.w[2]
504 || (M256.w[2] == C256.w[2]
505 && (M256.w[1] > C256.w[1]
506 || (M256.w[1] == C256.w[1]
507 && M256.w[0] > C256.w[0])))))) {
508
509 if (!CS.w[0])
510 CS.w[1]--;
511 CS.w[0]--;
512 }
513 }
514
515 else {
516 __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
517 __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
518 __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
519 M256.w[3] = M256.w[3] + Carry;
520 M256.w[0]++;
521 if (!M256.w[0]) {
522 M256.w[1]++;
523 if (!M256.w[1]) {
524 M256.w[2]++;
525 if (!M256.w[2])
526 M256.w[3]++;
200359e8
L
527 }
528 }
b2a00c89
L
529 if (M256.w[3] < C256.w[3]
530 || (M256.w[3] == C256.w[3]
531 && (M256.w[2] < C256.w[2]
532 || (M256.w[2] == C256.w[2]
533 && (M256.w[1] < C256.w[1]
534 || (M256.w[1] == C256.w[1]
535 && M256.w[0] <= C256.w[0])))))) {
536
200359e8
L
537 CS.w[0]++;
538 if (!CS.w[0])
539 CS.w[1]++;
540 }
200359e8 541 }
b2a00c89
L
542 // RU?
543 if ((rnd_mode) == ROUNDING_UP) {
544 CS.w[0]++;
545 if (!CS.w[0])
546 CS.w[1]++;
547 }
548
549}
200359e8
L
550#endif
551#endif
552
553#ifdef SET_STATUS_FLAGS
b2a00c89 554__set_status_flags (pfpsf, INEXACT_EXCEPTION);
200359e8 555#endif
b2a00c89
L
556get_BID128_fast (&res, 0, (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1,
557 CS);
558#ifdef UNCHANGED_BINARY_STATUS_FLAGS
559(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
560#endif
561BID_RETURN (res);
562
563
200359e8 564}