1 /* Copyright (C) 2007-2019 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 /*****************************************************************************
26 * BID128 fma x * y + z
28 ****************************************************************************/
30 #include "bid_internal.h"
33 rounding_correction (unsigned int rnd_mode
,
34 unsigned int is_inexact_lt_midpoint
,
35 unsigned int is_inexact_gt_midpoint
,
36 unsigned int is_midpoint_lt_even
,
37 unsigned int is_midpoint_gt_even
,
39 UINT128
* ptrres
, _IDEC_flags
* ptrfpsf
) {
40 // unbiased true exponent unbexp may be larger than emax
42 UINT128 res
= *ptrres
; // expected to have the correct sign and coefficient
43 // (the exponent field is ignored, as unbexp is used instead)
47 // general correction from RN to RA, RM, RP, RZ
48 // Note: if the result is negative, then is_inexact_lt_midpoint,
49 // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even
50 // have to be considered as if determined for the absolute value of the
51 // result (so they seem to be reversed)
53 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
54 is_midpoint_lt_even
|| is_midpoint_gt_even
) {
55 *ptrfpsf
|= INEXACT_EXCEPTION
;
57 // apply correction to result calculated with unbounded exponent
58 sign
= res
.w
[1] & MASK_SIGN
;
59 exp
= (UINT64
) (unbexp
+ 6176) << 49; // valid only if expmin<=unbexp<=expmax
60 C_hi
= res
.w
[1] & MASK_COEFF
;
62 if ((!sign
&& ((rnd_mode
== ROUNDING_UP
&& is_inexact_lt_midpoint
) ||
63 ((rnd_mode
== ROUNDING_TIES_AWAY
|| rnd_mode
== ROUNDING_UP
) &&
64 is_midpoint_gt_even
))) ||
65 (sign
&& ((rnd_mode
== ROUNDING_DOWN
&& is_inexact_lt_midpoint
) ||
66 ((rnd_mode
== ROUNDING_TIES_AWAY
|| rnd_mode
== ROUNDING_DOWN
) &&
67 is_midpoint_gt_even
)))) {
72 if (C_hi
== 0x0001ed09bead87c0ull
&& C_lo
== 0x378d8e6400000000ull
) {
73 // C = 10^34 => rounding overflow
74 C_hi
= 0x0000314dc6448d93ull
;
75 C_lo
= 0x38c15b0a00000000ull
; // 10^33
76 // exp = exp + EXP_P1;
78 exp
= (UINT64
) (unbexp
+ 6176) << 49;
80 } else if ((is_midpoint_lt_even
|| is_inexact_gt_midpoint
) &&
81 ((sign
&& (rnd_mode
== ROUNDING_UP
|| rnd_mode
== ROUNDING_TO_ZERO
)) ||
82 (!sign
&& (rnd_mode
== ROUNDING_DOWN
|| rnd_mode
== ROUNDING_TO_ZERO
)))) {
85 if (C_lo
== 0xffffffffffffffffull
)
87 // check if we crossed into the lower decade
88 if (C_hi
== 0x0000314dc6448d93ull
&& C_lo
== 0x38c15b09ffffffffull
) {
91 C_hi
= 0x0001ed09bead87c0ull
; // 10^34 - 1
92 C_lo
= 0x378d8e63ffffffffull
;
93 // exp = exp - EXP_P1;
95 exp
= (UINT64
) (unbexp
+ 6176) << 49;
96 } else { // if exp = 0 the result is tiny & inexact
97 *ptrfpsf
|= UNDERFLOW_EXCEPTION
;
101 ; // the result is already correct
103 if (unbexp
> expmax
) { // 6111
104 *ptrfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
106 if (!sign
) { // result is positive
107 if (rnd_mode
== ROUNDING_UP
|| rnd_mode
== ROUNDING_TIES_AWAY
) { // +inf
108 C_hi
= 0x7800000000000000ull
;
109 C_lo
= 0x0000000000000000ull
;
110 } else { // res = +MAXFP = (10^34-1) * 10^emax
111 C_hi
= 0x5fffed09bead87c0ull
;
112 C_lo
= 0x378d8e63ffffffffull
;
114 } else { // result is negative
115 if (rnd_mode
== ROUNDING_DOWN
|| rnd_mode
== ROUNDING_TIES_AWAY
) { // -inf
116 C_hi
= 0xf800000000000000ull
;
117 C_lo
= 0x0000000000000000ull
;
118 } else { // res = -MAXFP = -(10^34-1) * 10^emax
119 C_hi
= 0xdfffed09bead87c0ull
;
120 C_lo
= 0x378d8e63ffffffffull
;
124 // assemble the result
125 res
.w
[1] = sign
| exp
| C_hi
;
131 add256 (UINT256 x
, UINT256 y
, UINT256
* pz
) {
132 // *z = x + yl assume the sum fits in 256 bits
134 z
.w
[0] = x
.w
[0] + y
.w
[0];
135 if (z
.w
[0] < x
.w
[0]) {
137 if (x
.w
[1] == 0x0000000000000000ull
) {
139 if (x
.w
[2] == 0x0000000000000000ull
) {
144 z
.w
[1] = x
.w
[1] + y
.w
[1];
145 if (z
.w
[1] < x
.w
[1]) {
147 if (x
.w
[2] == 0x0000000000000000ull
) {
151 z
.w
[2] = x
.w
[2] + y
.w
[2];
152 if (z
.w
[2] < x
.w
[2]) {
155 z
.w
[3] = x
.w
[3] + y
.w
[3]; // it was assumed that no carry is possible
160 sub256 (UINT256 x
, UINT256 y
, UINT256
* pz
) {
161 // *z = x - y; assume x >= y
163 z
.w
[0] = x
.w
[0] - y
.w
[0];
164 if (z
.w
[0] > x
.w
[0]) {
166 if (x
.w
[1] == 0xffffffffffffffffull
) {
168 if (x
.w
[2] == 0xffffffffffffffffull
) {
173 z
.w
[1] = x
.w
[1] - y
.w
[1];
174 if (z
.w
[1] > x
.w
[1]) {
176 if (x
.w
[2] == 0xffffffffffffffffull
) {
180 z
.w
[2] = x
.w
[2] - y
.w
[2];
181 if (z
.w
[2] > x
.w
[2]) {
184 z
.w
[3] = x
.w
[3] - y
.w
[3]; // no borrow possible, because x >= y
190 nr_digits256 (UINT256 R256
) {
192 // determine the number of decimal digits in R256
193 if (R256
.w
[3] == 0x0 && R256
.w
[2] == 0x0 && R256
.w
[1] == 0x0) {
194 // between 1 and 19 digits
195 for (ind
= 1; ind
<= 19; ind
++) {
196 if (R256
.w
[0] < ten2k64
[ind
]) {
201 } else if (R256
.w
[3] == 0x0 && R256
.w
[2] == 0x0 &&
202 (R256
.w
[1] < ten2k128
[0].w
[1] ||
203 (R256
.w
[1] == ten2k128
[0].w
[1]
204 && R256
.w
[0] < ten2k128
[0].w
[0]))) {
207 } else if (R256
.w
[3] == 0x0 && R256
.w
[2] == 0x0) {
208 // between 21 and 38 digits
209 for (ind
= 1; ind
<= 18; ind
++) {
210 if (R256
.w
[1] < ten2k128
[ind
].w
[1] ||
211 (R256
.w
[1] == ten2k128
[ind
].w
[1] &&
212 R256
.w
[0] < ten2k128
[ind
].w
[0])) {
218 } else if (R256
.w
[3] == 0x0 &&
219 (R256
.w
[2] < ten2k256
[0].w
[2] ||
220 (R256
.w
[2] == ten2k256
[0].w
[2] &&
221 R256
.w
[1] < ten2k256
[0].w
[1]) ||
222 (R256
.w
[2] == ten2k256
[0].w
[2] &&
223 R256
.w
[1] == ten2k256
[0].w
[1] &&
224 R256
.w
[0] < ten2k256
[0].w
[0]))) {
228 // between 40 and 68 digits
229 for (ind
= 1; ind
<= 29; ind
++) {
230 if (R256
.w
[3] < ten2k256
[ind
].w
[3] ||
231 (R256
.w
[3] == ten2k256
[ind
].w
[3] &&
232 R256
.w
[2] < ten2k256
[ind
].w
[2]) ||
233 (R256
.w
[3] == ten2k256
[ind
].w
[3] &&
234 R256
.w
[2] == ten2k256
[ind
].w
[2] &&
235 R256
.w
[1] < ten2k256
[ind
].w
[1]) ||
236 (R256
.w
[3] == ten2k256
[ind
].w
[3] &&
237 R256
.w
[2] == ten2k256
[ind
].w
[2] &&
238 R256
.w
[1] == ten2k256
[ind
].w
[1] &&
239 R256
.w
[0] < ten2k256
[ind
].w
[0])) {
249 // add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so
250 // use the rounding information from ptr_is_* to avoid a double rounding error
252 add_and_round (int q3
,
262 int *ptr_is_midpoint_lt_even
,
263 int *ptr_is_midpoint_gt_even
,
264 int *ptr_is_inexact_lt_midpoint
,
265 int *ptr_is_inexact_gt_midpoint
,
266 _IDEC_flags
* ptrfpsf
, UINT128
* ptrres
) {
275 int is_midpoint_lt_even
= 0;
276 int is_midpoint_gt_even
= 0;
277 int is_inexact_lt_midpoint
= 0;
278 int is_inexact_gt_midpoint
= 0;
279 int is_midpoint_lt_even0
= 0;
280 int is_midpoint_gt_even0
= 0;
281 int is_inexact_lt_midpoint0
= 0;
282 int is_inexact_gt_midpoint0
= 0;
287 // int gt_half_ulp = 0;
288 UINT128 res
= *ptrres
;
290 // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66
291 scale
= q4
- delta
- q3
; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this
292 // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1
294 // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for
295 // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6))
301 } else if (scale
<= 19) { // 10^scale fits in 64 bits
303 P128
.w
[0] = ten2k64
[scale
];
304 __mul_128x128_to_256 (R256
, P128
, C3
);
305 } else if (scale
<= 38) { // 10^scale fits in 128 bits
306 __mul_128x128_to_256 (R256
, ten2k128
[scale
- 20], C3
);
307 } else if (scale
<= 57) { // 39 <= scale <= 57
308 // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits
309 // (10^67 has 223 bits; 10^69 has 230 bits);
310 // must split the computation:
311 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
312 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
313 // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits
314 __mul_64x128_to_128 (R128
, ten2k64
[scale
- 38], C3
);
315 // now multiply R128 by 10^38
316 __mul_128x128_to_256 (R256
, R128
, ten2k128
[18]);
317 } else { // 58 <= scale <= 66
318 // 10^scale takes between 193 and 220 bits,
319 // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits)
320 // must split the computation:
321 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
322 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
323 // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits
324 // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because
325 // 10^(scale-38) takes more than 64 bits, C3 will take less than 64
326 __mul_64x128_to_128 (R128
, C3
.w
[0], ten2k128
[scale
- 58]);
327 // now calculate 10*38 * 10^(scale-38) * C3
328 __mul_128x128_to_256 (R256
, R128
, ten2k128
[18]);
330 // C3 * 10^scale is now in R256
332 // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least
333 // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is
335 // add/subtract C4 and C3 * 10^scale; the exponent is e4
336 if (p_sign
== z_sign
) { // R256 = C4 + R256
337 // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact,
338 // but may require rounding
339 add256 (C4
, R256
, &R256
);
340 } else { // if (p_sign != z_sign) { // R256 = C4 - R256
341 // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or
342 // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact,
343 // but may require rounding
345 // compare first R256 = C3 * 10^scale and C4
346 if (R256
.w
[3] > C4
.w
[3] || (R256
.w
[3] == C4
.w
[3] && R256
.w
[2] > C4
.w
[2]) ||
347 (R256
.w
[3] == C4
.w
[3] && R256
.w
[2] == C4
.w
[2] && R256
.w
[1] > C4
.w
[1]) ||
348 (R256
.w
[3] == C4
.w
[3] && R256
.w
[2] == C4
.w
[2] && R256
.w
[1] == C4
.w
[1] &&
349 R256
.w
[0] >= C4
.w
[0])) { // C3 * 10^scale >= C4
350 // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact,
351 // but may require rounding
352 sub256 (R256
, C4
, &R256
);
353 // flip p_sign too, because the result has the sign of z
355 } else { // if C4 > C3 * 10^scale
356 // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact,
357 // but may require rounding
358 sub256 (C4
, R256
, &R256
);
360 // if the result is pure zero, the sign depends on the rounding mode
361 // (x*y and z had opposite signs)
362 if (R256
.w
[3] == 0x0ull
&& R256
.w
[2] == 0x0ull
&&
363 R256
.w
[1] == 0x0ull
&& R256
.w
[0] == 0x0ull
) {
364 if (rnd_mode
!= ROUNDING_DOWN
)
365 p_sign
= 0x0000000000000000ull
;
367 p_sign
= 0x8000000000000000ull
;
368 // the exponent is max (e4, expmin)
372 res
.w
[1] = p_sign
| ((UINT64
) (e4
+ 6176) << 49);
379 // determine the number of decimal digits in R256
380 ind
= nr_digits256 (R256
);
382 // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind;
383 // round to the destination precision, with unbounded exponent
386 // result rounded to the destination precision with unbounded exponent
388 if (ind
+ e4
< p34
+ expmin
) {
389 is_tiny
= 1; // applies to all rounding modes
391 res
.w
[1] = p_sign
| ((UINT64
) (e4
+ 6176) << 49) | R256
.w
[1];
392 res
.w
[0] = R256
.w
[0];
393 // Note: res is correct only if expmin <= e4 <= expmax
394 } else { // if (ind > p34)
395 // if more than P digits, round to nearest to P digits
396 // round R256 to p34 digits
397 x0
= ind
- p34
; // 1 <= x0 <= 34 as 35 <= ind <= 68
399 P128
.w
[1] = R256
.w
[1];
400 P128
.w
[0] = R256
.w
[0];
401 round128_19_38 (ind
, x0
, P128
, &R128
, &incr_exp
,
402 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
403 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
404 } else if (ind
<= 57) {
405 P192
.w
[2] = R256
.w
[2];
406 P192
.w
[1] = R256
.w
[1];
407 P192
.w
[0] = R256
.w
[0];
408 round192_39_57 (ind
, x0
, P192
, &R192
, &incr_exp
,
409 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
410 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
411 R128
.w
[1] = R192
.w
[1];
412 R128
.w
[0] = R192
.w
[0];
413 } else { // if (ind <= 68)
414 round256_58_76 (ind
, x0
, R256
, &R256
, &incr_exp
,
415 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
416 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
417 R128
.w
[1] = R256
.w
[1];
418 R128
.w
[0] = R256
.w
[0];
420 // the rounded result has p34 = 34 digits
421 e4
= e4
+ x0
+ incr_exp
;
422 if (rnd_mode
== ROUNDING_TO_NEAREST
) {
424 is_tiny
= 1; // for other rounding modes apply correction
427 // for RM, RP, RZ, RA apply correction in order to determine tininess
428 // but do not save the result; apply the correction to
429 // (-1)^p_sign * significand * 10^0
430 P128
.w
[1] = p_sign
| 0x3040000000000000ull
| R128
.w
[1];
431 P128
.w
[0] = R128
.w
[0];
432 rounding_correction (rnd_mode
,
433 is_inexact_lt_midpoint
,
434 is_inexact_gt_midpoint
, is_midpoint_lt_even
,
435 is_midpoint_gt_even
, 0, &P128
, ptrfpsf
);
436 scale
= ((P128
.w
[1] & MASK_EXP
) >> 49) - 6176; // -1, 0, or +1
437 // the number of digits in the significand is p34 = 34
438 if (e4
+ scale
< expmin
) {
442 ind
= p34
; // the number of decimal digits in the signifcand of res
443 res
.w
[1] = p_sign
| ((UINT64
) (e4
+ 6176) << 49) | R128
.w
[1]; // RN
444 res
.w
[0] = R128
.w
[0];
445 // Note: res is correct only if expmin <= e4 <= expmax
446 // set the inexact flag after rounding with bounded exponent, if any
448 // at this point we have the result rounded with unbounded exponent in
449 // res and we know its tininess:
450 // res = (-1)^p_sign * significand * 10^e4,
451 // where q (significand) = ind <= p34
452 // Note: res is correct only if expmin <= e4 <= expmax
454 // check for overflow if RN
455 if (rnd_mode
== ROUNDING_TO_NEAREST
&& (ind
+ e4
) > (p34
+ expmax
)) {
456 res
.w
[1] = p_sign
| 0x7800000000000000ull
;
457 res
.w
[0] = 0x0000000000000000ull
;
459 *ptrfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
460 return; // BID_RETURN (res)
461 } // else not overflow or not RN, so continue
463 // if (e4 >= expmin) we have the result rounded with bounded exponent
465 x0
= expmin
- e4
; // x0 >= 1; the number of digits to chop off of res
466 // where the result rounded [at most] once is
467 // (-1)^p_sign * significand_res * 10^e4
469 // avoid double rounding error
470 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
471 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
472 is_midpoint_lt_even0
= is_midpoint_lt_even
;
473 is_midpoint_gt_even0
= is_midpoint_gt_even
;
474 is_inexact_lt_midpoint
= 0;
475 is_inexact_gt_midpoint
= 0;
476 is_midpoint_lt_even
= 0;
477 is_midpoint_gt_even
= 0;
480 // nothing is left of res when moving the decimal point left x0 digits
481 is_inexact_lt_midpoint
= 1;
482 res
.w
[1] = p_sign
| 0x0000000000000000ull
;
483 res
.w
[0] = 0x0000000000000000ull
;
485 } else if (x0
== ind
) { // 1 <= x0 = ind <= p34 = 34
486 // this is <, =, or > 1/2 ulp
487 // compare the ind-digit value in the significand of res with
488 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
489 // less than, equal to, or greater than 1/2 ulp (significand of res)
490 R128
.w
[1] = res
.w
[1] & MASK_COEFF
;
491 R128
.w
[0] = res
.w
[0];
493 if (R128
.w
[0] < midpoint64
[ind
- 1]) { // < 1/2 ulp
495 is_inexact_lt_midpoint
= 1;
496 } else if (R128
.w
[0] == midpoint64
[ind
- 1]) { // = 1/2 ulp
498 is_midpoint_gt_even
= 1;
499 } else { // > 1/2 ulp
501 is_inexact_gt_midpoint
= 1;
503 } else { // if (ind <= 38) {
504 if (R128
.w
[1] < midpoint128
[ind
- 20].w
[1] ||
505 (R128
.w
[1] == midpoint128
[ind
- 20].w
[1] &&
506 R128
.w
[0] < midpoint128
[ind
- 20].w
[0])) { // < 1/2 ulp
508 is_inexact_lt_midpoint
= 1;
509 } else if (R128
.w
[1] == midpoint128
[ind
- 20].w
[1] &&
510 R128
.w
[0] == midpoint128
[ind
- 20].w
[0]) { // = 1/2 ulp
512 is_midpoint_gt_even
= 1;
513 } else { // > 1/2 ulp
515 is_inexact_gt_midpoint
= 1;
518 if (lt_half_ulp
|| eq_half_ulp
) {
519 // res = +0.0 * 10^expmin
520 res
.w
[1] = 0x0000000000000000ull
;
521 res
.w
[0] = 0x0000000000000000ull
;
522 } else { // if (gt_half_ulp)
523 // res = +1 * 10^expmin
524 res
.w
[1] = 0x0000000000000000ull
;
525 res
.w
[0] = 0x0000000000000001ull
;
527 res
.w
[1] = p_sign
| res
.w
[1];
529 } else { // if (1 <= x0 <= ind - 1 <= 33)
530 // round the ind-digit result to ind - x0 digits
532 if (ind
<= 18) { // 2 <= ind <= 18
533 round64_2_18 (ind
, x0
, res
.w
[0], &R64
, &incr_exp
,
534 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
535 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
538 } else if (ind
<= 38) {
539 P128
.w
[1] = res
.w
[1] & MASK_COEFF
;
540 P128
.w
[0] = res
.w
[0];
541 round128_19_38 (ind
, x0
, P128
, &res
, &incr_exp
,
542 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
543 &is_inexact_lt_midpoint
,
544 &is_inexact_gt_midpoint
);
546 e4
= e4
+ x0
; // expmin
547 // we want the exponent to be expmin, so if incr_exp = 1 then
548 // multiply the rounded result by 10 - it will still fit in 113 bits
551 P128
.w
[1] = res
.w
[1] & MASK_COEFF
;
552 P128
.w
[0] = res
.w
[0];
553 __mul_64x128_to_128 (res
, ten2k64
[1], P128
);
556 p_sign
| ((UINT64
) (e4
+ 6176) << 49) | (res
.w
[1] & MASK_COEFF
);
557 // avoid a double rounding error
558 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
559 is_midpoint_lt_even
) { // double rounding error upward
562 if (res
.w
[0] == 0xffffffffffffffffull
)
564 // Note: a double rounding error upward is not possible; for this
565 // the result after the first rounding would have to be 99...95
566 // (35 digits in all), possibly followed by a number of zeros; this
567 // is not possible in Cases (2)-(6) or (15)-(17) which may get here
568 is_midpoint_lt_even
= 0;
569 is_inexact_lt_midpoint
= 1;
570 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
571 is_midpoint_gt_even
) { // double rounding error downward
576 is_midpoint_gt_even
= 0;
577 is_inexact_gt_midpoint
= 1;
578 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
579 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
580 // if this second rounding was exact the result may still be
581 // inexact because of the first rounding
582 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
583 is_inexact_gt_midpoint
= 1;
585 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
586 is_inexact_lt_midpoint
= 1;
588 } else if (is_midpoint_gt_even
&&
589 (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
)) {
590 // pulled up to a midpoint
591 is_inexact_lt_midpoint
= 1;
592 is_inexact_gt_midpoint
= 0;
593 is_midpoint_lt_even
= 0;
594 is_midpoint_gt_even
= 0;
595 } else if (is_midpoint_lt_even
&&
596 (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
)) {
597 // pulled down to a midpoint
598 is_inexact_lt_midpoint
= 0;
599 is_inexact_gt_midpoint
= 1;
600 is_midpoint_lt_even
= 0;
601 is_midpoint_gt_even
= 0;
607 // res contains the correct result
608 // apply correction if not rounding to nearest
609 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
610 rounding_correction (rnd_mode
,
611 is_inexact_lt_midpoint
, is_inexact_gt_midpoint
,
612 is_midpoint_lt_even
, is_midpoint_gt_even
,
615 if (is_midpoint_lt_even
|| is_midpoint_gt_even
||
616 is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
) {
617 // set the inexact flag
618 *ptrfpsf
|= INEXACT_EXCEPTION
;
620 *ptrfpsf
|= UNDERFLOW_EXCEPTION
;
623 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
624 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
625 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
626 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
632 #if DECIMAL_CALL_BY_REFERENCE
634 bid128_ext_fma (int *ptr_is_midpoint_lt_even
,
635 int *ptr_is_midpoint_gt_even
,
636 int *ptr_is_inexact_lt_midpoint
,
637 int *ptr_is_inexact_gt_midpoint
, UINT128
* pres
,
638 UINT128
* px
, UINT128
* py
,
640 pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
642 UINT128 x
= *px
, y
= *py
, z
= *pz
;
643 #if !DECIMAL_GLOBAL_ROUNDING
644 unsigned int rnd_mode
= *prnd_mode
;
648 bid128_ext_fma (int *ptr_is_midpoint_lt_even
,
649 int *ptr_is_midpoint_gt_even
,
650 int *ptr_is_inexact_lt_midpoint
,
651 int *ptr_is_inexact_gt_midpoint
, UINT128 x
, UINT128 y
,
652 UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
653 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
656 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
657 UINT64 x_sign
, y_sign
, z_sign
, p_sign
, tmp_sign
;
658 UINT64 x_exp
= 0, y_exp
= 0, z_exp
= 0, p_exp
;
662 int q1
= 0, q2
= 0, q3
= 0, q4
;
664 int scale
, ind
, delta
, x0
;
665 int p34
= P34
; // used to modify the limit on the number of digits
667 int x_nr_bits
, y_nr_bits
, z_nr_bits
;
668 unsigned int save_fpsf
;
669 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0;
670 int is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
671 int is_midpoint_lt_even0
= 0, is_midpoint_gt_even0
= 0;
672 int is_inexact_lt_midpoint0
= 0, is_inexact_gt_midpoint0
= 0;
684 // the following are based on the table of special cases for fma; the NaN
685 // behavior is similar to that of the IA-64 Architecture fma
687 // identify cases where at least one operand is NaN
692 if ((y
.w
[1] & MASK_NAN
) == MASK_NAN
) { // y is NAN
693 // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
694 // check first for non-canonical NaN payload
695 if (((y
.w
[1] & 0x00003fffffffffffull
) > 0x0000314dc6448d93ull
) ||
696 (((y
.w
[1] & 0x00003fffffffffffull
) == 0x0000314dc6448d93ull
) &&
697 (y
.w
[0] > 0x38c15b09ffffffffull
))) {
698 y
.w
[1] = y
.w
[1] & 0xffffc00000000000ull
;
701 if ((y
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // y is SNAN
703 *pfpsf
|= INVALID_EXCEPTION
;
705 res
.w
[1] = y
.w
[1] & 0xfc003fffffffffffull
; // clear out also G[6]-G[16]
707 } else { // y is QNaN
709 res
.w
[1] = y
.w
[1] & 0xfc003fffffffffffull
; // clear out G[6]-G[16]
711 // if z = SNaN or x = SNaN signal invalid exception
712 if ((z
.w
[1] & MASK_SNAN
) == MASK_SNAN
||
713 (x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) {
715 *pfpsf
|= INVALID_EXCEPTION
;
718 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
719 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
720 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
721 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
724 } else if ((z
.w
[1] & MASK_NAN
) == MASK_NAN
) { // z is NAN
725 // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
726 // check first for non-canonical NaN payload
727 if (((z
.w
[1] & 0x00003fffffffffffull
) > 0x0000314dc6448d93ull
) ||
728 (((z
.w
[1] & 0x00003fffffffffffull
) == 0x0000314dc6448d93ull
) &&
729 (z
.w
[0] > 0x38c15b09ffffffffull
))) {
730 z
.w
[1] = z
.w
[1] & 0xffffc00000000000ull
;
733 if ((z
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // z is SNAN
735 *pfpsf
|= INVALID_EXCEPTION
;
737 res
.w
[1] = z
.w
[1] & 0xfc003fffffffffffull
; // clear out also G[6]-G[16]
739 } else { // z is QNaN
741 res
.w
[1] = z
.w
[1] & 0xfc003fffffffffffull
; // clear out G[6]-G[16]
743 // if x = SNaN signal invalid exception
744 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) {
746 *pfpsf
|= INVALID_EXCEPTION
;
749 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
750 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
751 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
752 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
755 } else if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
756 // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
757 // check first for non-canonical NaN payload
758 if (((x
.w
[1] & 0x00003fffffffffffull
) > 0x0000314dc6448d93ull
) ||
759 (((x
.w
[1] & 0x00003fffffffffffull
) == 0x0000314dc6448d93ull
) &&
760 (x
.w
[0] > 0x38c15b09ffffffffull
))) {
761 x
.w
[1] = x
.w
[1] & 0xffffc00000000000ull
;
764 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
766 *pfpsf
|= INVALID_EXCEPTION
;
768 res
.w
[1] = x
.w
[1] & 0xfc003fffffffffffull
; // clear out also G[6]-G[16]
770 } else { // x is QNaN
772 res
.w
[1] = x
.w
[1] & 0xfc003fffffffffffull
; // clear out G[6]-G[16]
775 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
776 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
777 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
778 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
782 // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check
783 // for non-canonical values
785 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
786 C1
.w
[1] = x
.w
[1] & MASK_COEFF
;
788 if ((x
.w
[1] & MASK_ANY_INF
) != MASK_INF
) { // x != inf
789 // if x is not infinity check for non-canonical values - treated as zero
790 if ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
) { // G0_G1=11
792 x_exp
= (x
.w
[1] << 2) & MASK_EXP
; // biased and shifted left 49 bits
793 C1
.w
[1] = 0; // significand high
794 C1
.w
[0] = 0; // significand low
795 } else { // G0_G1 != 11
796 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bits
797 if (C1
.w
[1] > 0x0001ed09bead87c0ull
||
798 (C1
.w
[1] == 0x0001ed09bead87c0ull
&&
799 C1
.w
[0] > 0x378d8e63ffffffffull
)) {
800 // x is non-canonical if coefficient is larger than 10^34 -1
803 } else { // canonical
808 y_sign
= y
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
809 C2
.w
[1] = y
.w
[1] & MASK_COEFF
;
811 if ((y
.w
[1] & MASK_ANY_INF
) != MASK_INF
) { // y != inf
812 // if y is not infinity check for non-canonical values - treated as zero
813 if ((y
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
) { // G0_G1=11
815 y_exp
= (y
.w
[1] << 2) & MASK_EXP
; // biased and shifted left 49 bits
816 C2
.w
[1] = 0; // significand high
817 C2
.w
[0] = 0; // significand low
818 } else { // G0_G1 != 11
819 y_exp
= y
.w
[1] & MASK_EXP
; // biased and shifted left 49 bits
820 if (C2
.w
[1] > 0x0001ed09bead87c0ull
||
821 (C2
.w
[1] == 0x0001ed09bead87c0ull
&&
822 C2
.w
[0] > 0x378d8e63ffffffffull
)) {
823 // y is non-canonical if coefficient is larger than 10^34 -1
826 } else { // canonical
831 z_sign
= z
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
832 C3
.w
[1] = z
.w
[1] & MASK_COEFF
;
834 if ((z
.w
[1] & MASK_ANY_INF
) != MASK_INF
) { // z != inf
835 // if z is not infinity check for non-canonical values - treated as zero
836 if ((z
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
) { // G0_G1=11
838 z_exp
= (z
.w
[1] << 2) & MASK_EXP
; // biased and shifted left 49 bits
839 C3
.w
[1] = 0; // significand high
840 C3
.w
[0] = 0; // significand low
841 } else { // G0_G1 != 11
842 z_exp
= z
.w
[1] & MASK_EXP
; // biased and shifted left 49 bits
843 if (C3
.w
[1] > 0x0001ed09bead87c0ull
||
844 (C3
.w
[1] == 0x0001ed09bead87c0ull
&&
845 C3
.w
[0] > 0x378d8e63ffffffffull
)) {
846 // z is non-canonical if coefficient is larger than 10^34 -1
849 } else { // canonical
855 p_sign
= x_sign
^ y_sign
; // sign of the product
857 // identify cases where at least one operand is infinity
859 if ((x
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // x = inf
860 if ((y
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // y = inf
861 if ((z
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // z = inf
862 if (p_sign
== z_sign
) {
863 res
.w
[1] = z_sign
| MASK_INF
;
866 // return QNaN Indefinite
867 res
.w
[1] = 0x7c00000000000000ull
;
868 res
.w
[0] = 0x0000000000000000ull
;
870 *pfpsf
|= INVALID_EXCEPTION
;
872 } else { // z = 0 or z = f
873 res
.w
[1] = p_sign
| MASK_INF
;
876 } else if (C2
.w
[1] != 0 || C2
.w
[0] != 0) { // y = f
877 if ((z
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // z = inf
878 if (p_sign
== z_sign
) {
879 res
.w
[1] = z_sign
| MASK_INF
;
882 // return QNaN Indefinite
883 res
.w
[1] = 0x7c00000000000000ull
;
884 res
.w
[0] = 0x0000000000000000ull
;
886 *pfpsf
|= INVALID_EXCEPTION
;
888 } else { // z = 0 or z = f
889 res
.w
[1] = p_sign
| MASK_INF
;
893 // return QNaN Indefinite
894 res
.w
[1] = 0x7c00000000000000ull
;
895 res
.w
[0] = 0x0000000000000000ull
;
897 *pfpsf
|= INVALID_EXCEPTION
;
899 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
900 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
901 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
902 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
905 } else if ((y
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // y = inf
906 if ((z
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // z = inf
907 // x = f, necessarily
908 if ((p_sign
!= z_sign
)
909 || (C1
.w
[1] == 0x0ull
&& C1
.w
[0] == 0x0ull
)) {
910 // return QNaN Indefinite
911 res
.w
[1] = 0x7c00000000000000ull
;
912 res
.w
[0] = 0x0000000000000000ull
;
914 *pfpsf
|= INVALID_EXCEPTION
;
916 res
.w
[1] = z_sign
| MASK_INF
;
919 } else if (C1
.w
[1] == 0x0 && C1
.w
[0] == 0x0) { // x = 0
921 // return QNaN Indefinite
922 res
.w
[1] = 0x7c00000000000000ull
;
923 res
.w
[0] = 0x0000000000000000ull
;
925 *pfpsf
|= INVALID_EXCEPTION
;
927 // x = f and z = 0, f, necessarily
928 res
.w
[1] = p_sign
| MASK_INF
;
931 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
932 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
933 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
934 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
937 } else if ((z
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // z = inf
938 // x = 0, f and y = 0, f, necessarily
939 res
.w
[1] = z_sign
| MASK_INF
;
941 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
942 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
943 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
944 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
949 true_p_exp
= (x_exp
>> 49) - 6176 + (y_exp
>> 49) - 6176;
950 if (true_p_exp
< -6176)
951 p_exp
= 0; // cannot be less than EXP_MIN
953 p_exp
= (UINT64
) (true_p_exp
+ 6176) << 49;
955 if (((C1
.w
[1] == 0x0 && C1
.w
[0] == 0x0) || (C2
.w
[1] == 0x0 && C2
.w
[0] == 0x0)) && C3
.w
[1] == 0x0 && C3
.w
[0] == 0x0) { // (x = 0 or y = 0) and z = 0
958 res
.w
[1] = p_exp
; // preferred exponent
960 res
.w
[1] = z_exp
; // preferred exponent
961 if (p_sign
== z_sign
) {
964 } else { // x * y and z have opposite signs
965 if (rnd_mode
== ROUNDING_DOWN
) {
967 res
.w
[1] |= MASK_SIGN
;
975 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
976 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
977 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
978 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
982 // from this point on, we may need to know the number of decimal digits
983 // in the significands of x, y, z when x, y, z != 0
985 if (C1
.w
[1] != 0 || C1
.w
[0] != 0) { // x = f (non-zero finite)
986 // q1 = nr. of decimal digits in x
987 // determine first the nr. of bits in x
989 if (C1
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
990 // split the 64-bit value in two 32-bit halves to avoid rounding errors
991 if (C1
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
992 tmp
.d
= (double) (C1
.w
[0] >> 32); // exact conversion
994 33 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
996 tmp
.d
= (double) (C1
.w
[0]); // exact conversion
998 1 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1000 } else { // if x < 2^53
1001 tmp
.d
= (double) C1
.w
[0]; // exact conversion
1003 1 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1005 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1006 tmp
.d
= (double) C1
.w
[1]; // exact conversion
1008 65 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1010 q1
= nr_digits
[x_nr_bits
- 1].digits
;
1012 q1
= nr_digits
[x_nr_bits
- 1].digits1
;
1013 if (C1
.w
[1] > nr_digits
[x_nr_bits
- 1].threshold_hi
||
1014 (C1
.w
[1] == nr_digits
[x_nr_bits
- 1].threshold_hi
&&
1015 C1
.w
[0] >= nr_digits
[x_nr_bits
- 1].threshold_lo
))
1020 if (C2
.w
[1] != 0 || C2
.w
[0] != 0) { // y = f (non-zero finite)
1022 if (C2
.w
[0] >= 0x0020000000000000ull
) { // y >= 2^53
1023 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1024 if (C2
.w
[0] >= 0x0000000100000000ull
) { // y >= 2^32
1025 tmp
.d
= (double) (C2
.w
[0] >> 32); // exact conversion
1027 32 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1028 } else { // y < 2^32
1029 tmp
.d
= (double) C2
.w
[0]; // exact conversion
1031 ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1033 } else { // if y < 2^53
1034 tmp
.d
= (double) C2
.w
[0]; // exact conversion
1036 ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1038 } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1])
1039 tmp
.d
= (double) C2
.w
[1]; // exact conversion
1041 64 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1044 q2
= nr_digits
[y_nr_bits
].digits
;
1046 q2
= nr_digits
[y_nr_bits
].digits1
;
1047 if (C2
.w
[1] > nr_digits
[y_nr_bits
].threshold_hi
||
1048 (C2
.w
[1] == nr_digits
[y_nr_bits
].threshold_hi
&&
1049 C2
.w
[0] >= nr_digits
[y_nr_bits
].threshold_lo
))
1054 if (C3
.w
[1] != 0 || C3
.w
[0] != 0) { // z = f (non-zero finite)
1056 if (C3
.w
[0] >= 0x0020000000000000ull
) { // z >= 2^53
1057 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1058 if (C3
.w
[0] >= 0x0000000100000000ull
) { // z >= 2^32
1059 tmp
.d
= (double) (C3
.w
[0] >> 32); // exact conversion
1061 32 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1062 } else { // z < 2^32
1063 tmp
.d
= (double) C3
.w
[0]; // exact conversion
1065 ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1067 } else { // if z < 2^53
1068 tmp
.d
= (double) C3
.w
[0]; // exact conversion
1070 ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1072 } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1])
1073 tmp
.d
= (double) C3
.w
[1]; // exact conversion
1075 64 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1078 q3
= nr_digits
[z_nr_bits
].digits
;
1080 q3
= nr_digits
[z_nr_bits
].digits1
;
1081 if (C3
.w
[1] > nr_digits
[z_nr_bits
].threshold_hi
||
1082 (C3
.w
[1] == nr_digits
[z_nr_bits
].threshold_hi
&&
1083 C3
.w
[0] >= nr_digits
[z_nr_bits
].threshold_lo
))
1088 if ((C1
.w
[1] == 0x0 && C1
.w
[0] == 0x0) ||
1089 (C2
.w
[1] == 0x0 && C2
.w
[0] == 0x0)) {
1091 // z = f, necessarily; for 0 + z return z, with the preferred exponent
1092 // the result is z, but need to get the preferred exponent
1093 if (z_exp
<= p_exp
) { // the preferred exponent is z_exp
1094 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | C3
.w
[1];
1096 } else { // if (p_exp < z_exp) the preferred exponent is p_exp
1097 // return (C3 * 10^scale) * 10^(z_exp - scale)
1098 // where scale = min (p34-q3, (z_exp-p_exp) >> 49)
1100 ind
= (z_exp
- p_exp
) >> 49;
1104 res
.w
[1] = z
.w
[1]; // & MASK_COEFF, which is redundant
1106 } else if (q3
<= 19) { // z fits in 64 bits
1107 if (scale
<= 19) { // 10^scale fits in 64 bits
1108 // 64 x 64 C3.w[0] * ten2k64[scale]
1109 __mul_64x64_to_128MACH (res
, C3
.w
[0], ten2k64
[scale
]);
1110 } else { // 10^scale fits in 128 bits
1111 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1112 __mul_128x64_to_128 (res
, C3
.w
[0], ten2k128
[scale
- 20]);
1114 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1115 // 64 x 128 ten2k64[scale] * C3
1116 __mul_128x64_to_128 (res
, ten2k64
[scale
], C3
);
1118 // subtract scale from the exponent
1119 z_exp
= z_exp
- ((UINT64
) scale
<< 49);
1120 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
1122 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
1123 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
1124 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
1125 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
1129 ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f
1132 e1
= (x_exp
>> 49) - 6176; // unbiased exponent of x
1133 e2
= (y_exp
>> 49) - 6176; // unbiased exponent of y
1134 e3
= (z_exp
>> 49) - 6176; // unbiased exponent of z
1135 e4
= e1
+ e2
; // unbiased exponent of the exact x * y
1137 // calculate C1 * C2 and its number of decimal digits, q4
1139 // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits
1140 // where 2 <= q1 + q2 <= 68
1141 // calculate C4 = C1 * C2 and determine q
1142 C4
.w
[3] = C4
.w
[2] = C4
.w
[1] = C4
.w
[0] = 0;
1143 if (q1
+ q2
<= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits
1144 C4
.w
[0] = C1
.w
[0] * C2
.w
[0];
1145 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1146 if (C4
.w
[0] < ten2k64
[q1
+ q2
- 1])
1147 q4
= q1
+ q2
- 1; // q4 in [1, 18]
1149 q4
= q1
+ q2
; // q4 in [2, 19]
1150 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1151 } else if (q1
+ q2
== 20) { // C4 = C1 * C2 fits in 64 or 128 bits
1152 // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits
1153 __mul_64x64_to_128MACH (C4
, C1
.w
[0], C2
.w
[0]);
1154 // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20
1155 if (C4
.w
[1] == 0 && C4
.w
[0] < ten2k64
[19]) { // 19 = q1+q2-1
1156 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1157 q4
= 19; // 19 = q1 + q2 - 1
1159 // if (C4.w[1] == 0)
1160 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1162 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1163 q4
= 20; // 20 = q1 + q2
1165 } else if (q1
+ q2
<= 38) { // 21 <= q1 + q2 <= 38
1166 // C4 = C1 * C2 fits in 64 or 128 bits
1167 // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits)
1168 // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits
1170 __mul_128x64_to_128 (C4
, C1
.w
[0], C2
);
1171 } else { // q2 <= 19
1172 __mul_128x64_to_128 (C4
, C2
.w
[0], C1
);
1174 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1175 if (C4
.w
[1] < ten2k128
[q1
+ q2
- 21].w
[1] ||
1176 (C4
.w
[1] == ten2k128
[q1
+ q2
- 21].w
[1] &&
1177 C4
.w
[0] < ten2k128
[q1
+ q2
- 21].w
[0])) {
1178 // if (C4.w[1] == 0) // q4 = 20, necessarily
1179 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1181 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1182 q4
= q1
+ q2
- 1; // q4 in [20, 37]
1184 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1185 q4
= q1
+ q2
; // q4 in [21, 38]
1187 } else if (q1
+ q2
== 39) { // C4 = C1 * C2 fits in 128 or 192 bits
1188 // both C1 and C2 fit in 128 bits (actually in 113 bits)
1189 // may replace this by 128x128_to192
1190 __mul_128x128_to_256 (C4
, C1
, C2
); // C4.w[3] is 0
1191 // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39
1192 if (C4
.w
[2] == 0 && (C4
.w
[1] < ten2k128
[18].w
[1] ||
1193 (C4
.w
[1] == ten2k128
[18].w
[1]
1194 && C4
.w
[0] < ten2k128
[18].w
[0]))) {
1195 // 18 = 38 - 20 = q1+q2-1 - 20
1196 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1197 q4
= 38; // 38 = q1 + q2 - 1
1199 // if (C4.w[2] == 0)
1200 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1202 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1203 q4
= 39; // 39 = q1 + q2
1205 } else if (q1
+ q2
<= 57) { // 40 <= q1 + q2 <= 57
1206 // C4 = C1 * C2 fits in 128 or 192 bits
1207 // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits)
1208 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1209 // may fit in 64 bits
1210 if (C1
.w
[1] == 0) { // C1 fits in 64 bits
1211 // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1212 __mul_64x128_full (C4
.w
[2], C4
, C1
.w
[0], C2
);
1213 } else if (C2
.w
[1] == 0) { // C2 fits in 64 bits
1214 // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1215 __mul_64x128_full (C4
.w
[2], C4
, C2
.w
[0], C1
);
1216 } else { // both C1 and C2 require 128 bits
1217 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1218 __mul_128x128_to_256 (C4
, C1
, C2
); // C4.w[3] = 0
1220 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1221 if (C4
.w
[2] < ten2k256
[q1
+ q2
- 40].w
[2] ||
1222 (C4
.w
[2] == ten2k256
[q1
+ q2
- 40].w
[2] &&
1223 (C4
.w
[1] < ten2k256
[q1
+ q2
- 40].w
[1] ||
1224 (C4
.w
[1] == ten2k256
[q1
+ q2
- 40].w
[1] &&
1225 C4
.w
[0] < ten2k256
[q1
+ q2
- 40].w
[0])))) {
1226 // if (C4.w[2] == 0) // q4 = 39, necessarily
1227 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1229 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1230 q4
= q1
+ q2
- 1; // q4 in [39, 56]
1232 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1233 q4
= q1
+ q2
; // q4 in [40, 57]
1235 } else if (q1
+ q2
== 58) { // C4 = C1 * C2 fits in 192 or 256 bits
1236 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1237 // may fit in 64 bits
1238 if (C1
.w
[1] == 0) { // C1 * C2 will fit in 192 bits
1239 __mul_64x128_full (C4
.w
[2], C4
, C1
.w
[0], C2
); // may use 64x128_to_192
1240 } else if (C2
.w
[1] == 0) { // C1 * C2 will fit in 192 bits
1241 __mul_64x128_full (C4
.w
[2], C4
, C2
.w
[0], C1
); // may use 64x128_to_192
1242 } else { // C1 * C2 will fit in 192 bits or in 256 bits
1243 __mul_128x128_to_256 (C4
, C1
, C2
);
1245 // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58
1246 if (C4
.w
[3] == 0 && (C4
.w
[2] < ten2k256
[18].w
[2] ||
1247 (C4
.w
[2] == ten2k256
[18].w
[2]
1248 && (C4
.w
[1] < ten2k256
[18].w
[1]
1249 || (C4
.w
[1] == ten2k256
[18].w
[1]
1250 && C4
.w
[0] < ten2k256
[18].w
[0]))))) {
1251 // 18 = 57 - 39 = q1+q2-1 - 39
1252 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1253 q4
= 57; // 57 = q1 + q2 - 1
1255 // if (C4.w[3] == 0)
1256 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1258 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1259 q4
= 58; // 58 = q1 + q2
1261 } else { // if 59 <= q1 + q2 <= 68
1262 // C4 = C1 * C2 fits in 192 or 256 bits
1263 // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits)
1264 // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in
1266 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1267 __mul_128x128_to_256 (C4
, C1
, C2
); // C4.w[3] = 0
1268 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1269 if (C4
.w
[3] < ten2k256
[q1
+ q2
- 40].w
[3] ||
1270 (C4
.w
[3] == ten2k256
[q1
+ q2
- 40].w
[3] &&
1271 (C4
.w
[2] < ten2k256
[q1
+ q2
- 40].w
[2] ||
1272 (C4
.w
[2] == ten2k256
[q1
+ q2
- 40].w
[2] &&
1273 (C4
.w
[1] < ten2k256
[q1
+ q2
- 40].w
[1] ||
1274 (C4
.w
[1] == ten2k256
[q1
+ q2
- 40].w
[1] &&
1275 C4
.w
[0] < ten2k256
[q1
+ q2
- 40].w
[0])))))) {
1276 // if (C4.w[3] == 0) // q4 = 58, necessarily
1277 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1279 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1280 q4
= q1
+ q2
- 1; // q4 in [58, 67]
1282 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1283 q4
= q1
+ q2
; // q4 in [59, 68]
1287 if (C3
.w
[1] == 0x0 && C3
.w
[0] == 0x0) { // x = f, y = f, z = 0
1288 save_fpsf
= *pfpsf
; // sticky bits - caller value must be preserved
1293 // truncate C4 to p34 digits into res
1294 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
1297 P128
.w
[1] = C4
.w
[1];
1298 P128
.w
[0] = C4
.w
[0];
1299 round128_19_38 (q4
, x0
, P128
, &res
, &incr_exp
,
1300 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
1301 &is_inexact_lt_midpoint
,
1302 &is_inexact_gt_midpoint
);
1303 } else if (q4
<= 57) { // 35 <= q4 <= 57
1304 P192
.w
[2] = C4
.w
[2];
1305 P192
.w
[1] = C4
.w
[1];
1306 P192
.w
[0] = C4
.w
[0];
1307 round192_39_57 (q4
, x0
, P192
, &R192
, &incr_exp
,
1308 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
1309 &is_inexact_lt_midpoint
,
1310 &is_inexact_gt_midpoint
);
1311 res
.w
[0] = R192
.w
[0];
1312 res
.w
[1] = R192
.w
[1];
1313 } else { // if (q4 <= 68)
1314 round256_58_76 (q4
, x0
, C4
, &R256
, &incr_exp
,
1315 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
1316 &is_inexact_lt_midpoint
,
1317 &is_inexact_gt_midpoint
);
1318 res
.w
[0] = R256
.w
[0];
1319 res
.w
[1] = R256
.w
[1];
1326 // res is now the coefficient of the result rounded to the destination
1327 // precision, with unbounded exponent; the exponent is e4; q4=digits(res)
1328 } else { // if (q4 <= p34)
1329 // C4 * 10^e4 is the result rounded to the destination precision, with
1330 // unbounded exponent (which is exact)
1332 if ((q4
+ e4
<= p34
+ expmax
) && (e4
> expmax
)) {
1333 // e4 is too large, but can be brought within range by scaling up C4
1334 scale
= e4
- expmax
; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2
1335 // res = (C4 * 10^scale) * 10^expmax
1336 if (q4
<= 19) { // C4 fits in 64 bits
1337 if (scale
<= 19) { // 10^scale fits in 64 bits
1338 // 64 x 64 C4.w[0] * ten2k64[scale]
1339 __mul_64x64_to_128MACH (res
, C4
.w
[0], ten2k64
[scale
]);
1340 } else { // 10^scale fits in 128 bits
1341 // 64 x 128 C4.w[0] * ten2k128[scale - 20]
1342 __mul_128x64_to_128 (res
, C4
.w
[0], ten2k128
[scale
- 20]);
1344 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
1345 // 64 x 128 ten2k64[scale] * CC43
1346 __mul_128x64_to_128 (res
, ten2k64
[scale
], C4
);
1348 e4
= e4
- scale
; // expmax
1354 // res is the coefficient of the result rounded to the destination
1355 // precision, with unbounded exponent (it has q4 digits); the exponent
1356 // is e4 (exact result)
1359 // check for overflow
1360 if (q4
+ e4
> p34
+ expmax
) {
1361 if (rnd_mode
== ROUNDING_TO_NEAREST
) {
1362 res
.w
[1] = p_sign
| 0x7800000000000000ull
; // +/-inf
1363 res
.w
[0] = 0x0000000000000000ull
;
1364 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
1366 res
.w
[1] = p_sign
| res
.w
[1];
1367 rounding_correction (rnd_mode
,
1368 is_inexact_lt_midpoint
,
1369 is_inexact_gt_midpoint
,
1370 is_midpoint_lt_even
, is_midpoint_gt_even
,
1373 *pfpsf
|= save_fpsf
;
1374 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
1375 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
1376 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
1377 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
1381 // check for underflow
1382 if (q4
+ e4
< expmin
+ P34
) {
1383 is_tiny
= 1; // the result is tiny
1385 // if e4 < expmin, we must truncate more of res
1386 x0
= expmin
- e4
; // x0 >= 1
1387 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
1388 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
1389 is_midpoint_lt_even0
= is_midpoint_lt_even
;
1390 is_midpoint_gt_even0
= is_midpoint_gt_even
;
1391 is_inexact_lt_midpoint
= 0;
1392 is_inexact_gt_midpoint
= 0;
1393 is_midpoint_lt_even
= 0;
1394 is_midpoint_gt_even
= 0;
1395 // the number of decimal digits in res is q4
1396 if (x0
< q4
) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits
1397 if (q4
<= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17
1398 round64_2_18 (q4
, x0
, res
.w
[0], &R64
, &incr_exp
,
1399 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
1400 &is_inexact_lt_midpoint
,
1401 &is_inexact_gt_midpoint
);
1403 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
1404 R64
= ten2k64
[q4
- x0
];
1406 // res.w[1] = 0; (from above)
1408 } else { // if (q4 <= 34)
1410 P128
.w
[1] = res
.w
[1];
1411 P128
.w
[0] = res
.w
[0];
1412 round128_19_38 (q4
, x0
, P128
, &res
, &incr_exp
,
1413 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
1414 &is_inexact_lt_midpoint
,
1415 &is_inexact_gt_midpoint
);
1417 // increase coefficient by a factor of 10; this will be <= 10^33
1418 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
1419 if (q4
- x0
<= 19) { // 1 <= q4 - x0 <= 19
1421 res
.w
[0] = ten2k64
[q4
- x0
];
1422 } else { // 20 <= q4 - x0 <= 37
1423 res
.w
[0] = ten2k128
[q4
- x0
- 20].w
[0];
1424 res
.w
[1] = ten2k128
[q4
- x0
- 20].w
[1];
1428 e4
= e4
+ x0
; // expmin
1429 } else if (x0
== q4
) {
1430 // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin
1431 // determine relationship with 1/2 ulp
1433 if (res
.w
[0] < midpoint64
[q4
- 1]) { // < 1/2 ulp
1435 is_inexact_lt_midpoint
= 1;
1436 } else if (res
.w
[0] == midpoint64
[q4
- 1]) { // = 1/2 ulp
1438 is_midpoint_gt_even
= 1;
1439 } else { // > 1/2 ulp
1441 is_inexact_gt_midpoint
= 1;
1443 } else { // if (q4 <= 34)
1444 if (res
.w
[1] < midpoint128
[q4
- 20].w
[1] ||
1445 (res
.w
[1] == midpoint128
[q4
- 20].w
[1] &&
1446 res
.w
[0] < midpoint128
[q4
- 20].w
[0])) { // < 1/2 ulp
1448 is_inexact_lt_midpoint
= 1;
1449 } else if (res
.w
[1] == midpoint128
[q4
- 20].w
[1] &&
1450 res
.w
[0] == midpoint128
[q4
- 20].w
[0]) { // = 1/2 ulp
1452 is_midpoint_gt_even
= 1;
1453 } else { // > 1/2 ulp
1455 is_inexact_gt_midpoint
= 1;
1458 if (lt_half_ulp
|| eq_half_ulp
) {
1459 // res = +0.0 * 10^expmin
1460 res
.w
[1] = 0x0000000000000000ull
;
1461 res
.w
[0] = 0x0000000000000000ull
;
1462 } else { // if (gt_half_ulp)
1463 // res = +1 * 10^expmin
1464 res
.w
[1] = 0x0000000000000000ull
;
1465 res
.w
[0] = 0x0000000000000001ull
;
1468 } else { // if (x0 > q4)
1469 // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin
1473 is_inexact_lt_midpoint
= 1;
1475 // avoid a double rounding error
1476 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
1477 is_midpoint_lt_even
) { // double rounding error upward
1480 if (res
.w
[0] == 0xffffffffffffffffull
)
1482 // Note: a double rounding error upward is not possible; for this
1483 // the result after the first rounding would have to be 99...95
1484 // (35 digits in all), possibly followed by a number of zeros; this
1485 // not possible for f * f + 0
1486 is_midpoint_lt_even
= 0;
1487 is_inexact_lt_midpoint
= 1;
1488 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
1489 is_midpoint_gt_even
) { // double rounding error downward
1494 is_midpoint_gt_even
= 0;
1495 is_inexact_gt_midpoint
= 1;
1496 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
1497 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
1498 // if this second rounding was exact the result may still be
1499 // inexact because of the first rounding
1500 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
1501 is_inexact_gt_midpoint
= 1;
1503 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
1504 is_inexact_lt_midpoint
= 1;
1506 } else if (is_midpoint_gt_even
&&
1507 (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
)) {
1508 // pulled up to a midpoint
1509 is_inexact_lt_midpoint
= 1;
1510 is_inexact_gt_midpoint
= 0;
1511 is_midpoint_lt_even
= 0;
1512 is_midpoint_gt_even
= 0;
1513 } else if (is_midpoint_lt_even
&&
1514 (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
)) {
1515 // pulled down to a midpoint
1516 is_inexact_lt_midpoint
= 0;
1517 is_inexact_gt_midpoint
= 1;
1518 is_midpoint_lt_even
= 0;
1519 is_midpoint_gt_even
= 0;
1523 } else { // if e4 >= emin then q4 < P and the result is tiny and exact
1525 // if (e3 < e4) the preferred exponent is e3
1526 // return (C4 * 10^scale) * 10^(e4 - scale)
1527 // where scale = min (p34-q4, (e4 - e3))
1533 ; // res and e4 are unchanged
1534 } else if (q4
<= 19) { // C4 fits in 64 bits
1535 if (scale
<= 19) { // 10^scale fits in 64 bits
1536 // 64 x 64 res.w[0] * ten2k64[scale]
1537 __mul_64x64_to_128MACH (res
, res
.w
[0], ten2k64
[scale
]);
1538 } else { // 10^scale fits in 128 bits
1539 // 64 x 128 res.w[0] * ten2k128[scale - 20]
1540 __mul_128x64_to_128 (res
, res
.w
[0], ten2k128
[scale
- 20]);
1542 } else { // res fits in 128 bits, but 10^scale must fit in 64 bits
1543 // 64 x 128 ten2k64[scale] * C3
1544 __mul_128x64_to_128 (res
, ten2k64
[scale
], res
);
1546 // subtract scale from the exponent
1551 // check for inexact result
1552 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
1553 is_midpoint_lt_even
|| is_midpoint_gt_even
) {
1554 // set the inexact flag and the underflow flag
1555 *pfpsf
|= INEXACT_EXCEPTION
;
1556 *pfpsf
|= UNDERFLOW_EXCEPTION
;
1558 res
.w
[1] = p_sign
| ((UINT64
) (e4
+ 6176) << 49) | res
.w
[1];
1559 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
1560 rounding_correction (rnd_mode
,
1561 is_inexact_lt_midpoint
,
1562 is_inexact_gt_midpoint
,
1563 is_midpoint_lt_even
, is_midpoint_gt_even
,
1566 *pfpsf
|= save_fpsf
;
1567 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
1568 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
1569 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
1570 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
1574 // no overflow, and no underflow for rounding to nearest
1575 res
.w
[1] = p_sign
| ((UINT64
) (e4
+ 6176) << 49) | res
.w
[1];
1577 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
1578 rounding_correction (rnd_mode
,
1579 is_inexact_lt_midpoint
,
1580 is_inexact_gt_midpoint
,
1581 is_midpoint_lt_even
, is_midpoint_gt_even
,
1583 // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ)
1585 if ((res
.w
[1] & MASK_COEFF
) < 0x0000314dc6448d93ull
||
1586 ((res
.w
[1] & MASK_COEFF
) == 0x0000314dc6448d93ull
&&
1587 res
.w
[0] < 0x38c15b0a00000000ull
)) {
1593 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
1594 is_midpoint_lt_even
|| is_midpoint_gt_even
) {
1595 // set the inexact flag
1596 *pfpsf
|= INEXACT_EXCEPTION
;
1598 *pfpsf
|= UNDERFLOW_EXCEPTION
;
1601 if ((*pfpsf
& INEXACT_EXCEPTION
) == 0) { // x * y is exact
1602 // need to ensure that the result has the preferred exponent
1603 p_exp
= res
.w
[1] & MASK_EXP
;
1604 if (z_exp
< p_exp
) { // the preferred exponent is z_exp
1605 // signficand of res in C3
1606 C3
.w
[1] = res
.w
[1] & MASK_COEFF
;
1608 // the number of decimal digits of x * y is q4 <= 34
1609 // Note: the coefficient fits in 128 bits
1611 // return (C3 * 10^scale) * 10^(p_exp - scale)
1612 // where scale = min (p34-q4, (p_exp-z_exp) >> 49)
1614 ind
= (p_exp
- z_exp
) >> 49;
1617 // subtract scale from the exponent
1618 p_exp
= p_exp
- ((UINT64
) scale
<< 49);
1620 ; // leave res unchanged
1621 } else if (q4
<= 19) { // x * y fits in 64 bits
1622 if (scale
<= 19) { // 10^scale fits in 64 bits
1623 // 64 x 64 C3.w[0] * ten2k64[scale]
1624 __mul_64x64_to_128MACH (res
, C3
.w
[0], ten2k64
[scale
]);
1625 } else { // 10^scale fits in 128 bits
1626 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1627 __mul_128x64_to_128 (res
, C3
.w
[0], ten2k128
[scale
- 20]);
1629 res
.w
[1] = p_sign
| (p_exp
& MASK_EXP
) | res
.w
[1];
1630 } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits
1631 // 64 x 128 ten2k64[scale] * C3
1632 __mul_128x64_to_128 (res
, ten2k64
[scale
], C3
);
1633 res
.w
[1] = p_sign
| (p_exp
& MASK_EXP
) | res
.w
[1];
1635 } // else leave the result as it is, because p_exp <= z_exp
1637 *pfpsf
|= save_fpsf
;
1638 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
1639 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
1640 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
1641 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
1644 } // else we have f * f + f
1646 // continue with x = f, y = f, z = f
1648 delta
= q3
+ e3
- q4
- e4
;
1652 if (p34
<= delta
- 1 || // Case (1')
1653 (p34
== delta
&& e3
+ 6176 < p34
- q3
)) { // Case (1''A)
1654 // check for overflow, which can occur only in Case (1')
1655 if ((q3
+ e3
) > (p34
+ expmax
) && p34
<= delta
- 1) {
1656 // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary
1657 // condition for (q3 + e3) > (p34 + expmax)
1658 if (rnd_mode
== ROUNDING_TO_NEAREST
) {
1659 res
.w
[1] = z_sign
| 0x7800000000000000ull
; // +/-inf
1660 res
.w
[0] = 0x0000000000000000ull
;
1661 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
1663 if (p_sign
== z_sign
) {
1664 is_inexact_lt_midpoint
= 1;
1666 is_inexact_gt_midpoint
= 1;
1668 // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3)
1671 res
.w
[1] = z_sign
| C3
.w
[1];
1674 if (q3
<= 19) { // C3 fits in 64 bits
1675 if (scale
<= 19) { // 10^scale fits in 64 bits
1676 // 64 x 64 C3.w[0] * ten2k64[scale]
1677 __mul_64x64_to_128MACH (res
, C3
.w
[0], ten2k64
[scale
]);
1678 } else { // 10^scale fits in 128 bits
1679 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1680 __mul_128x64_to_128 (res
, C3
.w
[0],
1681 ten2k128
[scale
- 20]);
1683 } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits
1684 // 64 x 128 ten2k64[scale] * C3
1685 __mul_128x64_to_128 (res
, ten2k64
[scale
], C3
);
1687 // the coefficient in res has q3 + scale = p34 digits
1690 res
.w
[1] = z_sign
| res
.w
[1];
1691 rounding_correction (rnd_mode
,
1692 is_inexact_lt_midpoint
,
1693 is_inexact_gt_midpoint
,
1694 is_midpoint_lt_even
, is_midpoint_gt_even
,
1697 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
1698 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
1699 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
1700 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
1705 if (q3
< p34
) { // the preferred exponent is z_exp - (p34 - q3)
1706 // return (C3 * 10^scale) * 10^(z_exp - scale)
1707 // where scale = min (p34-q3, z_exp-EMIN)
1715 } else if (q3
<= 19) { // z fits in 64 bits
1716 if (scale
<= 19) { // 10^scale fits in 64 bits
1717 // 64 x 64 C3.w[0] * ten2k64[scale]
1718 __mul_64x64_to_128MACH (res
, C3
.w
[0], ten2k64
[scale
]);
1719 } else { // 10^scale fits in 128 bits
1720 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1721 __mul_128x64_to_128 (res
, C3
.w
[0], ten2k128
[scale
- 20]);
1723 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1724 // 64 x 128 ten2k64[scale] * C3
1725 __mul_128x64_to_128 (res
, ten2k64
[scale
], C3
);
1727 // the coefficient in res has q3 + scale digits
1728 // subtract scale from the exponent
1729 z_exp
= z_exp
- ((UINT64
) scale
<< 49);
1731 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
1732 if (scale
+ q3
< p34
)
1733 *pfpsf
|= UNDERFLOW_EXCEPTION
;
1736 res
.w
[1] = z_sign
| ((UINT64
) (e3
+ 6176) << 49) | C3
.w
[1];
1740 // use the following to avoid double rounding errors when operating on
1741 // mixed formats in rounding to nearest, and for correcting the result
1742 // if not rounding to nearest
1743 if ((p_sign
!= z_sign
) && (delta
== (q3
+ scale
+ 1))) {
1744 // there is a gap of exactly one digit between the scaled C3 and C4
1745 // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case
1746 if ((q3
<= 19 && C3
.w
[0] != ten2k64
[q3
- 1]) ||
1747 (q3
== 20 && (C3
.w
[1] != 0 || C3
.w
[0] != ten2k64
[19])) ||
1748 (q3
>= 21 && (C3
.w
[1] != ten2k128
[q3
- 21].w
[1] ||
1749 C3
.w
[0] != ten2k128
[q3
- 21].w
[0]))) {
1750 // C3 * 10^ scale != 10^(q3-1)
1751 // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull ||
1752 // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
1753 is_inexact_gt_midpoint
= 1; // if (z_sign), set as if for abs. value
1754 } else { // if C3 * 10^scale = 10^(q3+scale-1)
1755 // ok from above e3 = (z_exp >> 49) - 6176;
1756 // the result is always inexact
1760 // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
1761 // x = q4-1, 1 <= x <= 67 and check if this operation is exact
1762 if (q4
<= 18) { // 2 <= q4 <= 18
1763 round64_2_18 (q4
, q4
- 1, C4
.w
[0], &R64
, &incr_exp
,
1764 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
1765 &is_inexact_lt_midpoint
,
1766 &is_inexact_gt_midpoint
);
1767 } else if (q4
<= 38) {
1768 P128
.w
[1] = C4
.w
[1];
1769 P128
.w
[0] = C4
.w
[0];
1770 round128_19_38 (q4
, q4
- 1, P128
, &R128
, &incr_exp
,
1771 &is_midpoint_lt_even
,
1772 &is_midpoint_gt_even
,
1773 &is_inexact_lt_midpoint
,
1774 &is_inexact_gt_midpoint
);
1775 R64
= R128
.w
[0]; // one decimal digit
1776 } else if (q4
<= 57) {
1777 P192
.w
[2] = C4
.w
[2];
1778 P192
.w
[1] = C4
.w
[1];
1779 P192
.w
[0] = C4
.w
[0];
1780 round192_39_57 (q4
, q4
- 1, P192
, &R192
, &incr_exp
,
1781 &is_midpoint_lt_even
,
1782 &is_midpoint_gt_even
,
1783 &is_inexact_lt_midpoint
,
1784 &is_inexact_gt_midpoint
);
1785 R64
= R192
.w
[0]; // one decimal digit
1786 } else { // if (q4 <= 68)
1787 round256_58_76 (q4
, q4
- 1, C4
, &R256
, &incr_exp
,
1788 &is_midpoint_lt_even
,
1789 &is_midpoint_gt_even
,
1790 &is_inexact_lt_midpoint
,
1791 &is_inexact_gt_midpoint
);
1792 R64
= R256
.w
[0]; // one decimal digit
1798 if (q4
== 1 && C4
.w
[0] == 5) {
1799 is_inexact_lt_midpoint
= 0;
1800 is_inexact_gt_midpoint
= 0;
1801 is_midpoint_lt_even
= 1;
1802 is_midpoint_gt_even
= 0;
1803 } else if ((e3
== expmin
) ||
1804 R64
< 5 || (R64
== 5 && is_inexact_gt_midpoint
)) {
1805 // result does not change
1806 is_inexact_lt_midpoint
= 0;
1807 is_inexact_gt_midpoint
= 1;
1808 is_midpoint_lt_even
= 0;
1809 is_midpoint_gt_even
= 0;
1811 is_inexact_lt_midpoint
= 1;
1812 is_inexact_gt_midpoint
= 0;
1813 is_midpoint_lt_even
= 0;
1814 is_midpoint_gt_even
= 0;
1815 // result decremented is 10^(q3+scale) - 1
1816 if ((q3
+ scale
) <= 19) {
1818 res
.w
[0] = ten2k64
[q3
+ scale
];
1819 } else { // if ((q3 + scale + 1) <= 35)
1820 res
.w
[1] = ten2k128
[q3
+ scale
- 20].w
[1];
1821 res
.w
[0] = ten2k128
[q3
+ scale
- 20].w
[0];
1823 res
.w
[0] = res
.w
[0] - 1; // borrow never occurs
1824 z_exp
= z_exp
- EXP_P1
;
1826 res
.w
[1] = z_sign
| ((UINT64
) (e3
+ 6176) << 49) | res
.w
[1];
1829 if (R64
< 5 || (R64
== 5 && !is_inexact_lt_midpoint
)) {
1830 ; // result not tiny (in round-to-nearest mode)
1832 *pfpsf
|= UNDERFLOW_EXCEPTION
;
1835 } // end 10^(q3+scale-1)
1836 // set the inexact flag
1837 *pfpsf
|= INEXACT_EXCEPTION
;
1839 if (p_sign
== z_sign
) {
1840 // if (z_sign), set as if for absolute value
1841 is_inexact_lt_midpoint
= 1;
1842 } else { // if (p_sign != z_sign)
1843 // if (z_sign), set as if for absolute value
1844 is_inexact_gt_midpoint
= 1;
1846 *pfpsf
|= INEXACT_EXCEPTION
;
1848 // the result is always inexact => set the inexact flag
1849 // Determine tininess:
1850 // if (exp > expmin)
1851 // the result is not tiny
1852 // else // if exp = emin
1853 // if (q3 + scale < p34)
1854 // the result is tiny
1855 // else // if (q3 + scale = p34)
1856 // if (C3 * 10^scale > 10^33)
1857 // the result is not tiny
1858 // else // if C3 * 10^scale = 10^33
1860 // the result is not tiny
1861 // else // if (xy * z < 0)
1863 // if rnd_mode != RP
1864 // the result is tiny
1866 // the result is not tiny
1867 // else // if (z < 0)
1868 // if rnd_mode != RM
1869 // the result is tiny
1871 // the result is not tiny
1878 if ((e3
== expmin
&& (q3
+ scale
) < p34
) ||
1879 (e3
== expmin
&& (q3
+ scale
) == p34
&&
1880 (res
.w
[1] & MASK_COEFF
) == 0x0000314dc6448d93ull
&& // 10^33_high
1881 res
.w
[0] == 0x38c15b0a00000000ull
&& // 10^33_low
1882 z_sign
!= p_sign
&& ((!z_sign
&& rnd_mode
!= ROUNDING_UP
) ||
1883 (z_sign
&& rnd_mode
!= ROUNDING_DOWN
)))) {
1884 *pfpsf
|= UNDERFLOW_EXCEPTION
;
1886 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
1887 rounding_correction (rnd_mode
,
1888 is_inexact_lt_midpoint
,
1889 is_inexact_gt_midpoint
,
1890 is_midpoint_lt_even
, is_midpoint_gt_even
,
1893 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
1894 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
1895 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
1896 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
1900 } else if (p34
== delta
) { // Case (1''B)
1902 // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3
1903 // and C3 can be scaled up to p34 digits if needed
1905 // scale C3 to p34 digits if needed
1906 scale
= p34
- q3
; // 0 <= scale <= p34 - 1
1910 } else if (q3
<= 19) { // z fits in 64 bits
1911 if (scale
<= 19) { // 10^scale fits in 64 bits
1912 // 64 x 64 C3.w[0] * ten2k64[scale]
1913 __mul_64x64_to_128MACH (res
, C3
.w
[0], ten2k64
[scale
]);
1914 } else { // 10^scale fits in 128 bits
1915 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1916 __mul_128x64_to_128 (res
, C3
.w
[0], ten2k128
[scale
- 20]);
1918 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1919 // 64 x 128 ten2k64[scale] * C3
1920 __mul_128x64_to_128 (res
, ten2k64
[scale
], C3
);
1922 // subtract scale from the exponent
1923 z_exp
= z_exp
- ((UINT64
) scale
<< 49);
1925 // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits
1927 // determine whether x * y is less than, equal to, or greater than
1930 if (C4
.w
[0] < midpoint64
[q4
- 1]) { // < 1/2 ulp
1932 } else if (C4
.w
[0] == midpoint64
[q4
- 1]) { // = 1/2 ulp
1934 } else { // > 1/2 ulp
1937 } else if (q4
<= 38) {
1938 if (C4
.w
[2] == 0 && (C4
.w
[1] < midpoint128
[q4
- 20].w
[1] ||
1939 (C4
.w
[1] == midpoint128
[q4
- 20].w
[1] &&
1940 C4
.w
[0] < midpoint128
[q4
- 20].w
[0]))) { // < 1/2 ulp
1942 } else if (C4
.w
[2] == 0 && C4
.w
[1] == midpoint128
[q4
- 20].w
[1] &&
1943 C4
.w
[0] == midpoint128
[q4
- 20].w
[0]) { // = 1/2 ulp
1945 } else { // > 1/2 ulp
1948 } else if (q4
<= 58) {
1949 if (C4
.w
[3] == 0 && (C4
.w
[2] < midpoint192
[q4
- 39].w
[2] ||
1950 (C4
.w
[2] == midpoint192
[q4
- 39].w
[2] &&
1951 C4
.w
[1] < midpoint192
[q4
- 39].w
[1]) ||
1952 (C4
.w
[2] == midpoint192
[q4
- 39].w
[2] &&
1953 C4
.w
[1] == midpoint192
[q4
- 39].w
[1] &&
1954 C4
.w
[0] < midpoint192
[q4
- 39].w
[0]))) { // < 1/2 ulp
1956 } else if (C4
.w
[3] == 0 && C4
.w
[2] == midpoint192
[q4
- 39].w
[2] &&
1957 C4
.w
[1] == midpoint192
[q4
- 39].w
[1] &&
1958 C4
.w
[0] == midpoint192
[q4
- 39].w
[0]) { // = 1/2 ulp
1960 } else { // > 1/2 ulp
1964 if (C4
.w
[3] < midpoint256
[q4
- 59].w
[3] ||
1965 (C4
.w
[3] == midpoint256
[q4
- 59].w
[3] &&
1966 C4
.w
[2] < midpoint256
[q4
- 59].w
[2]) ||
1967 (C4
.w
[3] == midpoint256
[q4
- 59].w
[3] &&
1968 C4
.w
[2] == midpoint256
[q4
- 59].w
[2] &&
1969 C4
.w
[1] < midpoint256
[q4
- 59].w
[1]) ||
1970 (C4
.w
[3] == midpoint256
[q4
- 59].w
[3] &&
1971 C4
.w
[2] == midpoint256
[q4
- 59].w
[2] &&
1972 C4
.w
[1] == midpoint256
[q4
- 59].w
[1] &&
1973 C4
.w
[0] < midpoint256
[q4
- 59].w
[0])) { // < 1/2 ulp
1975 } else if (C4
.w
[3] == midpoint256
[q4
- 59].w
[3] &&
1976 C4
.w
[2] == midpoint256
[q4
- 59].w
[2] &&
1977 C4
.w
[1] == midpoint256
[q4
- 59].w
[1] &&
1978 C4
.w
[0] == midpoint256
[q4
- 59].w
[0]) { // = 1/2 ulp
1980 } else { // > 1/2 ulp
1985 if (p_sign
== z_sign
) {
1987 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
1988 // use the following to avoid double rounding errors when operating on
1989 // mixed formats in rounding to nearest
1990 is_inexact_lt_midpoint
= 1; // if (z_sign), as if for absolute value
1991 } else if ((eq_half_ulp
&& (res
.w
[0] & 0x01)) || gt_half_ulp
) {
1992 // add 1 ulp to the significand
1994 if (res
.w
[0] == 0x0ull
)
1996 // check for rounding overflow, when coeff == 10^34
1997 if ((res
.w
[1] & MASK_COEFF
) == 0x0001ed09bead87c0ull
&&
1998 res
.w
[0] == 0x378d8e6400000000ull
) { // coefficient = 10^34
2001 z_exp
= ((UINT64
) (e3
+ 6176) << 49) & MASK_EXP
;
2002 res
.w
[1] = 0x0000314dc6448d93ull
;
2003 res
.w
[0] = 0x38c15b0a00000000ull
;
2006 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2008 is_midpoint_lt_even
= 1; // if (z_sign), as if for absolute value
2010 is_inexact_gt_midpoint
= 1; // if (z_sign), as if for absolute value
2012 } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2014 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2015 is_midpoint_gt_even
= 1; // if (z_sign), as if for absolute value
2017 // the result is always inexact, and never tiny
2018 // set the inexact flag
2019 *pfpsf
|= INEXACT_EXCEPTION
;
2020 // check for overflow
2021 if (e3
> expmax
&& rnd_mode
== ROUNDING_TO_NEAREST
) {
2022 res
.w
[1] = z_sign
| 0x7800000000000000ull
; // +/-inf
2023 res
.w
[0] = 0x0000000000000000ull
;
2024 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
2025 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2026 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2027 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2028 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2032 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
2033 rounding_correction (rnd_mode
,
2034 is_inexact_lt_midpoint
,
2035 is_inexact_gt_midpoint
,
2036 is_midpoint_lt_even
, is_midpoint_gt_even
,
2038 z_exp
= res
.w
[1] & MASK_EXP
;
2040 } else { // if (p_sign != z_sign)
2041 // consider two cases, because C3 * 10^scale = 10^33 is a special case
2042 if (res
.w
[1] != 0x0000314dc6448d93ull
||
2043 res
.w
[0] != 0x38c15b0a00000000ull
) { // C3 * 10^scale != 10^33
2045 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2046 // use the following to avoid double rounding errors when operating
2047 // on mixed formats in rounding to nearest
2048 is_inexact_gt_midpoint
= 1; // if (z_sign), as if for absolute value
2049 } else if ((eq_half_ulp
&& (res
.w
[0] & 0x01)) || gt_half_ulp
) {
2050 // subtract 1 ulp from the significand
2052 if (res
.w
[0] == 0xffffffffffffffffull
)
2054 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2056 is_midpoint_gt_even
= 1; // if (z_sign), as if for absolute value
2058 is_inexact_lt_midpoint
= 1; //if(z_sign), as if for absolute value
2060 } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2062 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2063 is_midpoint_lt_even
= 1; // if (z_sign), as if for absolute value
2065 // the result is always inexact, and never tiny
2066 // check for overflow for RN
2068 if (rnd_mode
== ROUNDING_TO_NEAREST
) {
2069 res
.w
[1] = z_sign
| 0x7800000000000000ull
; // +/-inf
2070 res
.w
[0] = 0x0000000000000000ull
;
2071 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
2073 rounding_correction (rnd_mode
,
2074 is_inexact_lt_midpoint
,
2075 is_inexact_gt_midpoint
,
2076 is_midpoint_lt_even
,
2077 is_midpoint_gt_even
, e3
, &res
,
2080 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2081 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2082 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2083 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2087 // set the inexact flag
2088 *pfpsf
|= INEXACT_EXCEPTION
;
2089 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
2090 rounding_correction (rnd_mode
,
2091 is_inexact_lt_midpoint
,
2092 is_inexact_gt_midpoint
,
2093 is_midpoint_lt_even
,
2094 is_midpoint_gt_even
, e3
, &res
, pfpsf
);
2096 z_exp
= res
.w
[1] & MASK_EXP
;
2097 } else { // if C3 * 10^scale = 10^33
2098 e3
= (z_exp
>> 49) - 6176;
2100 // the result is exact if exp > expmin and C4 = d*10^(q4-1),
2101 // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact
2103 // if q4 = 1 the result is exact
2104 // result coefficient = 10^34 - C4
2105 res
.w
[1] = 0x0001ed09bead87c0ull
;
2106 res
.w
[0] = 0x378d8e6400000000ull
- C4
.w
[0];
2107 z_exp
= z_exp
- EXP_P1
;
2109 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2111 // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
2112 // x = q4-1, 1 <= x <= 67 and check if this operation is exact
2113 if (q4
<= 18) { // 2 <= q4 <= 18
2114 round64_2_18 (q4
, q4
- 1, C4
.w
[0], &R64
, &incr_exp
,
2115 &is_midpoint_lt_even
,
2116 &is_midpoint_gt_even
,
2117 &is_inexact_lt_midpoint
,
2118 &is_inexact_gt_midpoint
);
2119 } else if (q4
<= 38) {
2120 P128
.w
[1] = C4
.w
[1];
2121 P128
.w
[0] = C4
.w
[0];
2122 round128_19_38 (q4
, q4
- 1, P128
, &R128
, &incr_exp
,
2123 &is_midpoint_lt_even
,
2124 &is_midpoint_gt_even
,
2125 &is_inexact_lt_midpoint
,
2126 &is_inexact_gt_midpoint
);
2127 R64
= R128
.w
[0]; // one decimal digit
2128 } else if (q4
<= 57) {
2129 P192
.w
[2] = C4
.w
[2];
2130 P192
.w
[1] = C4
.w
[1];
2131 P192
.w
[0] = C4
.w
[0];
2132 round192_39_57 (q4
, q4
- 1, P192
, &R192
, &incr_exp
,
2133 &is_midpoint_lt_even
,
2134 &is_midpoint_gt_even
,
2135 &is_inexact_lt_midpoint
,
2136 &is_inexact_gt_midpoint
);
2137 R64
= R192
.w
[0]; // one decimal digit
2138 } else { // if (q4 <= 68)
2139 round256_58_76 (q4
, q4
- 1, C4
, &R256
, &incr_exp
,
2140 &is_midpoint_lt_even
,
2141 &is_midpoint_gt_even
,
2142 &is_inexact_lt_midpoint
,
2143 &is_inexact_gt_midpoint
);
2144 R64
= R256
.w
[0]; // one decimal digit
2146 if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
2147 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
2148 // the result is exact: 10^34 - R64
2149 // incr_exp = 0 with certainty
2150 z_exp
= z_exp
- EXP_P1
;
2153 z_sign
| (z_exp
& MASK_EXP
) | 0x0001ed09bead87c0ull
;
2154 res
.w
[0] = 0x378d8e6400000000ull
- R64
;
2156 // We want R64 to be the top digit of C4, but we actually
2157 // obtained (C4 * 10^(-q4+1))RN; a correction may be needed,
2158 // because the top digit is (C4 * 10^(-q4+1))RZ
2159 // however, if incr_exp = 1 then R64 = 10 with certainty
2163 // the result is inexact as C4 has more than 1 significant digit
2164 // and C3 * 10^scale = 10^33
2165 // example of case that is treated here:
2166 // 100...0 * 10^e3 - 0.41 * 10^e3 =
2167 // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1)
2168 // note that (e3 > expmin}
2169 // in order to round, subtract R64 from 10^34 and then compare
2170 // C4 - R64 * 10^(q4-1) with 1/2 ulp
2171 // calculate 10^34 - R64
2172 res
.w
[1] = 0x0001ed09bead87c0ull
;
2173 res
.w
[0] = 0x378d8e6400000000ull
- R64
;
2174 z_exp
= z_exp
- EXP_P1
; // will be OR-ed with sign & significand
2175 // calculate C4 - R64 * 10^(q4-1); this is a rare case and
2176 // R64 is small, 1 <= R64 <= 9
2178 if (is_inexact_lt_midpoint
) {
2179 is_inexact_lt_midpoint
= 0;
2180 is_inexact_gt_midpoint
= 1;
2181 } else if (is_inexact_gt_midpoint
) {
2182 is_inexact_gt_midpoint
= 0;
2183 is_inexact_lt_midpoint
= 1;
2184 } else if (is_midpoint_lt_even
) {
2185 is_midpoint_lt_even
= 0;
2186 is_midpoint_gt_even
= 1;
2187 } else if (is_midpoint_gt_even
) {
2188 is_midpoint_gt_even
= 0;
2189 is_midpoint_lt_even
= 1;
2193 // the result is always inexact, and never tiny
2194 // check for overflow for RN
2196 if (rnd_mode
== ROUNDING_TO_NEAREST
) {
2197 res
.w
[1] = z_sign
| 0x7800000000000000ull
; // +/-inf
2198 res
.w
[0] = 0x0000000000000000ull
;
2199 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
2201 rounding_correction (rnd_mode
,
2202 is_inexact_lt_midpoint
,
2203 is_inexact_gt_midpoint
,
2204 is_midpoint_lt_even
,
2205 is_midpoint_gt_even
, e3
, &res
,
2208 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2209 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2210 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2211 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2215 // set the inexact flag
2216 *pfpsf
|= INEXACT_EXCEPTION
;
2218 z_sign
| ((UINT64
) (e3
+ 6176) << 49) | res
.w
[1];
2219 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
2220 rounding_correction (rnd_mode
,
2221 is_inexact_lt_midpoint
,
2222 is_inexact_gt_midpoint
,
2223 is_midpoint_lt_even
,
2224 is_midpoint_gt_even
, e3
, &res
,
2227 z_exp
= res
.w
[1] & MASK_EXP
;
2228 } // end result is inexact
2230 } else { // if (e3 = emin)
2231 // if e3 = expmin the result is also tiny (the condition for
2232 // tininess is C4 > 050...0 [q4 digits] which is met because
2233 // the msd of C4 is not zero)
2234 // the result is tiny and inexact in all rounding modes;
2235 // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp,
2236 // gt_half_ulp to calculate)
2237 // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged
2239 // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp
2240 if (gt_half_ulp
) { // res = 10^33 - 1
2241 res
.w
[1] = 0x0000314dc6448d93ull
;
2242 res
.w
[0] = 0x38c15b09ffffffffull
;
2244 res
.w
[1] = 0x0000314dc6448d93ull
;
2245 res
.w
[0] = 0x38c15b0a00000000ull
;
2247 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2248 *pfpsf
|= UNDERFLOW_EXCEPTION
; // inexact is set later
2251 is_midpoint_lt_even
= 1; // if (z_sign), as if for absolute value
2252 } else if (lt_half_ulp
) {
2253 is_inexact_gt_midpoint
= 1; //if(z_sign), as if for absolute value
2254 } else { // if (gt_half_ulp)
2255 is_inexact_lt_midpoint
= 1; //if(z_sign), as if for absolute value
2258 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
2259 rounding_correction (rnd_mode
,
2260 is_inexact_lt_midpoint
,
2261 is_inexact_gt_midpoint
,
2262 is_midpoint_lt_even
,
2263 is_midpoint_gt_even
, e3
, &res
,
2265 z_exp
= res
.w
[1] & MASK_EXP
;
2268 // set the inexact flag (if the result was not exact)
2269 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
2270 is_midpoint_lt_even
|| is_midpoint_gt_even
)
2271 *pfpsf
|= INEXACT_EXCEPTION
;
2273 } // end if (p_sign != z_sign)
2274 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2275 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2276 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2277 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2278 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2282 } else if (((q3
<= delta
&& delta
< p34
&& p34
< delta
+ q4
) || // Case (2)
2283 (q3
<= delta
&& delta
+ q4
<= p34
) || // Case (3)
2284 (delta
< q3
&& p34
< delta
+ q4
) || // Case (4)
2285 (delta
< q3
&& q3
<= delta
+ q4
&& delta
+ q4
<= p34
) || // Case (5)
2286 (delta
+ q4
< q3
)) && // Case (6)
2287 !(delta
<= 1 && p_sign
!= z_sign
)) { // Case (2), (3), (4), (5) or (6)
2289 // the result has the sign of z
2291 if ((q3
<= delta
&& delta
< p34
&& p34
< delta
+ q4
) || // Case (2)
2292 (delta
< q3
&& p34
< delta
+ q4
)) { // Case (4)
2293 // round first the sum x * y + z with unbounded exponent
2294 // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1,
2296 // calculate res = C3 * 10^scale
2298 x0
= delta
+ q4
- p34
;
2299 } else if (delta
+ q4
< q3
) { // Case (6)
2300 // make Case (6) look like Case (3) or Case (5) with scale = 0
2301 // by scaling up C4 by 10^(q3 - delta - q4)
2302 scale
= q3
- delta
- q4
; // 1 <= scale <= 33
2303 if (q4
<= 19) { // 1 <= scale <= 19; C4 fits in 64 bits
2304 if (scale
<= 19) { // 10^scale fits in 64 bits
2305 // 64 x 64 C4.w[0] * ten2k64[scale]
2306 __mul_64x64_to_128MACH (P128
, C4
.w
[0], ten2k64
[scale
]);
2307 } else { // 10^scale fits in 128 bits
2308 // 64 x 128 C4.w[0] * ten2k128[scale - 20]
2309 __mul_128x64_to_128 (P128
, C4
.w
[0], ten2k128
[scale
- 20]);
2311 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
2312 // 64 x 128 ten2k64[scale] * C4
2313 __mul_128x64_to_128 (P128
, ten2k64
[scale
], C4
);
2315 C4
.w
[0] = P128
.w
[0];
2316 C4
.w
[1] = P128
.w
[1];
2317 // e4 does not need adjustment, as it is not used from this point on
2320 // now Case (6) looks like Case (3) or Case (5) with scale = 0
2321 } else { // if Case (3) or Case (5)
2322 // Note: Case (3) is similar to Case (2), but scale differs and the
2323 // result is exact, unless it is tiny (so x0 = 0 when calculating the
2324 // result with unbounded exponent)
2326 // calculate first the sum x * y + z with unbounded exponent (exact)
2327 // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1,
2329 // calculate res = C3 * 10^scale
2330 scale
= delta
+ q4
- q3
;
2332 // Note: the comments which follow refer [mainly] to Case (2)]
2336 if (scale
== 0) { // this could happen e.g. if we return to case2_repeat
2340 } else if (q3
<= 19) { // 1 <= scale <= 19; z fits in 64 bits
2341 if (scale
<= 19) { // 10^scale fits in 64 bits
2342 // 64 x 64 C3.w[0] * ten2k64[scale]
2343 __mul_64x64_to_128MACH (res
, C3
.w
[0], ten2k64
[scale
]);
2344 } else { // 10^scale fits in 128 bits
2345 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
2346 __mul_128x64_to_128 (res
, C3
.w
[0], ten2k128
[scale
- 20]);
2348 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
2349 // 64 x 128 ten2k64[scale] * C3
2350 __mul_128x64_to_128 (res
, ten2k64
[scale
], C3
);
2352 // e3 is already calculated
2354 // now res = C3 * 10^scale and e3 = e3 - scale
2355 // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat
2356 // because the result was too small
2358 // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34,
2359 // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67)
2360 // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of
2361 // the rounding fits in 128 bits!)
2362 // x0 = delta + q4 - p34 (calculated before reaching case2_repeat)
2363 // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34
2364 if (x0
== 0) { // this could happen only if we return to case2_repeat, or
2365 // for Case (3) or Case (6)
2366 R128
.w
[1] = C4
.w
[1];
2367 R128
.w
[0] = C4
.w
[0];
2368 } else if (q4
<= 18) {
2369 // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17
2370 round64_2_18 (q4
, x0
, C4
.w
[0], &R64
, &incr_exp
,
2371 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2372 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
2374 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
2375 R64
= ten2k64
[q4
- x0
];
2379 } else if (q4
<= 38) {
2380 // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37
2381 P128
.w
[1] = C4
.w
[1];
2382 P128
.w
[0] = C4
.w
[0];
2383 round128_19_38 (q4
, x0
, P128
, &R128
, &incr_exp
,
2384 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2385 &is_inexact_lt_midpoint
,
2386 &is_inexact_gt_midpoint
);
2388 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
2389 if (q4
- x0
<= 19) { // 1 <= q4 - x0 <= 19
2390 R128
.w
[0] = ten2k64
[q4
- x0
];
2391 // R128.w[1] stays 0
2392 } else { // 20 <= q4 - x0 <= 37
2393 R128
.w
[0] = ten2k128
[q4
- x0
- 20].w
[0];
2394 R128
.w
[1] = ten2k128
[q4
- x0
- 20].w
[1];
2397 } else if (q4
<= 57) {
2398 // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56
2399 P192
.w
[2] = C4
.w
[2];
2400 P192
.w
[1] = C4
.w
[1];
2401 P192
.w
[0] = C4
.w
[0];
2402 round192_39_57 (q4
, x0
, P192
, &R192
, &incr_exp
,
2403 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2404 &is_inexact_lt_midpoint
,
2405 &is_inexact_gt_midpoint
);
2406 // R192.w[2] is always 0
2408 // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52
2409 if (q4
- x0
<= 19) { // 1 <= q4 - x0 <= 19
2410 R192
.w
[0] = ten2k64
[q4
- x0
];
2411 // R192.w[1] stays 0
2412 // R192.w[2] stays 0
2413 } else { // 20 <= q4 - x0 <= 33
2414 R192
.w
[0] = ten2k128
[q4
- x0
- 20].w
[0];
2415 R192
.w
[1] = ten2k128
[q4
- x0
- 20].w
[1];
2416 // R192.w[2] stays 0
2419 R128
.w
[1] = R192
.w
[1];
2420 R128
.w
[0] = R192
.w
[0];
2422 // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67
2423 round256_58_76 (q4
, x0
, C4
, &R256
, &incr_exp
,
2424 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2425 &is_inexact_lt_midpoint
,
2426 &is_inexact_gt_midpoint
);
2427 // R256.w[3] and R256.w[2] are always 0
2429 // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43
2430 if (q4
- x0
<= 19) { // 1 <= q4 - x0 <= 19
2431 R256
.w
[0] = ten2k64
[q4
- x0
];
2432 // R256.w[1] stays 0
2433 // R256.w[2] stays 0
2434 // R256.w[3] stays 0
2435 } else { // 20 <= q4 - x0 <= 33
2436 R256
.w
[0] = ten2k128
[q4
- x0
- 20].w
[0];
2437 R256
.w
[1] = ten2k128
[q4
- x0
- 20].w
[1];
2438 // R256.w[2] stays 0
2439 // R256.w[3] stays 0
2442 R128
.w
[1] = R256
.w
[1];
2443 R128
.w
[0] = R256
.w
[0];
2445 // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4,
2446 // rounded to nearest, which were copied into R128
2447 if (z_sign
== p_sign
) {
2448 lsb
= res
.w
[0] & 0x01; // lsb of C3 * 10^scale
2449 // the sum can result in [up to] p34 or p34 + 1 digits
2450 res
.w
[0] = res
.w
[0] + R128
.w
[0];
2451 res
.w
[1] = res
.w
[1] + R128
.w
[1];
2452 if (res
.w
[0] < R128
.w
[0])
2453 res
.w
[1]++; // carry
2454 // if res > 10^34 - 1 need to increase x0 and decrease scale by 1
2455 if (res
.w
[1] > 0x0001ed09bead87c0ull
||
2456 (res
.w
[1] == 0x0001ed09bead87c0ull
&&
2457 res
.w
[0] > 0x378d8e63ffffffffull
)) {
2458 // avoid double rounding error
2459 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
2460 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
2461 is_midpoint_lt_even0
= is_midpoint_lt_even
;
2462 is_midpoint_gt_even0
= is_midpoint_gt_even
;
2463 is_inexact_lt_midpoint
= 0;
2464 is_inexact_gt_midpoint
= 0;
2465 is_midpoint_lt_even
= 0;
2466 is_midpoint_gt_even
= 0;
2467 P128
.w
[1] = res
.w
[1];
2468 P128
.w
[0] = res
.w
[0];
2469 round128_19_38 (35, 1, P128
, &res
, &incr_exp
,
2470 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2471 &is_inexact_lt_midpoint
,
2472 &is_inexact_gt_midpoint
);
2473 // incr_exp is 0 with certainty in this case
2474 // avoid a double rounding error
2475 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
2476 is_midpoint_lt_even
) { // double rounding error upward
2479 if (res
.w
[0] == 0xffffffffffffffffull
)
2481 // Note: a double rounding error upward is not possible; for this
2482 // the result after the first rounding would have to be 99...95
2483 // (35 digits in all), possibly followed by a number of zeros; this
2484 // not possible in Cases (2)-(6) or (15)-(17) which may get here
2485 is_midpoint_lt_even
= 0;
2486 is_inexact_lt_midpoint
= 1;
2487 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
2488 is_midpoint_gt_even
) { // double rounding error downward
2493 is_midpoint_gt_even
= 0;
2494 is_inexact_gt_midpoint
= 1;
2495 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
2496 !is_inexact_lt_midpoint
2497 && !is_inexact_gt_midpoint
) {
2498 // if this second rounding was exact the result may still be
2499 // inexact because of the first rounding
2500 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
2501 is_inexact_gt_midpoint
= 1;
2503 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
2504 is_inexact_lt_midpoint
= 1;
2506 } else if (is_midpoint_gt_even
&&
2507 (is_inexact_gt_midpoint0
2508 || is_midpoint_lt_even0
)) {
2509 // pulled up to a midpoint
2510 is_inexact_lt_midpoint
= 1;
2511 is_inexact_gt_midpoint
= 0;
2512 is_midpoint_lt_even
= 0;
2513 is_midpoint_gt_even
= 0;
2514 } else if (is_midpoint_lt_even
&&
2515 (is_inexact_lt_midpoint0
2516 || is_midpoint_gt_even0
)) {
2517 // pulled down to a midpoint
2518 is_inexact_lt_midpoint
= 0;
2519 is_inexact_gt_midpoint
= 1;
2520 is_midpoint_lt_even
= 0;
2521 is_midpoint_gt_even
= 0;
2527 if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
2528 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
2529 if (is_midpoint_lt_even0
|| is_midpoint_gt_even0
||
2530 is_inexact_lt_midpoint0
|| is_inexact_gt_midpoint0
) {
2531 is_inexact_lt_midpoint
= 1;
2535 // this is the result rounded with unbounded exponent, unless a
2536 // correction is needed
2537 res
.w
[1] = res
.w
[1] & MASK_COEFF
;
2539 if (is_midpoint_gt_even
) {
2541 is_midpoint_gt_even
= 0;
2542 is_midpoint_lt_even
= 1;
2544 if (res
.w
[0] == 0x0)
2546 // check for rounding overflow
2547 if (res
.w
[1] == 0x0001ed09bead87c0ull
&&
2548 res
.w
[0] == 0x378d8e6400000000ull
) {
2549 // res = 10^34 => rounding overflow
2550 res
.w
[1] = 0x0000314dc6448d93ull
;
2551 res
.w
[0] = 0x38c15b0a00000000ull
; // 10^33
2554 } else if (is_midpoint_lt_even
) {
2556 is_midpoint_lt_even
= 0;
2557 is_midpoint_gt_even
= 1;
2559 if (res
.w
[0] == 0xffffffffffffffffull
)
2561 // if the result is pure zero, the sign depends on the rounding
2562 // mode (x*y and z had opposite signs)
2563 if (res
.w
[1] == 0x0ull
&& res
.w
[0] == 0x0ull
) {
2564 if (rnd_mode
!= ROUNDING_DOWN
)
2565 z_sign
= 0x0000000000000000ull
;
2567 z_sign
= 0x8000000000000000ull
;
2568 // the exponent is max (e3, expmin)
2571 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2572 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2573 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2574 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2583 } else { // if (z_sign != p_sign)
2584 lsb
= res
.w
[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4
2585 // used to swap rounding indicators if p_sign != z_sign
2586 // the sum can result in [up to] p34 or p34 - 1 digits
2588 res
.w
[0] = res
.w
[0] - R128
.w
[0];
2589 res
.w
[1] = res
.w
[1] - R128
.w
[1];
2590 if (res
.w
[0] > tmp64
)
2591 res
.w
[1]--; // borrow
2592 // if res < 10^33 and exp > expmin need to decrease x0 and
2593 // increase scale by 1
2594 if (e3
> expmin
&& ((res
.w
[1] < 0x0000314dc6448d93ull
||
2595 (res
.w
[1] == 0x0000314dc6448d93ull
&&
2596 res
.w
[0] < 0x38c15b0a00000000ull
)) ||
2597 (is_inexact_lt_midpoint
2598 && res
.w
[1] == 0x0000314dc6448d93ull
2599 && res
.w
[0] == 0x38c15b0a00000000ull
))
2602 // first restore e3, otherwise it will be too small
2605 is_inexact_lt_midpoint
= 0;
2606 is_inexact_gt_midpoint
= 0;
2607 is_midpoint_lt_even
= 0;
2608 is_midpoint_gt_even
= 0;
2612 // else this is the result rounded with unbounded exponent;
2613 // because the result has opposite sign to that of C4 which was
2614 // rounded, need to change the rounding indicators
2615 if (is_inexact_lt_midpoint
) {
2616 is_inexact_lt_midpoint
= 0;
2617 is_inexact_gt_midpoint
= 1;
2618 } else if (is_inexact_gt_midpoint
) {
2619 is_inexact_gt_midpoint
= 0;
2620 is_inexact_lt_midpoint
= 1;
2621 } else if (lsb
== 0) {
2622 if (is_midpoint_lt_even
) {
2623 is_midpoint_lt_even
= 0;
2624 is_midpoint_gt_even
= 1;
2625 } else if (is_midpoint_gt_even
) {
2626 is_midpoint_gt_even
= 0;
2627 is_midpoint_lt_even
= 1;
2631 } else if (lsb
== 1) {
2632 if (is_midpoint_lt_even
) {
2635 if (res
.w
[0] == 0x0)
2637 // check for rounding overflow
2638 if (res
.w
[1] == 0x0001ed09bead87c0ull
&&
2639 res
.w
[0] == 0x378d8e6400000000ull
) {
2640 // res = 10^34 => rounding overflow
2641 res
.w
[1] = 0x0000314dc6448d93ull
;
2642 res
.w
[0] = 0x38c15b0a00000000ull
; // 10^33
2645 } else if (is_midpoint_gt_even
) {
2648 if (res
.w
[0] == 0xffffffffffffffffull
)
2650 // if the result is pure zero, the sign depends on the rounding
2651 // mode (x*y and z had opposite signs)
2652 if (res
.w
[1] == 0x0ull
&& res
.w
[0] == 0x0ull
) {
2653 if (rnd_mode
!= ROUNDING_DOWN
)
2654 z_sign
= 0x0000000000000000ull
;
2656 z_sign
= 0x8000000000000000ull
;
2657 // the exponent is max (e3, expmin)
2660 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2661 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2662 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2663 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2674 // check for underflow
2675 if (e3
== expmin
) { // and if significand < 10^33 => result is tiny
2676 if ((res
.w
[1] & MASK_COEFF
) < 0x0000314dc6448d93ull
||
2677 ((res
.w
[1] & MASK_COEFF
) == 0x0000314dc6448d93ull
&&
2678 res
.w
[0] < 0x38c15b0a00000000ull
)) {
2681 } else if (e3
< expmin
) {
2682 // the result is tiny, so we must truncate more of res
2685 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
2686 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
2687 is_midpoint_lt_even0
= is_midpoint_lt_even
;
2688 is_midpoint_gt_even0
= is_midpoint_gt_even
;
2689 is_inexact_lt_midpoint
= 0;
2690 is_inexact_gt_midpoint
= 0;
2691 is_midpoint_lt_even
= 0;
2692 is_midpoint_gt_even
= 0;
2693 // determine the number of decimal digits in res
2694 if (res
.w
[1] == 0x0) {
2695 // between 1 and 19 digits
2696 for (ind
= 1; ind
<= 19; ind
++) {
2697 if (res
.w
[0] < ten2k64
[ind
]) {
2702 } else if (res
.w
[1] < ten2k128
[0].w
[1] ||
2703 (res
.w
[1] == ten2k128
[0].w
[1]
2704 && res
.w
[0] < ten2k128
[0].w
[0])) {
2707 } else { // between 21 and 38 digits
2708 for (ind
= 1; ind
<= 18; ind
++) {
2709 if (res
.w
[1] < ten2k128
[ind
].w
[1] ||
2710 (res
.w
[1] == ten2k128
[ind
].w
[1] &&
2711 res
.w
[0] < ten2k128
[ind
].w
[0])) {
2719 // at this point ind >= x0; because delta >= 2 on this path, the case
2720 // ind = x0 can occur only in Case (2) or case (3), when C3 has one
2721 // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin),
2722 // the signs of x * y and z are opposite, and through cancellation
2723 // the most significant decimal digit in res has the weight
2724 // 10^(emin-1); however, it is clear that in this case the most
2725 // significant digit is 9, so the result before rounding is
2727 // Otherwise, ind > x0 because there are non-zero decimal digits in the
2728 // result with weight of at least 10^emin, and correction for underflow
2729 // can be carried out using the round*_*_2_* () routines
2730 if (x0
== ind
) { // the result before rounding is 0.9... * 10^emin
2733 is_inexact_gt_midpoint
= 1;
2734 } else if (ind
<= 18) { // check that 2 <= ind
2735 // 2 <= ind <= 18, 1 <= x0 <= 17
2736 round64_2_18 (ind
, x0
, res
.w
[0], &R64
, &incr_exp
,
2737 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2738 &is_inexact_lt_midpoint
,
2739 &is_inexact_gt_midpoint
);
2741 // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17
2742 R64
= ten2k64
[ind
- x0
];
2746 } else if (ind
<= 38) {
2748 P128
.w
[1] = res
.w
[1];
2749 P128
.w
[0] = res
.w
[0];
2750 round128_19_38 (ind
, x0
, P128
, &res
, &incr_exp
,
2751 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2752 &is_inexact_lt_midpoint
,
2753 &is_inexact_gt_midpoint
);
2755 // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37
2756 if (ind
- x0
<= 19) { // 1 <= ind - x0 <= 19
2757 res
.w
[0] = ten2k64
[ind
- x0
];
2759 } else { // 20 <= ind - x0 <= 37
2760 res
.w
[0] = ten2k128
[ind
- x0
- 20].w
[0];
2761 res
.w
[1] = ten2k128
[ind
- x0
- 20].w
[1];
2765 // avoid a double rounding error
2766 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
2767 is_midpoint_lt_even
) { // double rounding error upward
2770 if (res
.w
[0] == 0xffffffffffffffffull
)
2772 // Note: a double rounding error upward is not possible; for this
2773 // the result after the first rounding would have to be 99...95
2774 // (35 digits in all), possibly followed by a number of zeros; this
2775 // not possible in Cases (2)-(6) which may get here
2776 is_midpoint_lt_even
= 0;
2777 is_inexact_lt_midpoint
= 1;
2778 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
2779 is_midpoint_gt_even
) { // double rounding error downward
2784 is_midpoint_gt_even
= 0;
2785 is_inexact_gt_midpoint
= 1;
2786 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
2787 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
2788 // if this second rounding was exact the result may still be
2789 // inexact because of the first rounding
2790 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
2791 is_inexact_gt_midpoint
= 1;
2793 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
2794 is_inexact_lt_midpoint
= 1;
2796 } else if (is_midpoint_gt_even
&&
2797 (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
)) {
2798 // pulled up to a midpoint
2799 is_inexact_lt_midpoint
= 1;
2800 is_inexact_gt_midpoint
= 0;
2801 is_midpoint_lt_even
= 0;
2802 is_midpoint_gt_even
= 0;
2803 } else if (is_midpoint_lt_even
&&
2804 (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
)) {
2805 // pulled down to a midpoint
2806 is_inexact_lt_midpoint
= 0;
2807 is_inexact_gt_midpoint
= 1;
2808 is_midpoint_lt_even
= 0;
2809 is_midpoint_gt_even
= 0;
2815 if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
2816 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
2817 if (is_midpoint_lt_even0
|| is_midpoint_gt_even0
||
2818 is_inexact_lt_midpoint0
|| is_inexact_gt_midpoint0
) {
2819 is_inexact_lt_midpoint
= 1;
2825 // check for inexact result
2826 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
2827 is_midpoint_lt_even
|| is_midpoint_gt_even
) {
2828 // set the inexact flag
2829 *pfpsf
|= INEXACT_EXCEPTION
;
2831 *pfpsf
|= UNDERFLOW_EXCEPTION
;
2833 // now check for significand = 10^34 (may have resulted from going
2834 // back to case2_repeat)
2835 if (res
.w
[1] == 0x0001ed09bead87c0ull
&&
2836 res
.w
[0] == 0x378d8e6400000000ull
) { // if res = 10^34
2837 res
.w
[1] = 0x0000314dc6448d93ull
; // res = 10^33
2838 res
.w
[0] = 0x38c15b0a00000000ull
;
2841 res
.w
[1] = z_sign
| ((UINT64
) (e3
+ 6176) << 49) | res
.w
[1];
2842 // check for overflow
2843 if (rnd_mode
== ROUNDING_TO_NEAREST
&& e3
> expmax
) {
2844 res
.w
[1] = z_sign
| 0x7800000000000000ull
; // +/-inf
2845 res
.w
[0] = 0x0000000000000000ull
;
2846 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
2848 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
2849 rounding_correction (rnd_mode
,
2850 is_inexact_lt_midpoint
,
2851 is_inexact_gt_midpoint
,
2852 is_midpoint_lt_even
, is_midpoint_gt_even
,
2855 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2856 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2857 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2858 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2864 // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and
2865 // the signs of x*y and z are opposite; in these cases massive
2866 // cancellation can occur, so it is better to scale either C3 or C4 and
2867 // to perform the subtraction before rounding; rounding is performed
2868 // next, depending on the number of decimal digits in the result and on
2869 // the exponent value
2870 // Note: overlow is not possible in this case
2871 // this is similar to Cases (15), (16), and (17)
2873 if (delta
+ q4
< q3
) { // from Case (6)
2874 // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and
2875 // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign)
2876 // and call add_and_round; delta stays positive
2877 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
2878 P128
.w
[1] = C3
.w
[1];
2879 P128
.w
[0] = C3
.w
[0];
2882 C4
.w
[1] = P128
.w
[1];
2883 C4
.w
[0] = P128
.w
[0];
2893 } else { // from Cases (2), (3), (4), (5)
2894 // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be
2895 // scaled up by q4 + delta - q3; this is the same as in Cases (15),
2896 // (16), and (17) if we just change the sign of delta
2899 add_and_round (q3
, q4
, e4
, delta
, p34
, z_sign
, p_sign
, C3
, C4
,
2900 rnd_mode
, &is_midpoint_lt_even
,
2901 &is_midpoint_gt_even
, &is_inexact_lt_midpoint
,
2902 &is_inexact_gt_midpoint
, pfpsf
, &res
);
2903 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2904 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2905 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2906 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2912 } else { // if delta < 0
2916 if (p34
< q4
&& q4
<= delta
) { // Case (7)
2918 // truncate C4 to p34 digits into res
2919 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
2922 P128
.w
[1] = C4
.w
[1];
2923 P128
.w
[0] = C4
.w
[0];
2924 round128_19_38 (q4
, x0
, P128
, &res
, &incr_exp
,
2925 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2926 &is_inexact_lt_midpoint
,
2927 &is_inexact_gt_midpoint
);
2928 } else if (q4
<= 57) { // 35 <= q4 <= 57
2929 P192
.w
[2] = C4
.w
[2];
2930 P192
.w
[1] = C4
.w
[1];
2931 P192
.w
[0] = C4
.w
[0];
2932 round192_39_57 (q4
, x0
, P192
, &R192
, &incr_exp
,
2933 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2934 &is_inexact_lt_midpoint
,
2935 &is_inexact_gt_midpoint
);
2936 res
.w
[0] = R192
.w
[0];
2937 res
.w
[1] = R192
.w
[1];
2938 } else { // if (q4 <= 68)
2939 round256_58_76 (q4
, x0
, C4
, &R256
, &incr_exp
,
2940 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2941 &is_inexact_lt_midpoint
,
2942 &is_inexact_gt_midpoint
);
2943 res
.w
[0] = R256
.w
[0];
2944 res
.w
[1] = R256
.w
[1];
2950 if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
2951 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
2952 // if C4 rounded to p34 digits is exact then the result is inexact,
2953 // in a way that depends on the signs of x * y and z
2954 if (p_sign
== z_sign
) {
2955 is_inexact_lt_midpoint
= 1;
2956 } else { // if (p_sign != z_sign)
2957 if (res
.w
[1] != 0x0000314dc6448d93ull
||
2958 res
.w
[0] != 0x38c15b0a00000000ull
) { // res != 10^33
2959 is_inexact_gt_midpoint
= 1;
2960 } else { // res = 10^33 and exact is a special case
2961 // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1
2962 // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1
2963 // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1
2964 // Note: ulp is really ulp/10 (after borrow which propagates to msd)
2965 if (delta
> p34
+ 1) { // C3 < 1/2
2966 // res = 10^33, unchanged
2967 is_inexact_gt_midpoint
= 1;
2968 } else { // if (delta == p34 + 1)
2970 if (C3
.w
[0] < midpoint64
[q3
- 1]) { // C3 < 1/2 ulp
2971 // res = 10^33, unchanged
2972 is_inexact_gt_midpoint
= 1;
2973 } else if (C3
.w
[0] == midpoint64
[q3
- 1]) { // C3 = 1/2 ulp
2974 // res = 10^33, unchanged
2975 is_midpoint_lt_even
= 1;
2976 } else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp
2977 res
.w
[1] = 0x0001ed09bead87c0ull
; // 10^34 - 1
2978 res
.w
[0] = 0x378d8e63ffffffffull
;
2980 is_inexact_lt_midpoint
= 1;
2982 } else { // if (20 <= q3 <=34)
2983 if (C3
.w
[1] < midpoint128
[q3
- 20].w
[1] ||
2984 (C3
.w
[1] == midpoint128
[q3
- 20].w
[1] &&
2985 C3
.w
[0] < midpoint128
[q3
- 20].w
[0])) { // C3 < 1/2 ulp
2986 // res = 10^33, unchanged
2987 is_inexact_gt_midpoint
= 1;
2988 } else if (C3
.w
[1] == midpoint128
[q3
- 20].w
[1] &&
2989 C3
.w
[0] == midpoint128
[q3
- 20].w
[0]) { // C3 = 1/2 ulp
2990 // res = 10^33, unchanged
2991 is_midpoint_lt_even
= 1;
2992 } else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp
2993 res
.w
[1] = 0x0001ed09bead87c0ull
; // 10^34 - 1
2994 res
.w
[0] = 0x378d8e63ffffffffull
;
2996 is_inexact_lt_midpoint
= 1;
3002 } else if (is_midpoint_lt_even
) {
3003 if (z_sign
!= p_sign
) {
3004 // needs correction: res = res - 1
3005 res
.w
[0] = res
.w
[0] - 1;
3006 if (res
.w
[0] == 0xffffffffffffffffull
)
3008 // if it is (10^33-1)*10^e4 then the corect result is
3009 // (10^34-1)*10(e4-1)
3010 if (res
.w
[1] == 0x0000314dc6448d93ull
&&
3011 res
.w
[0] == 0x38c15b09ffffffffull
) {
3012 res
.w
[1] = 0x0001ed09bead87c0ull
; // 10^34 - 1
3013 res
.w
[0] = 0x378d8e63ffffffffull
;
3016 is_midpoint_lt_even
= 0;
3017 is_inexact_lt_midpoint
= 1;
3018 } else { // if (z_sign == p_sign)
3019 is_midpoint_lt_even
= 0;
3020 is_inexact_gt_midpoint
= 1;
3022 } else if (is_midpoint_gt_even
) {
3023 if (z_sign
== p_sign
) {
3024 // needs correction: res = res + 1 (cannot cross in the next binade)
3025 res
.w
[0] = res
.w
[0] + 1;
3026 if (res
.w
[0] == 0x0000000000000000ull
)
3028 is_midpoint_gt_even
= 0;
3029 is_inexact_gt_midpoint
= 1;
3030 } else { // if (z_sign != p_sign)
3031 is_midpoint_gt_even
= 0;
3032 is_inexact_lt_midpoint
= 1;
3035 ; // the rounded result is already correct
3037 // check for overflow
3038 if (rnd_mode
== ROUNDING_TO_NEAREST
&& e4
> expmax
) {
3039 res
.w
[1] = p_sign
| 0x7800000000000000ull
;
3040 res
.w
[0] = 0x0000000000000000ull
;
3041 *pfpsf
|= (OVERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
3042 } else { // no overflow or not RN
3043 p_exp
= ((UINT64
) (e4
+ 6176) << 49);
3044 res
.w
[1] = p_sign
| (p_exp
& MASK_EXP
) | res
.w
[1];
3046 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
3047 rounding_correction (rnd_mode
,
3048 is_inexact_lt_midpoint
,
3049 is_inexact_gt_midpoint
,
3050 is_midpoint_lt_even
, is_midpoint_gt_even
,
3053 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
3054 is_midpoint_lt_even
|| is_midpoint_gt_even
) {
3055 // set the inexact flag
3056 *pfpsf
|= INEXACT_EXCEPTION
;
3058 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
3059 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
3060 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
3061 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
3065 } else if ((q4
<= p34
&& p34
<= delta
) || // Case (8)
3066 (q4
<= delta
&& delta
< p34
&& p34
< delta
+ q3
) || // Case (9)
3067 (q4
<= delta
&& delta
+ q3
<= p34
) || // Case (10)
3068 (delta
< q4
&& q4
<= p34
&& p34
< delta
+ q3
) || // Case (13)
3069 (delta
< q4
&& q4
<= delta
+ q3
&& delta
+ q3
<= p34
) || // Case (14)
3070 (delta
+ q3
< q4
&& q4
<= p34
)) { // Case (18)
3072 // Case (8) is similar to Case (1), with C3 and C4 swapped
3073 // Case (9) is similar to Case (2), with C3 and C4 swapped
3074 // Case (10) is similar to Case (3), with C3 and C4 swapped
3075 // Case (13) is similar to Case (4), with C3 and C4 swapped
3076 // Case (14) is similar to Case (5), with C3 and C4 swapped
3077 // Case (18) is similar to Case (6), with C3 and C4 swapped
3079 // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp)
3080 // and go back to delta_ge_zero
3081 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
3082 P128
.w
[1] = C3
.w
[1];
3083 P128
.w
[0] = C3
.w
[0];
3086 C4
.w
[1] = P128
.w
[1];
3087 C4
.w
[0] = P128
.w
[0];
3102 } else if ((p34
<= delta
&& delta
< q4
&& q4
< delta
+ q3
) || // Case (11)
3103 (delta
< p34
&& p34
< q4
&& q4
< delta
+ q3
)) { // Case (12)
3105 // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3,
3106 // 1 <= x0 <= q3 - 1 <= p34 - 1
3107 x0
= e4
- e3
; // or x0 = delta + q3 - q4
3108 if (q3
<= 18) { // 2 <= q3 <= 18
3109 round64_2_18 (q3
, x0
, C3
.w
[0], &R64
, &incr_exp
,
3110 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3111 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
3114 } else if (q3
<= 38) {
3115 round128_19_38 (q3
, x0
, C3
, &R128
, &incr_exp
,
3116 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3117 &is_inexact_lt_midpoint
,
3118 &is_inexact_gt_midpoint
);
3119 C3
.w
[1] = R128
.w
[1];
3120 C3
.w
[0] = R128
.w
[0];
3122 // the rounded result has q3 - x0 digits
3123 // we want the exponent to be e4, so if incr_exp = 1 then
3124 // multiply the rounded result by 10 - it will still fit in 113 bits
3127 P128
.w
[1] = C3
.w
[1];
3128 P128
.w
[0] = C3
.w
[0];
3129 __mul_64x128_to_128 (C3
, ten2k64
[1], P128
);
3131 e3
= e3
+ x0
; // this is e4
3132 // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3;
3133 // the result will have the sign of x * y; the exponent is e4
3136 R256
.w
[1] = C3
.w
[1];
3137 R256
.w
[0] = C3
.w
[0];
3138 if (p_sign
== z_sign
) { // R256 = C4 + R256
3139 add256 (C4
, R256
, &R256
);
3140 } else { // if (p_sign != z_sign) { // R256 = C4 - R256
3141 sub256 (C4
, R256
, &R256
); // the result cannot be pure zero
3142 // because the result has opposite sign to that of R256 which was
3143 // rounded, need to change the rounding indicators
3144 lsb
= C4
.w
[0] & 0x01;
3145 if (is_inexact_lt_midpoint
) {
3146 is_inexact_lt_midpoint
= 0;
3147 is_inexact_gt_midpoint
= 1;
3148 } else if (is_inexact_gt_midpoint
) {
3149 is_inexact_gt_midpoint
= 0;
3150 is_inexact_lt_midpoint
= 1;
3151 } else if (lsb
== 0) {
3152 if (is_midpoint_lt_even
) {
3153 is_midpoint_lt_even
= 0;
3154 is_midpoint_gt_even
= 1;
3155 } else if (is_midpoint_gt_even
) {
3156 is_midpoint_gt_even
= 0;
3157 is_midpoint_lt_even
= 1;
3161 } else if (lsb
== 1) {
3162 if (is_midpoint_lt_even
) {
3165 if (R256
.w
[0] == 0x0) {
3167 if (R256
.w
[1] == 0x0) {
3169 if (R256
.w
[2] == 0x0) {
3174 // no check for rounding overflow - R256 was a difference
3175 } else if (is_midpoint_gt_even
) {
3178 if (R256
.w
[0] == 0xffffffffffffffffull
) {
3180 if (R256
.w
[1] == 0xffffffffffffffffull
) {
3182 if (R256
.w
[2] == 0xffffffffffffffffull
) {
3194 // determine the number of decimal digits in R256
3195 ind
= nr_digits256 (R256
); // ind >= p34
3196 // if R256 is sum, then ind > p34; if R256 is a difference, then
3197 // ind >= p34; this means that we can calculate the result rounded to
3198 // the destination precision, with unbounded exponent, starting from R256
3199 // and using the indicators from the rounding of C3 to avoid a double
3204 } else if (ind
== p34
) {
3205 // the result rounded to the destination precision with
3206 // unbounded exponent
3207 // is (-1)^p_sign * R256 * 10^e4
3208 res
.w
[1] = R256
.w
[1];
3209 res
.w
[0] = R256
.w
[0];
3210 } else { // if (ind > p34)
3211 // if more than P digits, round to nearest to P digits
3212 // round R256 to p34 digits
3213 x0
= ind
- p34
; // 1 <= x0 <= 34 as 35 <= ind <= 68
3214 // save C3 rounding indicators to help avoid double rounding error
3215 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
3216 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
3217 is_midpoint_lt_even0
= is_midpoint_lt_even
;
3218 is_midpoint_gt_even0
= is_midpoint_gt_even
;
3219 // initialize rounding indicators
3220 is_inexact_lt_midpoint
= 0;
3221 is_inexact_gt_midpoint
= 0;
3222 is_midpoint_lt_even
= 0;
3223 is_midpoint_gt_even
= 0;
3224 // round to p34 digits; the result fits in 113 bits
3226 P128
.w
[1] = R256
.w
[1];
3227 P128
.w
[0] = R256
.w
[0];
3228 round128_19_38 (ind
, x0
, P128
, &R128
, &incr_exp
,
3229 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3230 &is_inexact_lt_midpoint
,
3231 &is_inexact_gt_midpoint
);
3232 } else if (ind
<= 57) {
3233 P192
.w
[2] = R256
.w
[2];
3234 P192
.w
[1] = R256
.w
[1];
3235 P192
.w
[0] = R256
.w
[0];
3236 round192_39_57 (ind
, x0
, P192
, &R192
, &incr_exp
,
3237 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3238 &is_inexact_lt_midpoint
,
3239 &is_inexact_gt_midpoint
);
3240 R128
.w
[1] = R192
.w
[1];
3241 R128
.w
[0] = R192
.w
[0];
3242 } else { // if (ind <= 68)
3243 round256_58_76 (ind
, x0
, R256
, &R256
, &incr_exp
,
3244 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3245 &is_inexact_lt_midpoint
,
3246 &is_inexact_gt_midpoint
);
3247 R128
.w
[1] = R256
.w
[1];
3248 R128
.w
[0] = R256
.w
[0];
3250 // the rounded result has p34 = 34 digits
3251 e4
= e4
+ x0
+ incr_exp
;
3253 res
.w
[1] = R128
.w
[1];
3254 res
.w
[0] = R128
.w
[0];
3256 // avoid a double rounding error
3257 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
3258 is_midpoint_lt_even
) { // double rounding error upward
3261 if (res
.w
[0] == 0xffffffffffffffffull
)
3263 is_midpoint_lt_even
= 0;
3264 is_inexact_lt_midpoint
= 1;
3265 // Note: a double rounding error upward is not possible; for this
3266 // the result after the first rounding would have to be 99...95
3267 // (35 digits in all), possibly followed by a number of zeros; this
3268 // not possible in Cases (2)-(6) or (15)-(17) which may get here
3269 // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent
3270 if (res
.w
[1] == 0x0000314dc6448d93ull
&&
3271 res
.w
[0] == 0x38c15b09ffffffffull
) { // 10^33 - 1
3272 res
.w
[1] = 0x0001ed09bead87c0ull
; // 10^34 - 1
3273 res
.w
[0] = 0x378d8e63ffffffffull
;
3276 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
3277 is_midpoint_gt_even
) { // double rounding error downward
3282 is_midpoint_gt_even
= 0;
3283 is_inexact_gt_midpoint
= 1;
3284 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
3285 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
3286 // if this second rounding was exact the result may still be
3287 // inexact because of the first rounding
3288 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
3289 is_inexact_gt_midpoint
= 1;
3291 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
3292 is_inexact_lt_midpoint
= 1;
3294 } else if (is_midpoint_gt_even
&&
3295 (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
)) {
3296 // pulled up to a midpoint
3297 is_inexact_lt_midpoint
= 1;
3298 is_inexact_gt_midpoint
= 0;
3299 is_midpoint_lt_even
= 0;
3300 is_midpoint_gt_even
= 0;
3301 } else if (is_midpoint_lt_even
&&
3302 (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
)) {
3303 // pulled down to a midpoint
3304 is_inexact_lt_midpoint
= 0;
3305 is_inexact_gt_midpoint
= 1;
3306 is_midpoint_lt_even
= 0;
3307 is_midpoint_gt_even
= 0;
3313 // determine tininess
3314 if (rnd_mode
== ROUNDING_TO_NEAREST
) {
3316 is_tiny
= 1; // for other rounding modes apply correction
3319 // for RM, RP, RZ, RA apply correction in order to determine tininess
3320 // but do not save the result; apply the correction to
3321 // (-1)^p_sign * res * 10^0
3322 P128
.w
[1] = p_sign
| 0x3040000000000000ull
| res
.w
[1];
3323 P128
.w
[0] = res
.w
[0];
3324 rounding_correction (rnd_mode
,
3325 is_inexact_lt_midpoint
,
3326 is_inexact_gt_midpoint
,
3327 is_midpoint_lt_even
, is_midpoint_gt_even
,
3329 scale
= ((P128
.w
[1] & MASK_EXP
) >> 49) - 6176; // -1, 0, or +1
3330 // the number of digits in the significand is p34 = 34
3331 if (e4
+ scale
< expmin
) {
3336 // the result rounded to the destination precision with unbounded exponent
3337 // is (-1)^p_sign * res * 10^e4
3338 res
.w
[1] = p_sign
| ((UINT64
) (e4
+ 6176) << 49) | res
.w
[1]; // RN
3339 // res.w[0] unchanged;
3340 // Note: res is correct only if expmin <= e4 <= expmax
3341 ind
= p34
; // the number of decimal digits in the signifcand of res
3343 // at this point we have the result rounded with unbounded exponent in
3344 // res and we know its tininess:
3345 // res = (-1)^p_sign * significand * 10^e4,
3346 // where q (significand) = ind = p34
3347 // Note: res is correct only if expmin <= e4 <= expmax
3349 // check for overflow if RN
3350 if (rnd_mode
== ROUNDING_TO_NEAREST
3351 && (ind
+ e4
) > (p34
+ expmax
)) {
3352 res
.w
[1] = p_sign
| 0x7800000000000000ull
;
3353 res
.w
[0] = 0x0000000000000000ull
;
3354 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
3355 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
3356 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
3357 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
3358 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
3361 } // else not overflow or not RN, so continue
3363 // from this point on this is similar to the last part of the computation
3364 // for Cases (15), (16), (17)
3366 // if (e4 >= expmin) we have the result rounded with bounded exponent
3368 x0
= expmin
- e4
; // x0 >= 1; the number of digits to chop off of res
3369 // where the result rounded [at most] once is
3370 // (-1)^p_sign * significand_res * 10^e4
3372 // avoid double rounding error
3373 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
3374 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
3375 is_midpoint_lt_even0
= is_midpoint_lt_even
;
3376 is_midpoint_gt_even0
= is_midpoint_gt_even
;
3377 is_inexact_lt_midpoint
= 0;
3378 is_inexact_gt_midpoint
= 0;
3379 is_midpoint_lt_even
= 0;
3380 is_midpoint_gt_even
= 0;
3383 // nothing is left of res when moving the decimal point left x0 digits
3384 is_inexact_lt_midpoint
= 1;
3385 res
.w
[1] = p_sign
| 0x0000000000000000ull
;
3386 res
.w
[0] = 0x0000000000000000ull
;
3388 } else if (x0
== ind
) { // 1 <= x0 = ind <= p34 = 34
3389 // this is <, =, or > 1/2 ulp
3390 // compare the ind-digit value in the significand of res with
3391 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
3392 // less than, equal to, or greater than 1/2 ulp (significand of res)
3393 R128
.w
[1] = res
.w
[1] & MASK_COEFF
;
3394 R128
.w
[0] = res
.w
[0];
3396 if (R128
.w
[0] < midpoint64
[ind
- 1]) { // < 1/2 ulp
3398 is_inexact_lt_midpoint
= 1;
3399 } else if (R128
.w
[0] == midpoint64
[ind
- 1]) { // = 1/2 ulp
3401 is_midpoint_gt_even
= 1;
3402 } else { // > 1/2 ulp
3404 is_inexact_gt_midpoint
= 1;
3406 } else { // if (ind <= 38)
3407 if (R128
.w
[1] < midpoint128
[ind
- 20].w
[1] ||
3408 (R128
.w
[1] == midpoint128
[ind
- 20].w
[1] &&
3409 R128
.w
[0] < midpoint128
[ind
- 20].w
[0])) { // < 1/2 ulp
3411 is_inexact_lt_midpoint
= 1;
3412 } else if (R128
.w
[1] == midpoint128
[ind
- 20].w
[1] &&
3413 R128
.w
[0] == midpoint128
[ind
- 20].w
[0]) { // = 1/2 ulp
3415 is_midpoint_gt_even
= 1;
3416 } else { // > 1/2 ulp
3418 is_inexact_gt_midpoint
= 1;
3421 if (lt_half_ulp
|| eq_half_ulp
) {
3422 // res = +0.0 * 10^expmin
3423 res
.w
[1] = 0x0000000000000000ull
;
3424 res
.w
[0] = 0x0000000000000000ull
;
3425 } else { // if (gt_half_ulp)
3426 // res = +1 * 10^expmin
3427 res
.w
[1] = 0x0000000000000000ull
;
3428 res
.w
[0] = 0x0000000000000001ull
;
3430 res
.w
[1] = p_sign
| res
.w
[1];
3432 } else { // if (1 <= x0 <= ind - 1 <= 33)
3433 // round the ind-digit result to ind - x0 digits
3435 if (ind
<= 18) { // 2 <= ind <= 18
3436 round64_2_18 (ind
, x0
, res
.w
[0], &R64
, &incr_exp
,
3437 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3438 &is_inexact_lt_midpoint
,
3439 &is_inexact_gt_midpoint
);
3442 } else if (ind
<= 38) {
3443 P128
.w
[1] = res
.w
[1] & MASK_COEFF
;
3444 P128
.w
[0] = res
.w
[0];
3445 round128_19_38 (ind
, x0
, P128
, &res
, &incr_exp
,
3446 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3447 &is_inexact_lt_midpoint
,
3448 &is_inexact_gt_midpoint
);
3450 e4
= e4
+ x0
; // expmin
3451 // we want the exponent to be expmin, so if incr_exp = 1 then
3452 // multiply the rounded result by 10 - it will still fit in 113 bits
3455 P128
.w
[1] = res
.w
[1] & MASK_COEFF
;
3456 P128
.w
[0] = res
.w
[0];
3457 __mul_64x128_to_128 (res
, ten2k64
[1], P128
);
3460 p_sign
| ((UINT64
) (e4
+ 6176) << 49) | (res
.
3462 // avoid a double rounding error
3463 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
3464 is_midpoint_lt_even
) { // double rounding error upward
3467 if (res
.w
[0] == 0xffffffffffffffffull
)
3469 // Note: a double rounding error upward is not possible; for this
3470 // the result after the first rounding would have to be 99...95
3471 // (35 digits in all), possibly followed by a number of zeros; this
3472 // not possible in this underflow case
3473 is_midpoint_lt_even
= 0;
3474 is_inexact_lt_midpoint
= 1;
3475 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
3476 is_midpoint_gt_even
) { // double rounding error downward
3481 is_midpoint_gt_even
= 0;
3482 is_inexact_gt_midpoint
= 1;
3483 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
3484 !is_inexact_lt_midpoint
3485 && !is_inexact_gt_midpoint
) {
3486 // if this second rounding was exact the result may still be
3487 // inexact because of the first rounding
3488 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
3489 is_inexact_gt_midpoint
= 1;
3491 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
3492 is_inexact_lt_midpoint
= 1;
3494 } else if (is_midpoint_gt_even
&&
3495 (is_inexact_gt_midpoint0
3496 || is_midpoint_lt_even0
)) {
3497 // pulled up to a midpoint
3498 is_inexact_lt_midpoint
= 1;
3499 is_inexact_gt_midpoint
= 0;
3500 is_midpoint_lt_even
= 0;
3501 is_midpoint_gt_even
= 0;
3502 } else if (is_midpoint_lt_even
&&
3503 (is_inexact_lt_midpoint0
3504 || is_midpoint_gt_even0
)) {
3505 // pulled down to a midpoint
3506 is_inexact_lt_midpoint
= 0;
3507 is_inexact_gt_midpoint
= 1;
3508 is_midpoint_lt_even
= 0;
3509 is_midpoint_gt_even
= 0;
3515 // res contains the correct result
3516 // apply correction if not rounding to nearest
3517 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
3518 rounding_correction (rnd_mode
,
3519 is_inexact_lt_midpoint
,
3520 is_inexact_gt_midpoint
,
3521 is_midpoint_lt_even
, is_midpoint_gt_even
,
3524 if (is_midpoint_lt_even
|| is_midpoint_gt_even
||
3525 is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
) {
3526 // set the inexact flag
3527 *pfpsf
|= INEXACT_EXCEPTION
;
3529 *pfpsf
|= UNDERFLOW_EXCEPTION
;
3531 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
3532 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
3533 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
3534 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
3538 } else if ((p34
<= delta
&& delta
+ q3
<= q4
) || // Case (15)
3539 (delta
< p34
&& p34
< delta
+ q3
&& delta
+ q3
<= q4
) || //Case (16)
3540 (delta
+ q3
<= p34
&& p34
< q4
)) { // Case (17)
3542 // calculate first the result rounded to the destination precision, with
3543 // unbounded exponent
3545 add_and_round (q3
, q4
, e4
, delta
, p34
, z_sign
, p_sign
, C3
, C4
,
3546 rnd_mode
, &is_midpoint_lt_even
,
3547 &is_midpoint_gt_even
, &is_inexact_lt_midpoint
,
3548 &is_inexact_gt_midpoint
, pfpsf
, &res
);
3549 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
3550 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
3551 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
3552 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
3560 } // end if delta < 0
3562 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
3563 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
3564 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
3565 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
3572 #if DECIMAL_CALL_BY_REFERENCE
3574 bid128_fma (UINT128
* pres
, UINT128
* px
, UINT128
* py
, UINT128
* pz
3575 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3577 UINT128 x
= *px
, y
= *py
, z
= *pz
;
3578 #if !DECIMAL_GLOBAL_ROUNDING
3579 unsigned int rnd_mode
= *prnd_mode
;
3583 bid128_fma (UINT128 x
, UINT128 y
, UINT128 z
3584 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3587 int is_midpoint_lt_even
, is_midpoint_gt_even
,
3588 is_inexact_lt_midpoint
, is_inexact_gt_midpoint
;
3589 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3591 #if DECIMAL_CALL_BY_REFERENCE
3592 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3593 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3595 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3598 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3599 &is_inexact_lt_midpoint
,
3600 &is_inexact_gt_midpoint
, x
, y
,
3601 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3608 #if DECIMAL_CALL_BY_REFERENCE
3610 bid128ddd_fma (UINT128
* pres
, UINT64
* px
, UINT64
* py
, UINT64
* pz
3611 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3613 UINT64 x
= *px
, y
= *py
, z
= *pz
;
3614 #if !DECIMAL_GLOBAL_ROUNDING
3615 unsigned int rnd_mode
= *prnd_mode
;
3619 bid128ddd_fma (UINT64 x
, UINT64 y
, UINT64 z
3620 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3623 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3624 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3625 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3628 #if DECIMAL_CALL_BY_REFERENCE
3629 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3630 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3631 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3632 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3633 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3635 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3638 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3639 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3640 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3641 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3642 &is_inexact_lt_midpoint
,
3643 &is_inexact_gt_midpoint
, x1
, y1
,
3644 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3651 #if DECIMAL_CALL_BY_REFERENCE
3653 bid128ddq_fma (UINT128
* pres
, UINT64
* px
, UINT64
* py
, UINT128
* pz
3654 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3656 UINT64 x
= *px
, y
= *py
;
3658 #if !DECIMAL_GLOBAL_ROUNDING
3659 unsigned int rnd_mode
= *prnd_mode
;
3663 bid128ddq_fma (UINT64 x
, UINT64 y
, UINT128 z
3664 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3667 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3668 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3669 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3672 #if DECIMAL_CALL_BY_REFERENCE
3673 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3674 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3675 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3676 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3678 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3681 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3682 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3683 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3684 &is_inexact_lt_midpoint
,
3685 &is_inexact_gt_midpoint
, x1
, y1
,
3686 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3693 #if DECIMAL_CALL_BY_REFERENCE
3695 bid128dqd_fma (UINT128
* pres
, UINT64
* px
, UINT128
* py
, UINT64
* pz
3696 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3698 UINT64 x
= *px
, z
= *pz
;
3699 #if !DECIMAL_GLOBAL_ROUNDING
3700 unsigned int rnd_mode
= *prnd_mode
;
3704 bid128dqd_fma (UINT64 x
, UINT128 y
, UINT64 z
3705 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3708 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3709 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3710 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3713 #if DECIMAL_CALL_BY_REFERENCE
3714 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3715 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3716 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3717 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3719 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3722 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3723 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3724 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3725 &is_inexact_lt_midpoint
,
3726 &is_inexact_gt_midpoint
, x1
, y
,
3727 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3734 #if DECIMAL_CALL_BY_REFERENCE
3736 bid128dqq_fma (UINT128
* pres
, UINT64
* px
, UINT128
* py
, UINT128
* pz
3737 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3740 #if !DECIMAL_GLOBAL_ROUNDING
3741 unsigned int rnd_mode
= *prnd_mode
;
3745 bid128dqq_fma (UINT64 x
, UINT128 y
, UINT128 z
3746 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3749 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3750 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3751 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3754 #if DECIMAL_CALL_BY_REFERENCE
3755 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3756 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3757 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3759 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3762 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3763 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3764 &is_inexact_lt_midpoint
,
3765 &is_inexact_gt_midpoint
, x1
, y
,
3766 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3773 #if DECIMAL_CALL_BY_REFERENCE
3775 bid128qdd_fma (UINT128
* pres
, UINT128
* px
, UINT64
* py
, UINT64
* pz
3776 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3778 UINT64 y
= *py
, z
= *pz
;
3779 #if !DECIMAL_GLOBAL_ROUNDING
3780 unsigned int rnd_mode
= *prnd_mode
;
3784 bid128qdd_fma (UINT128 x
, UINT64 y
, UINT64 z
3785 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3788 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3789 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3790 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3793 #if DECIMAL_CALL_BY_REFERENCE
3794 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3795 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3796 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3797 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3799 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3802 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3803 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3804 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3805 &is_inexact_lt_midpoint
,
3806 &is_inexact_gt_midpoint
, x
, y1
,
3807 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3814 #if DECIMAL_CALL_BY_REFERENCE
3816 bid128qdq_fma (UINT128
* pres
, UINT128
* px
, UINT64
* py
, UINT128
* pz
3817 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3820 #if !DECIMAL_GLOBAL_ROUNDING
3821 unsigned int rnd_mode
= *prnd_mode
;
3825 bid128qdq_fma (UINT128 x
, UINT64 y
, UINT128 z
3826 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3829 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3830 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3831 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3834 #if DECIMAL_CALL_BY_REFERENCE
3835 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3836 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3837 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3839 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3842 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3843 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3844 &is_inexact_lt_midpoint
,
3845 &is_inexact_gt_midpoint
, x
, y1
,
3846 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3853 #if DECIMAL_CALL_BY_REFERENCE
3855 bid128qqd_fma (UINT128
* pres
, UINT128
* px
, UINT128
* py
, UINT64
* pz
3856 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3859 #if !DECIMAL_GLOBAL_ROUNDING
3860 unsigned int rnd_mode
= *prnd_mode
;
3864 bid128qqd_fma (UINT128 x
, UINT128 y
, UINT64 z
3865 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3868 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3869 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3870 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3873 #if DECIMAL_CALL_BY_REFERENCE
3874 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3875 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3876 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3878 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3881 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3882 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3883 &is_inexact_lt_midpoint
,
3884 &is_inexact_gt_midpoint
, x
, y
,
3885 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3891 // Note: bid128qqq_fma is represented by bid128_fma
3893 // Note: bid64ddd_fma is represented by bid64_fma
3895 #if DECIMAL_CALL_BY_REFERENCE
3897 bid64ddq_fma (UINT64
* pres
, UINT64
* px
, UINT64
* py
, UINT128
* pz
3898 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3900 UINT64 x
= *px
, y
= *py
;
3901 #if !DECIMAL_GLOBAL_ROUNDING
3902 unsigned int rnd_mode
= *prnd_mode
;
3906 bid64ddq_fma (UINT64 x
, UINT64 y
, UINT128 z
3907 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3910 UINT64 res1
= 0xbaddbaddbaddbaddull
;
3913 #if DECIMAL_CALL_BY_REFERENCE
3914 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3915 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3916 bid64qqq_fma (&res1
, &x1
, &y1
, pz
3917 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3920 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3921 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3922 res1
= bid64qqq_fma (x1
, y1
, z
3923 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3930 #if DECIMAL_CALL_BY_REFERENCE
3932 bid64dqd_fma (UINT64
* pres
, UINT64
* px
, UINT128
* py
, UINT64
* pz
3933 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3935 UINT64 x
= *px
, z
= *pz
;
3936 #if !DECIMAL_GLOBAL_ROUNDING
3937 unsigned int rnd_mode
= *prnd_mode
;
3941 bid64dqd_fma (UINT64 x
, UINT128 y
, UINT64 z
3942 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3945 UINT64 res1
= 0xbaddbaddbaddbaddull
;
3948 #if DECIMAL_CALL_BY_REFERENCE
3949 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3950 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3951 bid64qqq_fma (&res1
, &x1
, py
, &z1
3952 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3955 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3956 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3957 res1
= bid64qqq_fma (x1
, y
, z1
3958 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3965 #if DECIMAL_CALL_BY_REFERENCE
3967 bid64dqq_fma (UINT64
* pres
, UINT64
* px
, UINT128
* py
, UINT128
* pz
3968 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3971 #if !DECIMAL_GLOBAL_ROUNDING
3972 unsigned int rnd_mode
= *prnd_mode
;
3976 bid64dqq_fma (UINT64 x
, UINT128 y
, UINT128 z
3977 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3980 UINT64 res1
= 0xbaddbaddbaddbaddull
;
3983 #if DECIMAL_CALL_BY_REFERENCE
3984 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3985 bid64qqq_fma (&res1
, &x1
, py
, pz
3986 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3989 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3990 res1
= bid64qqq_fma (x1
, y
, z
3991 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3998 #if DECIMAL_CALL_BY_REFERENCE
4000 bid64qdd_fma (UINT64
* pres
, UINT128
* px
, UINT64
* py
, UINT64
* pz
4001 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4003 UINT64 y
= *py
, z
= *pz
;
4004 #if !DECIMAL_GLOBAL_ROUNDING
4005 unsigned int rnd_mode
= *prnd_mode
;
4009 bid64qdd_fma (UINT128 x
, UINT64 y
, UINT64 z
4010 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4013 UINT64 res1
= 0xbaddbaddbaddbaddull
;
4016 #if DECIMAL_CALL_BY_REFERENCE
4017 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4018 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4019 bid64qqq_fma (&res1
, px
, &y1
, &z1
4020 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4023 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4024 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4025 res1
= bid64qqq_fma (x
, y1
, z1
4026 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4033 #if DECIMAL_CALL_BY_REFERENCE
4035 bid64qdq_fma (UINT64
* pres
, UINT128
* px
, UINT64
* py
, UINT128
* pz
4036 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4039 #if !DECIMAL_GLOBAL_ROUNDING
4040 unsigned int rnd_mode
= *prnd_mode
;
4044 bid64qdq_fma (UINT128 x
, UINT64 y
, UINT128 z
4045 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4048 UINT64 res1
= 0xbaddbaddbaddbaddull
;
4051 #if DECIMAL_CALL_BY_REFERENCE
4052 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4053 bid64qqq_fma (&res1
, px
, &y1
, pz
4054 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4057 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4058 res1
= bid64qqq_fma (x
, y1
, z
4059 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4066 #if DECIMAL_CALL_BY_REFERENCE
4068 bid64qqd_fma (UINT64
* pres
, UINT128
* px
, UINT128
* py
, UINT64
* pz
4069 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4072 #if !DECIMAL_GLOBAL_ROUNDING
4073 unsigned int rnd_mode
= *prnd_mode
;
4077 bid64qqd_fma (UINT128 x
, UINT128 y
, UINT64 z
4078 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4081 UINT64 res1
= 0xbaddbaddbaddbaddull
;
4084 #if DECIMAL_CALL_BY_REFERENCE
4085 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4086 bid64qqq_fma (&res1
, px
, py
, &z1
4087 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4090 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4091 res1
= bid64qqq_fma (x
, y
, z1
4092 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4099 #if DECIMAL_CALL_BY_REFERENCE
4101 bid64qqq_fma (UINT64
* pres
, UINT128
* px
, UINT128
* py
, UINT128
* pz
4102 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4104 UINT128 x
= *px
, y
= *py
, z
= *pz
;
4105 #if !DECIMAL_GLOBAL_ROUNDING
4106 unsigned int rnd_mode
= *prnd_mode
;
4110 bid64qqq_fma (UINT128 x
, UINT128 y
, UINT128 z
4111 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4114 int is_midpoint_lt_even0
= 0, is_midpoint_gt_even0
= 0,
4115 is_inexact_lt_midpoint0
= 0, is_inexact_gt_midpoint0
= 0;
4116 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
4117 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
4119 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
4120 UINT128 res128
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
4121 UINT64 res1
= 0xbaddbaddbaddbaddull
;
4122 unsigned int save_fpsf
; // needed because of the call to bid128_ext_fma
4131 int lt_half_ulp
= 0, eq_half_ulp
= 0;
4133 // Note: for rounding modes other than RN or RA, the result can be obtained
4134 // by rounding first to BID128 and then to BID64
4136 save_fpsf
= *pfpsf
; // sticky bits - caller value must be preserved
4139 #if DECIMAL_CALL_BY_REFERENCE
4140 bid128_ext_fma (&is_midpoint_lt_even0
, &is_midpoint_gt_even0
,
4141 &is_inexact_lt_midpoint0
, &is_inexact_gt_midpoint0
,
4143 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4146 res
= bid128_ext_fma (&is_midpoint_lt_even0
, &is_midpoint_gt_even0
,
4147 &is_inexact_lt_midpoint0
,
4148 &is_inexact_gt_midpoint0
, x
, y
,
4149 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4153 if ((rnd_mode
== ROUNDING_DOWN
) || (rnd_mode
== ROUNDING_UP
) ||
4154 (rnd_mode
== ROUNDING_TO_ZERO
) || // no double rounding error is possible
4155 ((res
.w
[HIGH_128W
] & MASK_NAN
) == MASK_NAN
) || //res=QNaN (cannot be SNaN)
4156 ((res
.w
[HIGH_128W
] & MASK_ANY_INF
) == MASK_INF
)) { // result is infinity
4157 #if DECIMAL_CALL_BY_REFERENCE
4158 bid128_to_bid64 (&res1
, &res _RND_MODE_ARG _EXC_FLAGS_ARG
);
4160 res1
= bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG
);
4162 // determine the unbiased exponent of the result
4163 unbexp
= ((res1
>> 53) & 0x3ff) - 398;
4165 // if subnormal, res1 must have exp = -398
4166 // if tiny and inexact set underflow and inexact status flags
4167 if (!((res1
& MASK_NAN
) == MASK_NAN
) && // res1 not NaN
4169 && ((res1
& MASK_BINARY_SIG1
) < 1000000000000000ull)
4170 && (is_inexact_lt_midpoint0
|| is_inexact_gt_midpoint0
4171 || is_midpoint_lt_even0
|| is_midpoint_gt_even0
)) {
4172 // set the inexact flag and the underflow flag
4173 *pfpsf
|= (INEXACT_EXCEPTION
| UNDERFLOW_EXCEPTION
);
4174 } else if (is_inexact_lt_midpoint0
|| is_inexact_gt_midpoint0
||
4175 is_midpoint_lt_even0
|| is_midpoint_gt_even0
) {
4176 // set the inexact flag and the underflow flag
4177 *pfpsf
|= INEXACT_EXCEPTION
;
4180 *pfpsf
|= save_fpsf
;
4182 } // else continue, and use rounding to nearest to round to 16 digits
4184 // at this point the result is rounded to nearest (even or away) to 34 digits
4185 // (or less if exact), and it is zero or finite non-zero canonical [sub]normal
4186 sign
= res
.w
[HIGH_128W
] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
4187 exp
= res
.w
[HIGH_128W
] & MASK_EXP
; // biased and shifted left 49 bits
4188 unbexp
= (exp
>> 49) - 6176;
4189 C
.w
[1] = res
.w
[HIGH_128W
] & MASK_COEFF
;
4190 C
.w
[0] = res
.w
[LOW_128W
];
4192 if ((C
.w
[1] == 0x0 && C
.w
[0] == 0x0) || // result is zero
4193 (unbexp
<= (-398 - 35)) || (unbexp
>= (369 + 16))) {
4194 // clear under/overflow
4195 #if DECIMAL_CALL_BY_REFERENCE
4196 bid128_to_bid64 (&res1
, &res _RND_MODE_ARG _EXC_FLAGS_ARG
);
4198 res1
= bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG
);
4200 *pfpsf
|= save_fpsf
;
4204 // -398 - 34 <= unbexp <= 369 + 15
4205 if (rnd_mode
== ROUNDING_TIES_AWAY
) {
4206 // apply correction, if needed, to make the result rounded to nearest-even
4207 if (is_midpoint_gt_even
) {
4209 res1
--; // res1 is now even
4210 } // else the result is already correctly rounded to nearest-even
4212 // at this point the result is finite, non-zero canonical normal or subnormal,
4213 // and in most cases overflow or underflow will not occur
4215 // determine the number of digits q in the result
4216 // q = nr. of decimal digits in x
4217 // determine first the nr. of bits in x
4219 if (C
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
4220 // split the 64-bit value in two 32-bit halves to avoid rounding errors
4221 if (C
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
4222 tmp
.d
= (double) (C
.w
[0] >> 32); // exact conversion
4224 33 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
4225 } else { // x < 2^32
4226 tmp
.d
= (double) (C
.w
[0]); // exact conversion
4228 1 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
4230 } else { // if x < 2^53
4231 tmp
.d
= (double) C
.w
[0]; // exact conversion
4233 1 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
4235 } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1])
4236 tmp
.d
= (double) C
.w
[1]; // exact conversion
4238 65 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
4240 q
= nr_digits
[nr_bits
- 1].digits
;
4242 q
= nr_digits
[nr_bits
- 1].digits1
;
4243 if (C
.w
[1] > nr_digits
[nr_bits
- 1].threshold_hi
||
4244 (C
.w
[1] == nr_digits
[nr_bits
- 1].threshold_hi
&&
4245 C
.w
[0] >= nr_digits
[nr_bits
- 1].threshold_lo
))
4248 // if q > 16, round to nearest even to 16 digits (but for underflow it may
4249 // have to be truncated even more)
4253 round64_2_18 (q
, x0
, C
.w
[0], &res1
, &incr_exp
,
4254 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
4255 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
4256 } else { // 19 <= q <= 34
4257 round128_19_38 (q
, x0
, C
, &res128
, &incr_exp
,
4258 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
4259 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
4260 res1
= res128
.w
[0]; // the result fits in 64 bits
4262 unbexp
= unbexp
+ x0
;
4265 q
= 16; // need to set in case denormalization is necessary
4267 // the result does not require a second rounding (and it must have
4268 // been exact in the first rounding, since q <= 16)
4272 // avoid a double rounding error
4273 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
4274 is_midpoint_lt_even
) { // double rounding error upward
4276 res1
--; // res1 becomes odd
4277 is_midpoint_lt_even
= 0;
4278 is_inexact_lt_midpoint
= 1;
4279 if (res1
== 0x00038d7ea4c67fffull
) { // 10^15 - 1
4280 res1
= 0x002386f26fc0ffffull
; // 10^16 - 1
4283 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
4284 is_midpoint_gt_even
) { // double rounding error downward
4286 res1
++; // res1 becomes odd (so it cannot be 10^16)
4287 is_midpoint_gt_even
= 0;
4288 is_inexact_gt_midpoint
= 1;
4289 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
4290 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
4291 // if this second rounding was exact the result may still be
4292 // inexact because of the first rounding
4293 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
4294 is_inexact_gt_midpoint
= 1;
4296 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
4297 is_inexact_lt_midpoint
= 1;
4299 } else if (is_midpoint_gt_even
&&
4300 (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
)) {
4301 // pulled up to a midpoint
4302 is_inexact_lt_midpoint
= 1;
4303 is_inexact_gt_midpoint
= 0;
4304 is_midpoint_lt_even
= 0;
4305 is_midpoint_gt_even
= 0;
4306 } else if (is_midpoint_lt_even
&&
4307 (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
)) {
4308 // pulled down to a midpoint
4309 is_inexact_lt_midpoint
= 0;
4310 is_inexact_gt_midpoint
= 1;
4311 is_midpoint_lt_even
= 0;
4312 is_midpoint_gt_even
= 0;
4316 // this is the result rounded correctly to nearest even, with unbounded exp.
4318 // check for overflow
4319 if (q
+ unbexp
> P16
+ expmax16
) {
4320 res1
= sign
| 0x7800000000000000ull
;
4321 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
4322 *pfpsf
|= save_fpsf
;
4324 } else if (unbexp
> expmax16
) { // q + unbexp <= P16 + expmax16
4325 // not overflow; the result must be exact, and we can multiply res1 by
4326 // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits
4327 scale
= unbexp
- expmax16
;
4328 res1
= res1
* ten2k64
[scale
]; // res1 * 10^scale
4329 unbexp
= expmax16
; // unbexp - scale
4334 // check for underflow
4335 if (q
+ unbexp
< P16
+ expmin16
) {
4336 if (unbexp
< expmin16
) {
4337 // we must truncate more of res
4338 x0
= expmin16
- unbexp
; // x0 >= 1
4339 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
4340 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
4341 is_midpoint_lt_even0
= is_midpoint_lt_even
;
4342 is_midpoint_gt_even0
= is_midpoint_gt_even
;
4343 is_inexact_lt_midpoint
= 0;
4344 is_inexact_gt_midpoint
= 0;
4345 is_midpoint_lt_even
= 0;
4346 is_midpoint_gt_even
= 0;
4347 // the number of decimal digits in res1 is q
4348 if (x0
< q
) { // 1 <= x0 <= q-1 => round res to q - x0 digits
4349 // 2 <= q <= 16, 1 <= x0 <= 15
4350 round64_2_18 (q
, x0
, res1
, &res1
, &incr_exp
,
4351 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
4352 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
4354 // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15
4355 res1
= ten2k64
[q
- x0
];
4357 unbexp
= unbexp
+ x0
; // expmin16
4358 } else if (x0
== q
) {
4359 // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin
4360 // determine relationship with 1/2 ulp
4362 if (res1
< midpoint64
[q
- 1]) { // < 1/2 ulp
4364 is_inexact_lt_midpoint
= 1;
4365 } else if (res1
== midpoint64
[q
- 1]) { // = 1/2 ulp
4367 is_midpoint_gt_even
= 1;
4368 } else { // > 1/2 ulp
4370 is_inexact_gt_midpoint
= 1;
4372 if (lt_half_ulp
|| eq_half_ulp
) {
4373 // res = +0.0 * 10^expmin16
4374 res1
= 0x0000000000000000ull
;
4375 } else { // if (gt_half_ulp)
4376 // res = +1 * 10^expmin16
4377 res1
= 0x0000000000000001ull
;
4380 } else { // if (x0 > q)
4381 // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin
4382 res1
= 0x0000000000000000ull
;
4384 is_inexact_lt_midpoint
= 1;
4386 // avoid a double rounding error
4387 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
4388 is_midpoint_lt_even
) { // double rounding error upward
4390 res1
--; // res1 becomes odd
4391 is_midpoint_lt_even
= 0;
4392 is_inexact_lt_midpoint
= 1;
4393 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
4394 is_midpoint_gt_even
) { // double rounding error downward
4396 res1
++; // res1 becomes odd
4397 is_midpoint_gt_even
= 0;
4398 is_inexact_gt_midpoint
= 1;
4399 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
4400 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
4401 // if this rounding was exact the result may still be
4402 // inexact because of the previous roundings
4403 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
4404 is_inexact_gt_midpoint
= 1;
4406 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
4407 is_inexact_lt_midpoint
= 1;
4409 } else if (is_midpoint_gt_even
&&
4410 (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
)) {
4411 // pulled up to a midpoint
4412 is_inexact_lt_midpoint
= 1;
4413 is_inexact_gt_midpoint
= 0;
4414 is_midpoint_lt_even
= 0;
4415 is_midpoint_gt_even
= 0;
4416 } else if (is_midpoint_lt_even
&&
4417 (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
)) {
4418 // pulled down to a midpoint
4419 is_inexact_lt_midpoint
= 0;
4420 is_inexact_gt_midpoint
= 1;
4421 is_midpoint_lt_even
= 0;
4422 is_midpoint_gt_even
= 0;
4427 // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16)
4428 // and the result is tiny and exact
4430 // check for inexact result
4431 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
4432 is_midpoint_lt_even
|| is_midpoint_gt_even
||
4433 is_inexact_lt_midpoint0
|| is_inexact_gt_midpoint0
||
4434 is_midpoint_lt_even0
|| is_midpoint_gt_even0
) {
4435 // set the inexact flag and the underflow flag
4436 *pfpsf
|= (INEXACT_EXCEPTION
| UNDERFLOW_EXCEPTION
);
4438 } else if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
4439 is_midpoint_lt_even
|| is_midpoint_gt_even
) {
4440 *pfpsf
|= INEXACT_EXCEPTION
;
4442 // this is the result rounded correctly to nearest, with bounded exponent
4444 if (rnd_mode
== ROUNDING_TIES_AWAY
&& is_midpoint_gt_even
) { // correction
4446 res1
++; // res1 is now odd
4447 } // else the result is already correct
4449 // assemble the result
4450 if (res1
< 0x0020000000000000ull
) { // res < 2^53
4451 res1
= sign
| ((UINT64
) (unbexp
+ 398) << 53) | res1
;
4452 } else { // res1 >= 2^53
4453 res1
= sign
| MASK_STEERING_BITS
|
4454 ((UINT64
) (unbexp
+ 398) << 51) | (res1
& MASK_BINARY_SIG2
);
4456 *pfpsf
|= save_fpsf
;