1 /* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 /*****************************************************************************
26 * BID128 fma x * y + z
28 ****************************************************************************/
30 #include "bid_internal.h"
33 rounding_correction (unsigned int rnd_mode
,
34 unsigned int is_inexact_lt_midpoint
,
35 unsigned int is_inexact_gt_midpoint
,
36 unsigned int is_midpoint_lt_even
,
37 unsigned int is_midpoint_gt_even
,
39 UINT128
* ptrres
, _IDEC_flags
* ptrfpsf
) {
40 // unbiased true exponent unbexp may be larger than emax
42 UINT128 res
= *ptrres
; // expected to have the correct sign and coefficient
43 // (the exponent field is ignored, as unbexp is used instead)
47 // general correction from RN to RA, RM, RP, RZ
48 // Note: if the result is negative, then is_inexact_lt_midpoint,
49 // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even
50 // have to be considered as if determined for the absolute value of the
51 // result (so they seem to be reversed)
53 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
54 is_midpoint_lt_even
|| is_midpoint_gt_even
) {
55 *ptrfpsf
|= INEXACT_EXCEPTION
;
57 // apply correction to result calculated with unbounded exponent
58 sign
= res
.w
[1] & MASK_SIGN
;
59 exp
= (UINT64
) (unbexp
+ 6176) << 49; // valid only if expmin<=unbexp<=expmax
60 C_hi
= res
.w
[1] & MASK_COEFF
;
62 if ((!sign
&& ((rnd_mode
== ROUNDING_UP
&& is_inexact_lt_midpoint
) ||
63 ((rnd_mode
== ROUNDING_TIES_AWAY
|| rnd_mode
== ROUNDING_UP
) &&
64 is_midpoint_gt_even
))) ||
65 (sign
&& ((rnd_mode
== ROUNDING_DOWN
&& is_inexact_lt_midpoint
) ||
66 ((rnd_mode
== ROUNDING_TIES_AWAY
|| rnd_mode
== ROUNDING_DOWN
) &&
67 is_midpoint_gt_even
)))) {
72 if (C_hi
== 0x0001ed09bead87c0ull
&& C_lo
== 0x378d8e6400000000ull
) {
73 // C = 10^34 => rounding overflow
74 C_hi
= 0x0000314dc6448d93ull
;
75 C_lo
= 0x38c15b0a00000000ull
; // 10^33
76 // exp = exp + EXP_P1;
78 exp
= (UINT64
) (unbexp
+ 6176) << 49;
80 } else if ((is_midpoint_lt_even
|| is_inexact_gt_midpoint
) &&
81 ((sign
&& (rnd_mode
== ROUNDING_UP
|| rnd_mode
== ROUNDING_TO_ZERO
)) ||
82 (!sign
&& (rnd_mode
== ROUNDING_DOWN
|| rnd_mode
== ROUNDING_TO_ZERO
)))) {
85 if (C_lo
== 0xffffffffffffffffull
)
87 // check if we crossed into the lower decade
88 if (C_hi
== 0x0000314dc6448d93ull
&& C_lo
== 0x38c15b09ffffffffull
) {
91 C_hi
= 0x0001ed09bead87c0ull
; // 10^34 - 1
92 C_lo
= 0x378d8e63ffffffffull
;
93 // exp = exp - EXP_P1;
95 exp
= (UINT64
) (unbexp
+ 6176) << 49;
96 } else { // if exp = 0
97 if (is_midpoint_lt_even
|| is_midpoint_lt_even
||
98 is_inexact_gt_midpoint
|| is_inexact_gt_midpoint
) // tiny & inexact
99 *ptrfpsf
|= UNDERFLOW_EXCEPTION
;
103 ; // the result is already correct
105 if (unbexp
> expmax
) { // 6111
106 *ptrfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
108 if (!sign
) { // result is positive
109 if (rnd_mode
== ROUNDING_UP
|| rnd_mode
== ROUNDING_TIES_AWAY
) { // +inf
110 C_hi
= 0x7800000000000000ull
;
111 C_lo
= 0x0000000000000000ull
;
112 } else { // res = +MAXFP = (10^34-1) * 10^emax
113 C_hi
= 0x5fffed09bead87c0ull
;
114 C_lo
= 0x378d8e63ffffffffull
;
116 } else { // result is negative
117 if (rnd_mode
== ROUNDING_DOWN
|| rnd_mode
== ROUNDING_TIES_AWAY
) { // -inf
118 C_hi
= 0xf800000000000000ull
;
119 C_lo
= 0x0000000000000000ull
;
120 } else { // res = -MAXFP = -(10^34-1) * 10^emax
121 C_hi
= 0xdfffed09bead87c0ull
;
122 C_lo
= 0x378d8e63ffffffffull
;
126 // assemble the result
127 res
.w
[1] = sign
| exp
| C_hi
;
133 add256 (UINT256 x
, UINT256 y
, UINT256
* pz
) {
134 // *z = x + yl assume the sum fits in 256 bits
136 z
.w
[0] = x
.w
[0] + y
.w
[0];
137 if (z
.w
[0] < x
.w
[0]) {
139 if (x
.w
[1] == 0x0000000000000000ull
) {
141 if (x
.w
[2] == 0x0000000000000000ull
) {
146 z
.w
[1] = x
.w
[1] + y
.w
[1];
147 if (z
.w
[1] < x
.w
[1]) {
149 if (x
.w
[2] == 0x0000000000000000ull
) {
153 z
.w
[2] = x
.w
[2] + y
.w
[2];
154 if (z
.w
[2] < x
.w
[2]) {
157 z
.w
[3] = x
.w
[3] + y
.w
[3]; // it was assumed that no carry is possible
162 sub256 (UINT256 x
, UINT256 y
, UINT256
* pz
) {
163 // *z = x - y; assume x >= y
165 z
.w
[0] = x
.w
[0] - y
.w
[0];
166 if (z
.w
[0] > x
.w
[0]) {
168 if (x
.w
[1] == 0xffffffffffffffffull
) {
170 if (x
.w
[2] == 0xffffffffffffffffull
) {
175 z
.w
[1] = x
.w
[1] - y
.w
[1];
176 if (z
.w
[1] > x
.w
[1]) {
178 if (x
.w
[2] == 0xffffffffffffffffull
) {
182 z
.w
[2] = x
.w
[2] - y
.w
[2];
183 if (z
.w
[2] > x
.w
[2]) {
186 z
.w
[3] = x
.w
[3] - y
.w
[3]; // no borrow possible, because x >= y
192 nr_digits256 (UINT256 R256
) {
194 // determine the number of decimal digits in R256
195 if (R256
.w
[3] == 0x0 && R256
.w
[2] == 0x0 && R256
.w
[1] == 0x0) {
196 // between 1 and 19 digits
197 for (ind
= 1; ind
<= 19; ind
++) {
198 if (R256
.w
[0] < ten2k64
[ind
]) {
203 } else if (R256
.w
[3] == 0x0 && R256
.w
[2] == 0x0 &&
204 (R256
.w
[1] < ten2k128
[0].w
[1] ||
205 (R256
.w
[1] == ten2k128
[0].w
[1]
206 && R256
.w
[0] < ten2k128
[0].w
[0]))) {
209 } else if (R256
.w
[3] == 0x0 && R256
.w
[2] == 0x0) {
210 // between 21 and 38 digits
211 for (ind
= 1; ind
<= 18; ind
++) {
212 if (R256
.w
[1] < ten2k128
[ind
].w
[1] ||
213 (R256
.w
[1] == ten2k128
[ind
].w
[1] &&
214 R256
.w
[0] < ten2k128
[ind
].w
[0])) {
220 } else if (R256
.w
[3] == 0x0 &&
221 (R256
.w
[2] < ten2k256
[0].w
[2] ||
222 (R256
.w
[2] == ten2k256
[0].w
[2] &&
223 R256
.w
[1] < ten2k256
[0].w
[1]) ||
224 (R256
.w
[2] == ten2k256
[0].w
[2] &&
225 R256
.w
[1] == ten2k256
[0].w
[1] &&
226 R256
.w
[0] < ten2k256
[0].w
[0]))) {
230 // between 40 and 68 digits
231 for (ind
= 1; ind
<= 29; ind
++) {
232 if (R256
.w
[3] < ten2k256
[ind
].w
[3] ||
233 (R256
.w
[3] == ten2k256
[ind
].w
[3] &&
234 R256
.w
[2] < ten2k256
[ind
].w
[2]) ||
235 (R256
.w
[3] == ten2k256
[ind
].w
[3] &&
236 R256
.w
[2] == ten2k256
[ind
].w
[2] &&
237 R256
.w
[1] < ten2k256
[ind
].w
[1]) ||
238 (R256
.w
[3] == ten2k256
[ind
].w
[3] &&
239 R256
.w
[2] == ten2k256
[ind
].w
[2] &&
240 R256
.w
[1] == ten2k256
[ind
].w
[1] &&
241 R256
.w
[0] < ten2k256
[ind
].w
[0])) {
251 // add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so
252 // use the rounding information from ptr_is_* to avoid a double rounding error
254 add_and_round (int q3
,
264 int *ptr_is_midpoint_lt_even
,
265 int *ptr_is_midpoint_gt_even
,
266 int *ptr_is_inexact_lt_midpoint
,
267 int *ptr_is_inexact_gt_midpoint
,
268 _IDEC_flags
* ptrfpsf
, UINT128
* ptrres
) {
277 int is_midpoint_lt_even
= 0;
278 int is_midpoint_gt_even
= 0;
279 int is_inexact_lt_midpoint
= 0;
280 int is_inexact_gt_midpoint
= 0;
281 int is_midpoint_lt_even0
= 0;
282 int is_midpoint_gt_even0
= 0;
283 int is_inexact_lt_midpoint0
= 0;
284 int is_inexact_gt_midpoint0
= 0;
289 // int gt_half_ulp = 0;
290 UINT128 res
= *ptrres
;
292 // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66
293 scale
= q4
- delta
- q3
; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this
294 // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1
296 // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for
297 // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6))
303 } else if (scale
<= 19) { // 10^scale fits in 64 bits
305 P128
.w
[0] = ten2k64
[scale
];
306 __mul_128x128_to_256 (R256
, P128
, C3
);
307 } else if (scale
<= 38) { // 10^scale fits in 128 bits
308 __mul_128x128_to_256 (R256
, ten2k128
[scale
- 20], C3
);
309 } else if (scale
<= 57) { // 39 <= scale <= 57
310 // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits
311 // (10^67 has 223 bits; 10^69 has 230 bits);
312 // must split the computation:
313 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
314 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
315 // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits
316 __mul_64x128_to_128 (R128
, ten2k64
[scale
- 38], C3
);
317 // now multiply R128 by 10^38
318 __mul_128x128_to_256 (R256
, R128
, ten2k128
[18]);
319 } else { // 58 <= scale <= 66
320 // 10^scale takes between 193 and 220 bits,
321 // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits)
322 // must split the computation:
323 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
324 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
325 // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits
326 // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because
327 // 10^(scale-38) takes more than 64 bits, C3 will take less than 64
328 __mul_64x128_to_128 (R128
, C3
.w
[0], ten2k128
[scale
- 58]);
329 // now calculate 10*38 * 10^(scale-38) * C3
330 __mul_128x128_to_256 (R256
, R128
, ten2k128
[18]);
332 // C3 * 10^scale is now in R256
334 // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least
335 // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is
337 // add/subtract C4 and C3 * 10^scale; the exponent is e4
338 if (p_sign
== z_sign
) { // R256 = C4 + R256
339 // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact,
340 // but may require rounding
341 add256 (C4
, R256
, &R256
);
342 } else { // if (p_sign != z_sign) { // R256 = C4 - R256
343 // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or
344 // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact,
345 // but may require rounding
347 // compare first R256 = C3 * 10^scale and C4
348 if (R256
.w
[3] > C4
.w
[3] || (R256
.w
[3] == C4
.w
[3] && R256
.w
[2] > C4
.w
[2]) ||
349 (R256
.w
[3] == C4
.w
[3] && R256
.w
[2] == C4
.w
[2] && R256
.w
[1] > C4
.w
[1]) ||
350 (R256
.w
[3] == C4
.w
[3] && R256
.w
[2] == C4
.w
[2] && R256
.w
[1] == C4
.w
[1] &&
351 R256
.w
[0] >= C4
.w
[0])) { // C3 * 10^scale >= C4
352 // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact,
353 // but may require rounding
354 sub256 (R256
, C4
, &R256
);
355 // flip p_sign too, because the result has the sign of z
357 } else { // if C4 > C3 * 10^scale
358 // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact,
359 // but may require rounding
360 sub256 (C4
, R256
, &R256
);
362 // if the result is pure zero, the sign depends on the rounding mode
363 // (x*y and z had opposite signs)
364 if (R256
.w
[3] == 0x0ull
&& R256
.w
[2] == 0x0ull
&&
365 R256
.w
[1] == 0x0ull
&& R256
.w
[0] == 0x0ull
) {
366 if (rnd_mode
!= ROUNDING_DOWN
)
367 p_sign
= 0x0000000000000000ull
;
369 p_sign
= 0x8000000000000000ull
;
370 // the exponent is max (e4, expmin)
374 res
.w
[1] = p_sign
| ((UINT64
) (e4
+ 6176) << 49);
381 // determine the number of decimal digits in R256
382 ind
= nr_digits256 (R256
);
384 // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind;
385 // round to the destination precision, with unbounded exponent
388 // result rounded to the destination precision with unbounded exponent
390 if (ind
+ e4
< p34
+ expmin
) {
391 is_tiny
= 1; // applies to all rounding modes
393 res
.w
[1] = p_sign
| ((UINT64
) (e4
+ 6176) << 49) | R256
.w
[1];
394 res
.w
[0] = R256
.w
[0];
395 // Note: res is correct only if expmin <= e4 <= expmax
396 } else { // if (ind > p34)
397 // if more than P digits, round to nearest to P digits
398 // round R256 to p34 digits
399 x0
= ind
- p34
; // 1 <= x0 <= 34 as 35 <= ind <= 68
401 P128
.w
[1] = R256
.w
[1];
402 P128
.w
[0] = R256
.w
[0];
403 round128_19_38 (ind
, x0
, P128
, &R128
, &incr_exp
,
404 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
405 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
406 } else if (ind
<= 57) {
407 P192
.w
[2] = R256
.w
[2];
408 P192
.w
[1] = R256
.w
[1];
409 P192
.w
[0] = R256
.w
[0];
410 round192_39_57 (ind
, x0
, P192
, &R192
, &incr_exp
,
411 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
412 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
413 R128
.w
[1] = R192
.w
[1];
414 R128
.w
[0] = R192
.w
[0];
415 } else { // if (ind <= 68)
416 round256_58_76 (ind
, x0
, R256
, &R256
, &incr_exp
,
417 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
418 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
419 R128
.w
[1] = R256
.w
[1];
420 R128
.w
[0] = R256
.w
[0];
422 // the rounded result has p34 = 34 digits
423 e4
= e4
+ x0
+ incr_exp
;
424 if (rnd_mode
== ROUNDING_TO_NEAREST
) {
426 is_tiny
= 1; // for other rounding modes apply correction
429 // for RM, RP, RZ, RA apply correction in order to determine tininess
430 // but do not save the result; apply the correction to
431 // (-1)^p_sign * significand * 10^0
432 P128
.w
[1] = p_sign
| 0x3040000000000000ull
| R128
.w
[1];
433 P128
.w
[0] = R128
.w
[0];
434 rounding_correction (rnd_mode
,
435 is_inexact_lt_midpoint
,
436 is_inexact_gt_midpoint
, is_midpoint_lt_even
,
437 is_midpoint_gt_even
, 0, &P128
, ptrfpsf
);
438 scale
= ((P128
.w
[1] & MASK_EXP
) >> 49) - 6176; // -1, 0, or +1
439 // the number of digits in the significand is p34 = 34
440 if (e4
+ scale
< expmin
) {
444 ind
= p34
; // the number of decimal digits in the signifcand of res
445 res
.w
[1] = p_sign
| ((UINT64
) (e4
+ 6176) << 49) | R128
.w
[1]; // RN
446 res
.w
[0] = R128
.w
[0];
447 // Note: res is correct only if expmin <= e4 <= expmax
448 // set the inexact flag after rounding with bounded exponent, if any
450 // at this point we have the result rounded with unbounded exponent in
451 // res and we know its tininess:
452 // res = (-1)^p_sign * significand * 10^e4,
453 // where q (significand) = ind <= p34
454 // Note: res is correct only if expmin <= e4 <= expmax
456 // check for overflow if RN
457 if (rnd_mode
== ROUNDING_TO_NEAREST
&& (ind
+ e4
) > (p34
+ expmax
)) {
458 res
.w
[1] = p_sign
| 0x7800000000000000ull
;
459 res
.w
[0] = 0x0000000000000000ull
;
461 *ptrfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
462 return; // BID_RETURN (res)
463 } // else not overflow or not RN, so continue
465 // if (e4 >= expmin) we have the result rounded with bounded exponent
467 x0
= expmin
- e4
; // x0 >= 1; the number of digits to chop off of res
468 // where the result rounded [at most] once is
469 // (-1)^p_sign * significand_res * 10^e4
471 // avoid double rounding error
472 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
473 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
474 is_midpoint_lt_even0
= is_midpoint_lt_even
;
475 is_midpoint_gt_even0
= is_midpoint_gt_even
;
476 is_inexact_lt_midpoint
= 0;
477 is_inexact_gt_midpoint
= 0;
478 is_midpoint_lt_even
= 0;
479 is_midpoint_gt_even
= 0;
482 // nothing is left of res when moving the decimal point left x0 digits
483 is_inexact_lt_midpoint
= 1;
484 res
.w
[1] = p_sign
| 0x0000000000000000ull
;
485 res
.w
[0] = 0x0000000000000000ull
;
487 } else if (x0
== ind
) { // 1 <= x0 = ind <= p34 = 34
488 // this is <, =, or > 1/2 ulp
489 // compare the ind-digit value in the significand of res with
490 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
491 // less than, equal to, or greater than 1/2 ulp (significand of res)
492 R128
.w
[1] = res
.w
[1] & MASK_COEFF
;
493 R128
.w
[0] = res
.w
[0];
495 if (R128
.w
[0] < midpoint64
[ind
- 1]) { // < 1/2 ulp
497 is_inexact_lt_midpoint
= 1;
498 } else if (R128
.w
[0] == midpoint64
[ind
- 1]) { // = 1/2 ulp
500 is_midpoint_gt_even
= 1;
501 } else { // > 1/2 ulp
503 is_inexact_gt_midpoint
= 1;
505 } else { // if (ind <= 38) {
506 if (R128
.w
[1] < midpoint128
[ind
- 20].w
[1] ||
507 (R128
.w
[1] == midpoint128
[ind
- 20].w
[1] &&
508 R128
.w
[0] < midpoint128
[ind
- 20].w
[0])) { // < 1/2 ulp
510 is_inexact_lt_midpoint
= 1;
511 } else if (R128
.w
[1] == midpoint128
[ind
- 20].w
[1] &&
512 R128
.w
[0] == midpoint128
[ind
- 20].w
[0]) { // = 1/2 ulp
514 is_midpoint_gt_even
= 1;
515 } else { // > 1/2 ulp
517 is_inexact_gt_midpoint
= 1;
520 if (lt_half_ulp
|| eq_half_ulp
) {
521 // res = +0.0 * 10^expmin
522 res
.w
[1] = 0x0000000000000000ull
;
523 res
.w
[0] = 0x0000000000000000ull
;
524 } else { // if (gt_half_ulp)
525 // res = +1 * 10^expmin
526 res
.w
[1] = 0x0000000000000000ull
;
527 res
.w
[0] = 0x0000000000000001ull
;
529 res
.w
[1] = p_sign
| res
.w
[1];
531 } else { // if (1 <= x0 <= ind - 1 <= 33)
532 // round the ind-digit result to ind - x0 digits
534 if (ind
<= 18) { // 2 <= ind <= 18
535 round64_2_18 (ind
, x0
, res
.w
[0], &R64
, &incr_exp
,
536 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
537 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
540 } else if (ind
<= 38) {
541 P128
.w
[1] = res
.w
[1] & MASK_COEFF
;
542 P128
.w
[0] = res
.w
[0];
543 round128_19_38 (ind
, x0
, P128
, &res
, &incr_exp
,
544 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
545 &is_inexact_lt_midpoint
,
546 &is_inexact_gt_midpoint
);
548 e4
= e4
+ x0
; // expmin
549 // we want the exponent to be expmin, so if incr_exp = 1 then
550 // multiply the rounded result by 10 - it will still fit in 113 bits
553 P128
.w
[1] = res
.w
[1] & MASK_COEFF
;
554 P128
.w
[0] = res
.w
[0];
555 __mul_64x128_to_128 (res
, ten2k64
[1], P128
);
558 p_sign
| ((UINT64
) (e4
+ 6176) << 49) | (res
.w
[1] & MASK_COEFF
);
559 // avoid a double rounding error
560 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
561 is_midpoint_lt_even
) { // double rounding error upward
564 if (res
.w
[0] == 0xffffffffffffffffull
)
566 // Note: a double rounding error upward is not possible; for this
567 // the result after the first rounding would have to be 99...95
568 // (35 digits in all), possibly followed by a number of zeros; this
569 // is not possible in Cases (2)-(6) or (15)-(17) which may get here
570 is_midpoint_lt_even
= 0;
571 is_inexact_lt_midpoint
= 1;
572 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
573 is_midpoint_gt_even
) { // double rounding error downward
578 is_midpoint_gt_even
= 0;
579 is_inexact_gt_midpoint
= 1;
580 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
581 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
582 // if this second rounding was exact the result may still be
583 // inexact because of the first rounding
584 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
585 is_inexact_gt_midpoint
= 1;
587 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
588 is_inexact_lt_midpoint
= 1;
590 } else if (is_midpoint_gt_even
&&
591 (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
)) {
592 // pulled up to a midpoint
593 is_inexact_lt_midpoint
= 1;
594 is_inexact_gt_midpoint
= 0;
595 is_midpoint_lt_even
= 0;
596 is_midpoint_gt_even
= 0;
597 } else if (is_midpoint_lt_even
&&
598 (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
)) {
599 // pulled down to a midpoint
600 is_inexact_lt_midpoint
= 0;
601 is_inexact_gt_midpoint
= 1;
602 is_midpoint_lt_even
= 0;
603 is_midpoint_gt_even
= 0;
609 // res contains the correct result
610 // apply correction if not rounding to nearest
611 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
612 rounding_correction (rnd_mode
,
613 is_inexact_lt_midpoint
, is_inexact_gt_midpoint
,
614 is_midpoint_lt_even
, is_midpoint_gt_even
,
617 if (is_midpoint_lt_even
|| is_midpoint_gt_even
||
618 is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
) {
619 // set the inexact flag
620 *ptrfpsf
|= INEXACT_EXCEPTION
;
622 *ptrfpsf
|= UNDERFLOW_EXCEPTION
;
625 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
626 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
627 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
628 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
634 #if DECIMAL_CALL_BY_REFERENCE
636 bid128_ext_fma (int *ptr_is_midpoint_lt_even
,
637 int *ptr_is_midpoint_gt_even
,
638 int *ptr_is_inexact_lt_midpoint
,
639 int *ptr_is_inexact_gt_midpoint
, UINT128
* pres
,
640 UINT128
* px
, UINT128
* py
,
642 pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
644 UINT128 x
= *px
, y
= *py
, z
= *pz
;
645 #if !DECIMAL_GLOBAL_ROUNDING
646 unsigned int rnd_mode
= *prnd_mode
;
650 bid128_ext_fma (int *ptr_is_midpoint_lt_even
,
651 int *ptr_is_midpoint_gt_even
,
652 int *ptr_is_inexact_lt_midpoint
,
653 int *ptr_is_inexact_gt_midpoint
, UINT128 x
, UINT128 y
,
654 UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
655 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
658 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
659 UINT64 x_sign
, y_sign
, z_sign
, p_sign
, tmp_sign
;
660 UINT64 x_exp
= 0, y_exp
= 0, z_exp
= 0, p_exp
;
664 int q1
= 0, q2
= 0, q3
= 0, q4
;
666 int scale
, ind
, delta
, x0
;
667 int p34
= P34
; // used to modify the limit on the number of digits
669 int x_nr_bits
, y_nr_bits
, z_nr_bits
;
670 unsigned int save_fpsf
;
671 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0;
672 int is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
673 int is_midpoint_lt_even0
= 0, is_midpoint_gt_even0
= 0;
674 int is_inexact_lt_midpoint0
= 0, is_inexact_gt_midpoint0
= 0;
686 // the following are based on the table of special cases for fma; the NaN
687 // behavior is similar to that of the IA-64 Architecture fma
689 // identify cases where at least one operand is NaN
694 if ((y
.w
[1] & MASK_NAN
) == MASK_NAN
) { // y is NAN
695 // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
696 // check first for non-canonical NaN payload
697 if (((y
.w
[1] & 0x00003fffffffffffull
) > 0x0000314dc6448d93ull
) ||
698 (((y
.w
[1] & 0x00003fffffffffffull
) == 0x0000314dc6448d93ull
) &&
699 (y
.w
[0] > 0x38c15b09ffffffffull
))) {
700 y
.w
[1] = y
.w
[1] & 0xffffc00000000000ull
;
703 if ((y
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // y is SNAN
705 *pfpsf
|= INVALID_EXCEPTION
;
707 res
.w
[1] = y
.w
[1] & 0xfc003fffffffffffull
; // clear out also G[6]-G[16]
709 } else { // y is QNaN
711 res
.w
[1] = y
.w
[1] & 0xfc003fffffffffffull
; // clear out G[6]-G[16]
713 // if z = SNaN or x = SNaN signal invalid exception
714 if ((z
.w
[1] & MASK_SNAN
) == MASK_SNAN
||
715 (x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) {
717 *pfpsf
|= INVALID_EXCEPTION
;
720 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
721 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
722 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
723 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
726 } else if ((z
.w
[1] & MASK_NAN
) == MASK_NAN
) { // z is NAN
727 // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
728 // check first for non-canonical NaN payload
729 if (((z
.w
[1] & 0x00003fffffffffffull
) > 0x0000314dc6448d93ull
) ||
730 (((z
.w
[1] & 0x00003fffffffffffull
) == 0x0000314dc6448d93ull
) &&
731 (z
.w
[0] > 0x38c15b09ffffffffull
))) {
732 z
.w
[1] = z
.w
[1] & 0xffffc00000000000ull
;
735 if ((z
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // z is SNAN
737 *pfpsf
|= INVALID_EXCEPTION
;
739 res
.w
[1] = z
.w
[1] & 0xfc003fffffffffffull
; // clear out also G[6]-G[16]
741 } else { // z is QNaN
743 res
.w
[1] = z
.w
[1] & 0xfc003fffffffffffull
; // clear out G[6]-G[16]
745 // if x = SNaN signal invalid exception
746 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) {
748 *pfpsf
|= INVALID_EXCEPTION
;
751 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
752 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
753 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
754 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
757 } else if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
758 // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
759 // check first for non-canonical NaN payload
760 if (((x
.w
[1] & 0x00003fffffffffffull
) > 0x0000314dc6448d93ull
) ||
761 (((x
.w
[1] & 0x00003fffffffffffull
) == 0x0000314dc6448d93ull
) &&
762 (x
.w
[0] > 0x38c15b09ffffffffull
))) {
763 x
.w
[1] = x
.w
[1] & 0xffffc00000000000ull
;
766 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
768 *pfpsf
|= INVALID_EXCEPTION
;
770 res
.w
[1] = x
.w
[1] & 0xfc003fffffffffffull
; // clear out also G[6]-G[16]
772 } else { // x is QNaN
774 res
.w
[1] = x
.w
[1] & 0xfc003fffffffffffull
; // clear out G[6]-G[16]
777 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
778 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
779 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
780 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
784 // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check
785 // for non-canonical values
787 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
788 C1
.w
[1] = x
.w
[1] & MASK_COEFF
;
790 if ((x
.w
[1] & MASK_ANY_INF
) != MASK_INF
) { // x != inf
791 // if x is not infinity check for non-canonical values - treated as zero
792 if ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
) { // G0_G1=11
794 x_exp
= (x
.w
[1] << 2) & MASK_EXP
; // biased and shifted left 49 bits
795 C1
.w
[1] = 0; // significand high
796 C1
.w
[0] = 0; // significand low
797 } else { // G0_G1 != 11
798 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bits
799 if (C1
.w
[1] > 0x0001ed09bead87c0ull
||
800 (C1
.w
[1] == 0x0001ed09bead87c0ull
&&
801 C1
.w
[0] > 0x378d8e63ffffffffull
)) {
802 // x is non-canonical if coefficient is larger than 10^34 -1
805 } else { // canonical
810 y_sign
= y
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
811 C2
.w
[1] = y
.w
[1] & MASK_COEFF
;
813 if ((y
.w
[1] & MASK_ANY_INF
) != MASK_INF
) { // y != inf
814 // if y is not infinity check for non-canonical values - treated as zero
815 if ((y
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
) { // G0_G1=11
817 y_exp
= (y
.w
[1] << 2) & MASK_EXP
; // biased and shifted left 49 bits
818 C2
.w
[1] = 0; // significand high
819 C2
.w
[0] = 0; // significand low
820 } else { // G0_G1 != 11
821 y_exp
= y
.w
[1] & MASK_EXP
; // biased and shifted left 49 bits
822 if (C2
.w
[1] > 0x0001ed09bead87c0ull
||
823 (C2
.w
[1] == 0x0001ed09bead87c0ull
&&
824 C2
.w
[0] > 0x378d8e63ffffffffull
)) {
825 // y is non-canonical if coefficient is larger than 10^34 -1
828 } else { // canonical
833 z_sign
= z
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
834 C3
.w
[1] = z
.w
[1] & MASK_COEFF
;
836 if ((z
.w
[1] & MASK_ANY_INF
) != MASK_INF
) { // z != inf
837 // if z is not infinity check for non-canonical values - treated as zero
838 if ((z
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
) { // G0_G1=11
840 z_exp
= (z
.w
[1] << 2) & MASK_EXP
; // biased and shifted left 49 bits
841 C3
.w
[1] = 0; // significand high
842 C3
.w
[0] = 0; // significand low
843 } else { // G0_G1 != 11
844 z_exp
= z
.w
[1] & MASK_EXP
; // biased and shifted left 49 bits
845 if (C3
.w
[1] > 0x0001ed09bead87c0ull
||
846 (C3
.w
[1] == 0x0001ed09bead87c0ull
&&
847 C3
.w
[0] > 0x378d8e63ffffffffull
)) {
848 // z is non-canonical if coefficient is larger than 10^34 -1
851 } else { // canonical
857 p_sign
= x_sign
^ y_sign
; // sign of the product
859 // identify cases where at least one operand is infinity
861 if ((x
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // x = inf
862 if ((y
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // y = inf
863 if ((z
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // z = inf
864 if (p_sign
== z_sign
) {
865 res
.w
[1] = z_sign
| MASK_INF
;
868 // return QNaN Indefinite
869 res
.w
[1] = 0x7c00000000000000ull
;
870 res
.w
[0] = 0x0000000000000000ull
;
872 *pfpsf
|= INVALID_EXCEPTION
;
874 } else { // z = 0 or z = f
875 res
.w
[1] = p_sign
| MASK_INF
;
878 } else if (C2
.w
[1] != 0 || C2
.w
[0] != 0) { // y = f
879 if ((z
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // z = inf
880 if (p_sign
== z_sign
) {
881 res
.w
[1] = z_sign
| MASK_INF
;
884 // return QNaN Indefinite
885 res
.w
[1] = 0x7c00000000000000ull
;
886 res
.w
[0] = 0x0000000000000000ull
;
888 *pfpsf
|= INVALID_EXCEPTION
;
890 } else { // z = 0 or z = f
891 res
.w
[1] = p_sign
| MASK_INF
;
895 // return QNaN Indefinite
896 res
.w
[1] = 0x7c00000000000000ull
;
897 res
.w
[0] = 0x0000000000000000ull
;
899 *pfpsf
|= INVALID_EXCEPTION
;
901 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
902 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
903 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
904 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
907 } else if ((y
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // y = inf
908 if ((z
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // z = inf
909 // x = f, necessarily
910 if ((p_sign
!= z_sign
)
911 || (C1
.w
[1] == 0x0ull
&& C1
.w
[0] == 0x0ull
)) {
912 // return QNaN Indefinite
913 res
.w
[1] = 0x7c00000000000000ull
;
914 res
.w
[0] = 0x0000000000000000ull
;
916 *pfpsf
|= INVALID_EXCEPTION
;
918 res
.w
[1] = z_sign
| MASK_INF
;
921 } else if (C1
.w
[1] == 0x0 && C1
.w
[0] == 0x0) { // x = 0
923 // return QNaN Indefinite
924 res
.w
[1] = 0x7c00000000000000ull
;
925 res
.w
[0] = 0x0000000000000000ull
;
927 *pfpsf
|= INVALID_EXCEPTION
;
929 // x = f and z = 0, f, necessarily
930 res
.w
[1] = p_sign
| MASK_INF
;
933 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
934 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
935 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
936 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
939 } else if ((z
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // z = inf
940 // x = 0, f and y = 0, f, necessarily
941 res
.w
[1] = z_sign
| MASK_INF
;
943 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
944 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
945 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
946 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
951 true_p_exp
= (x_exp
>> 49) - 6176 + (y_exp
>> 49) - 6176;
952 if (true_p_exp
< -6176)
953 p_exp
= 0; // cannot be less than EXP_MIN
955 p_exp
= (UINT64
) (true_p_exp
+ 6176) << 49;
957 if (((C1
.w
[1] == 0x0 && C1
.w
[0] == 0x0) || (C2
.w
[1] == 0x0 && C2
.w
[0] == 0x0)) && C3
.w
[1] == 0x0 && C3
.w
[0] == 0x0) { // (x = 0 or y = 0) and z = 0
960 res
.w
[1] = p_exp
; // preferred exponent
962 res
.w
[1] = z_exp
; // preferred exponent
963 if (p_sign
== z_sign
) {
966 } else { // x * y and z have opposite signs
967 if (rnd_mode
== ROUNDING_DOWN
) {
969 res
.w
[1] |= MASK_SIGN
;
977 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
978 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
979 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
980 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
984 // from this point on, we may need to know the number of decimal digits
985 // in the significands of x, y, z when x, y, z != 0
987 if (C1
.w
[1] != 0 || C1
.w
[0] != 0) { // x = f (non-zero finite)
988 // q1 = nr. of decimal digits in x
989 // determine first the nr. of bits in x
991 if (C1
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
992 // split the 64-bit value in two 32-bit halves to avoid rounding errors
993 if (C1
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
994 tmp
.d
= (double) (C1
.w
[0] >> 32); // exact conversion
996 33 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
998 tmp
.d
= (double) (C1
.w
[0]); // exact conversion
1000 1 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1002 } else { // if x < 2^53
1003 tmp
.d
= (double) C1
.w
[0]; // exact conversion
1005 1 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1007 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1008 tmp
.d
= (double) C1
.w
[1]; // exact conversion
1010 65 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1012 q1
= nr_digits
[x_nr_bits
- 1].digits
;
1014 q1
= nr_digits
[x_nr_bits
- 1].digits1
;
1015 if (C1
.w
[1] > nr_digits
[x_nr_bits
- 1].threshold_hi
||
1016 (C1
.w
[1] == nr_digits
[x_nr_bits
- 1].threshold_hi
&&
1017 C1
.w
[0] >= nr_digits
[x_nr_bits
- 1].threshold_lo
))
1022 if (C2
.w
[1] != 0 || C2
.w
[0] != 0) { // y = f (non-zero finite)
1024 if (C2
.w
[0] >= 0x0020000000000000ull
) { // y >= 2^53
1025 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1026 if (C2
.w
[0] >= 0x0000000100000000ull
) { // y >= 2^32
1027 tmp
.d
= (double) (C2
.w
[0] >> 32); // exact conversion
1029 32 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1030 } else { // y < 2^32
1031 tmp
.d
= (double) C2
.w
[0]; // exact conversion
1033 ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1035 } else { // if y < 2^53
1036 tmp
.d
= (double) C2
.w
[0]; // exact conversion
1038 ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1040 } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1])
1041 tmp
.d
= (double) C2
.w
[1]; // exact conversion
1043 64 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1046 q2
= nr_digits
[y_nr_bits
].digits
;
1048 q2
= nr_digits
[y_nr_bits
].digits1
;
1049 if (C2
.w
[1] > nr_digits
[y_nr_bits
].threshold_hi
||
1050 (C2
.w
[1] == nr_digits
[y_nr_bits
].threshold_hi
&&
1051 C2
.w
[0] >= nr_digits
[y_nr_bits
].threshold_lo
))
1056 if (C3
.w
[1] != 0 || C3
.w
[0] != 0) { // z = f (non-zero finite)
1058 if (C3
.w
[0] >= 0x0020000000000000ull
) { // z >= 2^53
1059 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1060 if (C3
.w
[0] >= 0x0000000100000000ull
) { // z >= 2^32
1061 tmp
.d
= (double) (C3
.w
[0] >> 32); // exact conversion
1063 32 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1064 } else { // z < 2^32
1065 tmp
.d
= (double) C3
.w
[0]; // exact conversion
1067 ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1069 } else { // if z < 2^53
1070 tmp
.d
= (double) C3
.w
[0]; // exact conversion
1072 ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1074 } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1])
1075 tmp
.d
= (double) C3
.w
[1]; // exact conversion
1077 64 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1080 q3
= nr_digits
[z_nr_bits
].digits
;
1082 q3
= nr_digits
[z_nr_bits
].digits1
;
1083 if (C3
.w
[1] > nr_digits
[z_nr_bits
].threshold_hi
||
1084 (C3
.w
[1] == nr_digits
[z_nr_bits
].threshold_hi
&&
1085 C3
.w
[0] >= nr_digits
[z_nr_bits
].threshold_lo
))
1090 if ((C1
.w
[1] == 0x0 && C1
.w
[0] == 0x0) ||
1091 (C2
.w
[1] == 0x0 && C2
.w
[0] == 0x0)) {
1093 // z = f, necessarily; for 0 + z return z, with the preferred exponent
1094 // the result is z, but need to get the preferred exponent
1095 if (z_exp
<= p_exp
) { // the preferred exponent is z_exp
1096 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | C3
.w
[1];
1098 } else { // if (p_exp < z_exp) the preferred exponent is p_exp
1099 // return (C3 * 10^scale) * 10^(z_exp - scale)
1100 // where scale = min (p34-q3, (z_exp-p_exp) >> 49)
1102 ind
= (z_exp
- p_exp
) >> 49;
1106 res
.w
[1] = z
.w
[1]; // & MASK_COEFF, which is redundant
1108 } else if (q3
<= 19) { // z fits in 64 bits
1109 if (scale
<= 19) { // 10^scale fits in 64 bits
1110 // 64 x 64 C3.w[0] * ten2k64[scale]
1111 __mul_64x64_to_128MACH (res
, C3
.w
[0], ten2k64
[scale
]);
1112 } else { // 10^scale fits in 128 bits
1113 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1114 __mul_128x64_to_128 (res
, C3
.w
[0], ten2k128
[scale
- 20]);
1116 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1117 // 64 x 128 ten2k64[scale] * C3
1118 __mul_128x64_to_128 (res
, ten2k64
[scale
], C3
);
1120 // subtract scale from the exponent
1121 z_exp
= z_exp
- ((UINT64
) scale
<< 49);
1122 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
1124 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
1125 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
1126 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
1127 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
1131 ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f
1134 e1
= (x_exp
>> 49) - 6176; // unbiased exponent of x
1135 e2
= (y_exp
>> 49) - 6176; // unbiased exponent of y
1136 e3
= (z_exp
>> 49) - 6176; // unbiased exponent of z
1137 e4
= e1
+ e2
; // unbiased exponent of the exact x * y
1139 // calculate C1 * C2 and its number of decimal digits, q4
1141 // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits
1142 // where 2 <= q1 + q2 <= 68
1143 // calculate C4 = C1 * C2 and determine q
1144 C4
.w
[3] = C4
.w
[2] = C4
.w
[1] = C4
.w
[0] = 0;
1145 if (q1
+ q2
<= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits
1146 C4
.w
[0] = C1
.w
[0] * C2
.w
[0];
1147 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1148 if (C4
.w
[0] < ten2k64
[q1
+ q2
- 1])
1149 q4
= q1
+ q2
- 1; // q4 in [1, 18]
1151 q4
= q1
+ q2
; // q4 in [2, 19]
1152 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1153 } else if (q1
+ q2
== 20) { // C4 = C1 * C2 fits in 64 or 128 bits
1154 // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits
1155 __mul_64x64_to_128MACH (C4
, C1
.w
[0], C2
.w
[0]);
1156 // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20
1157 if (C4
.w
[1] == 0 && C4
.w
[0] < ten2k64
[19]) { // 19 = q1+q2-1
1158 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1159 q4
= 19; // 19 = q1 + q2 - 1
1161 // if (C4.w[1] == 0)
1162 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1164 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1165 q4
= 20; // 20 = q1 + q2
1167 } else if (q1
+ q2
<= 38) { // 21 <= q1 + q2 <= 38
1168 // C4 = C1 * C2 fits in 64 or 128 bits
1169 // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits)
1170 // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits
1172 __mul_128x64_to_128 (C4
, C1
.w
[0], C2
);
1173 } else { // q2 <= 19
1174 __mul_128x64_to_128 (C4
, C2
.w
[0], C1
);
1176 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1177 if (C4
.w
[1] < ten2k128
[q1
+ q2
- 21].w
[1] ||
1178 (C4
.w
[1] == ten2k128
[q1
+ q2
- 21].w
[1] &&
1179 C4
.w
[0] < ten2k128
[q1
+ q2
- 21].w
[0])) {
1180 // if (C4.w[1] == 0) // q4 = 20, necessarily
1181 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1183 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1184 q4
= q1
+ q2
- 1; // q4 in [20, 37]
1186 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1187 q4
= q1
+ q2
; // q4 in [21, 38]
1189 } else if (q1
+ q2
== 39) { // C4 = C1 * C2 fits in 128 or 192 bits
1190 // both C1 and C2 fit in 128 bits (actually in 113 bits)
1191 // may replace this by 128x128_to192
1192 __mul_128x128_to_256 (C4
, C1
, C2
); // C4.w[3] is 0
1193 // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39
1194 if (C4
.w
[2] == 0 && (C4
.w
[1] < ten2k128
[18].w
[1] ||
1195 (C4
.w
[1] == ten2k128
[18].w
[1]
1196 && C4
.w
[0] < ten2k128
[18].w
[0]))) {
1197 // 18 = 38 - 20 = q1+q2-1 - 20
1198 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1199 q4
= 38; // 38 = q1 + q2 - 1
1201 // if (C4.w[2] == 0)
1202 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1204 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1205 q4
= 39; // 39 = q1 + q2
1207 } else if (q1
+ q2
<= 57) { // 40 <= q1 + q2 <= 57
1208 // C4 = C1 * C2 fits in 128 or 192 bits
1209 // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits)
1210 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1211 // may fit in 64 bits
1212 if (C1
.w
[1] == 0) { // C1 fits in 64 bits
1213 // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1214 __mul_64x128_full (C4
.w
[2], C4
, C1
.w
[0], C2
);
1215 } else if (C2
.w
[1] == 0) { // C2 fits in 64 bits
1216 // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1217 __mul_64x128_full (C4
.w
[2], C4
, C2
.w
[0], C1
);
1218 } else { // both C1 and C2 require 128 bits
1219 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1220 __mul_128x128_to_256 (C4
, C1
, C2
); // C4.w[3] = 0
1222 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1223 if (C4
.w
[2] < ten2k256
[q1
+ q2
- 40].w
[2] ||
1224 (C4
.w
[2] == ten2k256
[q1
+ q2
- 40].w
[2] &&
1225 (C4
.w
[1] < ten2k256
[q1
+ q2
- 40].w
[1] ||
1226 (C4
.w
[1] == ten2k256
[q1
+ q2
- 40].w
[1] &&
1227 C4
.w
[0] < ten2k256
[q1
+ q2
- 40].w
[0])))) {
1228 // if (C4.w[2] == 0) // q4 = 39, necessarily
1229 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1231 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1232 q4
= q1
+ q2
- 1; // q4 in [39, 56]
1234 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1235 q4
= q1
+ q2
; // q4 in [40, 57]
1237 } else if (q1
+ q2
== 58) { // C4 = C1 * C2 fits in 192 or 256 bits
1238 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1239 // may fit in 64 bits
1240 if (C1
.w
[1] == 0) { // C1 * C2 will fit in 192 bits
1241 __mul_64x128_full (C4
.w
[2], C4
, C1
.w
[0], C2
); // may use 64x128_to_192
1242 } else if (C2
.w
[1] == 0) { // C1 * C2 will fit in 192 bits
1243 __mul_64x128_full (C4
.w
[2], C4
, C2
.w
[0], C1
); // may use 64x128_to_192
1244 } else { // C1 * C2 will fit in 192 bits or in 256 bits
1245 __mul_128x128_to_256 (C4
, C1
, C2
);
1247 // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58
1248 if (C4
.w
[3] == 0 && (C4
.w
[2] < ten2k256
[18].w
[2] ||
1249 (C4
.w
[2] == ten2k256
[18].w
[2]
1250 && (C4
.w
[1] < ten2k256
[18].w
[1]
1251 || (C4
.w
[1] == ten2k256
[18].w
[1]
1252 && C4
.w
[0] < ten2k256
[18].w
[0]))))) {
1253 // 18 = 57 - 39 = q1+q2-1 - 39
1254 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1255 q4
= 57; // 57 = q1 + q2 - 1
1257 // if (C4.w[3] == 0)
1258 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1260 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1261 q4
= 58; // 58 = q1 + q2
1263 } else { // if 59 <= q1 + q2 <= 68
1264 // C4 = C1 * C2 fits in 192 or 256 bits
1265 // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits)
1266 // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in
1268 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1269 __mul_128x128_to_256 (C4
, C1
, C2
); // C4.w[3] = 0
1270 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1271 if (C4
.w
[3] < ten2k256
[q1
+ q2
- 40].w
[3] ||
1272 (C4
.w
[3] == ten2k256
[q1
+ q2
- 40].w
[3] &&
1273 (C4
.w
[2] < ten2k256
[q1
+ q2
- 40].w
[2] ||
1274 (C4
.w
[2] == ten2k256
[q1
+ q2
- 40].w
[2] &&
1275 (C4
.w
[1] < ten2k256
[q1
+ q2
- 40].w
[1] ||
1276 (C4
.w
[1] == ten2k256
[q1
+ q2
- 40].w
[1] &&
1277 C4
.w
[0] < ten2k256
[q1
+ q2
- 40].w
[0])))))) {
1278 // if (C4.w[3] == 0) // q4 = 58, necessarily
1279 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1281 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1282 q4
= q1
+ q2
- 1; // q4 in [58, 67]
1284 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1285 q4
= q1
+ q2
; // q4 in [59, 68]
1289 if (C3
.w
[1] == 0x0 && C3
.w
[0] == 0x0) { // x = f, y = f, z = 0
1290 save_fpsf
= *pfpsf
; // sticky bits - caller value must be preserved
1295 // truncate C4 to p34 digits into res
1296 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
1299 P128
.w
[1] = C4
.w
[1];
1300 P128
.w
[0] = C4
.w
[0];
1301 round128_19_38 (q4
, x0
, P128
, &res
, &incr_exp
,
1302 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
1303 &is_inexact_lt_midpoint
,
1304 &is_inexact_gt_midpoint
);
1305 } else if (q4
<= 57) { // 35 <= q4 <= 57
1306 P192
.w
[2] = C4
.w
[2];
1307 P192
.w
[1] = C4
.w
[1];
1308 P192
.w
[0] = C4
.w
[0];
1309 round192_39_57 (q4
, x0
, P192
, &R192
, &incr_exp
,
1310 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
1311 &is_inexact_lt_midpoint
,
1312 &is_inexact_gt_midpoint
);
1313 res
.w
[0] = R192
.w
[0];
1314 res
.w
[1] = R192
.w
[1];
1315 } else { // if (q4 <= 68)
1316 round256_58_76 (q4
, x0
, C4
, &R256
, &incr_exp
,
1317 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
1318 &is_inexact_lt_midpoint
,
1319 &is_inexact_gt_midpoint
);
1320 res
.w
[0] = R256
.w
[0];
1321 res
.w
[1] = R256
.w
[1];
1328 // res is now the coefficient of the result rounded to the destination
1329 // precision, with unbounded exponent; the exponent is e4; q4=digits(res)
1330 } else { // if (q4 <= p34)
1331 // C4 * 10^e4 is the result rounded to the destination precision, with
1332 // unbounded exponent (which is exact)
1334 if ((q4
+ e4
<= p34
+ expmax
) && (e4
> expmax
)) {
1335 // e4 is too large, but can be brought within range by scaling up C4
1336 scale
= e4
- expmax
; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2
1337 // res = (C4 * 10^scale) * 10^expmax
1338 if (q4
<= 19) { // C4 fits in 64 bits
1339 if (scale
<= 19) { // 10^scale fits in 64 bits
1340 // 64 x 64 C4.w[0] * ten2k64[scale]
1341 __mul_64x64_to_128MACH (res
, C4
.w
[0], ten2k64
[scale
]);
1342 } else { // 10^scale fits in 128 bits
1343 // 64 x 128 C4.w[0] * ten2k128[scale - 20]
1344 __mul_128x64_to_128 (res
, C4
.w
[0], ten2k128
[scale
- 20]);
1346 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
1347 // 64 x 128 ten2k64[scale] * CC43
1348 __mul_128x64_to_128 (res
, ten2k64
[scale
], C4
);
1350 e4
= e4
- scale
; // expmax
1356 // res is the coefficient of the result rounded to the destination
1357 // precision, with unbounded exponent (it has q4 digits); the exponent
1358 // is e4 (exact result)
1361 // check for overflow
1362 if (q4
+ e4
> p34
+ expmax
) {
1363 if (rnd_mode
== ROUNDING_TO_NEAREST
) {
1364 res
.w
[1] = p_sign
| 0x7800000000000000ull
; // +/-inf
1365 res
.w
[0] = 0x0000000000000000ull
;
1366 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
1368 res
.w
[1] = p_sign
| res
.w
[1];
1369 rounding_correction (rnd_mode
,
1370 is_inexact_lt_midpoint
,
1371 is_inexact_gt_midpoint
,
1372 is_midpoint_lt_even
, is_midpoint_gt_even
,
1375 *pfpsf
|= save_fpsf
;
1376 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
1377 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
1378 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
1379 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
1383 // check for underflow
1384 if (q4
+ e4
< expmin
+ P34
) {
1385 is_tiny
= 1; // the result is tiny
1387 // if e4 < expmin, we must truncate more of res
1388 x0
= expmin
- e4
; // x0 >= 1
1389 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
1390 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
1391 is_midpoint_lt_even0
= is_midpoint_lt_even
;
1392 is_midpoint_gt_even0
= is_midpoint_gt_even
;
1393 is_inexact_lt_midpoint
= 0;
1394 is_inexact_gt_midpoint
= 0;
1395 is_midpoint_lt_even
= 0;
1396 is_midpoint_gt_even
= 0;
1397 // the number of decimal digits in res is q4
1398 if (x0
< q4
) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits
1399 if (q4
<= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17
1400 round64_2_18 (q4
, x0
, res
.w
[0], &R64
, &incr_exp
,
1401 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
1402 &is_inexact_lt_midpoint
,
1403 &is_inexact_gt_midpoint
);
1405 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
1406 R64
= ten2k64
[q4
- x0
];
1408 // res.w[1] = 0; (from above)
1410 } else { // if (q4 <= 34)
1412 P128
.w
[1] = res
.w
[1];
1413 P128
.w
[0] = res
.w
[0];
1414 round128_19_38 (q4
, x0
, P128
, &res
, &incr_exp
,
1415 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
1416 &is_inexact_lt_midpoint
,
1417 &is_inexact_gt_midpoint
);
1419 // increase coefficient by a factor of 10; this will be <= 10^33
1420 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
1421 if (q4
- x0
<= 19) { // 1 <= q4 - x0 <= 19
1423 res
.w
[0] = ten2k64
[q4
- x0
];
1424 } else { // 20 <= q4 - x0 <= 37
1425 res
.w
[0] = ten2k128
[q4
- x0
- 20].w
[0];
1426 res
.w
[1] = ten2k128
[q4
- x0
- 20].w
[1];
1430 e4
= e4
+ x0
; // expmin
1431 } else if (x0
== q4
) {
1432 // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin
1433 // determine relationship with 1/2 ulp
1435 if (res
.w
[0] < midpoint64
[q4
- 1]) { // < 1/2 ulp
1437 is_inexact_lt_midpoint
= 1;
1438 } else if (res
.w
[0] == midpoint64
[q4
- 1]) { // = 1/2 ulp
1440 is_midpoint_gt_even
= 1;
1441 } else { // > 1/2 ulp
1443 is_inexact_gt_midpoint
= 1;
1445 } else { // if (q4 <= 34)
1446 if (res
.w
[1] < midpoint128
[q4
- 20].w
[1] ||
1447 (res
.w
[1] == midpoint128
[q4
- 20].w
[1] &&
1448 res
.w
[0] < midpoint128
[q4
- 20].w
[0])) { // < 1/2 ulp
1450 is_inexact_lt_midpoint
= 1;
1451 } else if (res
.w
[1] == midpoint128
[q4
- 20].w
[1] &&
1452 res
.w
[0] == midpoint128
[q4
- 20].w
[0]) { // = 1/2 ulp
1454 is_midpoint_gt_even
= 1;
1455 } else { // > 1/2 ulp
1457 is_inexact_gt_midpoint
= 1;
1460 if (lt_half_ulp
|| eq_half_ulp
) {
1461 // res = +0.0 * 10^expmin
1462 res
.w
[1] = 0x0000000000000000ull
;
1463 res
.w
[0] = 0x0000000000000000ull
;
1464 } else { // if (gt_half_ulp)
1465 // res = +1 * 10^expmin
1466 res
.w
[1] = 0x0000000000000000ull
;
1467 res
.w
[0] = 0x0000000000000001ull
;
1470 } else { // if (x0 > q4)
1471 // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin
1475 is_inexact_lt_midpoint
= 1;
1477 // avoid a double rounding error
1478 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
1479 is_midpoint_lt_even
) { // double rounding error upward
1482 if (res
.w
[0] == 0xffffffffffffffffull
)
1484 // Note: a double rounding error upward is not possible; for this
1485 // the result after the first rounding would have to be 99...95
1486 // (35 digits in all), possibly followed by a number of zeros; this
1487 // not possible for f * f + 0
1488 is_midpoint_lt_even
= 0;
1489 is_inexact_lt_midpoint
= 1;
1490 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
1491 is_midpoint_gt_even
) { // double rounding error downward
1496 is_midpoint_gt_even
= 0;
1497 is_inexact_gt_midpoint
= 1;
1498 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
1499 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
1500 // if this second rounding was exact the result may still be
1501 // inexact because of the first rounding
1502 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
1503 is_inexact_gt_midpoint
= 1;
1505 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
1506 is_inexact_lt_midpoint
= 1;
1508 } else if (is_midpoint_gt_even
&&
1509 (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
)) {
1510 // pulled up to a midpoint
1511 is_inexact_lt_midpoint
= 1;
1512 is_inexact_gt_midpoint
= 0;
1513 is_midpoint_lt_even
= 0;
1514 is_midpoint_gt_even
= 0;
1515 } else if (is_midpoint_lt_even
&&
1516 (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
)) {
1517 // pulled down to a midpoint
1518 is_inexact_lt_midpoint
= 0;
1519 is_inexact_gt_midpoint
= 1;
1520 is_midpoint_lt_even
= 0;
1521 is_midpoint_gt_even
= 0;
1525 } else { // if e4 >= emin then q4 < P and the result is tiny and exact
1527 // if (e3 < e4) the preferred exponent is e3
1528 // return (C4 * 10^scale) * 10^(e4 - scale)
1529 // where scale = min (p34-q4, (e4 - e3))
1535 ; // res and e4 are unchanged
1536 } else if (q4
<= 19) { // C4 fits in 64 bits
1537 if (scale
<= 19) { // 10^scale fits in 64 bits
1538 // 64 x 64 res.w[0] * ten2k64[scale]
1539 __mul_64x64_to_128MACH (res
, res
.w
[0], ten2k64
[scale
]);
1540 } else { // 10^scale fits in 128 bits
1541 // 64 x 128 res.w[0] * ten2k128[scale - 20]
1542 __mul_128x64_to_128 (res
, res
.w
[0], ten2k128
[scale
- 20]);
1544 } else { // res fits in 128 bits, but 10^scale must fit in 64 bits
1545 // 64 x 128 ten2k64[scale] * C3
1546 __mul_128x64_to_128 (res
, ten2k64
[scale
], res
);
1548 // subtract scale from the exponent
1553 // check for inexact result
1554 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
1555 is_midpoint_lt_even
|| is_midpoint_gt_even
) {
1556 // set the inexact flag and the underflow flag
1557 *pfpsf
|= INEXACT_EXCEPTION
;
1558 *pfpsf
|= UNDERFLOW_EXCEPTION
;
1560 res
.w
[1] = p_sign
| ((UINT64
) (e4
+ 6176) << 49) | res
.w
[1];
1561 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
1562 rounding_correction (rnd_mode
,
1563 is_inexact_lt_midpoint
,
1564 is_inexact_gt_midpoint
,
1565 is_midpoint_lt_even
, is_midpoint_gt_even
,
1568 *pfpsf
|= save_fpsf
;
1569 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
1570 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
1571 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
1572 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
1576 // no overflow, and no underflow for rounding to nearest
1577 res
.w
[1] = p_sign
| ((UINT64
) (e4
+ 6176) << 49) | res
.w
[1];
1579 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
1580 rounding_correction (rnd_mode
,
1581 is_inexact_lt_midpoint
,
1582 is_inexact_gt_midpoint
,
1583 is_midpoint_lt_even
, is_midpoint_gt_even
,
1585 // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ)
1587 if ((res
.w
[1] & MASK_COEFF
) < 0x0000314dc6448d93ull
||
1588 ((res
.w
[1] & MASK_COEFF
) == 0x0000314dc6448d93ull
&&
1589 res
.w
[0] < 0x38c15b0a00000000ull
)) {
1595 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
1596 is_midpoint_lt_even
|| is_midpoint_gt_even
) {
1597 // set the inexact flag
1598 *pfpsf
|= INEXACT_EXCEPTION
;
1600 *pfpsf
|= UNDERFLOW_EXCEPTION
;
1603 if ((*pfpsf
& INEXACT_EXCEPTION
) == 0) { // x * y is exact
1604 // need to ensure that the result has the preferred exponent
1605 p_exp
= res
.w
[1] & MASK_EXP
;
1606 if (z_exp
< p_exp
) { // the preferred exponent is z_exp
1607 // signficand of res in C3
1608 C3
.w
[1] = res
.w
[1] & MASK_COEFF
;
1610 // the number of decimal digits of x * y is q4 <= 34
1611 // Note: the coefficient fits in 128 bits
1613 // return (C3 * 10^scale) * 10^(p_exp - scale)
1614 // where scale = min (p34-q4, (p_exp-z_exp) >> 49)
1616 ind
= (p_exp
- z_exp
) >> 49;
1619 // subtract scale from the exponent
1620 p_exp
= p_exp
- ((UINT64
) scale
<< 49);
1622 ; // leave res unchanged
1623 } else if (q4
<= 19) { // x * y fits in 64 bits
1624 if (scale
<= 19) { // 10^scale fits in 64 bits
1625 // 64 x 64 C3.w[0] * ten2k64[scale]
1626 __mul_64x64_to_128MACH (res
, C3
.w
[0], ten2k64
[scale
]);
1627 } else { // 10^scale fits in 128 bits
1628 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1629 __mul_128x64_to_128 (res
, C3
.w
[0], ten2k128
[scale
- 20]);
1631 res
.w
[1] = p_sign
| (p_exp
& MASK_EXP
) | res
.w
[1];
1632 } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits
1633 // 64 x 128 ten2k64[scale] * C3
1634 __mul_128x64_to_128 (res
, ten2k64
[scale
], C3
);
1635 res
.w
[1] = p_sign
| (p_exp
& MASK_EXP
) | res
.w
[1];
1637 } // else leave the result as it is, because p_exp <= z_exp
1639 *pfpsf
|= save_fpsf
;
1640 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
1641 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
1642 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
1643 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
1646 } // else we have f * f + f
1648 // continue with x = f, y = f, z = f
1650 delta
= q3
+ e3
- q4
- e4
;
1654 if (p34
<= delta
- 1 || // Case (1')
1655 (p34
== delta
&& e3
+ 6176 < p34
- q3
)) { // Case (1''A)
1656 // check for overflow, which can occur only in Case (1')
1657 if ((q3
+ e3
) > (p34
+ expmax
) && p34
<= delta
- 1) {
1658 // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary
1659 // condition for (q3 + e3) > (p34 + expmax)
1660 if (rnd_mode
== ROUNDING_TO_NEAREST
) {
1661 res
.w
[1] = z_sign
| 0x7800000000000000ull
; // +/-inf
1662 res
.w
[0] = 0x0000000000000000ull
;
1663 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
1665 if (p_sign
== z_sign
) {
1666 is_inexact_lt_midpoint
= 1;
1668 is_inexact_gt_midpoint
= 1;
1670 // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3)
1673 res
.w
[1] = z_sign
| C3
.w
[1];
1676 if (q3
<= 19) { // C3 fits in 64 bits
1677 if (scale
<= 19) { // 10^scale fits in 64 bits
1678 // 64 x 64 C3.w[0] * ten2k64[scale]
1679 __mul_64x64_to_128MACH (res
, C3
.w
[0], ten2k64
[scale
]);
1680 } else { // 10^scale fits in 128 bits
1681 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1682 __mul_128x64_to_128 (res
, C3
.w
[0],
1683 ten2k128
[scale
- 20]);
1685 } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits
1686 // 64 x 128 ten2k64[scale] * C3
1687 __mul_128x64_to_128 (res
, ten2k64
[scale
], C3
);
1689 // the coefficient in res has q3 + scale = p34 digits
1692 res
.w
[1] = z_sign
| res
.w
[1];
1693 rounding_correction (rnd_mode
,
1694 is_inexact_lt_midpoint
,
1695 is_inexact_gt_midpoint
,
1696 is_midpoint_lt_even
, is_midpoint_gt_even
,
1699 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
1700 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
1701 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
1702 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
1707 if (q3
< p34
) { // the preferred exponent is z_exp - (p34 - q3)
1708 // return (C3 * 10^scale) * 10^(z_exp - scale)
1709 // where scale = min (p34-q3, z_exp-EMIN)
1717 } else if (q3
<= 19) { // z fits in 64 bits
1718 if (scale
<= 19) { // 10^scale fits in 64 bits
1719 // 64 x 64 C3.w[0] * ten2k64[scale]
1720 __mul_64x64_to_128MACH (res
, C3
.w
[0], ten2k64
[scale
]);
1721 } else { // 10^scale fits in 128 bits
1722 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1723 __mul_128x64_to_128 (res
, C3
.w
[0], ten2k128
[scale
- 20]);
1725 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1726 // 64 x 128 ten2k64[scale] * C3
1727 __mul_128x64_to_128 (res
, ten2k64
[scale
], C3
);
1729 // the coefficient in res has q3 + scale digits
1730 // subtract scale from the exponent
1731 z_exp
= z_exp
- ((UINT64
) scale
<< 49);
1733 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
1734 if (scale
+ q3
< p34
)
1735 *pfpsf
|= UNDERFLOW_EXCEPTION
;
1738 res
.w
[1] = z_sign
| ((UINT64
) (e3
+ 6176) << 49) | C3
.w
[1];
1742 // use the following to avoid double rounding errors when operating on
1743 // mixed formats in rounding to nearest, and for correcting the result
1744 // if not rounding to nearest
1745 if ((p_sign
!= z_sign
) && (delta
== (q3
+ scale
+ 1))) {
1746 // there is a gap of exactly one digit between the scaled C3 and C4
1747 // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case
1748 if ((q3
<= 19 && C3
.w
[0] != ten2k64
[q3
- 1]) ||
1749 (q3
== 20 && (C3
.w
[1] != 0 || C3
.w
[0] != ten2k64
[19])) ||
1750 (q3
>= 21 && (C3
.w
[1] != ten2k128
[q3
- 21].w
[1] ||
1751 C3
.w
[0] != ten2k128
[q3
- 21].w
[0]))) {
1752 // C3 * 10^ scale != 10^(q3-1)
1753 // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull ||
1754 // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
1755 is_inexact_gt_midpoint
= 1; // if (z_sign), set as if for abs. value
1756 } else { // if C3 * 10^scale = 10^(q3+scale-1)
1757 // ok from above e3 = (z_exp >> 49) - 6176;
1758 // the result is always inexact
1762 // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
1763 // x = q4-1, 1 <= x <= 67 and check if this operation is exact
1764 if (q4
<= 18) { // 2 <= q4 <= 18
1765 round64_2_18 (q4
, q4
- 1, C4
.w
[0], &R64
, &incr_exp
,
1766 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
1767 &is_inexact_lt_midpoint
,
1768 &is_inexact_gt_midpoint
);
1769 } else if (q4
<= 38) {
1770 P128
.w
[1] = C4
.w
[1];
1771 P128
.w
[0] = C4
.w
[0];
1772 round128_19_38 (q4
, q4
- 1, P128
, &R128
, &incr_exp
,
1773 &is_midpoint_lt_even
,
1774 &is_midpoint_gt_even
,
1775 &is_inexact_lt_midpoint
,
1776 &is_inexact_gt_midpoint
);
1777 R64
= R128
.w
[0]; // one decimal digit
1778 } else if (q4
<= 57) {
1779 P192
.w
[2] = C4
.w
[2];
1780 P192
.w
[1] = C4
.w
[1];
1781 P192
.w
[0] = C4
.w
[0];
1782 round192_39_57 (q4
, q4
- 1, P192
, &R192
, &incr_exp
,
1783 &is_midpoint_lt_even
,
1784 &is_midpoint_gt_even
,
1785 &is_inexact_lt_midpoint
,
1786 &is_inexact_gt_midpoint
);
1787 R64
= R192
.w
[0]; // one decimal digit
1788 } else { // if (q4 <= 68)
1789 round256_58_76 (q4
, q4
- 1, C4
, &R256
, &incr_exp
,
1790 &is_midpoint_lt_even
,
1791 &is_midpoint_gt_even
,
1792 &is_inexact_lt_midpoint
,
1793 &is_inexact_gt_midpoint
);
1794 R64
= R256
.w
[0]; // one decimal digit
1800 if (q4
== 1 && C4
.w
[0] == 5) {
1801 is_inexact_lt_midpoint
= 0;
1802 is_inexact_gt_midpoint
= 0;
1803 is_midpoint_lt_even
= 1;
1804 is_midpoint_gt_even
= 0;
1805 } else if ((e3
== expmin
) ||
1806 R64
< 5 || (R64
== 5 && is_inexact_gt_midpoint
)) {
1807 // result does not change
1808 is_inexact_lt_midpoint
= 0;
1809 is_inexact_gt_midpoint
= 1;
1810 is_midpoint_lt_even
= 0;
1811 is_midpoint_gt_even
= 0;
1813 is_inexact_lt_midpoint
= 1;
1814 is_inexact_gt_midpoint
= 0;
1815 is_midpoint_lt_even
= 0;
1816 is_midpoint_gt_even
= 0;
1817 // result decremented is 10^(q3+scale) - 1
1818 if ((q3
+ scale
) <= 19) {
1820 res
.w
[0] = ten2k64
[q3
+ scale
];
1821 } else { // if ((q3 + scale + 1) <= 35)
1822 res
.w
[1] = ten2k128
[q3
+ scale
- 20].w
[1];
1823 res
.w
[0] = ten2k128
[q3
+ scale
- 20].w
[0];
1825 res
.w
[0] = res
.w
[0] - 1; // borrow never occurs
1826 z_exp
= z_exp
- EXP_P1
;
1828 res
.w
[1] = z_sign
| ((UINT64
) (e3
+ 6176) << 49) | res
.w
[1];
1831 if (R64
< 5 || (R64
== 5 && !is_inexact_lt_midpoint
)) {
1832 ; // result not tiny (in round-to-nearest mode)
1834 *pfpsf
|= UNDERFLOW_EXCEPTION
;
1837 } // end 10^(q3+scale-1)
1838 // set the inexact flag
1839 *pfpsf
|= INEXACT_EXCEPTION
;
1841 if (p_sign
== z_sign
) {
1842 // if (z_sign), set as if for absolute value
1843 is_inexact_lt_midpoint
= 1;
1844 } else { // if (p_sign != z_sign)
1845 // if (z_sign), set as if for absolute value
1846 is_inexact_gt_midpoint
= 1;
1848 *pfpsf
|= INEXACT_EXCEPTION
;
1850 // the result is always inexact => set the inexact flag
1851 // Determine tininess:
1852 // if (exp > expmin)
1853 // the result is not tiny
1854 // else // if exp = emin
1855 // if (q3 + scale < p34)
1856 // the result is tiny
1857 // else // if (q3 + scale = p34)
1858 // if (C3 * 10^scale > 10^33)
1859 // the result is not tiny
1860 // else // if C3 * 10^scale = 10^33
1862 // the result is not tiny
1863 // else // if (xy * z < 0)
1865 // if rnd_mode != RP
1866 // the result is tiny
1868 // the result is not tiny
1869 // else // if (z < 0)
1870 // if rnd_mode != RM
1871 // the result is tiny
1873 // the result is not tiny
1880 if ((e3
== expmin
&& (q3
+ scale
) < p34
) ||
1881 (e3
== expmin
&& (q3
+ scale
) == p34
&&
1882 (res
.w
[1] & MASK_COEFF
) == 0x0000314dc6448d93ull
&& // 10^33_high
1883 res
.w
[0] == 0x38c15b0a00000000ull
&& // 10^33_low
1884 z_sign
!= p_sign
&& ((!z_sign
&& rnd_mode
!= ROUNDING_UP
) ||
1885 (z_sign
&& rnd_mode
!= ROUNDING_DOWN
)))) {
1886 *pfpsf
|= UNDERFLOW_EXCEPTION
;
1888 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
1889 rounding_correction (rnd_mode
,
1890 is_inexact_lt_midpoint
,
1891 is_inexact_gt_midpoint
,
1892 is_midpoint_lt_even
, is_midpoint_gt_even
,
1895 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
1896 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
1897 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
1898 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
1902 } else if (p34
== delta
) { // Case (1''B)
1904 // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3
1905 // and C3 can be scaled up to p34 digits if needed
1907 // scale C3 to p34 digits if needed
1908 scale
= p34
- q3
; // 0 <= scale <= p34 - 1
1912 } else if (q3
<= 19) { // z fits in 64 bits
1913 if (scale
<= 19) { // 10^scale fits in 64 bits
1914 // 64 x 64 C3.w[0] * ten2k64[scale]
1915 __mul_64x64_to_128MACH (res
, C3
.w
[0], ten2k64
[scale
]);
1916 } else { // 10^scale fits in 128 bits
1917 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1918 __mul_128x64_to_128 (res
, C3
.w
[0], ten2k128
[scale
- 20]);
1920 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1921 // 64 x 128 ten2k64[scale] * C3
1922 __mul_128x64_to_128 (res
, ten2k64
[scale
], C3
);
1924 // subtract scale from the exponent
1925 z_exp
= z_exp
- ((UINT64
) scale
<< 49);
1927 // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits
1929 // determine whether x * y is less than, equal to, or greater than
1932 if (C4
.w
[0] < midpoint64
[q4
- 1]) { // < 1/2 ulp
1934 } else if (C4
.w
[0] == midpoint64
[q4
- 1]) { // = 1/2 ulp
1936 } else { // > 1/2 ulp
1939 } else if (q4
<= 38) {
1940 if (C4
.w
[2] == 0 && (C4
.w
[1] < midpoint128
[q4
- 20].w
[1] ||
1941 (C4
.w
[1] == midpoint128
[q4
- 20].w
[1] &&
1942 C4
.w
[0] < midpoint128
[q4
- 20].w
[0]))) { // < 1/2 ulp
1944 } else if (C4
.w
[2] == 0 && C4
.w
[1] == midpoint128
[q4
- 20].w
[1] &&
1945 C4
.w
[0] == midpoint128
[q4
- 20].w
[0]) { // = 1/2 ulp
1947 } else { // > 1/2 ulp
1950 } else if (q4
<= 58) {
1951 if (C4
.w
[3] == 0 && (C4
.w
[2] < midpoint192
[q4
- 39].w
[2] ||
1952 (C4
.w
[2] == midpoint192
[q4
- 39].w
[2] &&
1953 C4
.w
[1] < midpoint192
[q4
- 39].w
[1]) ||
1954 (C4
.w
[2] == midpoint192
[q4
- 39].w
[2] &&
1955 C4
.w
[1] == midpoint192
[q4
- 39].w
[1] &&
1956 C4
.w
[0] < midpoint192
[q4
- 39].w
[0]))) { // < 1/2 ulp
1958 } else if (C4
.w
[3] == 0 && C4
.w
[2] == midpoint192
[q4
- 39].w
[2] &&
1959 C4
.w
[1] == midpoint192
[q4
- 39].w
[1] &&
1960 C4
.w
[0] == midpoint192
[q4
- 39].w
[0]) { // = 1/2 ulp
1962 } else { // > 1/2 ulp
1966 if (C4
.w
[3] < midpoint256
[q4
- 59].w
[3] ||
1967 (C4
.w
[3] == midpoint256
[q4
- 59].w
[3] &&
1968 C4
.w
[2] < midpoint256
[q4
- 59].w
[2]) ||
1969 (C4
.w
[3] == midpoint256
[q4
- 59].w
[3] &&
1970 C4
.w
[2] == midpoint256
[q4
- 59].w
[2] &&
1971 C4
.w
[1] < midpoint256
[q4
- 59].w
[1]) ||
1972 (C4
.w
[3] == midpoint256
[q4
- 59].w
[3] &&
1973 C4
.w
[2] == midpoint256
[q4
- 59].w
[2] &&
1974 C4
.w
[1] == midpoint256
[q4
- 59].w
[1] &&
1975 C4
.w
[0] < midpoint256
[q4
- 59].w
[0])) { // < 1/2 ulp
1977 } else if (C4
.w
[3] == midpoint256
[q4
- 59].w
[3] &&
1978 C4
.w
[2] == midpoint256
[q4
- 59].w
[2] &&
1979 C4
.w
[1] == midpoint256
[q4
- 59].w
[1] &&
1980 C4
.w
[0] == midpoint256
[q4
- 59].w
[0]) { // = 1/2 ulp
1982 } else { // > 1/2 ulp
1987 if (p_sign
== z_sign
) {
1989 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
1990 // use the following to avoid double rounding errors when operating on
1991 // mixed formats in rounding to nearest
1992 is_inexact_lt_midpoint
= 1; // if (z_sign), as if for absolute value
1993 } else if ((eq_half_ulp
&& (res
.w
[0] & 0x01)) || gt_half_ulp
) {
1994 // add 1 ulp to the significand
1996 if (res
.w
[0] == 0x0ull
)
1998 // check for rounding overflow, when coeff == 10^34
1999 if ((res
.w
[1] & MASK_COEFF
) == 0x0001ed09bead87c0ull
&&
2000 res
.w
[0] == 0x378d8e6400000000ull
) { // coefficient = 10^34
2003 z_exp
= ((UINT64
) (e3
+ 6176) << 49) & MASK_EXP
;
2004 res
.w
[1] = 0x0000314dc6448d93ull
;
2005 res
.w
[0] = 0x38c15b0a00000000ull
;
2008 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2010 is_midpoint_lt_even
= 1; // if (z_sign), as if for absolute value
2012 is_inexact_gt_midpoint
= 1; // if (z_sign), as if for absolute value
2014 } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2016 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2017 is_midpoint_gt_even
= 1; // if (z_sign), as if for absolute value
2019 // the result is always inexact, and never tiny
2020 // set the inexact flag
2021 *pfpsf
|= INEXACT_EXCEPTION
;
2022 // check for overflow
2023 if (e3
> expmax
&& rnd_mode
== ROUNDING_TO_NEAREST
) {
2024 res
.w
[1] = z_sign
| 0x7800000000000000ull
; // +/-inf
2025 res
.w
[0] = 0x0000000000000000ull
;
2026 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
2027 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2028 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2029 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2030 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2034 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
2035 rounding_correction (rnd_mode
,
2036 is_inexact_lt_midpoint
,
2037 is_inexact_gt_midpoint
,
2038 is_midpoint_lt_even
, is_midpoint_gt_even
,
2040 z_exp
= res
.w
[1] & MASK_EXP
;
2042 } else { // if (p_sign != z_sign)
2043 // consider two cases, because C3 * 10^scale = 10^33 is a special case
2044 if (res
.w
[1] != 0x0000314dc6448d93ull
||
2045 res
.w
[0] != 0x38c15b0a00000000ull
) { // C3 * 10^scale != 10^33
2047 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2048 // use the following to avoid double rounding errors when operating
2049 // on mixed formats in rounding to nearest
2050 is_inexact_gt_midpoint
= 1; // if (z_sign), as if for absolute value
2051 } else if ((eq_half_ulp
&& (res
.w
[0] & 0x01)) || gt_half_ulp
) {
2052 // subtract 1 ulp from the significand
2054 if (res
.w
[0] == 0xffffffffffffffffull
)
2056 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2058 is_midpoint_gt_even
= 1; // if (z_sign), as if for absolute value
2060 is_inexact_lt_midpoint
= 1; //if(z_sign), as if for absolute value
2062 } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2064 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2065 is_midpoint_lt_even
= 1; // if (z_sign), as if for absolute value
2067 // the result is always inexact, and never tiny
2068 // check for overflow for RN
2070 if (rnd_mode
== ROUNDING_TO_NEAREST
) {
2071 res
.w
[1] = z_sign
| 0x7800000000000000ull
; // +/-inf
2072 res
.w
[0] = 0x0000000000000000ull
;
2073 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
2075 rounding_correction (rnd_mode
,
2076 is_inexact_lt_midpoint
,
2077 is_inexact_gt_midpoint
,
2078 is_midpoint_lt_even
,
2079 is_midpoint_gt_even
, e3
, &res
,
2082 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2083 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2084 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2085 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2089 // set the inexact flag
2090 *pfpsf
|= INEXACT_EXCEPTION
;
2091 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
2092 rounding_correction (rnd_mode
,
2093 is_inexact_lt_midpoint
,
2094 is_inexact_gt_midpoint
,
2095 is_midpoint_lt_even
,
2096 is_midpoint_gt_even
, e3
, &res
, pfpsf
);
2098 z_exp
= res
.w
[1] & MASK_EXP
;
2099 } else { // if C3 * 10^scale = 10^33
2100 e3
= (z_exp
>> 49) - 6176;
2102 // the result is exact if exp > expmin and C4 = d*10^(q4-1),
2103 // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact
2105 // if q4 = 1 the result is exact
2106 // result coefficient = 10^34 - C4
2107 res
.w
[1] = 0x0001ed09bead87c0ull
;
2108 res
.w
[0] = 0x378d8e6400000000ull
- C4
.w
[0];
2109 z_exp
= z_exp
- EXP_P1
;
2111 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2113 // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
2114 // x = q4-1, 1 <= x <= 67 and check if this operation is exact
2115 if (q4
<= 18) { // 2 <= q4 <= 18
2116 round64_2_18 (q4
, q4
- 1, C4
.w
[0], &R64
, &incr_exp
,
2117 &is_midpoint_lt_even
,
2118 &is_midpoint_gt_even
,
2119 &is_inexact_lt_midpoint
,
2120 &is_inexact_gt_midpoint
);
2121 } else if (q4
<= 38) {
2122 P128
.w
[1] = C4
.w
[1];
2123 P128
.w
[0] = C4
.w
[0];
2124 round128_19_38 (q4
, q4
- 1, P128
, &R128
, &incr_exp
,
2125 &is_midpoint_lt_even
,
2126 &is_midpoint_gt_even
,
2127 &is_inexact_lt_midpoint
,
2128 &is_inexact_gt_midpoint
);
2129 R64
= R128
.w
[0]; // one decimal digit
2130 } else if (q4
<= 57) {
2131 P192
.w
[2] = C4
.w
[2];
2132 P192
.w
[1] = C4
.w
[1];
2133 P192
.w
[0] = C4
.w
[0];
2134 round192_39_57 (q4
, q4
- 1, P192
, &R192
, &incr_exp
,
2135 &is_midpoint_lt_even
,
2136 &is_midpoint_gt_even
,
2137 &is_inexact_lt_midpoint
,
2138 &is_inexact_gt_midpoint
);
2139 R64
= R192
.w
[0]; // one decimal digit
2140 } else { // if (q4 <= 68)
2141 round256_58_76 (q4
, q4
- 1, C4
, &R256
, &incr_exp
,
2142 &is_midpoint_lt_even
,
2143 &is_midpoint_gt_even
,
2144 &is_inexact_lt_midpoint
,
2145 &is_inexact_gt_midpoint
);
2146 R64
= R256
.w
[0]; // one decimal digit
2148 if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
2149 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
2150 // the result is exact: 10^34 - R64
2151 // incr_exp = 0 with certainty
2152 z_exp
= z_exp
- EXP_P1
;
2155 z_sign
| (z_exp
& MASK_EXP
) | 0x0001ed09bead87c0ull
;
2156 res
.w
[0] = 0x378d8e6400000000ull
- R64
;
2158 // We want R64 to be the top digit of C4, but we actually
2159 // obtained (C4 * 10^(-q4+1))RN; a correction may be needed,
2160 // because the top digit is (C4 * 10^(-q4+1))RZ
2161 // however, if incr_exp = 1 then R64 = 10 with certainty
2165 // the result is inexact as C4 has more than 1 significant digit
2166 // and C3 * 10^scale = 10^33
2167 // example of case that is treated here:
2168 // 100...0 * 10^e3 - 0.41 * 10^e3 =
2169 // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1)
2170 // note that (e3 > expmin}
2171 // in order to round, subtract R64 from 10^34 and then compare
2172 // C4 - R64 * 10^(q4-1) with 1/2 ulp
2173 // calculate 10^34 - R64
2174 res
.w
[1] = 0x0001ed09bead87c0ull
;
2175 res
.w
[0] = 0x378d8e6400000000ull
- R64
;
2176 z_exp
= z_exp
- EXP_P1
; // will be OR-ed with sign & significand
2177 // calculate C4 - R64 * 10^(q4-1); this is a rare case and
2178 // R64 is small, 1 <= R64 <= 9
2180 if (is_inexact_lt_midpoint
) {
2181 is_inexact_lt_midpoint
= 0;
2182 is_inexact_gt_midpoint
= 1;
2183 } else if (is_inexact_gt_midpoint
) {
2184 is_inexact_gt_midpoint
= 0;
2185 is_inexact_lt_midpoint
= 1;
2186 } else if (is_midpoint_lt_even
) {
2187 is_midpoint_lt_even
= 0;
2188 is_midpoint_gt_even
= 1;
2189 } else if (is_midpoint_gt_even
) {
2190 is_midpoint_gt_even
= 0;
2191 is_midpoint_lt_even
= 1;
2195 // the result is always inexact, and never tiny
2196 // check for overflow for RN
2198 if (rnd_mode
== ROUNDING_TO_NEAREST
) {
2199 res
.w
[1] = z_sign
| 0x7800000000000000ull
; // +/-inf
2200 res
.w
[0] = 0x0000000000000000ull
;
2201 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
2203 rounding_correction (rnd_mode
,
2204 is_inexact_lt_midpoint
,
2205 is_inexact_gt_midpoint
,
2206 is_midpoint_lt_even
,
2207 is_midpoint_gt_even
, e3
, &res
,
2210 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2211 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2212 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2213 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2217 // set the inexact flag
2218 *pfpsf
|= INEXACT_EXCEPTION
;
2220 z_sign
| ((UINT64
) (e3
+ 6176) << 49) | res
.w
[1];
2221 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
2222 rounding_correction (rnd_mode
,
2223 is_inexact_lt_midpoint
,
2224 is_inexact_gt_midpoint
,
2225 is_midpoint_lt_even
,
2226 is_midpoint_gt_even
, e3
, &res
,
2229 z_exp
= res
.w
[1] & MASK_EXP
;
2230 } // end result is inexact
2232 } else { // if (e3 = emin)
2233 // if e3 = expmin the result is also tiny (the condition for
2234 // tininess is C4 > 050...0 [q4 digits] which is met because
2235 // the msd of C4 is not zero)
2236 // the result is tiny and inexact in all rounding modes;
2237 // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp,
2238 // gt_half_ulp to calculate)
2239 // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged
2241 // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp
2242 if (gt_half_ulp
) { // res = 10^33 - 1
2243 res
.w
[1] = 0x0000314dc6448d93ull
;
2244 res
.w
[0] = 0x38c15b09ffffffffull
;
2246 res
.w
[1] = 0x0000314dc6448d93ull
;
2247 res
.w
[0] = 0x38c15b0a00000000ull
;
2249 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2250 *pfpsf
|= UNDERFLOW_EXCEPTION
; // inexact is set later
2253 is_midpoint_lt_even
= 1; // if (z_sign), as if for absolute value
2254 } else if (lt_half_ulp
) {
2255 is_inexact_gt_midpoint
= 1; //if(z_sign), as if for absolute value
2256 } else { // if (gt_half_ulp)
2257 is_inexact_lt_midpoint
= 1; //if(z_sign), as if for absolute value
2260 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
2261 rounding_correction (rnd_mode
,
2262 is_inexact_lt_midpoint
,
2263 is_inexact_gt_midpoint
,
2264 is_midpoint_lt_even
,
2265 is_midpoint_gt_even
, e3
, &res
,
2267 z_exp
= res
.w
[1] & MASK_EXP
;
2270 // set the inexact flag (if the result was not exact)
2271 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
2272 is_midpoint_lt_even
|| is_midpoint_gt_even
)
2273 *pfpsf
|= INEXACT_EXCEPTION
;
2275 } // end if (p_sign != z_sign)
2276 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2277 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2278 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2279 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2280 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2284 } else if (((q3
<= delta
&& delta
< p34
&& p34
< delta
+ q4
) || // Case (2)
2285 (q3
<= delta
&& delta
+ q4
<= p34
) || // Case (3)
2286 (delta
< q3
&& p34
< delta
+ q4
) || // Case (4)
2287 (delta
< q3
&& q3
<= delta
+ q4
&& delta
+ q4
<= p34
) || // Case (5)
2288 (delta
+ q4
< q3
)) && // Case (6)
2289 !(delta
<= 1 && p_sign
!= z_sign
)) { // Case (2), (3), (4), (5) or (6)
2291 // the result has the sign of z
2293 if ((q3
<= delta
&& delta
< p34
&& p34
< delta
+ q4
) || // Case (2)
2294 (delta
< q3
&& p34
< delta
+ q4
)) { // Case (4)
2295 // round first the sum x * y + z with unbounded exponent
2296 // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1,
2298 // calculate res = C3 * 10^scale
2300 x0
= delta
+ q4
- p34
;
2301 } else if (delta
+ q4
< q3
) { // Case (6)
2302 // make Case (6) look like Case (3) or Case (5) with scale = 0
2303 // by scaling up C4 by 10^(q3 - delta - q4)
2304 scale
= q3
- delta
- q4
; // 1 <= scale <= 33
2305 if (q4
<= 19) { // 1 <= scale <= 19; C4 fits in 64 bits
2306 if (scale
<= 19) { // 10^scale fits in 64 bits
2307 // 64 x 64 C4.w[0] * ten2k64[scale]
2308 __mul_64x64_to_128MACH (P128
, C4
.w
[0], ten2k64
[scale
]);
2309 } else { // 10^scale fits in 128 bits
2310 // 64 x 128 C4.w[0] * ten2k128[scale - 20]
2311 __mul_128x64_to_128 (P128
, C4
.w
[0], ten2k128
[scale
- 20]);
2313 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
2314 // 64 x 128 ten2k64[scale] * C4
2315 __mul_128x64_to_128 (P128
, ten2k64
[scale
], C4
);
2317 C4
.w
[0] = P128
.w
[0];
2318 C4
.w
[1] = P128
.w
[1];
2319 // e4 does not need adjustment, as it is not used from this point on
2322 // now Case (6) looks like Case (3) or Case (5) with scale = 0
2323 } else { // if Case (3) or Case (5)
2324 // Note: Case (3) is similar to Case (2), but scale differs and the
2325 // result is exact, unless it is tiny (so x0 = 0 when calculating the
2326 // result with unbounded exponent)
2328 // calculate first the sum x * y + z with unbounded exponent (exact)
2329 // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1,
2331 // calculate res = C3 * 10^scale
2332 scale
= delta
+ q4
- q3
;
2334 // Note: the comments which follow refer [mainly] to Case (2)]
2338 if (scale
== 0) { // this could happen e.g. if we return to case2_repeat
2342 } else if (q3
<= 19) { // 1 <= scale <= 19; z fits in 64 bits
2343 if (scale
<= 19) { // 10^scale fits in 64 bits
2344 // 64 x 64 C3.w[0] * ten2k64[scale]
2345 __mul_64x64_to_128MACH (res
, C3
.w
[0], ten2k64
[scale
]);
2346 } else { // 10^scale fits in 128 bits
2347 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
2348 __mul_128x64_to_128 (res
, C3
.w
[0], ten2k128
[scale
- 20]);
2350 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
2351 // 64 x 128 ten2k64[scale] * C3
2352 __mul_128x64_to_128 (res
, ten2k64
[scale
], C3
);
2354 // e3 is already calculated
2356 // now res = C3 * 10^scale and e3 = e3 - scale
2357 // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat
2358 // because the result was too small
2360 // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34,
2361 // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67)
2362 // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of
2363 // the rounding fits in 128 bits!)
2364 // x0 = delta + q4 - p34 (calculated before reaching case2_repeat)
2365 // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34
2366 if (x0
== 0) { // this could happen only if we return to case2_repeat, or
2367 // for Case (3) or Case (6)
2368 R128
.w
[1] = C4
.w
[1];
2369 R128
.w
[0] = C4
.w
[0];
2370 } else if (q4
<= 18) {
2371 // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17
2372 round64_2_18 (q4
, x0
, C4
.w
[0], &R64
, &incr_exp
,
2373 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2374 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
2376 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
2377 R64
= ten2k64
[q4
- x0
];
2381 } else if (q4
<= 38) {
2382 // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37
2383 P128
.w
[1] = C4
.w
[1];
2384 P128
.w
[0] = C4
.w
[0];
2385 round128_19_38 (q4
, x0
, P128
, &R128
, &incr_exp
,
2386 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2387 &is_inexact_lt_midpoint
,
2388 &is_inexact_gt_midpoint
);
2390 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
2391 if (q4
- x0
<= 19) { // 1 <= q4 - x0 <= 19
2392 R128
.w
[0] = ten2k64
[q4
- x0
];
2393 // R128.w[1] stays 0
2394 } else { // 20 <= q4 - x0 <= 37
2395 R128
.w
[0] = ten2k128
[q4
- x0
- 20].w
[0];
2396 R128
.w
[1] = ten2k128
[q4
- x0
- 20].w
[1];
2399 } else if (q4
<= 57) {
2400 // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56
2401 P192
.w
[2] = C4
.w
[2];
2402 P192
.w
[1] = C4
.w
[1];
2403 P192
.w
[0] = C4
.w
[0];
2404 round192_39_57 (q4
, x0
, P192
, &R192
, &incr_exp
,
2405 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2406 &is_inexact_lt_midpoint
,
2407 &is_inexact_gt_midpoint
);
2408 // R192.w[2] is always 0
2410 // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52
2411 if (q4
- x0
<= 19) { // 1 <= q4 - x0 <= 19
2412 R192
.w
[0] = ten2k64
[q4
- x0
];
2413 // R192.w[1] stays 0
2414 // R192.w[2] stays 0
2415 } else { // 20 <= q4 - x0 <= 33
2416 R192
.w
[0] = ten2k128
[q4
- x0
- 20].w
[0];
2417 R192
.w
[1] = ten2k128
[q4
- x0
- 20].w
[1];
2418 // R192.w[2] stays 0
2421 R128
.w
[1] = R192
.w
[1];
2422 R128
.w
[0] = R192
.w
[0];
2424 // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67
2425 round256_58_76 (q4
, x0
, C4
, &R256
, &incr_exp
,
2426 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2427 &is_inexact_lt_midpoint
,
2428 &is_inexact_gt_midpoint
);
2429 // R256.w[3] and R256.w[2] are always 0
2431 // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43
2432 if (q4
- x0
<= 19) { // 1 <= q4 - x0 <= 19
2433 R256
.w
[0] = ten2k64
[q4
- x0
];
2434 // R256.w[1] stays 0
2435 // R256.w[2] stays 0
2436 // R256.w[3] stays 0
2437 } else { // 20 <= q4 - x0 <= 33
2438 R256
.w
[0] = ten2k128
[q4
- x0
- 20].w
[0];
2439 R256
.w
[1] = ten2k128
[q4
- x0
- 20].w
[1];
2440 // R256.w[2] stays 0
2441 // R256.w[3] stays 0
2444 R128
.w
[1] = R256
.w
[1];
2445 R128
.w
[0] = R256
.w
[0];
2447 // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4,
2448 // rounded to nearest, which were copied into R128
2449 if (z_sign
== p_sign
) {
2450 lsb
= res
.w
[0] & 0x01; // lsb of C3 * 10^scale
2451 // the sum can result in [up to] p34 or p34 + 1 digits
2452 res
.w
[0] = res
.w
[0] + R128
.w
[0];
2453 res
.w
[1] = res
.w
[1] + R128
.w
[1];
2454 if (res
.w
[0] < R128
.w
[0])
2455 res
.w
[1]++; // carry
2456 // if res > 10^34 - 1 need to increase x0 and decrease scale by 1
2457 if (res
.w
[1] > 0x0001ed09bead87c0ull
||
2458 (res
.w
[1] == 0x0001ed09bead87c0ull
&&
2459 res
.w
[0] > 0x378d8e63ffffffffull
)) {
2460 // avoid double rounding error
2461 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
2462 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
2463 is_midpoint_lt_even0
= is_midpoint_lt_even
;
2464 is_midpoint_gt_even0
= is_midpoint_gt_even
;
2465 is_inexact_lt_midpoint
= 0;
2466 is_inexact_gt_midpoint
= 0;
2467 is_midpoint_lt_even
= 0;
2468 is_midpoint_gt_even
= 0;
2469 P128
.w
[1] = res
.w
[1];
2470 P128
.w
[0] = res
.w
[0];
2471 round128_19_38 (35, 1, P128
, &res
, &incr_exp
,
2472 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2473 &is_inexact_lt_midpoint
,
2474 &is_inexact_gt_midpoint
);
2475 // incr_exp is 0 with certainty in this case
2476 // avoid a double rounding error
2477 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
2478 is_midpoint_lt_even
) { // double rounding error upward
2481 if (res
.w
[0] == 0xffffffffffffffffull
)
2483 // Note: a double rounding error upward is not possible; for this
2484 // the result after the first rounding would have to be 99...95
2485 // (35 digits in all), possibly followed by a number of zeros; this
2486 // not possible in Cases (2)-(6) or (15)-(17) which may get here
2487 is_midpoint_lt_even
= 0;
2488 is_inexact_lt_midpoint
= 1;
2489 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
2490 is_midpoint_gt_even
) { // double rounding error downward
2495 is_midpoint_gt_even
= 0;
2496 is_inexact_gt_midpoint
= 1;
2497 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
2498 !is_inexact_lt_midpoint
2499 && !is_inexact_gt_midpoint
) {
2500 // if this second rounding was exact the result may still be
2501 // inexact because of the first rounding
2502 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
2503 is_inexact_gt_midpoint
= 1;
2505 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
2506 is_inexact_lt_midpoint
= 1;
2508 } else if (is_midpoint_gt_even
&&
2509 (is_inexact_gt_midpoint0
2510 || is_midpoint_lt_even0
)) {
2511 // pulled up to a midpoint
2512 is_inexact_lt_midpoint
= 1;
2513 is_inexact_gt_midpoint
= 0;
2514 is_midpoint_lt_even
= 0;
2515 is_midpoint_gt_even
= 0;
2516 } else if (is_midpoint_lt_even
&&
2517 (is_inexact_lt_midpoint0
2518 || is_midpoint_gt_even0
)) {
2519 // pulled down to a midpoint
2520 is_inexact_lt_midpoint
= 0;
2521 is_inexact_gt_midpoint
= 1;
2522 is_midpoint_lt_even
= 0;
2523 is_midpoint_gt_even
= 0;
2529 if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
2530 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
2531 if (is_midpoint_lt_even0
|| is_midpoint_gt_even0
||
2532 is_inexact_lt_midpoint0
|| is_inexact_gt_midpoint0
) {
2533 is_inexact_lt_midpoint
= 1;
2537 // this is the result rounded with unbounded exponent, unless a
2538 // correction is needed
2539 res
.w
[1] = res
.w
[1] & MASK_COEFF
;
2541 if (is_midpoint_gt_even
) {
2543 is_midpoint_gt_even
= 0;
2544 is_midpoint_lt_even
= 1;
2546 if (res
.w
[0] == 0x0)
2548 // check for rounding overflow
2549 if (res
.w
[1] == 0x0001ed09bead87c0ull
&&
2550 res
.w
[0] == 0x378d8e6400000000ull
) {
2551 // res = 10^34 => rounding overflow
2552 res
.w
[1] = 0x0000314dc6448d93ull
;
2553 res
.w
[0] = 0x38c15b0a00000000ull
; // 10^33
2556 } else if (is_midpoint_lt_even
) {
2558 is_midpoint_lt_even
= 0;
2559 is_midpoint_gt_even
= 1;
2561 if (res
.w
[0] == 0xffffffffffffffffull
)
2563 // if the result is pure zero, the sign depends on the rounding
2564 // mode (x*y and z had opposite signs)
2565 if (res
.w
[1] == 0x0ull
&& res
.w
[0] == 0x0ull
) {
2566 if (rnd_mode
!= ROUNDING_DOWN
)
2567 z_sign
= 0x0000000000000000ull
;
2569 z_sign
= 0x8000000000000000ull
;
2570 // the exponent is max (e3, expmin)
2573 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2574 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2575 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2576 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2585 } else { // if (z_sign != p_sign)
2586 lsb
= res
.w
[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4
2587 // used to swap rounding indicators if p_sign != z_sign
2588 // the sum can result in [up to] p34 or p34 - 1 digits
2590 res
.w
[0] = res
.w
[0] - R128
.w
[0];
2591 res
.w
[1] = res
.w
[1] - R128
.w
[1];
2592 if (res
.w
[0] > tmp64
)
2593 res
.w
[1]--; // borrow
2594 // if res < 10^33 and exp > expmin need to decrease x0 and
2595 // increase scale by 1
2596 if (e3
> expmin
&& ((res
.w
[1] < 0x0000314dc6448d93ull
||
2597 (res
.w
[1] == 0x0000314dc6448d93ull
&&
2598 res
.w
[0] < 0x38c15b0a00000000ull
)) ||
2599 (is_inexact_lt_midpoint
2600 && res
.w
[1] == 0x0000314dc6448d93ull
2601 && res
.w
[0] == 0x38c15b0a00000000ull
))
2604 // first restore e3, otherwise it will be too small
2607 is_inexact_lt_midpoint
= 0;
2608 is_inexact_gt_midpoint
= 0;
2609 is_midpoint_lt_even
= 0;
2610 is_midpoint_gt_even
= 0;
2614 // else this is the result rounded with unbounded exponent;
2615 // because the result has opposite sign to that of C4 which was
2616 // rounded, need to change the rounding indicators
2617 if (is_inexact_lt_midpoint
) {
2618 is_inexact_lt_midpoint
= 0;
2619 is_inexact_gt_midpoint
= 1;
2620 } else if (is_inexact_gt_midpoint
) {
2621 is_inexact_gt_midpoint
= 0;
2622 is_inexact_lt_midpoint
= 1;
2623 } else if (lsb
== 0) {
2624 if (is_midpoint_lt_even
) {
2625 is_midpoint_lt_even
= 0;
2626 is_midpoint_gt_even
= 1;
2627 } else if (is_midpoint_gt_even
) {
2628 is_midpoint_gt_even
= 0;
2629 is_midpoint_lt_even
= 1;
2633 } else if (lsb
== 1) {
2634 if (is_midpoint_lt_even
) {
2637 if (res
.w
[0] == 0x0)
2639 // check for rounding overflow
2640 if (res
.w
[1] == 0x0001ed09bead87c0ull
&&
2641 res
.w
[0] == 0x378d8e6400000000ull
) {
2642 // res = 10^34 => rounding overflow
2643 res
.w
[1] = 0x0000314dc6448d93ull
;
2644 res
.w
[0] = 0x38c15b0a00000000ull
; // 10^33
2647 } else if (is_midpoint_gt_even
) {
2650 if (res
.w
[0] == 0xffffffffffffffffull
)
2652 // if the result is pure zero, the sign depends on the rounding
2653 // mode (x*y and z had opposite signs)
2654 if (res
.w
[1] == 0x0ull
&& res
.w
[0] == 0x0ull
) {
2655 if (rnd_mode
!= ROUNDING_DOWN
)
2656 z_sign
= 0x0000000000000000ull
;
2658 z_sign
= 0x8000000000000000ull
;
2659 // the exponent is max (e3, expmin)
2662 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2663 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2664 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2665 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2676 // check for underflow
2677 if (e3
== expmin
) { // and if significand < 10^33 => result is tiny
2678 if ((res
.w
[1] & MASK_COEFF
) < 0x0000314dc6448d93ull
||
2679 ((res
.w
[1] & MASK_COEFF
) == 0x0000314dc6448d93ull
&&
2680 res
.w
[0] < 0x38c15b0a00000000ull
)) {
2683 } else if (e3
< expmin
) {
2684 // the result is tiny, so we must truncate more of res
2687 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
2688 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
2689 is_midpoint_lt_even0
= is_midpoint_lt_even
;
2690 is_midpoint_gt_even0
= is_midpoint_gt_even
;
2691 is_inexact_lt_midpoint
= 0;
2692 is_inexact_gt_midpoint
= 0;
2693 is_midpoint_lt_even
= 0;
2694 is_midpoint_gt_even
= 0;
2695 // determine the number of decimal digits in res
2696 if (res
.w
[1] == 0x0) {
2697 // between 1 and 19 digits
2698 for (ind
= 1; ind
<= 19; ind
++) {
2699 if (res
.w
[0] < ten2k64
[ind
]) {
2704 } else if (res
.w
[1] < ten2k128
[0].w
[1] ||
2705 (res
.w
[1] == ten2k128
[0].w
[1]
2706 && res
.w
[0] < ten2k128
[0].w
[0])) {
2709 } else { // between 21 and 38 digits
2710 for (ind
= 1; ind
<= 18; ind
++) {
2711 if (res
.w
[1] < ten2k128
[ind
].w
[1] ||
2712 (res
.w
[1] == ten2k128
[ind
].w
[1] &&
2713 res
.w
[0] < ten2k128
[ind
].w
[0])) {
2721 // at this point ind >= x0; because delta >= 2 on this path, the case
2722 // ind = x0 can occur only in Case (2) or case (3), when C3 has one
2723 // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin),
2724 // the signs of x * y and z are opposite, and through cancellation
2725 // the most significant decimal digit in res has the weight
2726 // 10^(emin-1); however, it is clear that in this case the most
2727 // significant digit is 9, so the result before rounding is
2729 // Otherwise, ind > x0 because there are non-zero decimal digits in the
2730 // result with weight of at least 10^emin, and correction for underflow
2731 // can be carried out using the round*_*_2_* () routines
2732 if (x0
== ind
) { // the result before rounding is 0.9... * 10^emin
2735 is_inexact_gt_midpoint
= 1;
2736 } else if (ind
<= 18) { // check that 2 <= ind
2737 // 2 <= ind <= 18, 1 <= x0 <= 17
2738 round64_2_18 (ind
, x0
, res
.w
[0], &R64
, &incr_exp
,
2739 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2740 &is_inexact_lt_midpoint
,
2741 &is_inexact_gt_midpoint
);
2743 // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17
2744 R64
= ten2k64
[ind
- x0
];
2748 } else if (ind
<= 38) {
2750 P128
.w
[1] = res
.w
[1];
2751 P128
.w
[0] = res
.w
[0];
2752 round128_19_38 (ind
, x0
, P128
, &res
, &incr_exp
,
2753 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2754 &is_inexact_lt_midpoint
,
2755 &is_inexact_gt_midpoint
);
2757 // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37
2758 if (ind
- x0
<= 19) { // 1 <= ind - x0 <= 19
2759 res
.w
[0] = ten2k64
[ind
- x0
];
2761 } else { // 20 <= ind - x0 <= 37
2762 res
.w
[0] = ten2k128
[ind
- x0
- 20].w
[0];
2763 res
.w
[1] = ten2k128
[ind
- x0
- 20].w
[1];
2767 // avoid a double rounding error
2768 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
2769 is_midpoint_lt_even
) { // double rounding error upward
2772 if (res
.w
[0] == 0xffffffffffffffffull
)
2774 // Note: a double rounding error upward is not possible; for this
2775 // the result after the first rounding would have to be 99...95
2776 // (35 digits in all), possibly followed by a number of zeros; this
2777 // not possible in Cases (2)-(6) which may get here
2778 is_midpoint_lt_even
= 0;
2779 is_inexact_lt_midpoint
= 1;
2780 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
2781 is_midpoint_gt_even
) { // double rounding error downward
2786 is_midpoint_gt_even
= 0;
2787 is_inexact_gt_midpoint
= 1;
2788 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
2789 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
2790 // if this second rounding was exact the result may still be
2791 // inexact because of the first rounding
2792 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
2793 is_inexact_gt_midpoint
= 1;
2795 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
2796 is_inexact_lt_midpoint
= 1;
2798 } else if (is_midpoint_gt_even
&&
2799 (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
)) {
2800 // pulled up to a midpoint
2801 is_inexact_lt_midpoint
= 1;
2802 is_inexact_gt_midpoint
= 0;
2803 is_midpoint_lt_even
= 0;
2804 is_midpoint_gt_even
= 0;
2805 } else if (is_midpoint_lt_even
&&
2806 (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
)) {
2807 // pulled down to a midpoint
2808 is_inexact_lt_midpoint
= 0;
2809 is_inexact_gt_midpoint
= 1;
2810 is_midpoint_lt_even
= 0;
2811 is_midpoint_gt_even
= 0;
2817 if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
2818 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
2819 if (is_midpoint_lt_even0
|| is_midpoint_gt_even0
||
2820 is_inexact_lt_midpoint0
|| is_inexact_gt_midpoint0
) {
2821 is_inexact_lt_midpoint
= 1;
2827 // check for inexact result
2828 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
2829 is_midpoint_lt_even
|| is_midpoint_gt_even
) {
2830 // set the inexact flag
2831 *pfpsf
|= INEXACT_EXCEPTION
;
2833 *pfpsf
|= UNDERFLOW_EXCEPTION
;
2835 // now check for significand = 10^34 (may have resulted from going
2836 // back to case2_repeat)
2837 if (res
.w
[1] == 0x0001ed09bead87c0ull
&&
2838 res
.w
[0] == 0x378d8e6400000000ull
) { // if res = 10^34
2839 res
.w
[1] = 0x0000314dc6448d93ull
; // res = 10^33
2840 res
.w
[0] = 0x38c15b0a00000000ull
;
2843 res
.w
[1] = z_sign
| ((UINT64
) (e3
+ 6176) << 49) | res
.w
[1];
2844 // check for overflow
2845 if (rnd_mode
== ROUNDING_TO_NEAREST
&& e3
> expmax
) {
2846 res
.w
[1] = z_sign
| 0x7800000000000000ull
; // +/-inf
2847 res
.w
[0] = 0x0000000000000000ull
;
2848 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
2850 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
2851 rounding_correction (rnd_mode
,
2852 is_inexact_lt_midpoint
,
2853 is_inexact_gt_midpoint
,
2854 is_midpoint_lt_even
, is_midpoint_gt_even
,
2857 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2858 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2859 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2860 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2866 // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and
2867 // the signs of x*y and z are opposite; in these cases massive
2868 // cancellation can occur, so it is better to scale either C3 or C4 and
2869 // to perform the subtraction before rounding; rounding is performed
2870 // next, depending on the number of decimal digits in the result and on
2871 // the exponent value
2872 // Note: overlow is not possible in this case
2873 // this is similar to Cases (15), (16), and (17)
2875 if (delta
+ q4
< q3
) { // from Case (6)
2876 // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and
2877 // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign)
2878 // and call add_and_round; delta stays positive
2879 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
2880 P128
.w
[1] = C3
.w
[1];
2881 P128
.w
[0] = C3
.w
[0];
2884 C4
.w
[1] = P128
.w
[1];
2885 C4
.w
[0] = P128
.w
[0];
2895 } else { // from Cases (2), (3), (4), (5)
2896 // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be
2897 // scaled up by q4 + delta - q3; this is the same as in Cases (15),
2898 // (16), and (17) if we just change the sign of delta
2901 add_and_round (q3
, q4
, e4
, delta
, p34
, z_sign
, p_sign
, C3
, C4
,
2902 rnd_mode
, &is_midpoint_lt_even
,
2903 &is_midpoint_gt_even
, &is_inexact_lt_midpoint
,
2904 &is_inexact_gt_midpoint
, pfpsf
, &res
);
2905 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2906 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2907 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2908 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2914 } else { // if delta < 0
2918 if (p34
< q4
&& q4
<= delta
) { // Case (7)
2920 // truncate C4 to p34 digits into res
2921 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
2924 P128
.w
[1] = C4
.w
[1];
2925 P128
.w
[0] = C4
.w
[0];
2926 round128_19_38 (q4
, x0
, P128
, &res
, &incr_exp
,
2927 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2928 &is_inexact_lt_midpoint
,
2929 &is_inexact_gt_midpoint
);
2930 } else if (q4
<= 57) { // 35 <= q4 <= 57
2931 P192
.w
[2] = C4
.w
[2];
2932 P192
.w
[1] = C4
.w
[1];
2933 P192
.w
[0] = C4
.w
[0];
2934 round192_39_57 (q4
, x0
, P192
, &R192
, &incr_exp
,
2935 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2936 &is_inexact_lt_midpoint
,
2937 &is_inexact_gt_midpoint
);
2938 res
.w
[0] = R192
.w
[0];
2939 res
.w
[1] = R192
.w
[1];
2940 } else { // if (q4 <= 68)
2941 round256_58_76 (q4
, x0
, C4
, &R256
, &incr_exp
,
2942 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2943 &is_inexact_lt_midpoint
,
2944 &is_inexact_gt_midpoint
);
2945 res
.w
[0] = R256
.w
[0];
2946 res
.w
[1] = R256
.w
[1];
2952 if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
2953 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
2954 // if C4 rounded to p34 digits is exact then the result is inexact,
2955 // in a way that depends on the signs of x * y and z
2956 if (p_sign
== z_sign
) {
2957 is_inexact_lt_midpoint
= 1;
2958 } else { // if (p_sign != z_sign)
2959 if (res
.w
[1] != 0x0000314dc6448d93ull
||
2960 res
.w
[0] != 0x38c15b0a00000000ull
) { // res != 10^33
2961 is_inexact_gt_midpoint
= 1;
2962 } else { // res = 10^33 and exact is a special case
2963 // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1
2964 // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1
2965 // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1
2966 // Note: ulp is really ulp/10 (after borrow which propagates to msd)
2967 if (delta
> p34
+ 1) { // C3 < 1/2
2968 // res = 10^33, unchanged
2969 is_inexact_gt_midpoint
= 1;
2970 } else { // if (delta == p34 + 1)
2972 if (C3
.w
[0] < midpoint64
[q3
- 1]) { // C3 < 1/2 ulp
2973 // res = 10^33, unchanged
2974 is_inexact_gt_midpoint
= 1;
2975 } else if (C3
.w
[0] == midpoint64
[q3
- 1]) { // C3 = 1/2 ulp
2976 // res = 10^33, unchanged
2977 is_midpoint_lt_even
= 1;
2978 } else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp
2979 res
.w
[1] = 0x0001ed09bead87c0ull
; // 10^34 - 1
2980 res
.w
[0] = 0x378d8e63ffffffffull
;
2982 is_inexact_lt_midpoint
= 1;
2984 } else { // if (20 <= q3 <=34)
2985 if (C3
.w
[1] < midpoint128
[q3
- 20].w
[1] ||
2986 (C3
.w
[1] == midpoint128
[q3
- 20].w
[1] &&
2987 C3
.w
[0] < midpoint128
[q3
- 20].w
[0])) { // C3 < 1/2 ulp
2988 // res = 10^33, unchanged
2989 is_inexact_gt_midpoint
= 1;
2990 } else if (C3
.w
[1] == midpoint128
[q3
- 20].w
[1] &&
2991 C3
.w
[0] == midpoint128
[q3
- 20].w
[0]) { // C3 = 1/2 ulp
2992 // res = 10^33, unchanged
2993 is_midpoint_lt_even
= 1;
2994 } else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp
2995 res
.w
[1] = 0x0001ed09bead87c0ull
; // 10^34 - 1
2996 res
.w
[0] = 0x378d8e63ffffffffull
;
2998 is_inexact_lt_midpoint
= 1;
3004 } else if (is_midpoint_lt_even
) {
3005 if (z_sign
!= p_sign
) {
3006 // needs correction: res = res - 1
3007 res
.w
[0] = res
.w
[0] - 1;
3008 if (res
.w
[0] == 0xffffffffffffffffull
)
3010 // if it is (10^33-1)*10^e4 then the corect result is
3011 // (10^34-1)*10(e4-1)
3012 if (res
.w
[1] == 0x0000314dc6448d93ull
&&
3013 res
.w
[0] == 0x38c15b09ffffffffull
) {
3014 res
.w
[1] = 0x0001ed09bead87c0ull
; // 10^34 - 1
3015 res
.w
[0] = 0x378d8e63ffffffffull
;
3018 is_midpoint_lt_even
= 0;
3019 is_inexact_lt_midpoint
= 1;
3020 } else { // if (z_sign == p_sign)
3021 is_midpoint_lt_even
= 0;
3022 is_inexact_gt_midpoint
= 1;
3024 } else if (is_midpoint_gt_even
) {
3025 if (z_sign
== p_sign
) {
3026 // needs correction: res = res + 1 (cannot cross in the next binade)
3027 res
.w
[0] = res
.w
[0] + 1;
3028 if (res
.w
[0] == 0x0000000000000000ull
)
3030 is_midpoint_gt_even
= 0;
3031 is_inexact_gt_midpoint
= 1;
3032 } else { // if (z_sign != p_sign)
3033 is_midpoint_gt_even
= 0;
3034 is_inexact_lt_midpoint
= 1;
3037 ; // the rounded result is already correct
3039 // check for overflow
3040 if (rnd_mode
== ROUNDING_TO_NEAREST
&& e4
> expmax
) {
3041 res
.w
[1] = p_sign
| 0x7800000000000000ull
;
3042 res
.w
[0] = 0x0000000000000000ull
;
3043 *pfpsf
|= (OVERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
3044 } else { // no overflow or not RN
3045 p_exp
= ((UINT64
) (e4
+ 6176) << 49);
3046 res
.w
[1] = p_sign
| (p_exp
& MASK_EXP
) | res
.w
[1];
3048 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
3049 rounding_correction (rnd_mode
,
3050 is_inexact_lt_midpoint
,
3051 is_inexact_gt_midpoint
,
3052 is_midpoint_lt_even
, is_midpoint_gt_even
,
3055 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
3056 is_midpoint_lt_even
|| is_midpoint_gt_even
) {
3057 // set the inexact flag
3058 *pfpsf
|= INEXACT_EXCEPTION
;
3060 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
3061 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
3062 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
3063 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
3067 } else if ((q4
<= p34
&& p34
<= delta
) || // Case (8)
3068 (q4
<= delta
&& delta
< p34
&& p34
< delta
+ q3
) || // Case (9)
3069 (q4
<= delta
&& delta
+ q3
<= p34
) || // Case (10)
3070 (delta
< q4
&& q4
<= p34
&& p34
< delta
+ q3
) || // Case (13)
3071 (delta
< q4
&& q4
<= delta
+ q3
&& delta
+ q3
<= p34
) || // Case (14)
3072 (delta
+ q3
< q4
&& q4
<= p34
)) { // Case (18)
3074 // Case (8) is similar to Case (1), with C3 and C4 swapped
3075 // Case (9) is similar to Case (2), with C3 and C4 swapped
3076 // Case (10) is similar to Case (3), with C3 and C4 swapped
3077 // Case (13) is similar to Case (4), with C3 and C4 swapped
3078 // Case (14) is similar to Case (5), with C3 and C4 swapped
3079 // Case (18) is similar to Case (6), with C3 and C4 swapped
3081 // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp)
3082 // and go back to delta_ge_zero
3083 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
3084 P128
.w
[1] = C3
.w
[1];
3085 P128
.w
[0] = C3
.w
[0];
3088 C4
.w
[1] = P128
.w
[1];
3089 C4
.w
[0] = P128
.w
[0];
3104 } else if ((p34
<= delta
&& delta
< q4
&& q4
< delta
+ q3
) || // Case (11)
3105 (delta
< p34
&& p34
< q4
&& q4
< delta
+ q3
)) { // Case (12)
3107 // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3,
3108 // 1 <= x0 <= q3 - 1 <= p34 - 1
3109 x0
= e4
- e3
; // or x0 = delta + q3 - q4
3110 if (q3
<= 18) { // 2 <= q3 <= 18
3111 round64_2_18 (q3
, x0
, C3
.w
[0], &R64
, &incr_exp
,
3112 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3113 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
3116 } else if (q3
<= 38) {
3117 round128_19_38 (q3
, x0
, C3
, &R128
, &incr_exp
,
3118 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3119 &is_inexact_lt_midpoint
,
3120 &is_inexact_gt_midpoint
);
3121 C3
.w
[1] = R128
.w
[1];
3122 C3
.w
[0] = R128
.w
[0];
3124 // the rounded result has q3 - x0 digits
3125 // we want the exponent to be e4, so if incr_exp = 1 then
3126 // multiply the rounded result by 10 - it will still fit in 113 bits
3129 P128
.w
[1] = C3
.w
[1];
3130 P128
.w
[0] = C3
.w
[0];
3131 __mul_64x128_to_128 (C3
, ten2k64
[1], P128
);
3133 e3
= e3
+ x0
; // this is e4
3134 // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3;
3135 // the result will have the sign of x * y; the exponent is e4
3138 R256
.w
[1] = C3
.w
[1];
3139 R256
.w
[0] = C3
.w
[0];
3140 if (p_sign
== z_sign
) { // R256 = C4 + R256
3141 add256 (C4
, R256
, &R256
);
3142 } else { // if (p_sign != z_sign) { // R256 = C4 - R256
3143 sub256 (C4
, R256
, &R256
); // the result cannot be pure zero
3144 // because the result has opposite sign to that of R256 which was
3145 // rounded, need to change the rounding indicators
3146 lsb
= C4
.w
[0] & 0x01;
3147 if (is_inexact_lt_midpoint
) {
3148 is_inexact_lt_midpoint
= 0;
3149 is_inexact_gt_midpoint
= 1;
3150 } else if (is_inexact_gt_midpoint
) {
3151 is_inexact_gt_midpoint
= 0;
3152 is_inexact_lt_midpoint
= 1;
3153 } else if (lsb
== 0) {
3154 if (is_midpoint_lt_even
) {
3155 is_midpoint_lt_even
= 0;
3156 is_midpoint_gt_even
= 1;
3157 } else if (is_midpoint_gt_even
) {
3158 is_midpoint_gt_even
= 0;
3159 is_midpoint_lt_even
= 1;
3163 } else if (lsb
== 1) {
3164 if (is_midpoint_lt_even
) {
3167 if (R256
.w
[0] == 0x0) {
3169 if (R256
.w
[1] == 0x0) {
3171 if (R256
.w
[2] == 0x0) {
3176 // no check for rounding overflow - R256 was a difference
3177 } else if (is_midpoint_gt_even
) {
3180 if (R256
.w
[0] == 0xffffffffffffffffull
) {
3182 if (R256
.w
[1] == 0xffffffffffffffffull
) {
3184 if (R256
.w
[2] == 0xffffffffffffffffull
) {
3196 // determine the number of decimal digits in R256
3197 ind
= nr_digits256 (R256
); // ind >= p34
3198 // if R256 is sum, then ind > p34; if R256 is a difference, then
3199 // ind >= p34; this means that we can calculate the result rounded to
3200 // the destination precision, with unbounded exponent, starting from R256
3201 // and using the indicators from the rounding of C3 to avoid a double
3206 } else if (ind
== p34
) {
3207 // the result rounded to the destination precision with
3208 // unbounded exponent
3209 // is (-1)^p_sign * R256 * 10^e4
3210 res
.w
[1] = R256
.w
[1];
3211 res
.w
[0] = R256
.w
[0];
3212 } else { // if (ind > p34)
3213 // if more than P digits, round to nearest to P digits
3214 // round R256 to p34 digits
3215 x0
= ind
- p34
; // 1 <= x0 <= 34 as 35 <= ind <= 68
3216 // save C3 rounding indicators to help avoid double rounding error
3217 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
3218 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
3219 is_midpoint_lt_even0
= is_midpoint_lt_even
;
3220 is_midpoint_gt_even0
= is_midpoint_gt_even
;
3221 // initialize rounding indicators
3222 is_inexact_lt_midpoint
= 0;
3223 is_inexact_gt_midpoint
= 0;
3224 is_midpoint_lt_even
= 0;
3225 is_midpoint_gt_even
= 0;
3226 // round to p34 digits; the result fits in 113 bits
3228 P128
.w
[1] = R256
.w
[1];
3229 P128
.w
[0] = R256
.w
[0];
3230 round128_19_38 (ind
, x0
, P128
, &R128
, &incr_exp
,
3231 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3232 &is_inexact_lt_midpoint
,
3233 &is_inexact_gt_midpoint
);
3234 } else if (ind
<= 57) {
3235 P192
.w
[2] = R256
.w
[2];
3236 P192
.w
[1] = R256
.w
[1];
3237 P192
.w
[0] = R256
.w
[0];
3238 round192_39_57 (ind
, x0
, P192
, &R192
, &incr_exp
,
3239 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3240 &is_inexact_lt_midpoint
,
3241 &is_inexact_gt_midpoint
);
3242 R128
.w
[1] = R192
.w
[1];
3243 R128
.w
[0] = R192
.w
[0];
3244 } else { // if (ind <= 68)
3245 round256_58_76 (ind
, x0
, R256
, &R256
, &incr_exp
,
3246 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3247 &is_inexact_lt_midpoint
,
3248 &is_inexact_gt_midpoint
);
3249 R128
.w
[1] = R256
.w
[1];
3250 R128
.w
[0] = R256
.w
[0];
3252 // the rounded result has p34 = 34 digits
3253 e4
= e4
+ x0
+ incr_exp
;
3255 res
.w
[1] = R128
.w
[1];
3256 res
.w
[0] = R128
.w
[0];
3258 // avoid a double rounding error
3259 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
3260 is_midpoint_lt_even
) { // double rounding error upward
3263 if (res
.w
[0] == 0xffffffffffffffffull
)
3265 is_midpoint_lt_even
= 0;
3266 is_inexact_lt_midpoint
= 1;
3267 // Note: a double rounding error upward is not possible; for this
3268 // the result after the first rounding would have to be 99...95
3269 // (35 digits in all), possibly followed by a number of zeros; this
3270 // not possible in Cases (2)-(6) or (15)-(17) which may get here
3271 // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent
3272 if (res
.w
[1] == 0x0000314dc6448d93ull
&&
3273 res
.w
[0] == 0x38c15b09ffffffffull
) { // 10^33 - 1
3274 res
.w
[1] = 0x0001ed09bead87c0ull
; // 10^34 - 1
3275 res
.w
[0] = 0x378d8e63ffffffffull
;
3278 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
3279 is_midpoint_gt_even
) { // double rounding error downward
3284 is_midpoint_gt_even
= 0;
3285 is_inexact_gt_midpoint
= 1;
3286 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
3287 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
3288 // if this second rounding was exact the result may still be
3289 // inexact because of the first rounding
3290 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
3291 is_inexact_gt_midpoint
= 1;
3293 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
3294 is_inexact_lt_midpoint
= 1;
3296 } else if (is_midpoint_gt_even
&&
3297 (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
)) {
3298 // pulled up to a midpoint
3299 is_inexact_lt_midpoint
= 1;
3300 is_inexact_gt_midpoint
= 0;
3301 is_midpoint_lt_even
= 0;
3302 is_midpoint_gt_even
= 0;
3303 } else if (is_midpoint_lt_even
&&
3304 (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
)) {
3305 // pulled down to a midpoint
3306 is_inexact_lt_midpoint
= 0;
3307 is_inexact_gt_midpoint
= 1;
3308 is_midpoint_lt_even
= 0;
3309 is_midpoint_gt_even
= 0;
3315 // determine tininess
3316 if (rnd_mode
== ROUNDING_TO_NEAREST
) {
3318 is_tiny
= 1; // for other rounding modes apply correction
3321 // for RM, RP, RZ, RA apply correction in order to determine tininess
3322 // but do not save the result; apply the correction to
3323 // (-1)^p_sign * res * 10^0
3324 P128
.w
[1] = p_sign
| 0x3040000000000000ull
| res
.w
[1];
3325 P128
.w
[0] = res
.w
[0];
3326 rounding_correction (rnd_mode
,
3327 is_inexact_lt_midpoint
,
3328 is_inexact_gt_midpoint
,
3329 is_midpoint_lt_even
, is_midpoint_gt_even
,
3331 scale
= ((P128
.w
[1] & MASK_EXP
) >> 49) - 6176; // -1, 0, or +1
3332 // the number of digits in the significand is p34 = 34
3333 if (e4
+ scale
< expmin
) {
3338 // the result rounded to the destination precision with unbounded exponent
3339 // is (-1)^p_sign * res * 10^e4
3340 res
.w
[1] = p_sign
| ((UINT64
) (e4
+ 6176) << 49) | res
.w
[1]; // RN
3341 // res.w[0] unchanged;
3342 // Note: res is correct only if expmin <= e4 <= expmax
3343 ind
= p34
; // the number of decimal digits in the signifcand of res
3345 // at this point we have the result rounded with unbounded exponent in
3346 // res and we know its tininess:
3347 // res = (-1)^p_sign * significand * 10^e4,
3348 // where q (significand) = ind = p34
3349 // Note: res is correct only if expmin <= e4 <= expmax
3351 // check for overflow if RN
3352 if (rnd_mode
== ROUNDING_TO_NEAREST
3353 && (ind
+ e4
) > (p34
+ expmax
)) {
3354 res
.w
[1] = p_sign
| 0x7800000000000000ull
;
3355 res
.w
[0] = 0x0000000000000000ull
;
3356 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
3357 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
3358 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
3359 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
3360 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
3363 } // else not overflow or not RN, so continue
3365 // from this point on this is similar to the last part of the computation
3366 // for Cases (15), (16), (17)
3368 // if (e4 >= expmin) we have the result rounded with bounded exponent
3370 x0
= expmin
- e4
; // x0 >= 1; the number of digits to chop off of res
3371 // where the result rounded [at most] once is
3372 // (-1)^p_sign * significand_res * 10^e4
3374 // avoid double rounding error
3375 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
3376 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
3377 is_midpoint_lt_even0
= is_midpoint_lt_even
;
3378 is_midpoint_gt_even0
= is_midpoint_gt_even
;
3379 is_inexact_lt_midpoint
= 0;
3380 is_inexact_gt_midpoint
= 0;
3381 is_midpoint_lt_even
= 0;
3382 is_midpoint_gt_even
= 0;
3385 // nothing is left of res when moving the decimal point left x0 digits
3386 is_inexact_lt_midpoint
= 1;
3387 res
.w
[1] = p_sign
| 0x0000000000000000ull
;
3388 res
.w
[0] = 0x0000000000000000ull
;
3390 } else if (x0
== ind
) { // 1 <= x0 = ind <= p34 = 34
3391 // this is <, =, or > 1/2 ulp
3392 // compare the ind-digit value in the significand of res with
3393 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
3394 // less than, equal to, or greater than 1/2 ulp (significand of res)
3395 R128
.w
[1] = res
.w
[1] & MASK_COEFF
;
3396 R128
.w
[0] = res
.w
[0];
3398 if (R128
.w
[0] < midpoint64
[ind
- 1]) { // < 1/2 ulp
3400 is_inexact_lt_midpoint
= 1;
3401 } else if (R128
.w
[0] == midpoint64
[ind
- 1]) { // = 1/2 ulp
3403 is_midpoint_gt_even
= 1;
3404 } else { // > 1/2 ulp
3406 is_inexact_gt_midpoint
= 1;
3408 } else { // if (ind <= 38)
3409 if (R128
.w
[1] < midpoint128
[ind
- 20].w
[1] ||
3410 (R128
.w
[1] == midpoint128
[ind
- 20].w
[1] &&
3411 R128
.w
[0] < midpoint128
[ind
- 20].w
[0])) { // < 1/2 ulp
3413 is_inexact_lt_midpoint
= 1;
3414 } else if (R128
.w
[1] == midpoint128
[ind
- 20].w
[1] &&
3415 R128
.w
[0] == midpoint128
[ind
- 20].w
[0]) { // = 1/2 ulp
3417 is_midpoint_gt_even
= 1;
3418 } else { // > 1/2 ulp
3420 is_inexact_gt_midpoint
= 1;
3423 if (lt_half_ulp
|| eq_half_ulp
) {
3424 // res = +0.0 * 10^expmin
3425 res
.w
[1] = 0x0000000000000000ull
;
3426 res
.w
[0] = 0x0000000000000000ull
;
3427 } else { // if (gt_half_ulp)
3428 // res = +1 * 10^expmin
3429 res
.w
[1] = 0x0000000000000000ull
;
3430 res
.w
[0] = 0x0000000000000001ull
;
3432 res
.w
[1] = p_sign
| res
.w
[1];
3434 } else { // if (1 <= x0 <= ind - 1 <= 33)
3435 // round the ind-digit result to ind - x0 digits
3437 if (ind
<= 18) { // 2 <= ind <= 18
3438 round64_2_18 (ind
, x0
, res
.w
[0], &R64
, &incr_exp
,
3439 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3440 &is_inexact_lt_midpoint
,
3441 &is_inexact_gt_midpoint
);
3444 } else if (ind
<= 38) {
3445 P128
.w
[1] = res
.w
[1] & MASK_COEFF
;
3446 P128
.w
[0] = res
.w
[0];
3447 round128_19_38 (ind
, x0
, P128
, &res
, &incr_exp
,
3448 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3449 &is_inexact_lt_midpoint
,
3450 &is_inexact_gt_midpoint
);
3452 e4
= e4
+ x0
; // expmin
3453 // we want the exponent to be expmin, so if incr_exp = 1 then
3454 // multiply the rounded result by 10 - it will still fit in 113 bits
3457 P128
.w
[1] = res
.w
[1] & MASK_COEFF
;
3458 P128
.w
[0] = res
.w
[0];
3459 __mul_64x128_to_128 (res
, ten2k64
[1], P128
);
3462 p_sign
| ((UINT64
) (e4
+ 6176) << 49) | (res
.
3464 // avoid a double rounding error
3465 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
3466 is_midpoint_lt_even
) { // double rounding error upward
3469 if (res
.w
[0] == 0xffffffffffffffffull
)
3471 // Note: a double rounding error upward is not possible; for this
3472 // the result after the first rounding would have to be 99...95
3473 // (35 digits in all), possibly followed by a number of zeros; this
3474 // not possible in this underflow case
3475 is_midpoint_lt_even
= 0;
3476 is_inexact_lt_midpoint
= 1;
3477 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
3478 is_midpoint_gt_even
) { // double rounding error downward
3483 is_midpoint_gt_even
= 0;
3484 is_inexact_gt_midpoint
= 1;
3485 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
3486 !is_inexact_lt_midpoint
3487 && !is_inexact_gt_midpoint
) {
3488 // if this second rounding was exact the result may still be
3489 // inexact because of the first rounding
3490 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
3491 is_inexact_gt_midpoint
= 1;
3493 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
3494 is_inexact_lt_midpoint
= 1;
3496 } else if (is_midpoint_gt_even
&&
3497 (is_inexact_gt_midpoint0
3498 || is_midpoint_lt_even0
)) {
3499 // pulled up to a midpoint
3500 is_inexact_lt_midpoint
= 1;
3501 is_inexact_gt_midpoint
= 0;
3502 is_midpoint_lt_even
= 0;
3503 is_midpoint_gt_even
= 0;
3504 } else if (is_midpoint_lt_even
&&
3505 (is_inexact_lt_midpoint0
3506 || is_midpoint_gt_even0
)) {
3507 // pulled down to a midpoint
3508 is_inexact_lt_midpoint
= 0;
3509 is_inexact_gt_midpoint
= 1;
3510 is_midpoint_lt_even
= 0;
3511 is_midpoint_gt_even
= 0;
3517 // res contains the correct result
3518 // apply correction if not rounding to nearest
3519 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
3520 rounding_correction (rnd_mode
,
3521 is_inexact_lt_midpoint
,
3522 is_inexact_gt_midpoint
,
3523 is_midpoint_lt_even
, is_midpoint_gt_even
,
3526 if (is_midpoint_lt_even
|| is_midpoint_gt_even
||
3527 is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
) {
3528 // set the inexact flag
3529 *pfpsf
|= INEXACT_EXCEPTION
;
3531 *pfpsf
|= UNDERFLOW_EXCEPTION
;
3533 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
3534 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
3535 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
3536 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
3540 } else if ((p34
<= delta
&& delta
+ q3
<= q4
) || // Case (15)
3541 (delta
< p34
&& p34
< delta
+ q3
&& delta
+ q3
<= q4
) || //Case (16)
3542 (delta
+ q3
<= p34
&& p34
< q4
)) { // Case (17)
3544 // calculate first the result rounded to the destination precision, with
3545 // unbounded exponent
3547 add_and_round (q3
, q4
, e4
, delta
, p34
, z_sign
, p_sign
, C3
, C4
,
3548 rnd_mode
, &is_midpoint_lt_even
,
3549 &is_midpoint_gt_even
, &is_inexact_lt_midpoint
,
3550 &is_inexact_gt_midpoint
, pfpsf
, &res
);
3551 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
3552 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
3553 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
3554 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
3562 } // end if delta < 0
3564 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
3565 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
3566 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
3567 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
3574 #if DECIMAL_CALL_BY_REFERENCE
3576 bid128_fma (UINT128
* pres
, UINT128
* px
, UINT128
* py
, UINT128
* pz
3577 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3579 UINT128 x
= *px
, y
= *py
, z
= *pz
;
3580 #if !DECIMAL_GLOBAL_ROUNDING
3581 unsigned int rnd_mode
= *prnd_mode
;
3585 bid128_fma (UINT128 x
, UINT128 y
, UINT128 z
3586 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3589 int is_midpoint_lt_even
, is_midpoint_gt_even
,
3590 is_inexact_lt_midpoint
, is_inexact_gt_midpoint
;
3591 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3593 #if DECIMAL_CALL_BY_REFERENCE
3594 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3595 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3597 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3600 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3601 &is_inexact_lt_midpoint
,
3602 &is_inexact_gt_midpoint
, x
, y
,
3603 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3610 #if DECIMAL_CALL_BY_REFERENCE
3612 bid128ddd_fma (UINT128
* pres
, UINT64
* px
, UINT64
* py
, UINT64
* pz
3613 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3615 UINT64 x
= *px
, y
= *py
, z
= *pz
;
3616 #if !DECIMAL_GLOBAL_ROUNDING
3617 unsigned int rnd_mode
= *prnd_mode
;
3621 bid128ddd_fma (UINT64 x
, UINT64 y
, UINT64 z
3622 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3625 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3626 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3627 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3630 #if DECIMAL_CALL_BY_REFERENCE
3631 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3632 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3633 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3634 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3635 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3637 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3640 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3641 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3642 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3643 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3644 &is_inexact_lt_midpoint
,
3645 &is_inexact_gt_midpoint
, x1
, y1
,
3646 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3653 #if DECIMAL_CALL_BY_REFERENCE
3655 bid128ddq_fma (UINT128
* pres
, UINT64
* px
, UINT64
* py
, UINT128
* pz
3656 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3658 UINT64 x
= *px
, y
= *py
;
3660 #if !DECIMAL_GLOBAL_ROUNDING
3661 unsigned int rnd_mode
= *prnd_mode
;
3665 bid128ddq_fma (UINT64 x
, UINT64 y
, UINT128 z
3666 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3669 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3670 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3671 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3674 #if DECIMAL_CALL_BY_REFERENCE
3675 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3676 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3677 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3678 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3680 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3683 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3684 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3685 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3686 &is_inexact_lt_midpoint
,
3687 &is_inexact_gt_midpoint
, x1
, y1
,
3688 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3695 #if DECIMAL_CALL_BY_REFERENCE
3697 bid128dqd_fma (UINT128
* pres
, UINT64
* px
, UINT128
* py
, UINT64
* pz
3698 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3700 UINT64 x
= *px
, z
= *pz
;
3701 #if !DECIMAL_GLOBAL_ROUNDING
3702 unsigned int rnd_mode
= *prnd_mode
;
3706 bid128dqd_fma (UINT64 x
, UINT128 y
, UINT64 z
3707 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3710 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3711 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3712 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3715 #if DECIMAL_CALL_BY_REFERENCE
3716 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3717 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3718 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3719 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3721 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3724 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3725 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3726 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3727 &is_inexact_lt_midpoint
,
3728 &is_inexact_gt_midpoint
, x1
, y
,
3729 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3736 #if DECIMAL_CALL_BY_REFERENCE
3738 bid128dqq_fma (UINT128
* pres
, UINT64
* px
, UINT128
* py
, UINT128
* pz
3739 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3742 #if !DECIMAL_GLOBAL_ROUNDING
3743 unsigned int rnd_mode
= *prnd_mode
;
3747 bid128dqq_fma (UINT64 x
, UINT128 y
, UINT128 z
3748 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3751 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3752 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3753 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3756 #if DECIMAL_CALL_BY_REFERENCE
3757 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3758 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3759 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3761 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3764 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3765 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3766 &is_inexact_lt_midpoint
,
3767 &is_inexact_gt_midpoint
, x1
, y
,
3768 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3775 #if DECIMAL_CALL_BY_REFERENCE
3777 bid128qdd_fma (UINT128
* pres
, UINT128
* px
, UINT64
* py
, UINT64
* pz
3778 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3780 UINT64 y
= *py
, z
= *pz
;
3781 #if !DECIMAL_GLOBAL_ROUNDING
3782 unsigned int rnd_mode
= *prnd_mode
;
3786 bid128qdd_fma (UINT128 x
, UINT64 y
, UINT64 z
3787 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3790 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3791 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3792 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3795 #if DECIMAL_CALL_BY_REFERENCE
3796 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3797 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3798 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3799 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3801 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3804 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3805 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3806 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3807 &is_inexact_lt_midpoint
,
3808 &is_inexact_gt_midpoint
, x
, y1
,
3809 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3816 #if DECIMAL_CALL_BY_REFERENCE
3818 bid128qdq_fma (UINT128
* pres
, UINT128
* px
, UINT64
* py
, UINT128
* pz
3819 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3822 #if !DECIMAL_GLOBAL_ROUNDING
3823 unsigned int rnd_mode
= *prnd_mode
;
3827 bid128qdq_fma (UINT128 x
, UINT64 y
, UINT128 z
3828 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3831 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3832 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3833 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3836 #if DECIMAL_CALL_BY_REFERENCE
3837 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3838 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3839 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3841 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3844 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3845 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3846 &is_inexact_lt_midpoint
,
3847 &is_inexact_gt_midpoint
, x
, y1
,
3848 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3855 #if DECIMAL_CALL_BY_REFERENCE
3857 bid128qqd_fma (UINT128
* pres
, UINT128
* px
, UINT128
* py
, UINT64
* pz
3858 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3861 #if !DECIMAL_GLOBAL_ROUNDING
3862 unsigned int rnd_mode
= *prnd_mode
;
3866 bid128qqd_fma (UINT128 x
, UINT128 y
, UINT64 z
3867 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3870 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3871 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3872 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3875 #if DECIMAL_CALL_BY_REFERENCE
3876 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3877 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3878 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3880 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3883 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3884 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3885 &is_inexact_lt_midpoint
,
3886 &is_inexact_gt_midpoint
, x
, y
,
3887 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3893 // Note: bid128qqq_fma is represented by bid128_fma
3895 // Note: bid64ddd_fma is represented by bid64_fma
3897 #if DECIMAL_CALL_BY_REFERENCE
3899 bid64ddq_fma (UINT64
* pres
, UINT64
* px
, UINT64
* py
, UINT128
* pz
3900 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3902 UINT64 x
= *px
, y
= *py
;
3903 #if !DECIMAL_GLOBAL_ROUNDING
3904 unsigned int rnd_mode
= *prnd_mode
;
3908 bid64ddq_fma (UINT64 x
, UINT64 y
, UINT128 z
3909 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3912 UINT64 res1
= 0xbaddbaddbaddbaddull
;
3915 #if DECIMAL_CALL_BY_REFERENCE
3916 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3917 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3918 bid64qqq_fma (&res1
, &x1
, &y1
, pz
3919 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3922 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3923 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3924 res1
= bid64qqq_fma (x1
, y1
, z
3925 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3932 #if DECIMAL_CALL_BY_REFERENCE
3934 bid64dqd_fma (UINT64
* pres
, UINT64
* px
, UINT128
* py
, UINT64
* pz
3935 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3937 UINT64 x
= *px
, z
= *pz
;
3938 #if !DECIMAL_GLOBAL_ROUNDING
3939 unsigned int rnd_mode
= *prnd_mode
;
3943 bid64dqd_fma (UINT64 x
, UINT128 y
, UINT64 z
3944 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3947 UINT64 res1
= 0xbaddbaddbaddbaddull
;
3950 #if DECIMAL_CALL_BY_REFERENCE
3951 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3952 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3953 bid64qqq_fma (&res1
, &x1
, py
, &z1
3954 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3957 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3958 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3959 res1
= bid64qqq_fma (x1
, y
, z1
3960 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3967 #if DECIMAL_CALL_BY_REFERENCE
3969 bid64dqq_fma (UINT64
* pres
, UINT64
* px
, UINT128
* py
, UINT128
* pz
3970 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3973 #if !DECIMAL_GLOBAL_ROUNDING
3974 unsigned int rnd_mode
= *prnd_mode
;
3978 bid64dqq_fma (UINT64 x
, UINT128 y
, UINT128 z
3979 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3982 UINT64 res1
= 0xbaddbaddbaddbaddull
;
3985 #if DECIMAL_CALL_BY_REFERENCE
3986 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3987 bid64qqq_fma (&res1
, &x1
, py
, pz
3988 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3991 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3992 res1
= bid64qqq_fma (x1
, y
, z
3993 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4000 #if DECIMAL_CALL_BY_REFERENCE
4002 bid64qdd_fma (UINT64
* pres
, UINT128
* px
, UINT64
* py
, UINT64
* pz
4003 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4005 UINT64 y
= *py
, z
= *pz
;
4006 #if !DECIMAL_GLOBAL_ROUNDING
4007 unsigned int rnd_mode
= *prnd_mode
;
4011 bid64qdd_fma (UINT128 x
, UINT64 y
, UINT64 z
4012 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4015 UINT64 res1
= 0xbaddbaddbaddbaddull
;
4018 #if DECIMAL_CALL_BY_REFERENCE
4019 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4020 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4021 bid64qqq_fma (&res1
, px
, &y1
, &z1
4022 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4025 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4026 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4027 res1
= bid64qqq_fma (x
, y1
, z1
4028 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4035 #if DECIMAL_CALL_BY_REFERENCE
4037 bid64qdq_fma (UINT64
* pres
, UINT128
* px
, UINT64
* py
, UINT128
* pz
4038 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4041 #if !DECIMAL_GLOBAL_ROUNDING
4042 unsigned int rnd_mode
= *prnd_mode
;
4046 bid64qdq_fma (UINT128 x
, UINT64 y
, UINT128 z
4047 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4050 UINT64 res1
= 0xbaddbaddbaddbaddull
;
4053 #if DECIMAL_CALL_BY_REFERENCE
4054 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4055 bid64qqq_fma (&res1
, px
, &y1
, pz
4056 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4059 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4060 res1
= bid64qqq_fma (x
, y1
, z
4061 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4068 #if DECIMAL_CALL_BY_REFERENCE
4070 bid64qqd_fma (UINT64
* pres
, UINT128
* px
, UINT128
* py
, UINT64
* pz
4071 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4074 #if !DECIMAL_GLOBAL_ROUNDING
4075 unsigned int rnd_mode
= *prnd_mode
;
4079 bid64qqd_fma (UINT128 x
, UINT128 y
, UINT64 z
4080 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4083 UINT64 res1
= 0xbaddbaddbaddbaddull
;
4086 #if DECIMAL_CALL_BY_REFERENCE
4087 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4088 bid64qqq_fma (&res1
, px
, py
, &z1
4089 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4092 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4093 res1
= bid64qqq_fma (x
, y
, z1
4094 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4101 #if DECIMAL_CALL_BY_REFERENCE
4103 bid64qqq_fma (UINT64
* pres
, UINT128
* px
, UINT128
* py
, UINT128
* pz
4104 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4106 UINT128 x
= *px
, y
= *py
, z
= *pz
;
4107 #if !DECIMAL_GLOBAL_ROUNDING
4108 unsigned int rnd_mode
= *prnd_mode
;
4112 bid64qqq_fma (UINT128 x
, UINT128 y
, UINT128 z
4113 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4116 int is_midpoint_lt_even0
= 0, is_midpoint_gt_even0
= 0,
4117 is_inexact_lt_midpoint0
= 0, is_inexact_gt_midpoint0
= 0;
4118 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
4119 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
4121 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
4122 UINT128 res128
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
4123 UINT64 res1
= 0xbaddbaddbaddbaddull
;
4124 unsigned int save_fpsf
; // needed because of the call to bid128_ext_fma
4133 int lt_half_ulp
= 0, eq_half_ulp
= 0;
4135 // Note: for rounding modes other than RN or RA, the result can be obtained
4136 // by rounding first to BID128 and then to BID64
4138 save_fpsf
= *pfpsf
; // sticky bits - caller value must be preserved
4141 #if DECIMAL_CALL_BY_REFERENCE
4142 bid128_ext_fma (&is_midpoint_lt_even0
, &is_midpoint_gt_even0
,
4143 &is_inexact_lt_midpoint0
, &is_inexact_gt_midpoint0
,
4145 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4148 res
= bid128_ext_fma (&is_midpoint_lt_even0
, &is_midpoint_gt_even0
,
4149 &is_inexact_lt_midpoint0
,
4150 &is_inexact_gt_midpoint0
, x
, y
,
4151 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4155 if ((rnd_mode
== ROUNDING_DOWN
) || (rnd_mode
== ROUNDING_UP
) ||
4156 (rnd_mode
== ROUNDING_TO_ZERO
) || // no double rounding error is possible
4157 ((res
.w
[HIGH_128W
] & MASK_NAN
) == MASK_NAN
) || //res=QNaN (cannot be SNaN)
4158 ((res
.w
[HIGH_128W
] & MASK_ANY_INF
) == MASK_INF
)) { // result is infinity
4159 #if DECIMAL_CALL_BY_REFERENCE
4160 bid128_to_bid64 (&res1
, &res _RND_MODE_ARG _EXC_FLAGS_ARG
);
4162 res1
= bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG
);
4164 // determine the unbiased exponent of the result
4165 unbexp
= ((res1
>> 53) & 0x3ff) - 398;
4167 // if subnormal, res1 must have exp = -398
4168 // if tiny and inexact set underflow and inexact status flags
4169 if (!((res1
& MASK_NAN
) == MASK_NAN
) && // res1 not NaN
4171 && ((res1
& MASK_BINARY_SIG1
) < 1000000000000000ull)
4172 && (is_inexact_lt_midpoint0
|| is_inexact_gt_midpoint0
4173 || is_midpoint_lt_even0
|| is_midpoint_gt_even0
)) {
4174 // set the inexact flag and the underflow flag
4175 *pfpsf
|= (INEXACT_EXCEPTION
| UNDERFLOW_EXCEPTION
);
4176 } else if (is_inexact_lt_midpoint0
|| is_inexact_gt_midpoint0
||
4177 is_midpoint_lt_even0
|| is_midpoint_gt_even0
) {
4178 // set the inexact flag and the underflow flag
4179 *pfpsf
|= INEXACT_EXCEPTION
;
4182 *pfpsf
|= save_fpsf
;
4184 } // else continue, and use rounding to nearest to round to 16 digits
4186 // at this point the result is rounded to nearest (even or away) to 34 digits
4187 // (or less if exact), and it is zero or finite non-zero canonical [sub]normal
4188 sign
= res
.w
[HIGH_128W
] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
4189 exp
= res
.w
[HIGH_128W
] & MASK_EXP
; // biased and shifted left 49 bits
4190 unbexp
= (exp
>> 49) - 6176;
4191 C
.w
[1] = res
.w
[HIGH_128W
] & MASK_COEFF
;
4192 C
.w
[0] = res
.w
[LOW_128W
];
4194 if ((C
.w
[1] == 0x0 && C
.w
[0] == 0x0) || // result is zero
4195 (unbexp
<= (-398 - 35)) || (unbexp
>= (369 + 16))) {
4196 // clear under/overflow
4197 #if DECIMAL_CALL_BY_REFERENCE
4198 bid128_to_bid64 (&res1
, &res _RND_MODE_ARG _EXC_FLAGS_ARG
);
4200 res1
= bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG
);
4202 *pfpsf
|= save_fpsf
;
4206 // -398 - 34 <= unbexp <= 369 + 15
4207 if (rnd_mode
== ROUNDING_TIES_AWAY
) {
4208 // apply correction, if needed, to make the result rounded to nearest-even
4209 if (is_midpoint_gt_even
) {
4211 res1
--; // res1 is now even
4212 } // else the result is already correctly rounded to nearest-even
4214 // at this point the result is finite, non-zero canonical normal or subnormal,
4215 // and in most cases overflow or underflow will not occur
4217 // determine the number of digits q in the result
4218 // q = nr. of decimal digits in x
4219 // determine first the nr. of bits in x
4221 if (C
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
4222 // split the 64-bit value in two 32-bit halves to avoid rounding errors
4223 if (C
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
4224 tmp
.d
= (double) (C
.w
[0] >> 32); // exact conversion
4226 33 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
4227 } else { // x < 2^32
4228 tmp
.d
= (double) (C
.w
[0]); // exact conversion
4230 1 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
4232 } else { // if x < 2^53
4233 tmp
.d
= (double) C
.w
[0]; // exact conversion
4235 1 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
4237 } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1])
4238 tmp
.d
= (double) C
.w
[1]; // exact conversion
4240 65 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
4242 q
= nr_digits
[nr_bits
- 1].digits
;
4244 q
= nr_digits
[nr_bits
- 1].digits1
;
4245 if (C
.w
[1] > nr_digits
[nr_bits
- 1].threshold_hi
||
4246 (C
.w
[1] == nr_digits
[nr_bits
- 1].threshold_hi
&&
4247 C
.w
[0] >= nr_digits
[nr_bits
- 1].threshold_lo
))
4250 // if q > 16, round to nearest even to 16 digits (but for underflow it may
4251 // have to be truncated even more)
4255 round64_2_18 (q
, x0
, C
.w
[0], &res1
, &incr_exp
,
4256 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
4257 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
4258 } else { // 19 <= q <= 34
4259 round128_19_38 (q
, x0
, C
, &res128
, &incr_exp
,
4260 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
4261 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
4262 res1
= res128
.w
[0]; // the result fits in 64 bits
4264 unbexp
= unbexp
+ x0
;
4267 q
= 16; // need to set in case denormalization is necessary
4269 // the result does not require a second rounding (and it must have
4270 // been exact in the first rounding, since q <= 16)
4274 // avoid a double rounding error
4275 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
4276 is_midpoint_lt_even
) { // double rounding error upward
4278 res1
--; // res1 becomes odd
4279 is_midpoint_lt_even
= 0;
4280 is_inexact_lt_midpoint
= 1;
4281 if (res1
== 0x00038d7ea4c67fffull
) { // 10^15 - 1
4282 res1
= 0x002386f26fc0ffffull
; // 10^16 - 1
4285 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
4286 is_midpoint_gt_even
) { // double rounding error downward
4288 res1
++; // res1 becomes odd (so it cannot be 10^16)
4289 is_midpoint_gt_even
= 0;
4290 is_inexact_gt_midpoint
= 1;
4291 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
4292 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
4293 // if this second rounding was exact the result may still be
4294 // inexact because of the first rounding
4295 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
4296 is_inexact_gt_midpoint
= 1;
4298 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
4299 is_inexact_lt_midpoint
= 1;
4301 } else if (is_midpoint_gt_even
&&
4302 (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
)) {
4303 // pulled up to a midpoint
4304 is_inexact_lt_midpoint
= 1;
4305 is_inexact_gt_midpoint
= 0;
4306 is_midpoint_lt_even
= 0;
4307 is_midpoint_gt_even
= 0;
4308 } else if (is_midpoint_lt_even
&&
4309 (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
)) {
4310 // pulled down to a midpoint
4311 is_inexact_lt_midpoint
= 0;
4312 is_inexact_gt_midpoint
= 1;
4313 is_midpoint_lt_even
= 0;
4314 is_midpoint_gt_even
= 0;
4318 // this is the result rounded correctly to nearest even, with unbounded exp.
4320 // check for overflow
4321 if (q
+ unbexp
> P16
+ expmax16
) {
4322 res1
= sign
| 0x7800000000000000ull
;
4323 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
4324 *pfpsf
|= save_fpsf
;
4326 } else if (unbexp
> expmax16
) { // q + unbexp <= P16 + expmax16
4327 // not overflow; the result must be exact, and we can multiply res1 by
4328 // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits
4329 scale
= unbexp
- expmax16
;
4330 res1
= res1
* ten2k64
[scale
]; // res1 * 10^scale
4331 unbexp
= expmax16
; // unbexp - scale
4336 // check for underflow
4337 if (q
+ unbexp
< P16
+ expmin16
) {
4338 if (unbexp
< expmin16
) {
4339 // we must truncate more of res
4340 x0
= expmin16
- unbexp
; // x0 >= 1
4341 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
4342 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
4343 is_midpoint_lt_even0
= is_midpoint_lt_even
;
4344 is_midpoint_gt_even0
= is_midpoint_gt_even
;
4345 is_inexact_lt_midpoint
= 0;
4346 is_inexact_gt_midpoint
= 0;
4347 is_midpoint_lt_even
= 0;
4348 is_midpoint_gt_even
= 0;
4349 // the number of decimal digits in res1 is q
4350 if (x0
< q
) { // 1 <= x0 <= q-1 => round res to q - x0 digits
4351 // 2 <= q <= 16, 1 <= x0 <= 15
4352 round64_2_18 (q
, x0
, res1
, &res1
, &incr_exp
,
4353 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
4354 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
4356 // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15
4357 res1
= ten2k64
[q
- x0
];
4359 unbexp
= unbexp
+ x0
; // expmin16
4360 } else if (x0
== q
) {
4361 // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin
4362 // determine relationship with 1/2 ulp
4364 if (res1
< midpoint64
[q
- 1]) { // < 1/2 ulp
4366 is_inexact_lt_midpoint
= 1;
4367 } else if (res1
== midpoint64
[q
- 1]) { // = 1/2 ulp
4369 is_midpoint_gt_even
= 1;
4370 } else { // > 1/2 ulp
4372 is_inexact_gt_midpoint
= 1;
4374 if (lt_half_ulp
|| eq_half_ulp
) {
4375 // res = +0.0 * 10^expmin16
4376 res1
= 0x0000000000000000ull
;
4377 } else { // if (gt_half_ulp)
4378 // res = +1 * 10^expmin16
4379 res1
= 0x0000000000000001ull
;
4382 } else { // if (x0 > q)
4383 // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin
4384 res1
= 0x0000000000000000ull
;
4386 is_inexact_lt_midpoint
= 1;
4388 // avoid a double rounding error
4389 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
4390 is_midpoint_lt_even
) { // double rounding error upward
4392 res1
--; // res1 becomes odd
4393 is_midpoint_lt_even
= 0;
4394 is_inexact_lt_midpoint
= 1;
4395 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
4396 is_midpoint_gt_even
) { // double rounding error downward
4398 res1
++; // res1 becomes odd
4399 is_midpoint_gt_even
= 0;
4400 is_inexact_gt_midpoint
= 1;
4401 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
4402 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
4403 // if this rounding was exact the result may still be
4404 // inexact because of the previous roundings
4405 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
4406 is_inexact_gt_midpoint
= 1;
4408 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
4409 is_inexact_lt_midpoint
= 1;
4411 } else if (is_midpoint_gt_even
&&
4412 (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
)) {
4413 // pulled up to a midpoint
4414 is_inexact_lt_midpoint
= 1;
4415 is_inexact_gt_midpoint
= 0;
4416 is_midpoint_lt_even
= 0;
4417 is_midpoint_gt_even
= 0;
4418 } else if (is_midpoint_lt_even
&&
4419 (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
)) {
4420 // pulled down to a midpoint
4421 is_inexact_lt_midpoint
= 0;
4422 is_inexact_gt_midpoint
= 1;
4423 is_midpoint_lt_even
= 0;
4424 is_midpoint_gt_even
= 0;
4429 // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16)
4430 // and the result is tiny and exact
4432 // check for inexact result
4433 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
4434 is_midpoint_lt_even
|| is_midpoint_gt_even
||
4435 is_inexact_lt_midpoint0
|| is_inexact_gt_midpoint0
||
4436 is_midpoint_lt_even0
|| is_midpoint_gt_even0
) {
4437 // set the inexact flag and the underflow flag
4438 *pfpsf
|= (INEXACT_EXCEPTION
| UNDERFLOW_EXCEPTION
);
4440 } else if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
4441 is_midpoint_lt_even
|| is_midpoint_gt_even
) {
4442 *pfpsf
|= INEXACT_EXCEPTION
;
4444 // this is the result rounded correctly to nearest, with bounded exponent
4446 if (rnd_mode
== ROUNDING_TIES_AWAY
&& is_midpoint_gt_even
) { // correction
4448 res1
++; // res1 is now odd
4449 } // else the result is already correct
4451 // assemble the result
4452 if (res1
< 0x0020000000000000ull
) { // res < 2^53
4453 res1
= sign
| ((UINT64
) (unbexp
+ 398) << 53) | res1
;
4454 } else { // res1 >= 2^53
4455 res1
= sign
| MASK_STEERING_BITS
|
4456 ((UINT64
) (unbexp
+ 398) << 51) | (res1
& MASK_BINARY_SIG2
);
4458 *pfpsf
|= save_fpsf
;