1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
29 /*****************************************************************************
31 ****************************************************************************/
33 #include "bid_internal.h"
35 #if DECIMAL_CALL_BY_REFERENCE
37 __bid128_add (UINT128
* pres
, UINT128
* px
,
39 py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
41 UINT128 x
= *px
, y
= *py
;
42 #if !DECIMAL_GLOBAL_ROUNDING
43 unsigned int rnd_mode
= *prnd_mode
;
47 __bid128_add (UINT128 x
,
48 UINT128 y _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
53 UINT64 x_sign
, y_sign
, tmp_sign
;
54 UINT64 x_exp
, y_exp
, tmp_exp
; // e1 = x_exp, e2 = y_exp
55 UINT64 C1_hi
, C2_hi
, tmp_signif_hi
;
56 UINT64 C1_lo
, C2_lo
, tmp_signif_lo
;
57 // Note: C1.w[1], C1.w[0] represent C1_hi, C1_lo (all are UINT64)
58 // Note: C2.w[1], C2.w[0] represent C2_hi, C2_lo (all are UINT64)
59 UINT64 tmp64
, tmp64A
, tmp64B
;
60 BID_UI64DOUBLE tmp1
, tmp2
;
61 int x_nr_bits
, y_nr_bits
;
62 int q1
, q2
, delta
, scale
, x1
, ind
, shift
, tmp_inexact
= 0;
67 UINT128 highf2star
; // top 128 bits in f2*; low 128 bits in R256[1], R256[0]
68 UINT256 P256
, Q256
, R256
;
69 int is_inexact
= 0, is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0;
70 int is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
73 // check for NaN or Infinity
74 if (((x
.w
[1] & MASK_SPECIAL
) == MASK_SPECIAL
)
75 || ((y
.w
[1] & MASK_SPECIAL
) == MASK_SPECIAL
)) {
76 // x is special or y is special
78 if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
79 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
81 *pfpsf
|= INVALID_EXCEPTION
;
83 res
.w
[1] = x
.w
[1] & 0xfdffffffffffffffull
;
86 if ((y
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // y is SNAN
88 *pfpsf
|= INVALID_EXCEPTION
;
95 } else if ((y
.w
[1] & MASK_NAN
) == MASK_NAN
) { // y is NAN
96 if ((y
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // y is SNAN
98 *pfpsf
|= INVALID_EXCEPTION
;
100 res
.w
[1] = y
.w
[1] & 0xfdffffffffffffffull
;
102 } else { // y is QNaN
108 } else { // neither x not y is NaN; at least one is infinity
109 if ((x
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // x is infinity
110 if ((y
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // y is infinity
111 // if same sign, return either of them
112 if ((x
.w
[1] & MASK_SIGN
) == (y
.w
[1] & MASK_SIGN
)) {
115 } else { // x and y are infinities of opposite signs
117 *pfpsf
|= INVALID_EXCEPTION
;
118 // return QNaN Indefinite
119 res
.w
[1] = 0x7c00000000000000ull
;
120 res
.w
[0] = 0x0000000000000000ull
;
122 } else { // y is 0 or finite
127 } else { // x is not NaN or infinity, so y must be infinity
134 // unpack the arguments
136 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
137 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bit positions
138 C1_hi
= x
.w
[1] & MASK_COEFF
;
141 y_sign
= y
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
142 y_exp
= y
.w
[1] & MASK_EXP
; // biased and shifted left 49 bit positions
143 C2_hi
= y
.w
[1] & MASK_COEFF
;
146 // test for non-canonical values:
147 // - values whose encoding begins with x00, x01, or x10 and whose
148 // coefficient is larger than 10^34 -1, or
149 // - values whose encoding begins with x1100, x1101, x1110 (if NaNs
150 // and infinitis were eliminated already this test is reduced to
151 // checking for x10x)
153 // test for non-canonical values of the argument x
154 if ((((C1_hi
> 0x0001ed09bead87c0ull
)
155 || ((C1_hi
== 0x0001ed09bead87c0ull
)
156 && (C1_lo
> 0x378d8e63ffffffffull
)))
157 && ((x
.w
[1] & 0x6000000000000000ull
) != 0x6000000000000000ull
))
158 || ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
)) {
159 // check for the case where the exponent is shifted right by 2 bits!
160 if ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
) {
161 x_exp
= (x
.w
[1] << 2) & MASK_EXP
; // same position as for G[0]G[1] != 11
163 x
.w
[1] = x
.w
[1] & 0x8000000000000000ull
; // preserve the sign bit
168 // test for non-canonical values of the argument y
169 if ((((C2_hi
> 0x0001ed09bead87c0ull
)
170 || ((C2_hi
== 0x0001ed09bead87c0ull
)
171 && (C2_lo
> 0x378d8e63ffffffffull
)))
172 && ((y
.w
[1] & 0x6000000000000000ull
) != 0x6000000000000000ull
))
173 || ((y
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
)) {
174 // check for the case where the exponent is shifted right by 2 bits!
175 if ((y
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
) {
176 y_exp
= (y
.w
[1] << 2) & MASK_EXP
; // same position as for G[0]G[1] != 11
178 y
.w
[1] = y
.w
[1] & 0x8000000000000000ull
; // preserve the sign bit
184 if ((C1_hi
== 0x0ull
) && (C1_lo
== 0x0ull
)) {
185 // x is 0 and y is not special
186 // if y is 0 return 0 with the smaller exponent
187 if ((C2_hi
== 0x0ull
) && (C2_lo
== 0x0ull
)) {
192 if (x_sign
&& y_sign
)
193 res
.w
[1] = res
.w
[1] | x_sign
; // both negative
194 else if (rnd_mode
== ROUNDING_DOWN
&& x_sign
!= y_sign
)
195 res
.w
[1] = res
.w
[1] | 0x8000000000000000ull
; // -0
199 // for 0 + y return y, with the preferred exponent
200 if (y_exp
<= x_exp
) {
203 } else { // if y_exp > x_exp
204 // return (C2 * 10^scale) * 10^(y_exp - scale)
205 // where scale = min (P34-q2, y_exp-x_exp)
206 // determine q2 = nr. of decimal digits in y
207 // determine first the nr. of bits in y (y_nr_bits)
209 if (C2_hi
== 0) { // y_bits is the nr. of bits in C2_lo
210 if (C2_lo
>= 0x0020000000000000ull
) { // y >= 2^53
211 // split the 64-bit value in two 32-bit halves to avoid
213 if (C2_lo
>= 0x0000000100000000ull
) { // y >= 2^32
214 tmp2
.d
= (double) (C2_lo
>> 32); // exact conversion
215 ///tmp2_i64 = *(UINT64 *) & tmp2_d;
218 ((((unsigned int) (tmp2
.ui64
>> 52)) & 0x7ff) - 0x3ff);
220 tmp2
.d
= (double) (C2_lo
); // exact conversion
221 ///tmp2_i64 = *(UINT64 *) & tmp2_d;
223 ((((unsigned int) (tmp2
.ui64
>> 52)) & 0x7ff) - 0x3ff);
225 } else { // if y < 2^53
226 tmp2
.d
= (double) C2_lo
; // exact conversion
227 ///tmp2_i64 = *(UINT64 *) & tmp2_d;
229 ((((unsigned int) (tmp2
.ui64
>> 52)) & 0x7ff) - 0x3ff);
231 } else { // C2_hi != 0 => nr. bits = 64 + nr_bits (C2_hi)
232 tmp2
.d
= (double) C2_hi
; // exact conversion
233 ///tmp2_i64 = *(UINT64 *) & tmp2_d;
235 64 + ((((unsigned int) (tmp2
.ui64
>> 52)) & 0x7ff) - 0x3ff);
237 q2
= __bid_nr_digits
[y_nr_bits
].digits
;
239 q2
= __bid_nr_digits
[y_nr_bits
].digits1
;
240 if (C2_hi
> __bid_nr_digits
[y_nr_bits
].threshold_hi
241 || (C2_hi
== __bid_nr_digits
[y_nr_bits
].threshold_hi
242 && C2_lo
>= __bid_nr_digits
[y_nr_bits
].threshold_lo
))
245 // return (C2 * 10^scale) * 10^(y_exp - scale)
246 // where scale = min (P34-q2, y_exp-x_exp)
248 ind
= (y_exp
- x_exp
) >> 49;
254 } else if (q2
<= 19) { // y fits in 64 bits
255 if (scale
<= 19) { // 10^scale fits in 64 bits
256 // 64 x 64 C2_lo * __bid_ten2k64[scale]
257 __mul_64x64_to_128MACH (res
, C2_lo
, __bid_ten2k64
[scale
]);
258 } else { // 10^scale fits in 128 bits
259 // 64 x 128 C2_lo * __bid_ten2k128[scale - 20]
260 __mul_128x64_to_128 (res
, C2_lo
, __bid_ten2k128
[scale
- 20]);
262 } else { // y fits in 128 bits, but 10^scale must fit in 64 bits
263 // 64 x 128 __bid_ten2k64[scale] * C2
266 __mul_128x64_to_128 (res
, __bid_ten2k64
[scale
], C2
);
268 // subtract scale from the exponent
269 y_exp
= y_exp
- ((UINT64
) scale
<< 49);
270 res
.w
[1] = res
.w
[1] | y_sign
| y_exp
;
274 } else if ((C2_hi
== 0x0ull
) && (C2_lo
== 0x0ull
)) {
275 // y is 0 and x is not special, and not zero
276 // for x + 0 return x, with the preferred exponent
277 if (x_exp
<= y_exp
) {
280 } else { // if x_exp > y_exp
281 // return (C1 * 10^scale) * 10^(x_exp - scale)
282 // where scale = min (P34-q1, x_exp-y_exp)
283 // determine q1 = nr. of decimal digits in x
284 // determine first the nr. of bits in x
285 if (C1_hi
== 0) { // x_bits is the nr. of bits in C1_lo
286 if (C1_lo
>= 0x0020000000000000ull
) { // x >= 2^53
287 // split the 64-bit value in two 32-bit halves to avoid
289 if (C1_lo
>= 0x0000000100000000ull
) { // x >= 2^32
290 tmp1
.d
= (double) (C1_lo
>> 32); // exact conversion
292 32 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) -
295 tmp1
.d
= (double) (C1_lo
); // exact conversion
297 ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
299 } else { // if x < 2^53
300 tmp1
.d
= (double) C1_lo
; // exact conversion
302 ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
304 } else { // C1_hi != 0 => nr. bits = 64 + nr_bits (C1_hi)
305 tmp1
.d
= (double) C1_hi
; // exact conversion
307 64 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
309 q1
= __bid_nr_digits
[x_nr_bits
].digits
;
311 q1
= __bid_nr_digits
[x_nr_bits
].digits1
;
312 if (C1_hi
> __bid_nr_digits
[x_nr_bits
].threshold_hi
313 || (C1_hi
== __bid_nr_digits
[x_nr_bits
].threshold_hi
314 && C1_lo
>= __bid_nr_digits
[x_nr_bits
].threshold_lo
))
317 // return (C1 * 10^scale) * 10^(x_exp - scale)
318 // where scale = min (P34-q1, x_exp-y_exp)
320 ind
= (x_exp
- y_exp
) >> 49;
326 } else if (q1
<= 19) { // x fits in 64 bits
327 if (scale
<= 19) { // 10^scale fits in 64 bits
328 // 64 x 64 C1_lo * __bid_ten2k64[scale]
329 __mul_64x64_to_128MACH (res
, C1_lo
, __bid_ten2k64
[scale
]);
330 } else { // 10^scale fits in 128 bits
331 // 64 x 128 C1_lo * __bid_ten2k128[scale - 20]
332 __mul_128x64_to_128 (res
, C1_lo
, __bid_ten2k128
[scale
- 20]);
334 } else { // x fits in 128 bits, but 10^scale must fit in 64 bits
335 // 64 x 128 __bid_ten2k64[scale] * C1
338 __mul_128x64_to_128 (res
, __bid_ten2k64
[scale
], C1
);
340 // subtract scale from the exponent
341 x_exp
= x_exp
- ((UINT64
) scale
<< 49);
342 res
.w
[1] = res
.w
[1] | x_sign
| x_exp
;
345 } else { // x and y are not canonical, not special, and are not zero
346 // note that the result may still be zero, and then it has to have the
347 // preferred exponent
348 if (x_exp
< y_exp
) { // if exp_x < exp_y then swap x and y
351 tmp_signif_hi
= C1_hi
;
352 tmp_signif_lo
= C1_lo
;
359 C2_hi
= tmp_signif_hi
;
360 C2_lo
= tmp_signif_lo
;
362 // q1 = nr. of decimal digits in x
363 // determine first the nr. of bits in x
364 if (C1_hi
== 0) { // x_bits is the nr. of bits in C1_lo
365 if (C1_lo
>= 0x0020000000000000ull
) { // x >= 2^53
366 //split the 64-bit value in two 32-bit halves to avoid rounding errors
367 if (C1_lo
>= 0x0000000100000000ull
) { // x >= 2^32
368 tmp1
.d
= (double) (C1_lo
>> 32); // exact conversion
370 32 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
372 tmp1
.d
= (double) (C1_lo
); // exact conversion
374 ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
376 } else { // if x < 2^53
377 tmp1
.d
= (double) C1_lo
; // exact conversion
379 ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
381 } else { // C1_hi != 0 => nr. bits = 64 + nr_bits (C1_hi)
382 tmp1
.d
= (double) C1_hi
; // exact conversion
384 64 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
387 q1
= __bid_nr_digits
[x_nr_bits
].digits
;
389 q1
= __bid_nr_digits
[x_nr_bits
].digits1
;
390 if (C1_hi
> __bid_nr_digits
[x_nr_bits
].threshold_hi
391 || (C1_hi
== __bid_nr_digits
[x_nr_bits
].threshold_hi
392 && C1_lo
>= __bid_nr_digits
[x_nr_bits
].threshold_lo
))
395 // q2 = nr. of decimal digits in y
396 // determine first the nr. of bits in y (y_nr_bits)
397 if (C2_hi
== 0) { // y_bits is the nr. of bits in C2_lo
398 if (C2_lo
>= 0x0020000000000000ull
) { // y >= 2^53
399 //split the 64-bit value in two 32-bit halves to avoid rounding errors
400 if (C2_lo
>= 0x0000000100000000ull
) { // y >= 2^32
401 tmp2
.d
= (double) (C2_lo
>> 32); // exact conversion
402 ///tmp2_i64 = *(UINT64 *) & tmp2_d;
404 32 + ((((unsigned int) (tmp2
.ui64
>> 52)) & 0x7ff) - 0x3ff);
406 tmp2
.d
= (double) (C2_lo
); // exact conversion
407 ///tmp2_i64 = *(UINT64 *) & tmp2_d;
409 ((((unsigned int) (tmp2
.ui64
>> 52)) & 0x7ff) - 0x3ff);
411 } else { // if y < 2^53
412 tmp2
.d
= (double) C2_lo
; // exact conversion
413 ///tmp2_i64 = *(UINT64 *) & tmp2_d;
415 ((((unsigned int) (tmp2
.ui64
>> 52)) & 0x7ff) - 0x3ff);
417 } else { // C2_hi != 0 => nr. bits = 64 + nr_bits (C2_hi)
418 tmp2
.d
= (double) C2_hi
; // exact conversion
419 ///tmp2_i64 = *(UINT64 *) & tmp2_d;
421 64 + ((((unsigned int) (tmp2
.ui64
>> 52)) & 0x7ff) - 0x3ff);
424 q2
= __bid_nr_digits
[y_nr_bits
].digits
;
426 q2
= __bid_nr_digits
[y_nr_bits
].digits1
;
427 if (C2_hi
> __bid_nr_digits
[y_nr_bits
].threshold_hi
428 || (C2_hi
== __bid_nr_digits
[y_nr_bits
].threshold_hi
429 && C2_lo
>= __bid_nr_digits
[y_nr_bits
].threshold_lo
))
433 delta
= q1
+ (int) (x_exp
>> 49) - q2
- (int) (y_exp
>> 49);
436 // round the result directly because 0 < C2 < ulp (C1 * 10^(x_exp-e2))
437 // n = C1 * 10^e1 or n = C1 +/- 10^(q1-P34)) * 10^e1
438 // the result is inexact; the preferred exponent is the least possible
440 if (delta
>= P34
+ 1) {
441 // for RN the result is the operand with the larger magnitude,
442 // possibly scaled up by 10^(P34-q1)
443 // an overflow cannot occur in this case (rounding to nearest)
444 if (q1
< P34
) { // scale C1 up by 10^(P34-q1)
445 // Note: because delta >= P34+1 it is certain that
446 // x_exp - ((UINT64)scale << 49) will stay above e_min
448 if (q1
<= 19) { // C1 fits in 64 bits
449 // 1 <= q1 <= 19 => 15 <= scale <= 33
450 if (scale
<= 19) { // 10^scale fits in 64 bits
451 __mul_64x64_to_128MACH (C1
, __bid_ten2k64
[scale
], C1_lo
);
452 } else { // if 20 <= scale <= 33
453 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
454 // (C1 * 10^(scale-19)) fits in 64 bits
455 C1_lo
= C1_lo
* __bid_ten2k64
[scale
- 19];
456 __mul_64x64_to_128MACH (C1
, __bid_ten2k64
[19], C1_lo
);
458 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
459 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
462 // C1 = __bid_ten2k64[P34 - q1] * C1
463 __mul_128x64_to_128 (C1
, __bid_ten2k64
[P34
- q1
], C1
);
465 x_exp
= x_exp
- ((UINT64
) scale
<< 49);
469 // some special cases arise: if delta = P34 + 1 and C1 = 10^(P34-1)
470 // (after scaling) and x_sign != y_sign and C2 > 5*10^(q2-1) =>
472 // Note: do this only for rounding to nearest; for other rounding
473 // modes the correction will be applied next
474 if ((rnd_mode
== ROUNDING_TO_NEAREST
475 || rnd_mode
== ROUNDING_TIES_AWAY
) && delta
== (P34
+ 1)
476 && C1_hi
== 0x0000314dc6448d93ull
477 && C1_lo
== 0x38c15b0a00000000ull
&& x_sign
!= y_sign
478 && ((q2
<= 19 && C2_lo
> __bid_midpoint64
[q2
- 1]) || (q2
>= 20
479 && (C2_hi
> __bid_midpoint128
[q2
- 20].w
[1]
480 || (C2_hi
== __bid_midpoint128
[q2
- 20].w
[1]
481 && C2_lo
> __bid_midpoint128
[q2
- 20].w
[0]))))) {
482 // C1 = 10^34 - 1 and decrement x_exp by 1 (no underflow possible)
483 C1_hi
= 0x0001ed09bead87c0ull
;
484 C1_lo
= 0x378d8e63ffffffffull
;
485 x_exp
= x_exp
- EXP_P1
;
487 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
488 if ((rnd_mode
== ROUNDING_DOWN
&& x_sign
&& y_sign
)
489 || (rnd_mode
== ROUNDING_UP
&& !x_sign
&& !y_sign
)) {
490 // add 1 ulp and then check for overflow
492 if (C1_lo
== 0) { // rounding overflow in the low 64 bits
495 if (C1_hi
== 0x0001ed09bead87c0ull
496 && C1_lo
== 0x378d8e6400000000ull
) {
497 // C1 = 10^34 => rounding overflow
498 C1_hi
= 0x0000314dc6448d93ull
;
499 C1_lo
= 0x38c15b0a00000000ull
; // 10^33
500 x_exp
= x_exp
+ EXP_P1
;
501 if (x_exp
== EXP_MAX_P1
) { // overflow
502 C1_hi
= 0x7800000000000000ull
; // +inf
504 x_exp
= 0; // x_sign is preserved
505 // set overflow flag (the inexact flag was set too)
506 *pfpsf
|= OVERFLOW_EXCEPTION
;
509 } else if ((rnd_mode
== ROUNDING_DOWN
&& !x_sign
&& y_sign
)
510 || (rnd_mode
== ROUNDING_UP
&& x_sign
&& !y_sign
)
511 || (rnd_mode
== ROUNDING_TO_ZERO
&& x_sign
!= y_sign
)) {
512 // subtract 1 ulp from C1
513 // Note: because delta >= P34 + 1 the result cannot be zero
515 if (C1_lo
== 0xffffffffffffffffull
)
517 // if the coefficient is 10^33 - 1 then make it 10^34 - 1 and
518 // decrease the exponent by 1 (because delta >= P34 + 1 the
519 // exponent will not become less than e_min)
520 // 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
521 // 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
522 if (C1_hi
== 0x0000314dc6448d93ull
523 && C1_lo
== 0x38c15b09ffffffffull
) {
524 // make C1 = 10^34 - 1
525 C1_hi
= 0x0001ed09bead87c0ull
;
526 C1_lo
= 0x378d8e63ffffffffull
;
527 x_exp
= x_exp
- EXP_P1
;
530 ; // the result is already correct
533 // set the inexact flag
534 *pfpsf
|= INEXACT_EXCEPTION
;
535 // assemble the result
536 res
.w
[1] = x_sign
| x_exp
| C1_hi
;
538 } else { // delta = P34
539 // in most cases, the smaller operand may be < or = or > 1/2 ulp of the
541 // however, the case C1 = 10^(q1-1) and x_sign != y_sign is special due
542 // to accuracy loss after subtraction, and will be treated separately
543 if (x_sign
== y_sign
|| (q1
<= 20
544 && (C1_hi
!= 0 || C1_lo
!= __bid_ten2k64
[q1
- 1])) || (q1
>= 21
545 && (C1_hi
!= __bid_ten2k128
[q1
- 21].w
[1]
546 || C1_lo
!= __bid_ten2k128
[q1
- 21].w
[0]))) {
547 // if x_sign == y_sign or C1 != 10^(q1-1)
548 // compare C2 with 1/2 ulp = 5 * 10^(q2-1), the latter read from table
549 // Note: cases q1<=19 and q1>=20 can be coalesced at some latency cost
550 if (q2
<= 19) { // C2 and 5*10^(q2-1) both fit in 64 bits
551 halfulp64
= __bid_midpoint64
[q2
- 1]; // 5 * 10^(q2-1)
552 if (C2_lo
< halfulp64
) { // n2 < 1/2 ulp (n1)
553 // for RN the result is the operand with the larger magnitude,
554 // possibly scaled up by 10^(P34-q1)
555 // an overflow cannot occur in this case (rounding to nearest)
556 if (q1
< P34
) { // scale C1 up by 10^(P34-q1)
557 // Note: because delta = P34 it is certain that
558 // x_exp - ((UINT64)scale << 49) will stay above e_min
560 if (q1
<= 19) { // C1 fits in 64 bits
561 // 1 <= q1 <= 19 => 15 <= scale <= 33
562 if (scale
<= 19) { // 10^scale fits in 64 bits
563 __mul_64x64_to_128MACH (C1
, __bid_ten2k64
[scale
], C1_lo
);
564 } else { // if 20 <= scale <= 33
565 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
566 // (C1 * 10^(scale-19)) fits in 64 bits
567 C1_lo
= C1_lo
* __bid_ten2k64
[scale
- 19];
568 __mul_64x64_to_128MACH (C1
, __bid_ten2k64
[19], C1_lo
);
570 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
571 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
574 // C1 = __bid_ten2k64[P34 - q1] * C1
575 __mul_128x64_to_128 (C1
, __bid_ten2k64
[P34
- q1
], C1
);
577 x_exp
= x_exp
- ((UINT64
) scale
<< 49);
581 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
582 if ((rnd_mode
== ROUNDING_DOWN
&& x_sign
583 && y_sign
) || (rnd_mode
== ROUNDING_UP
584 && !x_sign
&& !y_sign
)) {
585 // add 1 ulp and then check for overflow
587 if (C1_lo
== 0) { // rounding overflow in the low 64 bits
590 if (C1_hi
== 0x0001ed09bead87c0ull
591 && C1_lo
== 0x378d8e6400000000ull
) {
592 // C1 = 10^34 => rounding overflow
593 C1_hi
= 0x0000314dc6448d93ull
;
594 C1_lo
= 0x38c15b0a00000000ull
; // 10^33
595 x_exp
= x_exp
+ EXP_P1
;
596 if (x_exp
== EXP_MAX_P1
) { // overflow
597 C1_hi
= 0x7800000000000000ull
; // +inf
599 x_exp
= 0; // x_sign is preserved
600 // set overflow flag (the inexact flag was set too)
601 *pfpsf
|= OVERFLOW_EXCEPTION
;
604 } else if ((rnd_mode
== ROUNDING_DOWN
&& !x_sign
&& y_sign
) ||
605 (rnd_mode
== ROUNDING_UP
&& x_sign
&& !y_sign
) ||
606 (rnd_mode
== ROUNDING_TO_ZERO
&& x_sign
!= y_sign
)) {
607 // subtract 1 ulp from C1
608 // Note: because delta >= P34 + 1 the result cannot be zero
610 if (C1_lo
== 0xffffffffffffffffull
)
612 // if the coefficient is 10^33-1 then make it 10^34-1 and
613 // decrease the exponent by 1 (because delta >= P34 + 1 the
614 // exponent will not become less than e_min)
615 // 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
616 // 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
617 if (C1_hi
== 0x0000314dc6448d93ull
618 && C1_lo
== 0x38c15b09ffffffffull
) {
619 // make C1 = 10^34 - 1
620 C1_hi
= 0x0001ed09bead87c0ull
;
621 C1_lo
= 0x378d8e63ffffffffull
;
622 x_exp
= x_exp
- EXP_P1
;
625 ; // the result is already correct
628 // set the inexact flag
629 *pfpsf
|= INEXACT_EXCEPTION
;
630 // assemble the result
631 res
.w
[1] = x_sign
| x_exp
| C1_hi
;
633 } else if ((C2_lo
== halfulp64
)
634 && (q1
< P34
|| ((C1_lo
& 0x1) == 0))) {
635 // n2 = 1/2 ulp (n1) and C1 is even
636 // the result is the operand with the larger magnitude,
637 // possibly scaled up by 10^(P34-q1)
638 // an overflow cannot occur in this case (rounding to nearest)
639 if (q1
< P34
) { // scale C1 up by 10^(P34-q1)
640 // Note: because delta = P34 it is certain that
641 // x_exp - ((UINT64)scale << 49) will stay above e_min
643 if (q1
<= 19) { // C1 fits in 64 bits
644 // 1 <= q1 <= 19 => 15 <= scale <= 33
645 if (scale
<= 19) { // 10^scale fits in 64 bits
646 __mul_64x64_to_128MACH (C1
, __bid_ten2k64
[scale
], C1_lo
);
647 } else { // if 20 <= scale <= 33
648 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
649 // (C1 * 10^(scale-19)) fits in 64 bits
650 C1_lo
= C1_lo
* __bid_ten2k64
[scale
- 19];
651 __mul_64x64_to_128MACH (C1
, __bid_ten2k64
[19], C1_lo
);
653 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
654 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
657 // C1 = __bid_ten2k64[P34 - q1] * C1
658 __mul_128x64_to_128 (C1
, __bid_ten2k64
[P34
- q1
], C1
);
660 x_exp
= x_exp
- ((UINT64
) scale
<< 49);
664 if ((rnd_mode
== ROUNDING_TO_NEAREST
665 && x_sign
== y_sign
&& (C1_lo
& 0x01))
666 || (rnd_mode
== ROUNDING_TIES_AWAY
667 && x_sign
== y_sign
) || (rnd_mode
== ROUNDING_UP
668 && !x_sign
&& !y_sign
)
669 || (rnd_mode
== ROUNDING_DOWN
&& x_sign
&& y_sign
)) {
670 // add 1 ulp and then check for overflow
672 if (C1_lo
== 0) { // rounding overflow in the low 64 bits
675 if (C1_hi
== 0x0001ed09bead87c0ull
676 && C1_lo
== 0x378d8e6400000000ull
) {
677 // C1 = 10^34 => rounding overflow
678 C1_hi
= 0x0000314dc6448d93ull
;
679 C1_lo
= 0x38c15b0a00000000ull
; // 10^33
680 x_exp
= x_exp
+ EXP_P1
;
681 if (x_exp
== EXP_MAX_P1
) { // overflow
682 C1_hi
= 0x7800000000000000ull
; // +inf
684 x_exp
= 0; // x_sign is preserved
685 // set overflow flag (the inexact flag was set too)
686 *pfpsf
|= OVERFLOW_EXCEPTION
;
689 } else if ((rnd_mode
== ROUNDING_TO_NEAREST
690 && x_sign
!= y_sign
&& (C1_lo
& 0x01))
691 || (rnd_mode
== ROUNDING_DOWN
&& !x_sign
692 && y_sign
) || (rnd_mode
== ROUNDING_UP
693 && x_sign
&& !y_sign
)
694 || (rnd_mode
== ROUNDING_TO_ZERO
695 && x_sign
!= y_sign
)) {
696 // subtract 1 ulp from C1
697 // Note: because delta >= P34 + 1 the result cannot be zero
699 if (C1_lo
== 0xffffffffffffffffull
)
701 // if the coefficient is 10^33 - 1 then make it 10^34 - 1
702 // and decrease the exponent by 1 (because delta >= P34 + 1
703 // the exponent will not become less than e_min)
704 // 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
705 // 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
706 if (C1_hi
== 0x0000314dc6448d93ull
707 && C1_lo
== 0x38c15b09ffffffffull
) {
708 // make C1 = 10^34 - 1
709 C1_hi
= 0x0001ed09bead87c0ull
;
710 C1_lo
= 0x378d8e63ffffffffull
;
711 x_exp
= x_exp
- EXP_P1
;
714 ; // the result is already correct
716 // set the inexact flag
717 *pfpsf
|= INEXACT_EXCEPTION
;
718 // assemble the result
719 res
.w
[1] = x_sign
| x_exp
| C1_hi
;
721 } else { // if C2_lo > halfulp64 ||
722 // (C2_lo == halfulp64 && q1 == P34 && ((C1_lo & 0x1) == 1)), i.e.
723 // 1/2 ulp(n1) < n2 < 1 ulp(n1) or n2 = 1/2 ulp(n1) and C1 odd
724 // res = x+1 ulp if n1*n2 > 0 and res = x-1 ulp if n1*n2 < 0
725 if (q1
< P34
) { // then 1 ulp = 10^(e1+q1-P34) < 10^e1
726 // Note: if (q1 == P34) then 1 ulp = 10^(e1+q1-P34) = 10^e1
727 // because q1 < P34 we must first replace C1 by
728 // C1 * 10^(P34-q1), and must decrease the exponent by
729 // (P34-q1) (it will still be at least e_min)
731 if (q1
<= 19) { // C1 fits in 64 bits
732 // 1 <= q1 <= 19 => 15 <= scale <= 33
733 if (scale
<= 19) { // 10^scale fits in 64 bits
734 __mul_64x64_to_128MACH (C1
, __bid_ten2k64
[scale
], C1_lo
);
735 } else { // if 20 <= scale <= 33
736 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
737 // (C1 * 10^(scale-19)) fits in 64 bits
738 C1_lo
= C1_lo
* __bid_ten2k64
[scale
- 19];
739 __mul_64x64_to_128MACH (C1
, __bid_ten2k64
[19], C1_lo
);
741 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
742 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
745 // C1 = __bid_ten2k64[P34 - q1] * C1
746 __mul_128x64_to_128 (C1
, __bid_ten2k64
[P34
- q1
], C1
);
748 x_exp
= x_exp
- ((UINT64
) scale
<< 49);
751 // check for rounding overflow
752 if (C1_hi
== 0x0001ed09bead87c0ull
753 && C1_lo
== 0x378d8e6400000000ull
) {
754 // C1 = 10^34 => rounding overflow
755 C1_hi
= 0x0000314dc6448d93ull
;
756 C1_lo
= 0x38c15b0a00000000ull
; // 10^33
757 x_exp
= x_exp
+ EXP_P1
;
760 if ((rnd_mode
== ROUNDING_TO_NEAREST
762 || (rnd_mode
== ROUNDING_TIES_AWAY
763 && x_sign
!= y_sign
&& C2_lo
!= halfulp64
)
764 || (rnd_mode
== ROUNDING_DOWN
&& !x_sign
765 && y_sign
) || (rnd_mode
== ROUNDING_UP
&& x_sign
766 && !y_sign
) || (rnd_mode
== ROUNDING_TO_ZERO
767 && x_sign
!= y_sign
)) {
768 // the result is x - 1
769 // for RN n1 * n2 < 0; underflow not possible
771 if (C1_lo
== 0xffffffffffffffffull
)
773 // check if we crossed into the lower decade
774 if (C1_hi
== 0x0000314dc6448d93ull
&&
775 C1_lo
== 0x38c15b09ffffffffull
) { // 10^33 - 1
776 C1_hi
= 0x0001ed09bead87c0ull
; // 10^34 - 1
777 C1_lo
= 0x378d8e63ffffffffull
;
778 x_exp
= x_exp
- EXP_P1
; // no underflow, because n1 >> n2
780 } else if ((rnd_mode
== ROUNDING_TO_NEAREST
782 || (rnd_mode
== ROUNDING_TIES_AWAY
784 || (rnd_mode
== ROUNDING_DOWN
&& x_sign
785 && y_sign
) || (rnd_mode
== ROUNDING_UP
786 && !x_sign
&& !y_sign
)) {
787 // the result is x + 1
788 // for RN x_sign = y_sign, i.e. n1*n2 > 0
790 if (C1_lo
== 0) { // rounding overflow in the low 64 bits
793 if (C1_hi
== 0x0001ed09bead87c0ull
794 && C1_lo
== 0x378d8e6400000000ull
) {
795 // C1 = 10^34 => rounding overflow
796 C1_hi
= 0x0000314dc6448d93ull
;
797 C1_lo
= 0x38c15b0a00000000ull
; // 10^33
798 x_exp
= x_exp
+ EXP_P1
;
799 if (x_exp
== EXP_MAX_P1
) { // overflow
800 C1_hi
= 0x7800000000000000ull
; // +inf
802 x_exp
= 0; // x_sign is preserved
803 // set the overflow flag
804 *pfpsf
|= OVERFLOW_EXCEPTION
;
810 // set the inexact flag
811 *pfpsf
|= INEXACT_EXCEPTION
;
812 // assemble the result
813 res
.w
[1] = x_sign
| x_exp
| C1_hi
;
816 } else { // if q2 >= 20 then 5*10^(q2-1) and C2 (the latter in
817 // most cases) fit only in more than 64 bits
818 halfulp128
= __bid_midpoint128
[q2
- 20]; // 5 * 10^(q2-1)
819 if ((C2_hi
< halfulp128
.w
[1])
820 || (C2_hi
== halfulp128
.w
[1]
821 && C2_lo
< halfulp128
.w
[0])) {
823 // the result is the operand with the larger magnitude,
824 // possibly scaled up by 10^(P34-q1)
825 // an overflow cannot occur in this case (rounding to nearest)
826 if (q1
< P34
) { // scale C1 up by 10^(P34-q1)
827 // Note: because delta = P34 it is certain that
828 // x_exp - ((UINT64)scale << 49) will stay above e_min
830 if (q1
<= 19) { // C1 fits in 64 bits
831 // 1 <= q1 <= 19 => 15 <= scale <= 33
832 if (scale
<= 19) { // 10^scale fits in 64 bits
833 __mul_64x64_to_128MACH (C1
, __bid_ten2k64
[scale
], C1_lo
);
834 } else { // if 20 <= scale <= 33
835 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
836 // (C1 * 10^(scale-19)) fits in 64 bits
837 C1_lo
= C1_lo
* __bid_ten2k64
[scale
- 19];
838 __mul_64x64_to_128MACH (C1
, __bid_ten2k64
[19], C1_lo
);
840 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
841 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
844 // C1 = __bid_ten2k64[P34 - q1] * C1
845 __mul_128x64_to_128 (C1
, __bid_ten2k64
[P34
- q1
], C1
);
849 x_exp
= x_exp
- ((UINT64
) scale
<< 49);
851 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
852 if ((rnd_mode
== ROUNDING_DOWN
&& x_sign
853 && y_sign
) || (rnd_mode
== ROUNDING_UP
854 && !x_sign
&& !y_sign
)) {
855 // add 1 ulp and then check for overflow
857 if (C1_lo
== 0) { // rounding overflow in the low 64 bits
860 if (C1_hi
== 0x0001ed09bead87c0ull
861 && C1_lo
== 0x378d8e6400000000ull
) {
862 // C1 = 10^34 => rounding overflow
863 C1_hi
= 0x0000314dc6448d93ull
;
864 C1_lo
= 0x38c15b0a00000000ull
; // 10^33
865 x_exp
= x_exp
+ EXP_P1
;
866 if (x_exp
== EXP_MAX_P1
) { // overflow
867 C1_hi
= 0x7800000000000000ull
; // +inf
869 x_exp
= 0; // x_sign is preserved
870 // set overflow flag (the inexact flag was set too)
871 *pfpsf
|= OVERFLOW_EXCEPTION
;
874 } else if ((rnd_mode
== ROUNDING_DOWN
&& !x_sign
875 && y_sign
) || (rnd_mode
== ROUNDING_UP
876 && x_sign
&& !y_sign
)
877 || (rnd_mode
== ROUNDING_TO_ZERO
878 && x_sign
!= y_sign
)) {
879 // subtract 1 ulp from C1
880 // Note: because delta >= P34 + 1 the result cannot be zero
882 if (C1_lo
== 0xffffffffffffffffull
)
884 // if the coefficient is 10^33-1 then make it 10^34-1 and
885 // decrease the exponent by 1 (because delta >= P34 + 1 the
886 // exponent will not become less than e_min)
887 // 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
888 // 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
889 if (C1_hi
== 0x0000314dc6448d93ull
890 && C1_lo
== 0x38c15b09ffffffffull
) {
891 // make C1 = 10^34 - 1
892 C1_hi
= 0x0001ed09bead87c0ull
;
893 C1_lo
= 0x378d8e63ffffffffull
;
894 x_exp
= x_exp
- EXP_P1
;
897 ; // the result is already correct
900 // set the inexact flag
901 *pfpsf
|= INEXACT_EXCEPTION
;
902 // assemble the result
903 res
.w
[1] = x_sign
| x_exp
| C1_hi
;
905 } else if ((C2_hi
== halfulp128
.w
[1]
906 && C2_lo
== halfulp128
.w
[0])
907 && (q1
< P34
|| ((C1_lo
& 0x1) == 0))) {
908 // midpoint & lsb in C1 is 0
909 // n2 = 1/2 ulp (n1) and C1 is even
910 // the result is the operand with the larger magnitude,
911 // possibly scaled up by 10^(P34-q1)
912 // an overflow cannot occur in this case (rounding to nearest)
913 if (q1
< P34
) { // scale C1 up by 10^(P34-q1)
914 // Note: because delta = P34 it is certain that
915 // x_exp - ((UINT64)scale << 49) will stay above e_min
917 if (q1
<= 19) { // C1 fits in 64 bits
918 // 1 <= q1 <= 19 => 15 <= scale <= 33
919 if (scale
<= 19) { // 10^scale fits in 64 bits
920 __mul_64x64_to_128MACH (C1
, __bid_ten2k64
[scale
], C1_lo
);
921 } else { // if 20 <= scale <= 33
922 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
923 // (C1 * 10^(scale-19)) fits in 64 bits
924 C1_lo
= C1_lo
* __bid_ten2k64
[scale
- 19];
925 __mul_64x64_to_128MACH (C1
, __bid_ten2k64
[19], C1_lo
);
927 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
928 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
931 // C1 = __bid_ten2k64[P34 - q1] * C1
932 __mul_128x64_to_128 (C1
, __bid_ten2k64
[P34
- q1
], C1
);
934 x_exp
= x_exp
- ((UINT64
) scale
<< 49);
938 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
939 if ((rnd_mode
== ROUNDING_TIES_AWAY
941 || (rnd_mode
== ROUNDING_UP
&& !y_sign
)) {
942 // add 1 ulp and then check for overflow
944 if (C1_lo
== 0) { // rounding overflow in the low 64 bits
947 if (C1_hi
== 0x0001ed09bead87c0ull
948 && C1_lo
== 0x378d8e6400000000ull
) {
949 // C1 = 10^34 => rounding overflow
950 C1_hi
= 0x0000314dc6448d93ull
;
951 C1_lo
= 0x38c15b0a00000000ull
; // 10^33
952 x_exp
= x_exp
+ EXP_P1
;
953 if (x_exp
== EXP_MAX_P1
) { // overflow
954 C1_hi
= 0x7800000000000000ull
; // +inf
956 x_exp
= 0; // x_sign is preserved
957 // set overflow flag (the inexact flag was set too)
958 *pfpsf
|= OVERFLOW_EXCEPTION
;
961 } else if ((rnd_mode
== ROUNDING_DOWN
&& y_sign
)
962 || (rnd_mode
== ROUNDING_TO_ZERO
963 && x_sign
!= y_sign
)) {
964 // subtract 1 ulp from C1
965 // Note: because delta >= P34 + 1 the result cannot be zero
967 if (C1_lo
== 0xffffffffffffffffull
)
969 // if the coefficient is 10^33 - 1 then make it 10^34 - 1
970 // and decrease the exponent by 1 (because delta >= P34 + 1
971 // the exponent will not become less than e_min)
972 // 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
973 // 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
974 if (C1_hi
== 0x0000314dc6448d93ull
975 && C1_lo
== 0x38c15b09ffffffffull
) {
976 // make C1 = 10^34 - 1
977 C1_hi
= 0x0001ed09bead87c0ull
;
978 C1_lo
= 0x378d8e63ffffffffull
;
979 x_exp
= x_exp
- EXP_P1
;
982 ; // the result is already correct
985 // set the inexact flag
986 *pfpsf
|= INEXACT_EXCEPTION
;
987 // assemble the result
988 res
.w
[1] = x_sign
| x_exp
| C1_hi
;
990 } else { // if C2 > halfulp128 ||
991 // (C2 == halfulp128 && q1 == P34 && ((C1 & 0x1) == 1)), i.e.
992 // 1/2 ulp(n1) < n2 < 1 ulp(n1) or n2 = 1/2 ulp(n1) and C1 odd
993 // res = x+1 ulp if n1*n2 > 0 and res = x-1 ulp if n1*n2 < 0
994 if (q1
< P34
) { // then 1 ulp = 10^(e1+q1-P34) < 10^e1
995 // Note: if (q1 == P34) then 1 ulp = 10^(e1+q1-P34) = 10^e1
996 // because q1 < P34 we must first replace C1 by C1*10^(P34-q1),
997 // and must decrease the exponent by (P34-q1) (it will still be
1000 if (q1
<= 19) { // C1 fits in 64 bits
1001 // 1 <= q1 <= 19 => 15 <= scale <= 33
1002 if (scale
<= 19) { // 10^scale fits in 64 bits
1003 __mul_64x64_to_128MACH (C1
, __bid_ten2k64
[scale
], C1_lo
);
1004 } else { // if 20 <= scale <= 33
1005 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
1006 // (C1 * 10^(scale-19)) fits in 64 bits
1007 C1_lo
= C1_lo
* __bid_ten2k64
[scale
- 19];
1008 __mul_64x64_to_128MACH (C1
, __bid_ten2k64
[19], C1_lo
);
1010 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
1011 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
1014 // C1 = __bid_ten2k64[P34 - q1] * C1
1015 __mul_128x64_to_128 (C1
, __bid_ten2k64
[P34
- q1
], C1
);
1019 x_exp
= x_exp
- ((UINT64
) scale
<< 49);
1021 if ((rnd_mode
== ROUNDING_TO_NEAREST
1022 && x_sign
!= y_sign
) || (rnd_mode
== ROUNDING_TIES_AWAY
1023 && x_sign
!= y_sign
&&
1024 (C2_hi
!= halfulp128
.w
[1] || C2_lo
!= halfulp128
.w
[0])) ||
1025 (rnd_mode
== ROUNDING_DOWN
&& !x_sign
1026 && y_sign
) || (rnd_mode
== ROUNDING_UP
&& x_sign
1027 && !y_sign
) || (rnd_mode
== ROUNDING_TO_ZERO
1028 && x_sign
!= y_sign
)) {
1029 // the result is x - 1
1030 // for RN n1 * n2 < 0; underflow not possible
1032 if (C1_lo
== 0xffffffffffffffffull
)
1034 // check if we crossed into the lower decade
1035 if (C1_hi
== 0x0000314dc6448d93ull
&&
1036 C1_lo
== 0x38c15b09ffffffffull
) { // 10^33 - 1
1037 C1_hi
= 0x0001ed09bead87c0ull
; // 10^34 - 1
1038 C1_lo
= 0x378d8e63ffffffffull
;
1039 x_exp
= x_exp
- EXP_P1
; // no underflow, because n1 >> n2
1041 } else if ((rnd_mode
== ROUNDING_TO_NEAREST
&& x_sign
== y_sign
)
1042 || (rnd_mode
== ROUNDING_TIES_AWAY
&& x_sign
== y_sign
) ||
1043 (rnd_mode
== ROUNDING_DOWN
&& x_sign
&& y_sign
) ||
1044 (rnd_mode
== ROUNDING_UP
&& !x_sign
&& !y_sign
)) {
1045 // the result is x + 1
1046 // for RN x_sign = y_sign, i.e. n1*n2 > 0
1048 if (C1_lo
== 0) { // rounding overflow in the low 64 bits
1051 if (C1_hi
== 0x0001ed09bead87c0ull
1052 && C1_lo
== 0x378d8e6400000000ull
) {
1053 // C1 = 10^34 => rounding overflow
1054 C1_hi
= 0x0000314dc6448d93ull
;
1055 C1_lo
= 0x38c15b0a00000000ull
; // 10^33
1056 x_exp
= x_exp
+ EXP_P1
;
1057 if (x_exp
== EXP_MAX_P1
) { // overflow
1058 C1_hi
= 0x7800000000000000ull
; // +inf
1060 x_exp
= 0; // x_sign is preserved
1061 // set the overflow flag
1062 *pfpsf
|= OVERFLOW_EXCEPTION
;
1066 ; // the result is x
1068 // set the inexact flag
1069 *pfpsf
|= INEXACT_EXCEPTION
;
1070 // assemble the result
1071 res
.w
[1] = x_sign
| x_exp
| C1_hi
;
1075 // end case where C1 != 10^(q1-1)
1076 } else { // C1 = 10^(q1-1) and x_sign != y_sign
1077 // instead of C' = (C1 * 10^(e1-e2) + C2)rnd,P34
1078 // calculate C' = C1 * 10^(e1-e2-x1) + (C2 * 10^(-x1))rnd,P34
1079 // where x1 = q2 - 1, 0 <= x1 <= P34 - 1
1080 // Because C1 = 10^(q1-1) and x_sign != y_sign, C' will have P34
1081 // digits and n = C' * 10^(e2+x1)
1082 // If the result has P34+1 digits, redo the steps above with x1+1
1083 // If the result has P34-1 digits or less, redo the steps above with
1084 // x1-1 but only if initially x1 >= 1
1085 // NOTE: these two steps can be improved, e.g we could guess if
1086 // P34+1 or P34-1 digits will be obtained by adding/subtracting
1087 // just the top 64 bits of the two operands
1088 // The result cannot be zero, and it cannot overflow
1089 x1
= q2
- 1; // 0 <= x1 <= P34-1
1090 // Calculate C1 * 10^(e1-e2-x1) where 1 <= e1-e2-x1 <= P34
1091 // scale = (int)(e1 >> 49) - (int)(e2 >> 49) - x1; 0 <= scale <= P34-1
1092 scale
= P34
- q1
+ 1; // scale=e1-e2-x1 = P34+1-q1; 1<=scale<=P34
1093 // either C1 or 10^(e1-e2-x1) may not fit is 64 bits,
1094 // but their product fits with certainty in 128 bits
1095 if (scale
>= 20) { //10^(e1-e2-x1) doesn't fit in 64 bits, but C1 does
1096 __mul_128x64_to_128 (C1
, C1_lo
, __bid_ten2k128
[scale
- 20]);
1097 } else { // if (scale >= 1
1098 // if 1 <= scale <= 19 then 10^(e1-e2-x1) fits in 64 bits
1099 if (q1
<= 19) { // C1 fits in 64 bits
1100 __mul_64x64_to_128MACH (C1
, C1_lo
, __bid_ten2k64
[scale
]);
1101 } else { // q1 >= 20
1104 __mul_128x64_to_128 (C1
, __bid_ten2k64
[scale
], C1
);
1107 tmp64
= C1
.w
[0]; // C1.w[1], C1.w[0] contains C1 * 10^(e1-e2-x1)
1109 // now round C2 to q2-x1 = 1 decimal digit
1110 // C2' = C2 + 1/2 * 10^x1 = C2 + 5 * 10^(x1-1)
1111 ind
= x1
- 1; // -1 <= ind <= P34 - 2
1112 if (ind
>= 0) { // if (x1 >= 1)
1116 C2
.w
[0] = C2
.w
[0] + __bid_midpoint64
[ind
];
1117 if (C2
.w
[0] < C2_lo
)
1119 } else { // 19 <= ind <= 32
1120 C2
.w
[0] = C2
.w
[0] + __bid_midpoint128
[ind
- 19].w
[0];
1121 C2
.w
[1] = C2
.w
[1] + __bid_midpoint128
[ind
- 19].w
[1];
1122 if (C2
.w
[0] < C2_lo
)
1125 // the approximation of 10^(-x1) was rounded up to 118 bits
1126 __mul_128x128_to_256 (R256
, C2
, __bid_ten2mk128
[ind
]); // R256 = C2*, f2*
1127 // calculate C2* and f2*
1128 // C2* is actually floor(C2*) in this case
1129 // C2* and f2* need shifting and masking, as shown by
1130 // __bid_shiftright128[] and __bid_maskhigh128[]
1131 // the top Ex bits of 10^(-x1) are T* = __bid_ten2mk128trunc[ind], e.g.
1132 // if x1=1, T*=__bid_ten2mk128trunc[0]=0x19999999999999999999999999999999
1133 // if (0 < f2* < 10^(-x1)) then
1134 // if floor(C1+C2*) is even then C2* = floor(C2*) - logical right
1135 // shift; C2* has p decimal digits, correct by Prop. 1)
1136 // else if floor(C1+C2*) is odd C2* = floor(C2*)-1 (logical right
1137 // shift; C2* has p decimal digits, correct by Pr. 1)
1139 // C2* = floor(C2*) (logical right shift; C has p decimal digits,
1140 // correct by Property 1)
1141 // n = C2* * 10^(e2+x1)
1144 highf2star
.w
[1] = 0x0;
1145 highf2star
.w
[0] = 0x0; // low f2* ok
1146 } else if (ind
<= 21) {
1147 highf2star
.w
[1] = 0x0;
1148 highf2star
.w
[0] = R256
.w
[2] & __bid_maskhigh128
[ind
]; // low f2* ok
1150 highf2star
.w
[1] = R256
.w
[3] & __bid_maskhigh128
[ind
];
1151 highf2star
.w
[0] = R256
.w
[2]; // low f2* is ok
1153 // shift right C2* by Ex-128 = __bid_shiftright128[ind]
1155 shift
= __bid_shiftright128
[ind
];
1156 if (shift
< 64) { // 3 <= shift <= 63
1158 (R256
.w
[2] >> shift
) | (R256
.w
[3] << (64 - shift
));
1159 R256
.w
[3] = (R256
.w
[3] >> shift
);
1160 } else { // 66 <= shift <= 102
1161 R256
.w
[2] = (R256
.w
[3] >> (shift
- 64));
1166 is_inexact_lt_midpoint
= 0;
1167 is_inexact_gt_midpoint
= 0;
1168 is_midpoint_lt_even
= 0;
1169 is_midpoint_gt_even
= 0;
1170 // determine inexactness of the rounding of C2*
1171 // (cannot be followed by a second rounding)
1172 // if (0 < f2* - 1/2 < 10^(-x1)) then
1173 // the result is exact
1174 // else (if f2* - 1/2 > T* then)
1175 // the result of is inexact
1177 if (R256
.w
[1] > 0x8000000000000000ull
||
1178 (R256
.w
[1] == 0x8000000000000000ull
&& R256
.w
[0] > 0x0ull
)) {
1179 // f2* > 1/2 and the result may be exact
1180 tmp64A
= R256
.w
[1] - 0x8000000000000000ull
; // f* - 1/2
1181 if ((tmp64A
> __bid_ten2mk128trunc
[ind
].w
[1]
1182 || (tmp64A
== __bid_ten2mk128trunc
[ind
].w
[1]
1183 && R256
.w
[0] >= __bid_ten2mk128trunc
[ind
].w
[0]))) {
1184 // set the inexact flag
1185 *pfpsf
|= INEXACT_EXCEPTION
;
1186 // this rounding is applied to C2 only!
1188 is_inexact_gt_midpoint
= 1;
1189 } // else the result is exact
1190 // rounding down, unless a midpoint in [ODD, EVEN]
1191 } else { // the result is inexact; f2* <= 1/2
1192 // set the inexact flag
1193 *pfpsf
|= INEXACT_EXCEPTION
;
1194 // this rounding is applied to C2 only!
1196 is_inexact_lt_midpoint
= 1;
1198 } else if (ind
<= 21) { // if 3 <= ind <= 21
1199 if (highf2star
.w
[1] > 0x0 || (highf2star
.w
[1] == 0x0
1200 && highf2star
.w
[0] > __bid_one_half128
[ind
])
1201 || (highf2star
.w
[1] == 0x0
1202 && highf2star
.w
[0] == __bid_one_half128
[ind
]
1203 && (R256
.w
[1] || R256
.w
[0]))) {
1204 // f2* > 1/2 and the result may be exact
1205 // Calculate f2* - 1/2
1206 tmp64A
= highf2star
.w
[0] - __bid_one_half128
[ind
];
1207 tmp64B
= highf2star
.w
[1];
1208 if (tmp64A
> highf2star
.w
[0])
1210 if (tmp64B
|| tmp64A
1211 || R256
.w
[1] > __bid_ten2mk128trunc
[ind
].w
[1]
1212 || (R256
.w
[1] == __bid_ten2mk128trunc
[ind
].w
[1]
1213 && R256
.w
[0] > __bid_ten2mk128trunc
[ind
].w
[0])) {
1214 // set the inexact flag
1215 *pfpsf
|= INEXACT_EXCEPTION
;
1216 // this rounding is applied to C2 only!
1218 is_inexact_gt_midpoint
= 1;
1219 } // else the result is exact
1220 } else { // the result is inexact; f2* <= 1/2
1221 // set the inexact flag
1222 *pfpsf
|= INEXACT_EXCEPTION
;
1223 // this rounding is applied to C2 only!
1225 is_inexact_lt_midpoint
= 1;
1227 } else { // if 22 <= ind <= 33
1228 if (highf2star
.w
[1] > __bid_one_half128
[ind
]
1229 || (highf2star
.w
[1] == __bid_one_half128
[ind
]
1230 && (highf2star
.w
[0] || R256
.w
[1]
1232 // f2* > 1/2 and the result may be exact
1233 // Calculate f2* - 1/2
1234 // tmp64A = highf2star.w[0];
1235 tmp64B
= highf2star
.w
[1] - __bid_one_half128
[ind
];
1236 if (tmp64B
|| highf2star
.w
[0]
1237 || R256
.w
[1] > __bid_ten2mk128trunc
[ind
].w
[1]
1238 || (R256
.w
[1] == __bid_ten2mk128trunc
[ind
].w
[1]
1239 && R256
.w
[0] > __bid_ten2mk128trunc
[ind
].w
[0])) {
1240 // set the inexact flag
1241 *pfpsf
|= INEXACT_EXCEPTION
;
1242 // this rounding is applied to C2 only!
1244 is_inexact_gt_midpoint
= 1;
1245 } // else the result is exact
1246 } else { // the result is inexact; f2* <= 1/2
1247 // set the inexact flag
1248 *pfpsf
|= INEXACT_EXCEPTION
;
1249 // this rounding is applied to C2 only!
1251 is_inexact_lt_midpoint
= 1;
1254 // check for midpoints after determining inexactness
1255 if ((R256
.w
[1] || R256
.w
[0]) && (highf2star
.w
[1] == 0)
1256 && (highf2star
.w
[0] == 0)
1257 && (R256
.w
[1] < __bid_ten2mk128trunc
[ind
].w
[1]
1258 || (R256
.w
[1] == __bid_ten2mk128trunc
[ind
].w
[1]
1259 && R256
.w
[0] <= __bid_ten2mk128trunc
[ind
].w
[0]))) {
1260 // the result is a midpoint
1261 if ((tmp64
+ R256
.w
[2]) & 0x01) { // MP in [EVEN, ODD]
1262 // if floor(C2*) is odd C = floor(C2*) - 1; the result may be 0
1264 if (R256
.w
[2] == 0xffffffffffffffffull
)
1266 // this rounding is applied to C2 only!
1268 is_midpoint_lt_even
= 1;
1269 is_inexact_lt_midpoint
= 0;
1270 is_inexact_gt_midpoint
= 0;
1272 // else MP in [ODD, EVEN]
1273 // this rounding is applied to C2 only!
1275 is_midpoint_gt_even
= 1;
1276 is_inexact_lt_midpoint
= 0;
1277 is_inexact_gt_midpoint
= 0;
1280 } else { // if (ind == -1) only when x1 = 0
1283 is_midpoint_lt_even
= 0;
1284 is_midpoint_gt_even
= 0;
1285 is_inexact_lt_midpoint
= 0;
1286 is_inexact_gt_midpoint
= 0;
1288 // and now subtract C1 * 10^(e1-e2-x1) - (C2 * 10^(-x1))rnd,P34
1289 // because x_sign != y_sign this last operation is exact
1290 C1
.w
[0] = C1
.w
[0] - R256
.w
[2];
1291 C1
.w
[1] = C1
.w
[1] - R256
.w
[3];
1292 if (C1
.w
[0] > tmp64
)
1293 C1
.w
[1]--; // borrow
1294 if (C1
.w
[1] >= 0x8000000000000000ull
) { // negative coefficient!
1300 tmp_sign
= y_sign
; // the result will have the sign of y
1304 // the difference has exactly P34 digits
1307 y_exp
= y_exp
+ ((UINT64
) x1
<< 49);
1310 // general correction from RN to RA, RM, RP, RZ; result uses y_exp
1311 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
1313 ((rnd_mode
== ROUNDING_UP
&& is_inexact_lt_midpoint
) ||
1314 ((rnd_mode
== ROUNDING_TIES_AWAY
|| rnd_mode
== ROUNDING_UP
) &&
1315 is_midpoint_gt_even
))) ||
1317 ((rnd_mode
== ROUNDING_DOWN
&& is_inexact_lt_midpoint
) ||
1318 ((rnd_mode
== ROUNDING_TIES_AWAY
|| rnd_mode
== ROUNDING_DOWN
)
1319 && is_midpoint_gt_even
)))) {
1322 if (C1_lo
== 0) { // rounding overflow in the low 64 bits
1325 if (C1_hi
== 0x0001ed09bead87c0ull
1326 && C1_lo
== 0x378d8e6400000000ull
) {
1327 // C1 = 10^34 => rounding overflow
1328 C1_hi
= 0x0000314dc6448d93ull
;
1329 C1_lo
= 0x38c15b0a00000000ull
; // 10^33
1330 y_exp
= y_exp
+ EXP_P1
;
1332 } else if ((is_midpoint_lt_even
|| is_inexact_gt_midpoint
) &&
1334 (rnd_mode
== ROUNDING_UP
|| rnd_mode
== ROUNDING_TO_ZERO
)) ||
1336 (rnd_mode
== ROUNDING_DOWN
|| rnd_mode
== ROUNDING_TO_ZERO
)))) {
1339 if (C1_lo
== 0xffffffffffffffffull
)
1341 // check if we crossed into the lower decade
1342 if (C1_hi
== 0x0000314dc6448d93ull
&&
1343 C1_lo
== 0x38c15b09ffffffffull
) { // 10^33 - 1
1344 C1_hi
= 0x0001ed09bead87c0ull
; // 10^34 - 1
1345 C1_lo
= 0x378d8e63ffffffffull
;
1346 y_exp
= y_exp
- EXP_P1
;
1347 // no underflow, because delta + q2 >= P34 + 1
1350 ; // exact, the result is already correct
1353 // assemble the result
1354 res
.w
[1] = x_sign
| y_exp
| C1_hi
;
1357 } // end delta = P34
1358 } else { // if (|delta| <= P34 - 1)
1359 if (delta
>= 0) { // if (0 <= delta <= P34 - 1)
1360 if (delta
<= P34
- 1 - q2
) {
1361 // calculate C' directly; the result is exact
1362 // in this case 1<=q1<=P34-1, 1<=q2<=P34-1 and 0 <= e1-e2 <= P34-2
1363 // The coefficient of the result is C1 * 10^(e1-e2) + C2 and the
1364 // exponent is e2; either C1 or 10^(e1-e2) may not fit is 64 bits,
1365 // but their product fits with certainty in 128 bits (actually in 113)
1366 scale
= delta
- q1
+ q2
; // scale = (int)(e1 >> 49) - (int)(e2 >> 49)
1368 if (scale
>= 20) { // 10^(e1-e2) does not fit in 64 bits, but C1 does
1369 __mul_128x64_to_128 (C1
, C1_lo
, __bid_ten2k128
[scale
- 20]);
1372 } else if (scale
>= 1) {
1373 // if 1 <= scale <= 19 then 10^(e1-e2) fits in 64 bits
1374 if (q1
<= 19) { // C1 fits in 64 bits
1375 __mul_64x64_to_128MACH (C1
, C1_lo
, __bid_ten2k64
[scale
]);
1376 } else { // q1 >= 20
1379 __mul_128x64_to_128 (C1
, __bid_ten2k64
[scale
], C1
);
1383 } else { // if (scale == 0) C1 is unchanged
1384 C1
.w
[0] = C1_lo
; // C1.w[1] = C1_hi;
1387 if (x_sign
== y_sign
) {
1388 // the result cannot overflow
1389 C1_lo
= C1_lo
+ C2_lo
;
1390 C1_hi
= C1_hi
+ C2_hi
;
1391 if (C1_lo
< C1
.w
[0])
1393 } else { // if x_sign != y_sign
1394 C1_lo
= C1_lo
- C2_lo
;
1395 C1_hi
= C1_hi
- C2_hi
;
1396 if (C1_lo
> C1
.w
[0])
1398 // the result can be zero, but it cannot overflow
1399 if (C1_lo
== 0 && C1_hi
== 0) {
1400 // assemble the result
1406 if (rnd_mode
== ROUNDING_DOWN
) {
1407 res
.w
[1] |= 0x8000000000000000ull
;
1411 if (C1_hi
>= 0x8000000000000000ull
) { // negative coefficient!
1417 x_sign
= y_sign
; // the result will have the sign of y
1420 // assemble the result
1421 res
.w
[1] = x_sign
| y_exp
| C1_hi
;
1423 } else if (delta
== P34
- q2
) {
1424 // calculate C' directly; the result may be inexact if it requires
1425 // P34+1 decimal digits; in this case the 'cutoff' point for addition
1426 // is at the position of the lsb of C2, so 0 <= e1-e2 <= P34-1
1427 // The coefficient of the result is C1 * 10^(e1-e2) + C2 and the
1428 // exponent is e2; either C1 or 10^(e1-e2) may not fit is 64 bits,
1429 // but their product fits with certainty in 128 bits (actually in 113)
1430 scale
= delta
- q1
+ q2
; // scale = (int)(e1 >> 49) - (int)(e2 >> 49)
1431 if (scale
>= 20) { // 10^(e1-e2) does not fit in 64 bits, but C1 does
1432 __mul_128x64_to_128 (C1
, C1_lo
, __bid_ten2k128
[scale
- 20]);
1433 } else if (scale
>= 1) {
1434 // if 1 <= scale <= 19 then 10^(e1-e2) fits in 64 bits
1435 if (q1
<= 19) { // C1 fits in 64 bits
1436 __mul_64x64_to_128MACH (C1
, C1_lo
, __bid_ten2k64
[scale
]);
1437 } else { // q1 >= 20
1440 __mul_128x64_to_128 (C1
, __bid_ten2k64
[scale
], C1
);
1442 } else { // if (scale == 0) C1 is unchanged
1444 C1
.w
[0] = C1_lo
; // only the low part is necessary
1449 if (x_sign
== y_sign
) {
1450 // the result can overflow!
1451 C1_lo
= C1_lo
+ C2_lo
;
1452 C1_hi
= C1_hi
+ C2_hi
;
1453 if (C1_lo
< C1
.w
[0])
1455 // test for overflow, possible only when C1 >= 10^34
1456 if (C1_hi
> 0x0001ed09bead87c0ull
||
1457 (C1_hi
== 0x0001ed09bead87c0ull
&&
1458 C1_lo
>= 0x378d8e6400000000ull
)) { // C1 >= 10^34
1459 // in this case q = P34 + 1 and x = q - P34 = 1, so multiply
1460 // C'' = C'+ 5 = C1 + 5 by k1 ~ 10^(-1) calculated for P34 + 1
1462 // Calculate C'' = C' + 1/2 * 10^x
1463 if (C1_lo
>= 0xfffffffffffffffbull
) { // low half add has carry
1469 // the approximation of 10^(-1) was rounded up to 118 bits
1470 // 10^(-1) =~ 33333333333333333333333333333400 * 2^-129
1471 // 10^(-1) =~ 19999999999999999999999999999a00 * 2^-128
1473 C1
.w
[0] = C1_lo
; // C''
1474 __bid_ten2m1
.w
[1] = 0x1999999999999999ull
;
1475 __bid_ten2m1
.w
[0] = 0x9999999999999a00ull
;
1476 __mul_128x128_to_256 (P256
, C1
, __bid_ten2m1
); // P256 = C*, f*
1477 // C* is actually floor(C*) in this case
1478 // the top Ex = 128 bits of 10^(-1) are
1479 // T* = 0x00199999999999999999999999999999
1480 // if (0 < f* < 10^(-x)) then
1481 // if floor(C*) is even then C = floor(C*) - logical right
1482 // shift; C has p decimal digits, correct by Prop. 1)
1483 // else if floor(C*) is odd C = floor(C*) - 1 (logical right
1484 // shift; C has p decimal digits, correct by Pr. 1)
1486 // C = floor(C*) (logical right shift; C has p decimal digits,
1487 // correct by Property 1)
1488 // n = C * 10^(e2+x)
1489 if ((P256
.w
[1] || P256
.w
[0])
1490 && (P256
.w
[1] < 0x1999999999999999ull
1491 || (P256
.w
[1] == 0x1999999999999999ull
1492 && P256
.w
[0] <= 0x9999999999999999ull
))) {
1493 // the result is a midpoint
1494 if (P256
.w
[2] & 0x01) {
1495 is_midpoint_gt_even
= 1;
1496 // if floor(C*) is odd C = floor(C*) - 1; the result is not 0
1498 if (P256
.w
[2] == 0xffffffffffffffffull
)
1501 is_midpoint_lt_even
= 1;
1504 // n = Cstar * 10^(e2+1)
1505 y_exp
= y_exp
+ EXP_P1
;
1506 // C* != 10^P because C* has P34 digits
1507 // check for overflow
1508 if (y_exp
== EXP_MAX_P1
1509 && (rnd_mode
== ROUNDING_TO_NEAREST
1510 || rnd_mode
== ROUNDING_TIES_AWAY
)) {
1512 res
.w
[1] = x_sign
| 0x7800000000000000ull
; // +/-inf
1514 // set the inexact flag
1515 *pfpsf
|= INEXACT_EXCEPTION
;
1516 // set the overflow flag
1517 *pfpsf
|= OVERFLOW_EXCEPTION
;
1520 // if (0 < f* - 1/2 < 10^(-x)) then
1521 // the result of the addition is exact
1523 // the result of the addition is inexact
1524 if (P256
.w
[1] > 0x8000000000000000ull
||
1525 (P256
.w
[1] == 0x8000000000000000ull
&&
1526 P256
.w
[0] > 0x0ull
)) { // the result may be exact
1527 tmp64
= P256
.w
[1] - 0x8000000000000000ull
; // f* - 1/2
1528 if ((tmp64
> 0x1999999999999999ull
1529 || (tmp64
== 0x1999999999999999ull
1530 && P256
.w
[0] >= 0x9999999999999999ull
))) {
1531 // set the inexact flag
1532 *pfpsf
|= INEXACT_EXCEPTION
;
1534 } // else the result is exact
1535 } else { // the result is inexact
1536 // set the inexact flag
1537 *pfpsf
|= INEXACT_EXCEPTION
;
1542 if (!is_midpoint_gt_even
&& !is_midpoint_lt_even
) {
1543 is_inexact_lt_midpoint
= is_inexact
1544 && (P256
.w
[1] & 0x8000000000000000ull
);
1545 is_inexact_gt_midpoint
= is_inexact
1546 && !(P256
.w
[1] & 0x8000000000000000ull
);
1548 // general correction from RN to RA, RM, RP, RZ;
1549 // result uses y_exp
1550 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
1552 ((rnd_mode
== ROUNDING_UP
&& is_inexact_lt_midpoint
) ||
1553 ((rnd_mode
== ROUNDING_TIES_AWAY
|| rnd_mode
== ROUNDING_UP
)
1554 && is_midpoint_gt_even
))) ||
1556 ((rnd_mode
== ROUNDING_DOWN
&& is_inexact_lt_midpoint
) ||
1557 ((rnd_mode
== ROUNDING_TIES_AWAY
||
1558 rnd_mode
== ROUNDING_DOWN
) && is_midpoint_gt_even
)))) {
1561 if (C1_lo
== 0) { // rounding overflow in the low 64 bits
1564 if (C1_hi
== 0x0001ed09bead87c0ull
1565 && C1_lo
== 0x378d8e6400000000ull
) {
1566 // C1 = 10^34 => rounding overflow
1567 C1_hi
= 0x0000314dc6448d93ull
;
1568 C1_lo
= 0x38c15b0a00000000ull
; // 10^33
1569 y_exp
= y_exp
+ EXP_P1
;
1571 } else if ((is_midpoint_lt_even
|| is_inexact_gt_midpoint
) &&
1572 ((x_sign
&& (rnd_mode
== ROUNDING_UP
||
1573 rnd_mode
== ROUNDING_TO_ZERO
)) || (!x_sign
&&
1574 (rnd_mode
== ROUNDING_DOWN
||
1575 rnd_mode
== ROUNDING_TO_ZERO
)))) {
1578 if (C1_lo
== 0xffffffffffffffffull
)
1580 // check if we crossed into the lower decade
1581 if (C1_hi
== 0x0000314dc6448d93ull
&&
1582 C1_lo
== 0x38c15b09ffffffffull
) { // 10^33 - 1
1583 C1_hi
= 0x0001ed09bead87c0ull
; // 10^34 - 1
1584 C1_lo
= 0x378d8e63ffffffffull
;
1585 y_exp
= y_exp
- EXP_P1
;
1586 // no underflow, because delta + q2 >= P34 + 1
1589 ; // exact, the result is already correct
1591 // in all cases check for overflow (RN and RA solved already)
1592 if (y_exp
== EXP_MAX_P1
) { // overflow
1593 if ((rnd_mode
== ROUNDING_DOWN
&& x_sign
) || // RM and res < 0
1594 (rnd_mode
== ROUNDING_UP
&& !x_sign
)) { // RP and res > 0
1595 C1_hi
= 0x7800000000000000ull
; // +inf
1597 } else { // RM and res > 0, RP and res < 0, or RZ
1598 C1_hi
= 0x5fffed09bead87c0ull
;
1599 C1_lo
= 0x378d8e63ffffffffull
;
1601 y_exp
= 0; // x_sign is preserved
1602 // set the inexact flag (in case the exact addition was exact)
1603 *pfpsf
|= INEXACT_EXCEPTION
;
1604 // set the overflow flag
1605 *pfpsf
|= OVERFLOW_EXCEPTION
;
1608 } // else if (C1 < 10^34) then C1 is the coeff.; the result is exact
1609 } else { // if x_sign != y_sign the result is exact
1610 C1_lo
= C1_lo
- C2_lo
;
1611 C1_hi
= C1_hi
- C2_hi
;
1612 if (C1_lo
> C1
.w
[0])
1614 // the result can be zero, but it cannot overflow
1615 if (C1_lo
== 0 && C1_hi
== 0) {
1616 // assemble the result
1622 if (rnd_mode
== ROUNDING_DOWN
) {
1623 res
.w
[1] |= 0x8000000000000000ull
;
1627 if (C1_hi
>= 0x8000000000000000ull
) { // negative coefficient!
1633 x_sign
= y_sign
; // the result will have the sign of y
1636 // assemble the result
1637 res
.w
[1] = x_sign
| y_exp
| C1_hi
;
1639 } else { // if (delta >= P34 + 1 - q2)
1640 // instead of C' = (C1 * 10^(e1-e2) + C2)rnd,P34
1641 // calculate C' = C1 * 10^(e1-e2-x1) + (C2 * 10^(-x1))rnd,P34
1642 // where x1 = q1 + e1 - e2 - P34, 1 <= x1 <= P34 - 1
1643 // In most cases C' will have P34 digits, and n = C' * 10^(e2+x1)
1644 // If the result has P34+1 digits, redo the steps above with x1+1
1645 // If the result has P34-1 digits or less, redo the steps above with
1646 // x1-1 but only if initially x1 >= 1
1647 // NOTE: these two steps can be improved, e.g we could guess if
1648 // P34+1 or P34-1 digits will be obtained by adding/subtracting just
1649 // the top 64 bits of the two operands
1650 // The result cannot be zero, but it can overflow
1651 x1
= delta
+ q2
- P34
; // 1 <= x1 <= P34-1
1653 // Calculate C1 * 10^(e1-e2-x1) where 0 <= e1-e2-x1 <= P34 - 1
1654 // scale = (int)(e1 >> 49) - (int)(e2 >> 49) - x1; 0 <= scale <= P34-1
1655 scale
= delta
- q1
+ q2
- x1
; // scale = e1 - e2 - x1 = P34 - q1
1656 // either C1 or 10^(e1-e2-x1) may not fit is 64 bits,
1657 // but their product fits with certainty in 128 bits (actually in 113)
1658 if (scale
>= 20) { //10^(e1-e2-x1) doesn't fit in 64 bits, but C1 does
1659 __mul_128x64_to_128 (C1
, C1_lo
, __bid_ten2k128
[scale
- 20]);
1660 } else if (scale
>= 1) {
1661 // if 1 <= scale <= 19 then 10^(e1-e2-x1) fits in 64 bits
1662 if (q1
<= 19) { // C1 fits in 64 bits
1663 __mul_64x64_to_128MACH (C1
, C1_lo
, __bid_ten2k64
[scale
]);
1664 } else { // q1 >= 20
1667 __mul_128x64_to_128 (C1
, __bid_ten2k64
[scale
], C1
);
1669 } else { // if (scale == 0) C1 is unchanged
1673 tmp64
= C1
.w
[0]; // C1.w[1], C1.w[0] contains C1 * 10^(e1-e2-x1)
1675 // now round C2 to q2-x1 decimal digits, where 1<=x1<=q2-1<=P34-1
1676 // (but if we got here a second time after x1 = x1 - 1, then
1677 // x1 >= 0; note that for x1 = 0 C2 is unchanged)
1678 // C2' = C2 + 1/2 * 10^x1 = C2 + 5 * 10^(x1-1)
1679 ind
= x1
- 1; // 0 <= ind <= q2-2<=P34-2=32; but note that if x1 = 0
1680 // during a second pass, then ind = -1
1681 if (ind
>= 0) { // if (x1 >= 1)
1685 C2
.w
[0] = C2
.w
[0] + __bid_midpoint64
[ind
];
1686 if (C2
.w
[0] < C2_lo
)
1688 } else { // 19 <= ind <= 32
1689 C2
.w
[0] = C2
.w
[0] + __bid_midpoint128
[ind
- 19].w
[0];
1690 C2
.w
[1] = C2
.w
[1] + __bid_midpoint128
[ind
- 19].w
[1];
1691 if (C2
.w
[0] < C2_lo
)
1694 // the approximation of 10^(-x1) was rounded up to 118 bits
1695 __mul_128x128_to_256 (R256
, C2
, __bid_ten2mk128
[ind
]); // R256 = C2*, f2*
1696 // calculate C2* and f2*
1697 // C2* is actually floor(C2*) in this case
1698 // C2* and f2* need shifting and masking, as shown by
1699 // __bid_shiftright128[] and __bid_maskhigh128[]
1700 // the top Ex bits of 10^(-x1) are T* = __bid_ten2mk128trunc[ind], e.g.
1701 // if x1=1, T*=__bid_ten2mk128trunc[0]=0x19999999999999999999999999999999
1702 // if (0 < f2* < 10^(-x1)) then
1703 // if floor(C1+C2*) is even then C2* = floor(C2*) - logical right
1704 // shift; C2* has p decimal digits, correct by Prop. 1)
1705 // else if floor(C1+C2*) is odd C2* = floor(C2*)-1 (logical right
1706 // shift; C2* has p decimal digits, correct by Pr. 1)
1708 // C2* = floor(C2*) (logical right shift; C has p decimal digits,
1709 // correct by Property 1)
1710 // n = C2* * 10^(e2+x1)
1713 highf2star
.w
[1] = 0x0;
1714 highf2star
.w
[0] = 0x0; // low f2* ok
1715 } else if (ind
<= 21) {
1716 highf2star
.w
[1] = 0x0;
1717 highf2star
.w
[0] = R256
.w
[2] & __bid_maskhigh128
[ind
]; // low f2* ok
1719 highf2star
.w
[1] = R256
.w
[3] & __bid_maskhigh128
[ind
];
1720 highf2star
.w
[0] = R256
.w
[2]; // low f2* is ok
1722 // shift right C2* by Ex-128 = __bid_shiftright128[ind]
1724 shift
= __bid_shiftright128
[ind
];
1725 if (shift
< 64) { // 3 <= shift <= 63
1727 (R256
.w
[2] >> shift
) | (R256
.w
[3] << (64 - shift
));
1728 R256
.w
[3] = (R256
.w
[3] >> shift
);
1729 } else { // 66 <= shift <= 102
1730 R256
.w
[2] = (R256
.w
[3] >> (shift
- 64));
1735 is_inexact_lt_midpoint
= 0;
1736 is_inexact_gt_midpoint
= 0;
1737 is_midpoint_lt_even
= 0;
1738 is_midpoint_gt_even
= 0;
1740 // determine inexactness of the rounding of C2* (this may be
1741 // followed by a second rounding only if we get P34+1
1743 // if (0 < f2* - 1/2 < 10^(-x1)) then
1744 // the result is exact
1745 // else (if f2* - 1/2 > T* then)
1746 // the result of is inexact
1748 if (R256
.w
[1] > 0x8000000000000000ull
||
1749 (R256
.w
[1] == 0x8000000000000000ull
&& R256
.w
[0] > 0x0ull
)) {
1750 // f2* > 1/2 and the result may be exact
1751 tmp64A
= R256
.w
[1] - 0x8000000000000000ull
; // f* - 1/2
1752 if ((tmp64A
> __bid_ten2mk128trunc
[ind
].w
[1]
1753 || (tmp64A
== __bid_ten2mk128trunc
[ind
].w
[1]
1754 && R256
.w
[0] >= __bid_ten2mk128trunc
[ind
].w
[0]))) {
1755 // set the inexact flag
1756 // *pfpsf |= INEXACT_EXCEPTION;
1757 tmp_inexact
= 1; // may be set again during a second pass
1758 // this rounding is applied to C2 only!
1759 if (x_sign
== y_sign
)
1760 is_inexact_lt_midpoint
= 1;
1761 else // if (x_sign != y_sign)
1762 is_inexact_gt_midpoint
= 1;
1763 } // else the result is exact
1764 // rounding down, unless a midpoint in [ODD, EVEN]
1765 } else { // the result is inexact; f2* <= 1/2
1766 // set the inexact flag
1767 // *pfpsf |= INEXACT_EXCEPTION;
1768 tmp_inexact
= 1; // just in case we will round a second time
1769 // rounding up, unless a midpoint in [EVEN, ODD]
1770 // this rounding is applied to C2 only!
1771 if (x_sign
== y_sign
)
1772 is_inexact_gt_midpoint
= 1;
1773 else // if (x_sign != y_sign)
1774 is_inexact_lt_midpoint
= 1;
1776 } else if (ind
<= 21) { // if 3 <= ind <= 21
1777 if (highf2star
.w
[1] > 0x0 || (highf2star
.w
[1] == 0x0
1778 && highf2star
.w
[0] > __bid_one_half128
[ind
])
1779 || ((highf2star
.w
[1] == 0x0
1780 && highf2star
.w
[0] == __bid_one_half128
[ind
])
1781 && (R256
.w
[1] || R256
.w
[0]))) {
1782 // f2* > 1/2 and the result may be exact
1783 // Calculate f2* - 1/2
1784 tmp64A
= highf2star
.w
[0] - __bid_one_half128
[ind
];
1785 tmp64B
= highf2star
.w
[1];
1786 if (tmp64A
> highf2star
.w
[0])
1788 if (tmp64B
|| tmp64A
1789 || R256
.w
[1] > __bid_ten2mk128trunc
[ind
].w
[1]
1790 || (R256
.w
[1] == __bid_ten2mk128trunc
[ind
].w
[1]
1791 && R256
.w
[0] > __bid_ten2mk128trunc
[ind
].w
[0])) {
1792 // set the inexact flag
1793 // *pfpsf |= INEXACT_EXCEPTION;
1794 tmp_inexact
= 1; // may be set again during a second pass
1795 // this rounding is applied to C2 only!
1796 if (x_sign
== y_sign
)
1797 is_inexact_lt_midpoint
= 1;
1798 else // if (x_sign != y_sign)
1799 is_inexact_gt_midpoint
= 1;
1800 } // else the result is exact
1801 } else { // the result is inexact; f2* <= 1/2
1802 // set the inexact flag
1803 // *pfpsf |= INEXACT_EXCEPTION;
1804 tmp_inexact
= 1; // may be set again during a second pass
1805 // rounding up, unless a midpoint in [EVEN, ODD]
1806 // this rounding is applied to C2 only!
1807 if (x_sign
== y_sign
)
1808 is_inexact_gt_midpoint
= 1;
1809 else // if (x_sign != y_sign)
1810 is_inexact_lt_midpoint
= 1;
1812 } else { // if 22 <= ind <= 33
1813 if (highf2star
.w
[1] > __bid_one_half128
[ind
]
1814 || (highf2star
.w
[1] == __bid_one_half128
[ind
]
1815 && (highf2star
.w
[0] || R256
.w
[1]
1817 // f2* > 1/2 and the result may be exact
1818 // Calculate f2* - 1/2
1819 // tmp64A = highf2star.w[0];
1820 tmp64B
= highf2star
.w
[1] - __bid_one_half128
[ind
];
1821 if (tmp64B
|| highf2star
.w
[0]
1822 || R256
.w
[1] > __bid_ten2mk128trunc
[ind
].w
[1]
1823 || (R256
.w
[1] == __bid_ten2mk128trunc
[ind
].w
[1]
1824 && R256
.w
[0] > __bid_ten2mk128trunc
[ind
].w
[0])) {
1825 // set the inexact flag
1826 // *pfpsf |= INEXACT_EXCEPTION;
1827 tmp_inexact
= 1; // may be set again during a second pass
1828 // this rounding is applied to C2 only!
1829 if (x_sign
== y_sign
)
1830 is_inexact_lt_midpoint
= 1;
1831 else // if (x_sign != y_sign)
1832 is_inexact_gt_midpoint
= 1;
1833 } // else the result is exact
1834 } else { // the result is inexact; f2* <= 1/2
1835 // set the inexact flag
1836 // *pfpsf |= INEXACT_EXCEPTION;
1837 tmp_inexact
= 1; // may be set again during a second pass
1838 // rounding up, unless a midpoint in [EVEN, ODD]
1839 // this rounding is applied to C2 only!
1840 if (x_sign
== y_sign
)
1841 is_inexact_gt_midpoint
= 1;
1842 else // if (x_sign != y_sign)
1843 is_inexact_lt_midpoint
= 1;
1846 // check for midpoints
1847 if ((R256
.w
[1] || R256
.w
[0]) && (highf2star
.w
[1] == 0)
1848 && (highf2star
.w
[0] == 0)
1849 && (R256
.w
[1] < __bid_ten2mk128trunc
[ind
].w
[1]
1850 || (R256
.w
[1] == __bid_ten2mk128trunc
[ind
].w
[1]
1851 && R256
.w
[0] <= __bid_ten2mk128trunc
[ind
].w
[0]))) {
1852 // the result is a midpoint
1853 if ((tmp64
+ R256
.w
[2]) & 0x01) { // MP in [EVEN, ODD]
1854 // if floor(C2*) is odd C = floor(C2*) - 1; the result may be 0
1856 if (R256
.w
[2] == 0xffffffffffffffffull
)
1858 // this rounding is applied to C2 only!
1859 if (x_sign
== y_sign
)
1860 is_midpoint_gt_even
= 1;
1861 else // if (x_sign != y_sign)
1862 is_midpoint_lt_even
= 1;
1863 is_inexact_lt_midpoint
= 0;
1864 is_inexact_gt_midpoint
= 0;
1866 // else MP in [ODD, EVEN]
1867 // this rounding is applied to C2 only!
1868 if (x_sign
== y_sign
)
1869 is_midpoint_lt_even
= 1;
1870 else // if (x_sign != y_sign)
1871 is_midpoint_gt_even
= 1;
1872 is_inexact_lt_midpoint
= 0;
1873 is_inexact_gt_midpoint
= 0;
1876 // end if (ind >= 0)
1877 } else { // if (ind == -1); only during a 2nd pass, and when x1 = 0
1881 // to correct a possible setting to 1 from 1st pass
1883 is_midpoint_lt_even
= 0;
1884 is_midpoint_gt_even
= 0;
1885 is_inexact_lt_midpoint
= 0;
1886 is_inexact_gt_midpoint
= 0;
1889 // and now add/subtract C1 * 10^(e1-e2-x1) +/- (C2 * 10^(-x1))rnd,P34
1890 if (x_sign
== y_sign
) { // addition; could overflow
1891 // no second pass is possible this way (only for x_sign != y_sign)
1892 C1
.w
[0] = C1
.w
[0] + R256
.w
[2];
1893 C1
.w
[1] = C1
.w
[1] + R256
.w
[3];
1894 if (C1
.w
[0] < tmp64
)
1896 // if the sum has P34+1 digits, i.e. C1>=10^34 redo the calculation
1898 if (C1
.w
[1] > 0x0001ed09bead87c0ull
||
1899 (C1
.w
[1] == 0x0001ed09bead87c0ull
&&
1900 C1
.w
[0] >= 0x378d8e6400000000ull
)) { // C1 >= 10^34
1901 // chop off one more digit from the sum, but make sure there is
1902 // no double-rounding error (see table - double rounding logic)
1903 // now round C1 from P34+1 to P34 decimal digits
1904 // C1' = C1 + 1/2 * 10 = C1 + 5
1905 if (C1
.w
[0] >= 0xfffffffffffffffbull
) { // low half add has carry
1906 C1
.w
[0] = C1
.w
[0] + 5;
1907 C1
.w
[1] = C1
.w
[1] + 1;
1909 C1
.w
[0] = C1
.w
[0] + 5;
1911 // the approximation of 10^(-1) was rounded up to 118 bits
1912 __mul_128x128_to_256 (Q256
, C1
, __bid_ten2mk128
[0]); // Q256 = C1*, f1*
1913 // C1* is actually floor(C1*) in this case
1914 // the top 128 bits of 10^(-1) are
1915 // T* = __bid_ten2mk128trunc[0]=0x19999999999999999999999999999999
1916 // if (0 < f1* < 10^(-1)) then
1917 // if floor(C1*) is even then C1* = floor(C1*) - logical right
1918 // shift; C1* has p decimal digits, correct by Prop. 1)
1919 // else if floor(C1*) is odd C1* = floor(C1*) - 1 (logical right
1920 // shift; C1* has p decimal digits, correct by Pr. 1)
1922 // C1* = floor(C1*) (logical right shift; C has p decimal digits
1923 // correct by Property 1)
1924 // n = C1* * 10^(e2+x1+1)
1925 if ((Q256
.w
[1] || Q256
.w
[0])
1926 && (Q256
.w
[1] < __bid_ten2mk128trunc
[0].w
[1]
1927 || (Q256
.w
[1] == __bid_ten2mk128trunc
[0].w
[1]
1928 && Q256
.w
[0] <= __bid_ten2mk128trunc
[0].w
[0]))) {
1929 // the result is a midpoint
1930 if (is_inexact_lt_midpoint
) { // for the 1st rounding
1931 is_inexact_gt_midpoint
= 1;
1932 is_inexact_lt_midpoint
= 0;
1933 is_midpoint_gt_even
= 0;
1934 is_midpoint_lt_even
= 0;
1935 } else if (is_inexact_gt_midpoint
) { // for the 1st rounding
1937 if (Q256
.w
[2] == 0xffffffffffffffffull
)
1939 is_inexact_gt_midpoint
= 0;
1940 is_inexact_lt_midpoint
= 1;
1941 is_midpoint_gt_even
= 0;
1942 is_midpoint_lt_even
= 0;
1943 } else if (is_midpoint_gt_even
) { // for the 1st rounding
1944 // Note: cannot have is_midpoint_lt_even
1945 is_inexact_gt_midpoint
= 0;
1946 is_inexact_lt_midpoint
= 1;
1947 is_midpoint_gt_even
= 0;
1948 is_midpoint_lt_even
= 0;
1949 } else { // the first rounding must have been exact
1950 if (Q256
.w
[2] & 0x01) { // MP in [EVEN, ODD]
1951 // the truncated result is correct
1953 if (Q256
.w
[2] == 0xffffffffffffffffull
)
1955 is_inexact_gt_midpoint
= 0;
1956 is_inexact_lt_midpoint
= 0;
1957 is_midpoint_gt_even
= 1;
1958 is_midpoint_lt_even
= 0;
1959 } else { // MP in [ODD, EVEN]
1960 is_inexact_gt_midpoint
= 0;
1961 is_inexact_lt_midpoint
= 0;
1962 is_midpoint_gt_even
= 0;
1963 is_midpoint_lt_even
= 1;
1966 tmp_inexact
= 1; // in all cases
1967 } else { // the result is not a midpoint
1968 // determine inexactness of the rounding of C1 (the sum C1+C2*)
1969 // if (0 < f1* - 1/2 < 10^(-1)) then
1970 // the result is exact
1971 // else (if f1* - 1/2 > T* then)
1972 // the result of is inexact
1974 if (Q256
.w
[1] > 0x8000000000000000ull
1975 || (Q256
.w
[1] == 0x8000000000000000ull
1976 && Q256
.w
[0] > 0x0ull
)) {
1977 // f1* > 1/2 and the result may be exact
1978 Q256
.w
[1] = Q256
.w
[1] - 0x8000000000000000ull
; // f1* - 1/2
1979 if ((Q256
.w
[1] > __bid_ten2mk128trunc
[0].w
[1]
1980 || (Q256
.w
[1] == __bid_ten2mk128trunc
[0].w
[1]
1981 && Q256
.w
[0] > __bid_ten2mk128trunc
[0].w
[0]))) {
1982 is_inexact_gt_midpoint
= 0;
1983 is_inexact_lt_midpoint
= 1;
1984 is_midpoint_gt_even
= 0;
1985 is_midpoint_lt_even
= 0;
1986 // set the inexact flag
1988 // *pfpsf |= INEXACT_EXCEPTION;
1989 } else { // else the result is exact for the 2nd rounding
1990 if (tmp_inexact
) { // if the previous rounding was inexact
1991 if (is_midpoint_lt_even
) {
1992 is_inexact_gt_midpoint
= 1;
1993 is_midpoint_lt_even
= 0;
1994 } else if (is_midpoint_gt_even
) {
1995 is_inexact_lt_midpoint
= 1;
1996 is_midpoint_gt_even
= 0;
2002 // rounding down, unless a midpoint in [ODD, EVEN]
2003 } else { // the result is inexact; f1* <= 1/2
2004 is_inexact_gt_midpoint
= 1;
2005 is_inexact_lt_midpoint
= 0;
2006 is_midpoint_gt_even
= 0;
2007 is_midpoint_lt_even
= 0;
2008 // set the inexact flag
2010 // *pfpsf |= INEXACT_EXCEPTION;
2012 } // end 'the result is not a midpoint'
2013 // n = C1 * 10^(e2+x1)
2014 C1
.w
[1] = Q256
.w
[3];
2015 C1
.w
[0] = Q256
.w
[2];
2016 y_exp
= y_exp
+ ((UINT64
) (x1
+ 1) << 49);
2017 } else { // C1 < 10^34
2018 // C1.w[1] and C1.w[0] already set
2019 // n = C1 * 10^(e2+x1)
2020 y_exp
= y_exp
+ ((UINT64
) x1
<< 49);
2022 // check for overflow
2023 if (y_exp
== EXP_MAX_P1
2024 && (rnd_mode
== ROUNDING_TO_NEAREST
2025 || rnd_mode
== ROUNDING_TIES_AWAY
)) {
2026 res
.w
[1] = 0x7800000000000000ull
| x_sign
; // +/-inf
2028 // set the inexact flag
2029 *pfpsf
|= INEXACT_EXCEPTION
;
2030 // set the overflow flag
2031 *pfpsf
|= OVERFLOW_EXCEPTION
;
2033 } // else no overflow
2034 } else { // if x_sign != y_sign the result of this subtract. is exact
2035 C1
.w
[0] = C1
.w
[0] - R256
.w
[2];
2036 C1
.w
[1] = C1
.w
[1] - R256
.w
[3];
2037 if (C1
.w
[0] > tmp64
)
2038 C1
.w
[1]--; // borrow
2039 if (C1
.w
[1] >= 0x8000000000000000ull
) { // negative coefficient!
2046 // the result will have the sign of y if last rnd
2050 // if the difference has P34-1 digits or less, i.e. C1 < 10^33 then
2051 // redo the calculation with x1=x1-1;
2052 // redo the calculation also if C1 = 10^33 and
2053 // (is_inexact_gt_midpoint or is_midpoint_lt_even);
2054 // (the last part should have really been
2055 // (is_inexact_lt_midpoint or is_midpoint_gt_even) from
2056 // the rounding of C2, but the position flags have been reversed)
2057 // 10^33 = 0x0000314dc6448d93 0x38c15b0a00000000
2058 if ((C1
.w
[1] < 0x0000314dc6448d93ull
||
2059 (C1
.w
[1] == 0x0000314dc6448d93ull
&&
2060 C1
.w
[0] < 0x38c15b0a00000000ull
)) ||
2061 (C1
.w
[1] == 0x0000314dc6448d93ull
&&
2062 C1
.w
[0] == 0x38c15b0a00000000ull
&&
2063 (is_inexact_gt_midpoint
|| is_midpoint_lt_even
))) { // C1=10^33
2064 x1
= x1
- 1; // x1 >= 0
2066 // clear position flags and tmp_inexact
2067 is_midpoint_lt_even
= 0;
2068 is_midpoint_gt_even
= 0;
2069 is_inexact_lt_midpoint
= 0;
2070 is_inexact_gt_midpoint
= 0;
2073 goto roundC2
; // else result has less than P34 digits
2076 // if the coefficient of the result is 10^34 it means that this
2077 // must be the second pass, and we are done
2078 if (C1
.w
[1] == 0x0001ed09bead87c0ull
&&
2079 C1
.w
[0] == 0x378d8e6400000000ull
) { // if C1 = 10^34
2080 C1
.w
[1] = 0x0000314dc6448d93ull
; // C1 = 10^33
2081 C1
.w
[0] = 0x38c15b0a00000000ull
;
2082 y_exp
= y_exp
+ ((UINT64
) 1 << 49);
2086 y_exp
= y_exp
+ ((UINT64
) x1
<< 49);
2087 // x1 = -1 is possible at the end of a second pass when the
2088 // first pass started with x1 = 1
2092 // general correction from RN to RA, RM, RP, RZ; result uses y_exp
2093 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
2095 && ((rnd_mode
== ROUNDING_UP
2096 && is_inexact_lt_midpoint
)
2097 || ((rnd_mode
== ROUNDING_TIES_AWAY
2098 || rnd_mode
== ROUNDING_UP
)
2099 && is_midpoint_gt_even
))) ||
2101 && ((rnd_mode
== ROUNDING_DOWN
2102 && is_inexact_lt_midpoint
)
2103 || ((rnd_mode
== ROUNDING_TIES_AWAY
2104 || rnd_mode
== ROUNDING_DOWN
)
2105 && is_midpoint_gt_even
)))) {
2108 if (C1_lo
== 0) { // rounding overflow in the low 64 bits
2111 if (C1_hi
== 0x0001ed09bead87c0ull
2112 && C1_lo
== 0x378d8e6400000000ull
) {
2113 // C1 = 10^34 => rounding overflow
2114 C1_hi
= 0x0000314dc6448d93ull
;
2115 C1_lo
= 0x38c15b0a00000000ull
; // 10^33
2116 y_exp
= y_exp
+ EXP_P1
;
2118 } else if ((is_midpoint_lt_even
|| is_inexact_gt_midpoint
) &&
2120 (rnd_mode
== ROUNDING_UP
|| rnd_mode
== ROUNDING_TO_ZERO
)) ||
2122 (rnd_mode
== ROUNDING_DOWN
|| rnd_mode
== ROUNDING_TO_ZERO
)))) {
2125 if (C1_lo
== 0xffffffffffffffffull
)
2127 // check if we crossed into the lower decade
2128 if (C1_hi
== 0x0000314dc6448d93ull
&&
2129 C1_lo
== 0x38c15b09ffffffffull
) { // 10^33 - 1
2130 C1_hi
= 0x0001ed09bead87c0ull
; // 10^34 - 1
2131 C1_lo
= 0x378d8e63ffffffffull
;
2132 y_exp
= y_exp
- EXP_P1
;
2133 // no underflow, because delta + q2 >= P34 + 1
2136 ; // exact, the result is already correct
2138 // in all cases check for overflow (RN and RA solved already)
2139 if (y_exp
== EXP_MAX_P1
) { // overflow
2140 if ((rnd_mode
== ROUNDING_DOWN
&& x_sign
) || // RM and res < 0
2141 (rnd_mode
== ROUNDING_UP
&& !x_sign
)) { // RP and res > 0
2142 C1_hi
= 0x7800000000000000ull
; // +inf
2144 } else { // RM and res > 0, RP and res < 0, or RZ
2145 C1_hi
= 0x5fffed09bead87c0ull
;
2146 C1_lo
= 0x378d8e63ffffffffull
;
2148 y_exp
= 0; // x_sign is preserved
2149 // set the inexact flag (in case the exact addition was exact)
2150 *pfpsf
|= INEXACT_EXCEPTION
;
2151 // set the overflow flag
2152 *pfpsf
|= OVERFLOW_EXCEPTION
;
2155 // assemble the result
2156 res
.w
[1] = x_sign
| y_exp
| C1_hi
;
2159 *pfpsf
|= INEXACT_EXCEPTION
;
2161 } else { // if (-P34 + 1 <= delta <= -1) <=> 1 <= -delta <= P34 - 1
2162 // NOTE: the following, up to "} else { // if x_sign != y_sign
2163 // the result is exact" is identical to "else if (delta == P34 - q2) {"
2164 // from above; also, the code is not symmetric: a+b and b+a may take
2165 // different paths (need to unify eventually!)
2166 // calculate C' = C2 + C1 * 10^(e1-e2) directly; the result may be
2167 // inexact if it requires P34 + 1 decimal digits; in either case the
2168 // 'cutoff' point for addition is at the position of the lsb of C2
2169 // The coefficient of the result is C1 * 10^(e1-e2) + C2 and the
2170 // exponent is e2; either C1 or 10^(e1-e2) may not fit is 64 bits,
2171 // but their product fits with certainty in 128 bits (actually in 113)
2172 // Note that 0 <= e1 - e2 <= P34 - 2
2173 // -P34 + 1 <= delta <= -1 <=> -P34 + 1 <= delta <= -1 <=>
2174 // -P34 + 1 <= q1 + e1 - q2 - e2 <= -1 <=>
2175 // q2 - q1 - P34 + 1 <= e1 - e2 <= q2 - q1 - 1 <=>
2176 // 1 - P34 - P34 + 1 <= e1-e2 <= P34 - 1 - 1 => 0 <= e1-e2 <= P34 - 2
2177 scale
= delta
- q1
+ q2
; // scale = (int)(e1 >> 49) - (int)(e2 >> 49)
2178 if (scale
>= 20) { // 10^(e1-e2) does not fit in 64 bits, but C1 does
2179 __mul_128x64_to_128 (C1
, C1_lo
, __bid_ten2k128
[scale
- 20]);
2180 } else if (scale
>= 1) {
2181 // if 1 <= scale <= 19 then 10^(e1-e2) fits in 64 bits
2182 if (q1
<= 19) { // C1 fits in 64 bits
2183 __mul_64x64_to_128MACH (C1
, C1_lo
, __bid_ten2k64
[scale
]);
2184 } else { // q1 >= 20
2187 __mul_128x64_to_128 (C1
, __bid_ten2k64
[scale
], C1
);
2189 } else { // if (scale == 0) C1 is unchanged
2191 C1
.w
[0] = C1_lo
; // only the low part is necessary
2196 if (x_sign
== y_sign
) {
2197 // the result can overflow!
2198 C1_lo
= C1_lo
+ C2_lo
;
2199 C1_hi
= C1_hi
+ C2_hi
;
2200 if (C1_lo
< C1
.w
[0])
2202 // test for overflow, possible only when C1 >= 10^34
2203 if (C1_hi
> 0x0001ed09bead87c0ull
||
2204 (C1_hi
== 0x0001ed09bead87c0ull
&&
2205 C1_lo
>= 0x378d8e6400000000ull
)) { // C1 >= 10^34
2206 // in this case q = P34 + 1 and x = q - P34 = 1, so multiply
2207 // C'' = C'+ 5 = C1 + 5 by k1 ~ 10^(-1) calculated for P34 + 1
2209 // Calculate C'' = C' + 1/2 * 10^x
2210 if (C1_lo
>= 0xfffffffffffffffbull
) { // low half add has carry
2216 // the approximation of 10^(-1) was rounded up to 118 bits
2217 // 10^(-1) =~ 33333333333333333333333333333400 * 2^-129
2218 // 10^(-1) =~ 19999999999999999999999999999a00 * 2^-128
2220 C1
.w
[0] = C1_lo
; // C''
2221 __bid_ten2m1
.w
[1] = 0x1999999999999999ull
;
2222 __bid_ten2m1
.w
[0] = 0x9999999999999a00ull
;
2223 __mul_128x128_to_256 (P256
, C1
, __bid_ten2m1
); // P256 = C*, f*
2224 // C* is actually floor(C*) in this case
2225 // the top Ex = 128 bits of 10^(-1) are
2226 // T* = 0x00199999999999999999999999999999
2227 // if (0 < f* < 10^(-x)) then
2228 // if floor(C*) is even then C = floor(C*) - logical right
2229 // shift; C has p decimal digits, correct by Prop. 1)
2230 // else if floor(C*) is odd C = floor(C*) - 1 (logical right
2231 // shift; C has p decimal digits, correct by Pr. 1)
2233 // C = floor(C*) (logical right shift; C has p decimal digits,
2234 // correct by Property 1)
2235 // n = C * 10^(e2+x)
2236 if ((P256
.w
[1] || P256
.w
[0])
2237 && (P256
.w
[1] < 0x1999999999999999ull
2238 || (P256
.w
[1] == 0x1999999999999999ull
2239 && P256
.w
[0] <= 0x9999999999999999ull
))) {
2240 // the result is a midpoint
2241 if (P256
.w
[2] & 0x01) {
2242 is_midpoint_gt_even
= 1;
2243 // if floor(C*) is odd C = floor(C*) - 1; the result is not 0
2245 if (P256
.w
[2] == 0xffffffffffffffffull
)
2248 is_midpoint_lt_even
= 1;
2251 // n = Cstar * 10^(e2+1)
2252 y_exp
= y_exp
+ EXP_P1
;
2253 // C* != 10^P34 because C* has P34 digits
2254 // check for overflow
2255 if (y_exp
== EXP_MAX_P1
2256 && (rnd_mode
== ROUNDING_TO_NEAREST
2257 || rnd_mode
== ROUNDING_TIES_AWAY
)) {
2259 res
.w
[1] = x_sign
| 0x7800000000000000ull
; // +/-inf
2261 // set the inexact flag
2262 *pfpsf
|= INEXACT_EXCEPTION
;
2263 // set the overflow flag
2264 *pfpsf
|= OVERFLOW_EXCEPTION
;
2267 // if (0 < f* - 1/2 < 10^(-x)) then
2268 // the result of the addition is exact
2270 // the result of the addition is inexact
2271 if (P256
.w
[1] > 0x8000000000000000ull
||
2272 (P256
.w
[1] == 0x8000000000000000ull
&&
2273 P256
.w
[0] > 0x0ull
)) { // the result may be exact
2274 tmp64
= P256
.w
[1] - 0x8000000000000000ull
; // f* - 1/2
2275 if ((tmp64
> 0x1999999999999999ull
2276 || (tmp64
== 0x1999999999999999ull
2277 && P256
.w
[0] >= 0x9999999999999999ull
))) {
2278 // set the inexact flag
2279 *pfpsf
|= INEXACT_EXCEPTION
;
2281 } // else the result is exact
2282 } else { // the result is inexact
2283 // set the inexact flag
2284 *pfpsf
|= INEXACT_EXCEPTION
;
2289 if (!is_midpoint_gt_even
&& !is_midpoint_lt_even
) {
2290 is_inexact_lt_midpoint
= is_inexact
2291 && (P256
.w
[1] & 0x8000000000000000ull
);
2292 is_inexact_gt_midpoint
= is_inexact
2293 && !(P256
.w
[1] & 0x8000000000000000ull
);
2295 // general correction from RN to RA, RM, RP, RZ; result uses y_exp
2296 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
2298 ((rnd_mode
== ROUNDING_UP
&& is_inexact_lt_midpoint
) ||
2299 ((rnd_mode
== ROUNDING_TIES_AWAY
|| rnd_mode
== ROUNDING_UP
)
2300 && is_midpoint_gt_even
))) ||
2302 ((rnd_mode
== ROUNDING_DOWN
&& is_inexact_lt_midpoint
) ||
2303 ((rnd_mode
== ROUNDING_TIES_AWAY
|| rnd_mode
== ROUNDING_DOWN
)
2304 && is_midpoint_gt_even
)))) {
2307 if (C1_lo
== 0) { // rounding overflow in the low 64 bits
2310 if (C1_hi
== 0x0001ed09bead87c0ull
2311 && C1_lo
== 0x378d8e6400000000ull
) {
2312 // C1 = 10^34 => rounding overflow
2313 C1_hi
= 0x0000314dc6448d93ull
;
2314 C1_lo
= 0x38c15b0a00000000ull
; // 10^33
2315 y_exp
= y_exp
+ EXP_P1
;
2318 if ((is_midpoint_lt_even
|| is_inexact_gt_midpoint
) &&
2320 (rnd_mode
== ROUNDING_UP
||
2321 rnd_mode
== ROUNDING_TO_ZERO
)) ||
2323 (rnd_mode
== ROUNDING_DOWN
||
2324 rnd_mode
== ROUNDING_TO_ZERO
)))) {
2327 if (C1_lo
== 0xffffffffffffffffull
)
2329 // check if we crossed into the lower decade
2330 if (C1_hi
== 0x0000314dc6448d93ull
&&
2331 C1_lo
== 0x38c15b09ffffffffull
) { // 10^33 - 1
2332 C1_hi
= 0x0001ed09bead87c0ull
; // 10^34 - 1
2333 C1_lo
= 0x378d8e63ffffffffull
;
2334 y_exp
= y_exp
- EXP_P1
;
2335 // no underflow, because delta + q2 >= P34 + 1
2338 ; // exact, the result is already correct
2340 // in all cases check for overflow (RN and RA solved already)
2341 if (y_exp
== EXP_MAX_P1
) { // overflow
2342 if ((rnd_mode
== ROUNDING_DOWN
&& x_sign
) || // RM and res < 0
2343 (rnd_mode
== ROUNDING_UP
&& !x_sign
)) { // RP and res > 0
2344 C1_hi
= 0x7800000000000000ull
; // +inf
2346 } else { // RM and res > 0, RP and res < 0, or RZ
2347 C1_hi
= 0x5fffed09bead87c0ull
;
2348 C1_lo
= 0x378d8e63ffffffffull
;
2350 y_exp
= 0; // x_sign is preserved
2351 // set the inexact flag (in case the exact addition was exact)
2352 *pfpsf
|= INEXACT_EXCEPTION
;
2353 // set the overflow flag
2354 *pfpsf
|= OVERFLOW_EXCEPTION
;
2357 } // else if (C1 < 10^34) then C1 is the coeff.; the result is exact
2358 // assemble the result
2359 res
.w
[1] = x_sign
| y_exp
| C1_hi
;
2361 } else { // if x_sign != y_sign the result is exact
2362 C1_lo
= C2_lo
- C1_lo
;
2363 C1_hi
= C2_hi
- C1_hi
;
2366 if (C1_hi
>= 0x8000000000000000ull
) { // negative coefficient!
2372 x_sign
= y_sign
; // the result will have the sign of y
2374 // the result can be zero, but it cannot overflow
2375 if (C1_lo
== 0 && C1_hi
== 0) {
2376 // assemble the result
2382 if (rnd_mode
== ROUNDING_DOWN
) {
2383 res
.w
[1] |= 0x8000000000000000ull
;
2387 // assemble the result
2388 res
.w
[1] = y_sign
| y_exp
| C1_hi
;
2397 /*****************************************************************************
2399 ****************************************************************************/
2401 #if DECIMAL_CALL_BY_REFERENCE
2403 __bid128_sub (UINT128
* pres
, UINT128
* px
,
2405 py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
2407 UINT128 x
= *px
, y
= *py
;
2408 #if !DECIMAL_GLOBAL_ROUNDING
2409 unsigned int rnd_mode
= *prnd_mode
;
2413 __bid128_sub (UINT128 x
,
2414 UINT128 y _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
2421 if ((y
.w
[1] & MASK_NAN
) != MASK_NAN
) { // y is not NAN
2423 y_sign
= y
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
2425 y
.w
[1] = y
.w
[1] & 0x7fffffffffffffffull
;
2427 y
.w
[1] = y
.w
[1] | 0x8000000000000000ull
;
2429 #if DECIMAL_CALL_BY_REFERENCE
2430 __bid128_add (&res
, &x
,
2431 &y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
2436 y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG