1 /* Copyright (C) 2007-2024 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #include "bid_internal.h"
26 /*****************************************************************************
27 * BID64_round_integral_exact
28 ****************************************************************************/
30 #if DECIMAL_CALL_BY_REFERENCE
32 bid64_round_integral_exact (UINT64
* pres
,
34 px _RND_MODE_PARAM _EXC_FLAGS_PARAM
35 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
37 #if !DECIMAL_GLOBAL_ROUNDING
38 unsigned int rnd_mode
= *prnd_mode
;
42 bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM
43 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
46 UINT64 res
= 0xbaddbaddbaddbaddull
;
48 int exp
; // unbiased exponent
49 // Note: C1 represents the significand (UINT64)
54 // UINT64 res is C* at first - represents up to 16 decimal digits <= 54 bits
55 UINT128 fstar
= { {0x0ull
, 0x0ull
} };
58 x_sign
= x
& MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
60 // check for NaNs and infinities
61 if ((x
& MASK_NAN
) == MASK_NAN
) { // check for NaN
62 if ((x
& 0x0003ffffffffffffull
) > 999999999999999ull)
63 x
= x
& 0xfe00000000000000ull
; // clear G6-G12 and the payload bits
65 x
= x
& 0xfe03ffffffffffffull
; // clear G6-G12
66 if ((x
& MASK_SNAN
) == MASK_SNAN
) { // SNaN
68 *pfpsf
|= INVALID_EXCEPTION
;
69 // return quiet (SNaN)
70 res
= x
& 0xfdffffffffffffffull
;
75 } else if ((x
& MASK_INF
) == MASK_INF
) { // check for Infinity
76 res
= x_sign
| 0x7800000000000000ull
;
80 if ((x
& MASK_STEERING_BITS
) == MASK_STEERING_BITS
) {
81 // if the steering bits are 11 (condition will be 0), then
82 // the exponent is G[0:w+1]
83 exp
= ((x
& MASK_BINARY_EXPONENT2
) >> 51) - 398;
84 C1
= (x
& MASK_BINARY_SIG2
) | MASK_BINARY_OR2
;
85 if (C1
> 9999999999999999ull) { // non-canonical
88 } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
89 exp
= ((x
& MASK_BINARY_EXPONENT1
) >> 53) - 398;
90 C1
= (x
& MASK_BINARY_SIG1
);
93 // if x is 0 or non-canonical return 0 preserving the sign bit and
94 // the preferred exponent of MAX(Q(x), 0)
98 res
= x_sign
| (((UINT64
) exp
+ 398) << 53);
101 // x is a finite non-zero number (not 0, non-canonical, or special)
104 case ROUNDING_TO_NEAREST
:
105 case ROUNDING_TIES_AWAY
:
106 // return 0 if (exp <= -(p+1))
108 res
= x_sign
| 0x31c0000000000000ull
;
109 *pfpsf
|= INEXACT_EXCEPTION
;
114 // return 0 if (exp <= -p)
117 res
= 0xb1c0000000000001ull
;
119 res
= 0x31c0000000000000ull
;
121 *pfpsf
|= INEXACT_EXCEPTION
;
126 // return 0 if (exp <= -p)
129 res
= 0xb1c0000000000000ull
;
131 res
= 0x31c0000000000001ull
;
133 *pfpsf
|= INEXACT_EXCEPTION
;
137 case ROUNDING_TO_ZERO
:
138 // return 0 if (exp <= -p)
140 res
= x_sign
| 0x31c0000000000000ull
;
141 *pfpsf
|= INEXACT_EXCEPTION
;
145 default: break; // default added to avoid compiler warning
148 // q = nr. of decimal digits in x (1 <= q <= 54)
149 // determine first the nr. of bits in x
150 if (C1
>= 0x0020000000000000ull
) { // x >= 2^53
152 } else { // if x < 2^53
153 tmp1
.d
= (double) C1
; // exact conversion
155 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
156 q
= nr_digits
[x_nr_bits
- 1].digits
;
158 q
= nr_digits
[x_nr_bits
- 1].digits1
;
159 if (C1
>= nr_digits
[x_nr_bits
- 1].threshold_lo
)
164 if (exp
>= 0) { // -exp <= 0
165 // the argument is an integer already
171 case ROUNDING_TO_NEAREST
:
172 if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
173 // need to shift right -exp digits from the coefficient; exp will be 0
174 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
175 // chop off ind digits from the lower part of C1
176 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
177 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
178 C1
= C1
+ midpoint64
[ind
- 1];
179 // calculate C* and f*
180 // C* is actually floor(C*) in this case
181 // C* and f* need shifting and masking, as shown by
182 // shiftright128[] and maskhigh128[]
184 // kx = 10^(-x) = ten2mk64[ind - 1]
185 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
186 // the approximation of 10^(-x) was rounded up to 64 bits
187 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
189 // if (0 < f* < 10^(-x)) then the result is a midpoint
190 // if floor(C*) is even then C* = floor(C*) - logical right
191 // shift; C* has p decimal digits, correct by Prop. 1)
192 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
193 // shift; C* has p decimal digits, correct by Pr. 1)
195 // C* = floor(C*) (logical right shift; C has p decimal digits,
196 // correct by Property 1)
199 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
202 fstar
.w
[0] = P128
.w
[0];
203 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
204 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
205 res
= (P128
.w
[1] >> shift
);
206 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
207 fstar
.w
[0] = P128
.w
[0];
209 // if (0 < f* < 10^(-x)) then the result is a midpoint
210 // since round_to_even, subtract 1 if current result is odd
211 if ((res
& 0x0000000000000001ull
) && (fstar
.w
[1] == 0)
212 && (fstar
.w
[0] < ten2mk64
[ind
- 1])) {
215 // determine inexactness of the rounding of C*
216 // if (0 < f* - 1/2 < 10^(-x)) then
217 // the result is exact
218 // else // if (f* - 1/2 > T*) then
219 // the result is inexact
221 if (fstar
.w
[0] > 0x8000000000000000ull
) {
222 // f* > 1/2 and the result may be exact
223 // fstar.w[0] - 0x8000000000000000ull is f* - 1/2
224 if ((fstar
.w
[0] - 0x8000000000000000ull
) > ten2mk64
[ind
- 1]) {
225 // set the inexact flag
226 *pfpsf
|= INEXACT_EXCEPTION
;
227 } // else the result is exact
228 } else { // the result is inexact; f2* <= 1/2
229 // set the inexact flag
230 *pfpsf
|= INEXACT_EXCEPTION
;
232 } else { // if 3 <= ind - 1 <= 21
233 if (fstar
.w
[1] > onehalf128
[ind
- 1] ||
234 (fstar
.w
[1] == onehalf128
[ind
- 1] && fstar
.w
[0])) {
235 // f2* > 1/2 and the result may be exact
236 // Calculate f2* - 1/2
237 if (fstar
.w
[1] > onehalf128
[ind
- 1]
238 || fstar
.w
[0] > ten2mk64
[ind
- 1]) {
239 // set the inexact flag
240 *pfpsf
|= INEXACT_EXCEPTION
;
241 } // else the result is exact
242 } else { // the result is inexact; f2* <= 1/2
243 // set the inexact flag
244 *pfpsf
|= INEXACT_EXCEPTION
;
247 // set exponent to zero as it was negative before.
248 res
= x_sign
| 0x31c0000000000000ull
| res
;
250 } else { // if exp < 0 and q + exp < 0
251 // the result is +0 or -0
252 res
= x_sign
| 0x31c0000000000000ull
;
253 *pfpsf
|= INEXACT_EXCEPTION
;
257 case ROUNDING_TIES_AWAY
:
258 if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
259 // need to shift right -exp digits from the coefficient; exp will be 0
260 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
261 // chop off ind digits from the lower part of C1
262 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
263 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
264 C1
= C1
+ midpoint64
[ind
- 1];
265 // calculate C* and f*
266 // C* is actually floor(C*) in this case
267 // C* and f* need shifting and masking, as shown by
268 // shiftright128[] and maskhigh128[]
270 // kx = 10^(-x) = ten2mk64[ind - 1]
271 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
272 // the approximation of 10^(-x) was rounded up to 64 bits
273 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
275 // if (0 < f* < 10^(-x)) then the result is a midpoint
276 // C* = floor(C*) - logical right shift; C* has p decimal digits,
277 // correct by Prop. 1)
279 // C* = floor(C*) (logical right shift; C has p decimal digits,
280 // correct by Property 1)
283 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
286 fstar
.w
[0] = P128
.w
[0];
287 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
288 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
289 res
= (P128
.w
[1] >> shift
);
290 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
291 fstar
.w
[0] = P128
.w
[0];
293 // midpoints are already rounded correctly
294 // determine inexactness of the rounding of C*
295 // if (0 < f* - 1/2 < 10^(-x)) then
296 // the result is exact
297 // else // if (f* - 1/2 > T*) then
298 // the result is inexact
300 if (fstar
.w
[0] > 0x8000000000000000ull
) {
301 // f* > 1/2 and the result may be exact
302 // fstar.w[0] - 0x8000000000000000ull is f* - 1/2
303 if ((fstar
.w
[0] - 0x8000000000000000ull
) > ten2mk64
[ind
- 1]) {
304 // set the inexact flag
305 *pfpsf
|= INEXACT_EXCEPTION
;
306 } // else the result is exact
307 } else { // the result is inexact; f2* <= 1/2
308 // set the inexact flag
309 *pfpsf
|= INEXACT_EXCEPTION
;
311 } else { // if 3 <= ind - 1 <= 21
312 if (fstar
.w
[1] > onehalf128
[ind
- 1] ||
313 (fstar
.w
[1] == onehalf128
[ind
- 1] && fstar
.w
[0])) {
314 // f2* > 1/2 and the result may be exact
315 // Calculate f2* - 1/2
316 if (fstar
.w
[1] > onehalf128
[ind
- 1]
317 || fstar
.w
[0] > ten2mk64
[ind
- 1]) {
318 // set the inexact flag
319 *pfpsf
|= INEXACT_EXCEPTION
;
320 } // else the result is exact
321 } else { // the result is inexact; f2* <= 1/2
322 // set the inexact flag
323 *pfpsf
|= INEXACT_EXCEPTION
;
326 // set exponent to zero as it was negative before.
327 res
= x_sign
| 0x31c0000000000000ull
| res
;
329 } else { // if exp < 0 and q + exp < 0
330 // the result is +0 or -0
331 res
= x_sign
| 0x31c0000000000000ull
;
332 *pfpsf
|= INEXACT_EXCEPTION
;
337 if ((q
+ exp
) > 0) { // exp < 0 and 1 <= -exp < q
338 // need to shift right -exp digits from the coefficient; exp will be 0
339 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
340 // chop off ind digits from the lower part of C1
341 // C1 fits in 64 bits
342 // calculate C* and f*
343 // C* is actually floor(C*) in this case
344 // C* and f* need shifting and masking, as shown by
345 // shiftright128[] and maskhigh128[]
347 // kx = 10^(-x) = ten2mk64[ind - 1]
349 // the approximation of 10^(-x) was rounded up to 64 bits
350 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
352 // C* = floor(C*) (logical right shift; C has p decimal digits,
353 // correct by Property 1)
354 // if (0 < f* < 10^(-x)) then the result is exact
357 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
360 fstar
.w
[0] = P128
.w
[0];
361 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
362 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
363 res
= (P128
.w
[1] >> shift
);
364 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
365 fstar
.w
[0] = P128
.w
[0];
367 // if (f* > 10^(-x)) then the result is inexact
368 if ((fstar
.w
[1] != 0) || (fstar
.w
[0] >= ten2mk64
[ind
- 1])) {
370 // if negative and not exact, increment magnitude
373 *pfpsf
|= INEXACT_EXCEPTION
;
375 // set exponent to zero as it was negative before.
376 res
= x_sign
| 0x31c0000000000000ull
| res
;
378 } else { // if exp < 0 and q + exp <= 0
379 // the result is +0 or -1
381 res
= 0xb1c0000000000001ull
;
383 res
= 0x31c0000000000000ull
;
385 *pfpsf
|= INEXACT_EXCEPTION
;
390 if ((q
+ exp
) > 0) { // exp < 0 and 1 <= -exp < q
391 // need to shift right -exp digits from the coefficient; exp will be 0
392 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
393 // chop off ind digits from the lower part of C1
394 // C1 fits in 64 bits
395 // calculate C* and f*
396 // C* is actually floor(C*) in this case
397 // C* and f* need shifting and masking, as shown by
398 // shiftright128[] and maskhigh128[]
400 // kx = 10^(-x) = ten2mk64[ind - 1]
402 // the approximation of 10^(-x) was rounded up to 64 bits
403 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
405 // C* = floor(C*) (logical right shift; C has p decimal digits,
406 // correct by Property 1)
407 // if (0 < f* < 10^(-x)) then the result is exact
410 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
413 fstar
.w
[0] = P128
.w
[0];
414 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
415 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
416 res
= (P128
.w
[1] >> shift
);
417 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
418 fstar
.w
[0] = P128
.w
[0];
420 // if (f* > 10^(-x)) then the result is inexact
421 if ((fstar
.w
[1] != 0) || (fstar
.w
[0] >= ten2mk64
[ind
- 1])) {
423 // if positive and not exact, increment magnitude
426 *pfpsf
|= INEXACT_EXCEPTION
;
428 // set exponent to zero as it was negative before.
429 res
= x_sign
| 0x31c0000000000000ull
| res
;
431 } else { // if exp < 0 and q + exp <= 0
432 // the result is -0 or +1
434 res
= 0xb1c0000000000000ull
;
436 res
= 0x31c0000000000001ull
;
438 *pfpsf
|= INEXACT_EXCEPTION
;
442 case ROUNDING_TO_ZERO
:
443 if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
444 // need to shift right -exp digits from the coefficient; exp will be 0
445 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
446 // chop off ind digits from the lower part of C1
447 // C1 fits in 127 bits
448 // calculate C* and f*
449 // C* is actually floor(C*) in this case
450 // C* and f* need shifting and masking, as shown by
451 // shiftright128[] and maskhigh128[]
453 // kx = 10^(-x) = ten2mk64[ind - 1]
455 // the approximation of 10^(-x) was rounded up to 64 bits
456 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
458 // C* = floor(C*) (logical right shift; C has p decimal digits,
459 // correct by Property 1)
460 // if (0 < f* < 10^(-x)) then the result is exact
463 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
466 fstar
.w
[0] = P128
.w
[0];
467 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
468 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
469 res
= (P128
.w
[1] >> shift
);
470 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
471 fstar
.w
[0] = P128
.w
[0];
473 // if (f* > 10^(-x)) then the result is inexact
474 if ((fstar
.w
[1] != 0) || (fstar
.w
[0] >= ten2mk64
[ind
- 1])) {
475 *pfpsf
|= INEXACT_EXCEPTION
;
477 // set exponent to zero as it was negative before.
478 res
= x_sign
| 0x31c0000000000000ull
| res
;
480 } else { // if exp < 0 and q + exp < 0
481 // the result is +0 or -0
482 res
= x_sign
| 0x31c0000000000000ull
;
483 *pfpsf
|= INEXACT_EXCEPTION
;
487 default: break; // default added to avoid compiler warning
492 /*****************************************************************************
493 * BID64_round_integral_nearest_even
494 ****************************************************************************/
496 #if DECIMAL_CALL_BY_REFERENCE
498 bid64_round_integral_nearest_even (UINT64
* pres
,
500 px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
505 bid64_round_integral_nearest_even (UINT64 x _EXC_FLAGS_PARAM
506 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
509 UINT64 res
= 0xbaddbaddbaddbaddull
;
511 int exp
; // unbiased exponent
512 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
520 x_sign
= x
& MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
522 // check for NaNs and infinities
523 if ((x
& MASK_NAN
) == MASK_NAN
) { // check for NaN
524 if ((x
& 0x0003ffffffffffffull
) > 999999999999999ull)
525 x
= x
& 0xfe00000000000000ull
; // clear G6-G12 and the payload bits
527 x
= x
& 0xfe03ffffffffffffull
; // clear G6-G12
528 if ((x
& MASK_SNAN
) == MASK_SNAN
) { // SNaN
530 *pfpsf
|= INVALID_EXCEPTION
;
531 // return quiet (SNaN)
532 res
= x
& 0xfdffffffffffffffull
;
537 } else if ((x
& MASK_INF
) == MASK_INF
) { // check for Infinity
538 res
= x_sign
| 0x7800000000000000ull
;
542 if ((x
& MASK_STEERING_BITS
) == MASK_STEERING_BITS
) {
543 // if the steering bits are 11 (condition will be 0), then
544 // the exponent is G[0:w+1]
545 exp
= ((x
& MASK_BINARY_EXPONENT2
) >> 51) - 398;
546 C1
= (x
& MASK_BINARY_SIG2
) | MASK_BINARY_OR2
;
547 if (C1
> 9999999999999999ull) { // non-canonical
550 } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
551 exp
= ((x
& MASK_BINARY_EXPONENT1
) >> 53) - 398;
552 C1
= (x
& MASK_BINARY_SIG1
);
555 // if x is 0 or non-canonical
559 res
= x_sign
| (((UINT64
) exp
+ 398) << 53);
562 // x is a finite non-zero number (not 0, non-canonical, or special)
564 // return 0 if (exp <= -(p+1))
566 res
= x_sign
| 0x31c0000000000000ull
;
569 // q = nr. of decimal digits in x (1 <= q <= 54)
570 // determine first the nr. of bits in x
571 if (C1
>= 0x0020000000000000ull
) { // x >= 2^53
573 } else { // if x < 2^53
574 tmp1
.d
= (double) C1
; // exact conversion
576 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
577 q
= nr_digits
[x_nr_bits
- 1].digits
;
579 q
= nr_digits
[x_nr_bits
- 1].digits1
;
580 if (C1
>= nr_digits
[x_nr_bits
- 1].threshold_lo
)
585 if (exp
>= 0) { // -exp <= 0
586 // the argument is an integer already
589 } else if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
590 // need to shift right -exp digits from the coefficient; the exp will be 0
591 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
592 // chop off ind digits from the lower part of C1
593 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
594 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
595 C1
= C1
+ midpoint64
[ind
- 1];
596 // calculate C* and f*
597 // C* is actually floor(C*) in this case
598 // C* and f* need shifting and masking, as shown by
599 // shiftright128[] and maskhigh128[]
601 // kx = 10^(-x) = ten2mk64[ind - 1]
602 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
603 // the approximation of 10^(-x) was rounded up to 64 bits
604 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
606 // if (0 < f* < 10^(-x)) then the result is a midpoint
607 // if floor(C*) is even then C* = floor(C*) - logical right
608 // shift; C* has p decimal digits, correct by Prop. 1)
609 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
610 // shift; C* has p decimal digits, correct by Pr. 1)
612 // C* = floor(C*) (logical right shift; C has p decimal digits,
613 // correct by Property 1)
616 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
619 fstar
.w
[0] = P128
.w
[0];
620 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
621 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
622 res
= (P128
.w
[1] >> shift
);
623 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
624 fstar
.w
[0] = P128
.w
[0];
626 // if (0 < f* < 10^(-x)) then the result is a midpoint
627 // since round_to_even, subtract 1 if current result is odd
628 if ((res
& 0x0000000000000001ull
) && (fstar
.w
[1] == 0)
629 && (fstar
.w
[0] < ten2mk64
[ind
- 1])) {
632 // set exponent to zero as it was negative before.
633 res
= x_sign
| 0x31c0000000000000ull
| res
;
635 } else { // if exp < 0 and q + exp < 0
636 // the result is +0 or -0
637 res
= x_sign
| 0x31c0000000000000ull
;
642 /*****************************************************************************
643 * BID64_round_integral_negative
644 *****************************************************************************/
646 #if DECIMAL_CALL_BY_REFERENCE
648 bid64_round_integral_negative (UINT64
* pres
,
650 px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
655 bid64_round_integral_negative (UINT64 x _EXC_FLAGS_PARAM
656 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
659 UINT64 res
= 0xbaddbaddbaddbaddull
;
661 int exp
; // unbiased exponent
662 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
667 // UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits
671 x_sign
= x
& MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
673 // check for NaNs and infinities
674 if ((x
& MASK_NAN
) == MASK_NAN
) { // check for NaN
675 if ((x
& 0x0003ffffffffffffull
) > 999999999999999ull)
676 x
= x
& 0xfe00000000000000ull
; // clear G6-G12 and the payload bits
678 x
= x
& 0xfe03ffffffffffffull
; // clear G6-G12
679 if ((x
& MASK_SNAN
) == MASK_SNAN
) { // SNaN
681 *pfpsf
|= INVALID_EXCEPTION
;
682 // return quiet (SNaN)
683 res
= x
& 0xfdffffffffffffffull
;
688 } else if ((x
& MASK_INF
) == MASK_INF
) { // check for Infinity
689 res
= x_sign
| 0x7800000000000000ull
;
693 if ((x
& MASK_STEERING_BITS
) == MASK_STEERING_BITS
) {
694 // if the steering bits are 11 (condition will be 0), then
695 // the exponent is G[0:w+1]
696 exp
= ((x
& MASK_BINARY_EXPONENT2
) >> 51) - 398;
697 C1
= (x
& MASK_BINARY_SIG2
) | MASK_BINARY_OR2
;
698 if (C1
> 9999999999999999ull) { // non-canonical
701 } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
702 exp
= ((x
& MASK_BINARY_EXPONENT1
) >> 53) - 398;
703 C1
= (x
& MASK_BINARY_SIG1
);
706 // if x is 0 or non-canonical
710 res
= x_sign
| (((UINT64
) exp
+ 398) << 53);
713 // x is a finite non-zero number (not 0, non-canonical, or special)
715 // return 0 if (exp <= -p)
718 res
= 0xb1c0000000000001ull
;
720 res
= 0x31c0000000000000ull
;
724 // q = nr. of decimal digits in x (1 <= q <= 54)
725 // determine first the nr. of bits in x
726 if (C1
>= 0x0020000000000000ull
) { // x >= 2^53
728 } else { // if x < 2^53
729 tmp1
.d
= (double) C1
; // exact conversion
731 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
732 q
= nr_digits
[x_nr_bits
- 1].digits
;
734 q
= nr_digits
[x_nr_bits
- 1].digits1
;
735 if (C1
>= nr_digits
[x_nr_bits
- 1].threshold_lo
)
740 if (exp
>= 0) { // -exp <= 0
741 // the argument is an integer already
744 } else if ((q
+ exp
) > 0) { // exp < 0 and 1 <= -exp < q
745 // need to shift right -exp digits from the coefficient; the exp will be 0
746 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
747 // chop off ind digits from the lower part of C1
748 // C1 fits in 64 bits
749 // calculate C* and f*
750 // C* is actually floor(C*) in this case
751 // C* and f* need shifting and masking, as shown by
752 // shiftright128[] and maskhigh128[]
754 // kx = 10^(-x) = ten2mk64[ind - 1]
756 // the approximation of 10^(-x) was rounded up to 64 bits
757 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
759 // C* = floor(C*) (logical right shift; C has p decimal digits,
760 // correct by Property 1)
761 // if (0 < f* < 10^(-x)) then the result is exact
764 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
767 fstar
.w
[0] = P128
.w
[0];
768 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
769 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
770 res
= (P128
.w
[1] >> shift
);
771 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
772 fstar
.w
[0] = P128
.w
[0];
774 // if (f* > 10^(-x)) then the result is inexact
776 && ((fstar
.w
[1] != 0) || (fstar
.w
[0] >= ten2mk64
[ind
- 1]))) {
777 // if negative and not exact, increment magnitude
780 // set exponent to zero as it was negative before.
781 res
= x_sign
| 0x31c0000000000000ull
| res
;
783 } else { // if exp < 0 and q + exp <= 0
784 // the result is +0 or -1
786 res
= 0xb1c0000000000001ull
;
788 res
= 0x31c0000000000000ull
;
794 /*****************************************************************************
795 * BID64_round_integral_positive
796 ****************************************************************************/
798 #if DECIMAL_CALL_BY_REFERENCE
800 bid64_round_integral_positive (UINT64
* pres
,
802 px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
807 bid64_round_integral_positive (UINT64 x _EXC_FLAGS_PARAM
808 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
811 UINT64 res
= 0xbaddbaddbaddbaddull
;
813 int exp
; // unbiased exponent
814 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
819 // UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits
823 x_sign
= x
& MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
825 // check for NaNs and infinities
826 if ((x
& MASK_NAN
) == MASK_NAN
) { // check for NaN
827 if ((x
& 0x0003ffffffffffffull
) > 999999999999999ull)
828 x
= x
& 0xfe00000000000000ull
; // clear G6-G12 and the payload bits
830 x
= x
& 0xfe03ffffffffffffull
; // clear G6-G12
831 if ((x
& MASK_SNAN
) == MASK_SNAN
) { // SNaN
833 *pfpsf
|= INVALID_EXCEPTION
;
834 // return quiet (SNaN)
835 res
= x
& 0xfdffffffffffffffull
;
840 } else if ((x
& MASK_INF
) == MASK_INF
) { // check for Infinity
841 res
= x_sign
| 0x7800000000000000ull
;
845 if ((x
& MASK_STEERING_BITS
) == MASK_STEERING_BITS
) {
846 // if the steering bits are 11 (condition will be 0), then
847 // the exponent is G[0:w+1]
848 exp
= ((x
& MASK_BINARY_EXPONENT2
) >> 51) - 398;
849 C1
= (x
& MASK_BINARY_SIG2
) | MASK_BINARY_OR2
;
850 if (C1
> 9999999999999999ull) { // non-canonical
853 } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
854 exp
= ((x
& MASK_BINARY_EXPONENT1
) >> 53) - 398;
855 C1
= (x
& MASK_BINARY_SIG1
);
858 // if x is 0 or non-canonical
862 res
= x_sign
| (((UINT64
) exp
+ 398) << 53);
865 // x is a finite non-zero number (not 0, non-canonical, or special)
867 // return 0 if (exp <= -p)
870 res
= 0xb1c0000000000000ull
;
872 res
= 0x31c0000000000001ull
;
876 // q = nr. of decimal digits in x (1 <= q <= 54)
877 // determine first the nr. of bits in x
878 if (C1
>= 0x0020000000000000ull
) { // x >= 2^53
880 } else { // if x < 2^53
881 tmp1
.d
= (double) C1
; // exact conversion
883 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
884 q
= nr_digits
[x_nr_bits
- 1].digits
;
886 q
= nr_digits
[x_nr_bits
- 1].digits1
;
887 if (C1
>= nr_digits
[x_nr_bits
- 1].threshold_lo
)
892 if (exp
>= 0) { // -exp <= 0
893 // the argument is an integer already
896 } else if ((q
+ exp
) > 0) { // exp < 0 and 1 <= -exp < q
897 // need to shift right -exp digits from the coefficient; the exp will be 0
898 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
899 // chop off ind digits from the lower part of C1
900 // C1 fits in 64 bits
901 // calculate C* and f*
902 // C* is actually floor(C*) in this case
903 // C* and f* need shifting and masking, as shown by
904 // shiftright128[] and maskhigh128[]
906 // kx = 10^(-x) = ten2mk64[ind - 1]
908 // the approximation of 10^(-x) was rounded up to 64 bits
909 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
911 // C* = floor(C*) (logical right shift; C has p decimal digits,
912 // correct by Property 1)
913 // if (0 < f* < 10^(-x)) then the result is exact
916 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
919 fstar
.w
[0] = P128
.w
[0];
920 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
921 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
922 res
= (P128
.w
[1] >> shift
);
923 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
924 fstar
.w
[0] = P128
.w
[0];
926 // if (f* > 10^(-x)) then the result is inexact
928 && ((fstar
.w
[1] != 0) || (fstar
.w
[0] >= ten2mk64
[ind
- 1]))) {
929 // if positive and not exact, increment magnitude
932 // set exponent to zero as it was negative before.
933 res
= x_sign
| 0x31c0000000000000ull
| res
;
935 } else { // if exp < 0 and q + exp <= 0
936 // the result is -0 or +1
938 res
= 0xb1c0000000000000ull
;
940 res
= 0x31c0000000000001ull
;
946 /*****************************************************************************
947 * BID64_round_integral_zero
948 ****************************************************************************/
950 #if DECIMAL_CALL_BY_REFERENCE
952 bid64_round_integral_zero (UINT64
* pres
,
954 px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
959 bid64_round_integral_zero (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
963 UINT64 res
= 0xbaddbaddbaddbaddull
;
965 int exp
; // unbiased exponent
966 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
971 // UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits
974 x_sign
= x
& MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
976 // check for NaNs and infinities
977 if ((x
& MASK_NAN
) == MASK_NAN
) { // check for NaN
978 if ((x
& 0x0003ffffffffffffull
) > 999999999999999ull)
979 x
= x
& 0xfe00000000000000ull
; // clear G6-G12 and the payload bits
981 x
= x
& 0xfe03ffffffffffffull
; // clear G6-G12
982 if ((x
& MASK_SNAN
) == MASK_SNAN
) { // SNaN
984 *pfpsf
|= INVALID_EXCEPTION
;
985 // return quiet (SNaN)
986 res
= x
& 0xfdffffffffffffffull
;
991 } else if ((x
& MASK_INF
) == MASK_INF
) { // check for Infinity
992 res
= x_sign
| 0x7800000000000000ull
;
996 if ((x
& MASK_STEERING_BITS
) == MASK_STEERING_BITS
) {
997 // if the steering bits are 11 (condition will be 0), then
998 // the exponent is G[0:w+1]
999 exp
= ((x
& MASK_BINARY_EXPONENT2
) >> 51) - 398;
1000 C1
= (x
& MASK_BINARY_SIG2
) | MASK_BINARY_OR2
;
1001 if (C1
> 9999999999999999ull) { // non-canonical
1004 } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
1005 exp
= ((x
& MASK_BINARY_EXPONENT1
) >> 53) - 398;
1006 C1
= (x
& MASK_BINARY_SIG1
);
1009 // if x is 0 or non-canonical
1013 res
= x_sign
| (((UINT64
) exp
+ 398) << 53);
1016 // x is a finite non-zero number (not 0, non-canonical, or special)
1018 // return 0 if (exp <= -p)
1020 res
= x_sign
| 0x31c0000000000000ull
;
1023 // q = nr. of decimal digits in x (1 <= q <= 54)
1024 // determine first the nr. of bits in x
1025 if (C1
>= 0x0020000000000000ull
) { // x >= 2^53
1027 } else { // if x < 2^53
1028 tmp1
.d
= (double) C1
; // exact conversion
1030 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1031 q
= nr_digits
[x_nr_bits
- 1].digits
;
1033 q
= nr_digits
[x_nr_bits
- 1].digits1
;
1034 if (C1
>= nr_digits
[x_nr_bits
- 1].threshold_lo
)
1039 if (exp
>= 0) { // -exp <= 0
1040 // the argument is an integer already
1043 } else if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
1044 // need to shift right -exp digits from the coefficient; the exp will be 0
1045 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
1046 // chop off ind digits from the lower part of C1
1047 // C1 fits in 127 bits
1048 // calculate C* and f*
1049 // C* is actually floor(C*) in this case
1050 // C* and f* need shifting and masking, as shown by
1051 // shiftright128[] and maskhigh128[]
1053 // kx = 10^(-x) = ten2mk64[ind - 1]
1054 // C* = C1 * 10^(-x)
1055 // the approximation of 10^(-x) was rounded up to 64 bits
1056 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
1058 // C* = floor(C*) (logical right shift; C has p decimal digits,
1059 // correct by Property 1)
1060 // if (0 < f* < 10^(-x)) then the result is exact
1061 // n = C* * 10^(e+x)
1063 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
1065 // redundant fstar.w[1] = 0;
1066 // redundant fstar.w[0] = P128.w[0];
1067 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1068 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
1069 res
= (P128
.w
[1] >> shift
);
1070 // redundant fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
1071 // redundant fstar.w[0] = P128.w[0];
1073 // if (f* > 10^(-x)) then the result is inexact
1074 // if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind-1])){
1077 // set exponent to zero as it was negative before.
1078 res
= x_sign
| 0x31c0000000000000ull
| res
;
1080 } else { // if exp < 0 and q + exp < 0
1081 // the result is +0 or -0
1082 res
= x_sign
| 0x31c0000000000000ull
;
1087 /*****************************************************************************
1088 * BID64_round_integral_nearest_away
1089 ****************************************************************************/
1091 #if DECIMAL_CALL_BY_REFERENCE
1093 bid64_round_integral_nearest_away (UINT64
* pres
,
1095 px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
1100 bid64_round_integral_nearest_away (UINT64 x _EXC_FLAGS_PARAM
1101 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
1104 UINT64 res
= 0xbaddbaddbaddbaddull
;
1106 int exp
; // unbiased exponent
1107 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
1108 BID_UI64DOUBLE tmp1
;
1114 x_sign
= x
& MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
1116 // check for NaNs and infinities
1117 if ((x
& MASK_NAN
) == MASK_NAN
) { // check for NaN
1118 if ((x
& 0x0003ffffffffffffull
) > 999999999999999ull)
1119 x
= x
& 0xfe00000000000000ull
; // clear G6-G12 and the payload bits
1121 x
= x
& 0xfe03ffffffffffffull
; // clear G6-G12
1122 if ((x
& MASK_SNAN
) == MASK_SNAN
) { // SNaN
1124 *pfpsf
|= INVALID_EXCEPTION
;
1125 // return quiet (SNaN)
1126 res
= x
& 0xfdffffffffffffffull
;
1131 } else if ((x
& MASK_INF
) == MASK_INF
) { // check for Infinity
1132 res
= x_sign
| 0x7800000000000000ull
;
1136 if ((x
& MASK_STEERING_BITS
) == MASK_STEERING_BITS
) {
1137 // if the steering bits are 11 (condition will be 0), then
1138 // the exponent is G[0:w+1]
1139 exp
= ((x
& MASK_BINARY_EXPONENT2
) >> 51) - 398;
1140 C1
= (x
& MASK_BINARY_SIG2
) | MASK_BINARY_OR2
;
1141 if (C1
> 9999999999999999ull) { // non-canonical
1144 } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
1145 exp
= ((x
& MASK_BINARY_EXPONENT1
) >> 53) - 398;
1146 C1
= (x
& MASK_BINARY_SIG1
);
1149 // if x is 0 or non-canonical
1153 res
= x_sign
| (((UINT64
) exp
+ 398) << 53);
1156 // x is a finite non-zero number (not 0, non-canonical, or special)
1158 // return 0 if (exp <= -(p+1))
1160 res
= x_sign
| 0x31c0000000000000ull
;
1163 // q = nr. of decimal digits in x (1 <= q <= 54)
1164 // determine first the nr. of bits in x
1165 if (C1
>= 0x0020000000000000ull
) { // x >= 2^53
1167 } else { // if x < 2^53
1168 tmp1
.d
= (double) C1
; // exact conversion
1170 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1171 q
= nr_digits
[x_nr_bits
- 1].digits
;
1173 q
= nr_digits
[x_nr_bits
- 1].digits1
;
1174 if (C1
>= nr_digits
[x_nr_bits
- 1].threshold_lo
)
1179 if (exp
>= 0) { // -exp <= 0
1180 // the argument is an integer already
1183 } else if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
1184 // need to shift right -exp digits from the coefficient; the exp will be 0
1185 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
1186 // chop off ind digits from the lower part of C1
1187 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
1188 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
1189 C1
= C1
+ midpoint64
[ind
- 1];
1190 // calculate C* and f*
1191 // C* is actually floor(C*) in this case
1192 // C* and f* need shifting and masking, as shown by
1193 // shiftright128[] and maskhigh128[]
1195 // kx = 10^(-x) = ten2mk64[ind - 1]
1196 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
1197 // the approximation of 10^(-x) was rounded up to 64 bits
1198 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
1200 // if (0 < f* < 10^(-x)) then the result is a midpoint
1201 // C* = floor(C*) - logical right shift; C* has p decimal digits,
1202 // correct by Prop. 1)
1204 // C* = floor(C*) (logical right shift; C has p decimal digits,
1205 // correct by Property 1)
1206 // n = C* * 10^(e+x)
1208 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
1210 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1211 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
1212 res
= (P128
.w
[1] >> shift
);
1214 // midpoints are already rounded correctly
1215 // set exponent to zero as it was negative before.
1216 res
= x_sign
| 0x31c0000000000000ull
| res
;
1218 } else { // if exp < 0 and q + exp < 0
1219 // the result is +0 or -0
1220 res
= x_sign
| 0x31c0000000000000ull
;