]>
Commit | Line | Data |
---|---|---|
7adcbafe | 1 | /* Copyright (C) 2007-2022 Free Software Foundation, Inc. |
200359e8 L |
2 | |
3 | This file is part of GCC. | |
4 | ||
5 | GCC is free software; you can redistribute it and/or modify it under | |
6 | the terms of the GNU General Public License as published by the Free | |
748086b7 | 7 | Software Foundation; either version 3, or (at your option) any later |
200359e8 L |
8 | version. |
9 | ||
200359e8 L |
10 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 | for more details. | |
14 | ||
748086b7 JJ |
15 | Under Section 7 of GPL version 3, you are granted additional |
16 | permissions described in the GCC Runtime Library Exception, version | |
17 | 3.1, as published by the Free Software Foundation. | |
18 | ||
19 | You should have received a copy of the GNU General Public License and | |
20 | a copy of the GCC Runtime Library Exception along with this program; | |
21 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 | <http://www.gnu.org/licenses/>. */ | |
200359e8 L |
23 | |
24 | /***************************************************************************** | |
25 | * | |
26 | * Helper add functions (for fma) | |
27 | * | |
28 | * __BID_INLINE__ UINT64 get_add64( | |
29 | * UINT64 sign_x, int exponent_x, UINT64 coefficient_x, | |
30 | * UINT64 sign_y, int exponent_y, UINT64 coefficient_y, | |
31 | * int rounding_mode) | |
32 | * | |
33 | * __BID_INLINE__ UINT64 get_add128( | |
34 | * UINT64 sign_x, int exponent_x, UINT64 coefficient_x, | |
35 | * UINT64 sign_y, int final_exponent_y, UINT128 CY, | |
36 | * int extra_digits, int rounding_mode) | |
37 | * | |
38 | ***************************************************************************** | |
39 | * | |
40 | * Algorithm description: | |
41 | * | |
42 | * get_add64: same as BID64 add, but arguments are unpacked and there | |
43 | * are no special case checks | |
44 | * | |
45 | * get_add128: add 64-bit coefficient to 128-bit product (which contains | |
46 | * 16+extra_digits decimal digits), | |
47 | * return BID64 result | |
48 | * - the exponents are compared and the two coefficients are | |
49 | * properly aligned for addition/subtraction | |
50 | * - multiple paths are needed | |
51 | * - final result exponent is calculated and the lower term is | |
52 | * rounded first if necessary, to avoid manipulating | |
53 | * coefficients longer than 128 bits | |
54 | * | |
55 | ****************************************************************************/ | |
56 | ||
57 | #ifndef _INLINE_BID_ADD_H_ | |
58 | #define _INLINE_BID_ADD_H_ | |
59 | ||
60 | #include "bid_internal.h" | |
61 | ||
62 | #define MAX_FORMAT_DIGITS 16 | |
63 | #define DECIMAL_EXPONENT_BIAS 398 | |
64 | #define MASK_BINARY_EXPONENT 0x7ff0000000000000ull | |
65 | #define BINARY_EXPONENT_BIAS 0x3ff | |
66 | #define UPPER_EXPON_LIMIT 51 | |
67 | ||
68 | /////////////////////////////////////////////////////////////////////// | |
69 | // | |
70 | // get_add64() is essentially the same as bid_add(), except that | |
71 | // the arguments are unpacked | |
72 | // | |
73 | ////////////////////////////////////////////////////////////////////// | |
74 | __BID_INLINE__ UINT64 | |
75 | get_add64 (UINT64 sign_x, int exponent_x, UINT64 coefficient_x, | |
76 | UINT64 sign_y, int exponent_y, UINT64 coefficient_y, | |
77 | int rounding_mode, unsigned *fpsc) { | |
78 | UINT128 CA, CT, CT_new; | |
79 | UINT64 sign_a, sign_b, coefficient_a, coefficient_b, sign_s, sign_ab, | |
80 | rem_a; | |
81 | UINT64 saved_ca, saved_cb, C0_64, C64, remainder_h, T1, carry, tmp, | |
82 | C64_new; | |
83 | int_double tempx; | |
84 | int exponent_a, exponent_b, diff_dec_expon; | |
85 | int bin_expon_ca, extra_digits, amount, scale_k, scale_ca; | |
86 | unsigned rmode, status; | |
87 | ||
88 | // sort arguments by exponent | |
89 | if (exponent_x <= exponent_y) { | |
90 | sign_a = sign_y; | |
91 | exponent_a = exponent_y; | |
92 | coefficient_a = coefficient_y; | |
93 | sign_b = sign_x; | |
94 | exponent_b = exponent_x; | |
95 | coefficient_b = coefficient_x; | |
96 | } else { | |
97 | sign_a = sign_x; | |
98 | exponent_a = exponent_x; | |
99 | coefficient_a = coefficient_x; | |
100 | sign_b = sign_y; | |
101 | exponent_b = exponent_y; | |
102 | coefficient_b = coefficient_y; | |
103 | } | |
104 | ||
105 | // exponent difference | |
106 | diff_dec_expon = exponent_a - exponent_b; | |
107 | ||
108 | /* get binary coefficients of x and y */ | |
109 | ||
110 | //--- get number of bits in the coefficients of x and y --- | |
111 | ||
112 | tempx.d = (double) coefficient_a; | |
113 | bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff; | |
114 | ||
115 | if (!coefficient_a) { | |
116 | return get_BID64 (sign_b, exponent_b, coefficient_b, rounding_mode, | |
117 | fpsc); | |
118 | } | |
119 | if (diff_dec_expon > MAX_FORMAT_DIGITS) { | |
120 | // normalize a to a 16-digit coefficient | |
121 | ||
b2a00c89 L |
122 | scale_ca = estimate_decimal_digits[bin_expon_ca]; |
123 | if (coefficient_a >= power10_table_128[scale_ca].w[0]) | |
200359e8 L |
124 | scale_ca++; |
125 | ||
126 | scale_k = 16 - scale_ca; | |
127 | ||
b2a00c89 | 128 | coefficient_a *= power10_table_128[scale_k].w[0]; |
200359e8 L |
129 | |
130 | diff_dec_expon -= scale_k; | |
131 | exponent_a -= scale_k; | |
132 | ||
133 | /* get binary coefficients of x and y */ | |
134 | ||
135 | //--- get number of bits in the coefficients of x and y --- | |
136 | tempx.d = (double) coefficient_a; | |
137 | bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff; | |
138 | ||
139 | if (diff_dec_expon > MAX_FORMAT_DIGITS) { | |
140 | #ifdef SET_STATUS_FLAGS | |
141 | if (coefficient_b) { | |
142 | __set_status_flags (fpsc, INEXACT_EXCEPTION); | |
143 | } | |
144 | #endif | |
145 | ||
146 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
147 | #ifndef IEEE_ROUND_NEAREST | |
b2a00c89 | 148 | if (((rounding_mode) & 3) && coefficient_b) // not ROUNDING_TO_NEAREST |
200359e8 L |
149 | { |
150 | switch (rounding_mode) { | |
151 | case ROUNDING_DOWN: | |
152 | if (sign_b) { | |
153 | coefficient_a -= ((((SINT64) sign_a) >> 63) | 1); | |
154 | if (coefficient_a < 1000000000000000ull) { | |
155 | exponent_a--; | |
156 | coefficient_a = 9999999999999999ull; | |
157 | } else if (coefficient_a >= 10000000000000000ull) { | |
158 | exponent_a++; | |
159 | coefficient_a = 1000000000000000ull; | |
160 | } | |
161 | } | |
162 | break; | |
163 | case ROUNDING_UP: | |
164 | if (!sign_b) { | |
165 | coefficient_a += ((((SINT64) sign_a) >> 63) | 1); | |
166 | if (coefficient_a < 1000000000000000ull) { | |
167 | exponent_a--; | |
168 | coefficient_a = 9999999999999999ull; | |
169 | } else if (coefficient_a >= 10000000000000000ull) { | |
170 | exponent_a++; | |
171 | coefficient_a = 1000000000000000ull; | |
172 | } | |
173 | } | |
174 | break; | |
175 | default: // RZ | |
176 | if (sign_a != sign_b) { | |
177 | coefficient_a--; | |
178 | if (coefficient_a < 1000000000000000ull) { | |
179 | exponent_a--; | |
180 | coefficient_a = 9999999999999999ull; | |
181 | } | |
182 | } | |
183 | break; | |
184 | } | |
185 | } else | |
186 | #endif | |
187 | #endif | |
188 | // check special case here | |
189 | if ((coefficient_a == 1000000000000000ull) | |
190 | && (diff_dec_expon == MAX_FORMAT_DIGITS + 1) | |
b2a00c89 L |
191 | && (sign_a ^ sign_b) |
192 | && (coefficient_b > 5000000000000000ull)) { | |
200359e8 L |
193 | coefficient_a = 9999999999999999ull; |
194 | exponent_a--; | |
195 | } | |
196 | ||
197 | return get_BID64 (sign_a, exponent_a, coefficient_a, | |
198 | rounding_mode, fpsc); | |
199 | } | |
200 | } | |
201 | // test whether coefficient_a*10^(exponent_a-exponent_b) may exceed 2^62 | |
b2a00c89 | 202 | if (bin_expon_ca + estimate_bin_expon[diff_dec_expon] < 60) { |
200359e8 L |
203 | // coefficient_a*10^(exponent_a-exponent_b)<2^63 |
204 | ||
205 | // multiply by 10^(exponent_a-exponent_b) | |
b2a00c89 | 206 | coefficient_a *= power10_table_128[diff_dec_expon].w[0]; |
200359e8 L |
207 | |
208 | // sign mask | |
209 | sign_b = ((SINT64) sign_b) >> 63; | |
210 | // apply sign to coeff. of b | |
211 | coefficient_b = (coefficient_b + sign_b) ^ sign_b; | |
212 | ||
213 | // apply sign to coefficient a | |
214 | sign_a = ((SINT64) sign_a) >> 63; | |
215 | coefficient_a = (coefficient_a + sign_a) ^ sign_a; | |
216 | ||
217 | coefficient_a += coefficient_b; | |
218 | // get sign | |
219 | sign_s = ((SINT64) coefficient_a) >> 63; | |
220 | coefficient_a = (coefficient_a + sign_s) ^ sign_s; | |
221 | sign_s &= 0x8000000000000000ull; | |
222 | ||
223 | // coefficient_a < 10^16 ? | |
b2a00c89 | 224 | if (coefficient_a < power10_table_128[MAX_FORMAT_DIGITS].w[0]) { |
200359e8 L |
225 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY |
226 | #ifndef IEEE_ROUND_NEAREST | |
227 | if (rounding_mode == ROUNDING_DOWN && (!coefficient_a) | |
228 | && sign_a != sign_b) | |
229 | sign_s = 0x8000000000000000ull; | |
230 | #endif | |
231 | #endif | |
232 | return get_BID64 (sign_s, exponent_b, coefficient_a, | |
233 | rounding_mode, fpsc); | |
234 | } | |
235 | // otherwise rounding is necessary | |
236 | ||
237 | // already know coefficient_a<10^19 | |
238 | // coefficient_a < 10^17 ? | |
b2a00c89 | 239 | if (coefficient_a < power10_table_128[17].w[0]) |
200359e8 | 240 | extra_digits = 1; |
b2a00c89 | 241 | else if (coefficient_a < power10_table_128[18].w[0]) |
200359e8 L |
242 | extra_digits = 2; |
243 | else | |
244 | extra_digits = 3; | |
245 | ||
246 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
247 | #ifndef IEEE_ROUND_NEAREST | |
248 | rmode = rounding_mode; | |
249 | if (sign_s && (unsigned) (rmode - 1) < 2) | |
250 | rmode = 3 - rmode; | |
251 | #else | |
252 | rmode = 0; | |
253 | #endif | |
254 | #else | |
255 | rmode = 0; | |
256 | #endif | |
b2a00c89 | 257 | coefficient_a += round_const_table[rmode][extra_digits]; |
200359e8 L |
258 | |
259 | // get P*(2^M[extra_digits])/10^extra_digits | |
260 | __mul_64x64_to_128 (CT, coefficient_a, | |
b2a00c89 | 261 | reciprocals10_64[extra_digits]); |
200359e8 L |
262 | |
263 | // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 | |
b2a00c89 | 264 | amount = short_recip_scale[extra_digits]; |
200359e8 L |
265 | C64 = CT.w[1] >> amount; |
266 | ||
267 | } else { | |
268 | // coefficient_a*10^(exponent_a-exponent_b) is large | |
269 | sign_s = sign_a; | |
270 | ||
271 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
272 | #ifndef IEEE_ROUND_NEAREST | |
273 | rmode = rounding_mode; | |
274 | if (sign_s && (unsigned) (rmode - 1) < 2) | |
275 | rmode = 3 - rmode; | |
276 | #else | |
277 | rmode = 0; | |
278 | #endif | |
279 | #else | |
280 | rmode = 0; | |
281 | #endif | |
282 | ||
283 | // check whether we can take faster path | |
b2a00c89 | 284 | scale_ca = estimate_decimal_digits[bin_expon_ca]; |
200359e8 L |
285 | |
286 | sign_ab = sign_a ^ sign_b; | |
287 | sign_ab = ((SINT64) sign_ab) >> 63; | |
288 | ||
289 | // T1 = 10^(16-diff_dec_expon) | |
b2a00c89 | 290 | T1 = power10_table_128[16 - diff_dec_expon].w[0]; |
200359e8 L |
291 | |
292 | // get number of digits in coefficient_a | |
b2a00c89 L |
293 | //P_ca = power10_table_128[scale_ca].w[0]; |
294 | //P_ca_m1 = power10_table_128[scale_ca-1].w[0]; | |
295 | if (coefficient_a >= power10_table_128[scale_ca].w[0]) { | |
200359e8 L |
296 | scale_ca++; |
297 | //P_ca_m1 = P_ca; | |
b2a00c89 | 298 | //P_ca = power10_table_128[scale_ca].w[0]; |
200359e8 L |
299 | } |
300 | ||
301 | scale_k = 16 - scale_ca; | |
302 | ||
303 | // apply sign | |
304 | //Ts = (T1 + sign_ab) ^ sign_ab; | |
305 | ||
306 | // test range of ca | |
307 | //X = coefficient_a + Ts - P_ca_m1; | |
308 | ||
309 | // addition | |
310 | saved_ca = coefficient_a - T1; | |
311 | coefficient_a = | |
b2a00c89 | 312 | (SINT64) saved_ca *(SINT64) power10_table_128[scale_k].w[0]; |
200359e8 L |
313 | extra_digits = diff_dec_expon - scale_k; |
314 | ||
315 | // apply sign | |
316 | saved_cb = (coefficient_b + sign_ab) ^ sign_ab; | |
317 | // add 10^16 and rounding constant | |
318 | coefficient_b = | |
319 | saved_cb + 10000000000000000ull + | |
b2a00c89 | 320 | round_const_table[rmode][extra_digits]; |
200359e8 L |
321 | |
322 | // get P*(2^M[extra_digits])/10^extra_digits | |
323 | __mul_64x64_to_128 (CT, coefficient_b, | |
b2a00c89 | 324 | reciprocals10_64[extra_digits]); |
200359e8 L |
325 | |
326 | // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 | |
b2a00c89 | 327 | amount = short_recip_scale[extra_digits]; |
200359e8 L |
328 | C0_64 = CT.w[1] >> amount; |
329 | ||
330 | // result coefficient | |
331 | C64 = C0_64 + coefficient_a; | |
332 | // filter out difficult (corner) cases | |
333 | // the following test is equivalent to | |
334 | // ( (initial_coefficient_a + Ts) < P_ca && | |
335 | // (initial_coefficient_a + Ts) > P_ca_m1 ), | |
336 | // which ensures the number of digits in coefficient_a does not change | |
337 | // after adding (the appropriately scaled and rounded) coefficient_b | |
b2a00c89 L |
338 | if ((UINT64) (C64 - 1000000000000000ull - 1) > |
339 | 9000000000000000ull - 2) { | |
200359e8 L |
340 | if (C64 >= 10000000000000000ull) { |
341 | // result has more than 16 digits | |
342 | if (!scale_k) { | |
343 | // must divide coeff_a by 10 | |
344 | saved_ca = saved_ca + T1; | |
b2a00c89 L |
345 | __mul_64x64_to_128 (CA, saved_ca, 0x3333333333333334ull); |
346 | //reciprocals10_64[1]); | |
200359e8 L |
347 | coefficient_a = CA.w[1] >> 1; |
348 | rem_a = | |
349 | saved_ca - (coefficient_a << 3) - (coefficient_a << 1); | |
350 | coefficient_a = coefficient_a - T1; | |
351 | ||
352 | saved_cb += | |
353 | /*90000000000000000 */ +rem_a * | |
b2a00c89 | 354 | power10_table_128[diff_dec_expon].w[0]; |
200359e8 L |
355 | } else |
356 | coefficient_a = | |
357 | (SINT64) (saved_ca - T1 - | |
b2a00c89 | 358 | (T1 << 3)) * (SINT64) power10_table_128[scale_k - |
200359e8 L |
359 | 1].w[0]; |
360 | ||
361 | extra_digits++; | |
362 | coefficient_b = | |
363 | saved_cb + 100000000000000000ull + | |
b2a00c89 | 364 | round_const_table[rmode][extra_digits]; |
200359e8 L |
365 | |
366 | // get P*(2^M[extra_digits])/10^extra_digits | |
367 | __mul_64x64_to_128 (CT, coefficient_b, | |
b2a00c89 | 368 | reciprocals10_64[extra_digits]); |
200359e8 L |
369 | |
370 | // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 | |
b2a00c89 | 371 | amount = short_recip_scale[extra_digits]; |
200359e8 L |
372 | C0_64 = CT.w[1] >> amount; |
373 | ||
374 | // result coefficient | |
375 | C64 = C0_64 + coefficient_a; | |
376 | } else if (C64 <= 1000000000000000ull) { | |
377 | // less than 16 digits in result | |
378 | coefficient_a = | |
b2a00c89 | 379 | (SINT64) saved_ca *(SINT64) power10_table_128[scale_k + |
200359e8 L |
380 | 1].w[0]; |
381 | //extra_digits --; | |
382 | exponent_b--; | |
383 | coefficient_b = | |
384 | (saved_cb << 3) + (saved_cb << 1) + 100000000000000000ull + | |
b2a00c89 | 385 | round_const_table[rmode][extra_digits]; |
200359e8 L |
386 | |
387 | // get P*(2^M[extra_digits])/10^extra_digits | |
388 | __mul_64x64_to_128 (CT_new, coefficient_b, | |
b2a00c89 | 389 | reciprocals10_64[extra_digits]); |
200359e8 L |
390 | |
391 | // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 | |
b2a00c89 | 392 | amount = short_recip_scale[extra_digits]; |
200359e8 L |
393 | C0_64 = CT_new.w[1] >> amount; |
394 | ||
395 | // result coefficient | |
396 | C64_new = C0_64 + coefficient_a; | |
397 | if (C64_new < 10000000000000000ull) { | |
398 | C64 = C64_new; | |
399 | #ifdef SET_STATUS_FLAGS | |
400 | CT = CT_new; | |
401 | #endif | |
402 | } else | |
403 | exponent_b++; | |
404 | } | |
405 | ||
406 | } | |
407 | ||
408 | } | |
409 | ||
410 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
411 | #ifndef IEEE_ROUND_NEAREST | |
b2a00c89 | 412 | if (rmode == 0) //ROUNDING_TO_NEAREST |
200359e8 L |
413 | #endif |
414 | if (C64 & 1) { | |
415 | // check whether fractional part of initial_P/10^extra_digits | |
416 | // is exactly .5 | |
417 | // this is the same as fractional part of | |
418 | // (initial_P + 0.5*10^extra_digits)/10^extra_digits is exactly zero | |
419 | ||
420 | // get remainder | |
421 | remainder_h = CT.w[1] << (64 - amount); | |
422 | ||
423 | // test whether fractional part is 0 | |
b2a00c89 | 424 | if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits])) { |
200359e8 L |
425 | C64--; |
426 | } | |
427 | } | |
428 | #endif | |
429 | ||
430 | #ifdef SET_STATUS_FLAGS | |
431 | status = INEXACT_EXCEPTION; | |
432 | ||
433 | // get remainder | |
434 | remainder_h = CT.w[1] << (64 - amount); | |
435 | ||
436 | switch (rmode) { | |
437 | case ROUNDING_TO_NEAREST: | |
438 | case ROUNDING_TIES_AWAY: | |
439 | // test whether fractional part is 0 | |
440 | if ((remainder_h == 0x8000000000000000ull) | |
b2a00c89 | 441 | && (CT.w[0] < reciprocals10_64[extra_digits])) |
200359e8 L |
442 | status = EXACT_STATUS; |
443 | break; | |
444 | case ROUNDING_DOWN: | |
445 | case ROUNDING_TO_ZERO: | |
b2a00c89 | 446 | if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits])) |
200359e8 L |
447 | status = EXACT_STATUS; |
448 | break; | |
449 | default: | |
450 | // round up | |
451 | __add_carry_out (tmp, carry, CT.w[0], | |
b2a00c89 | 452 | reciprocals10_64[extra_digits]); |
200359e8 L |
453 | if ((remainder_h >> (64 - amount)) + carry >= |
454 | (((UINT64) 1) << amount)) | |
455 | status = EXACT_STATUS; | |
456 | break; | |
457 | } | |
458 | __set_status_flags (fpsc, status); | |
459 | ||
460 | #endif | |
461 | ||
462 | return get_BID64 (sign_s, exponent_b + extra_digits, C64, | |
463 | rounding_mode, fpsc); | |
464 | } | |
465 | ||
466 | ||
467 | /////////////////////////////////////////////////////////////////// | |
468 | // round 128-bit coefficient and return result in BID64 format | |
469 | // do not worry about midpoint cases | |
470 | ////////////////////////////////////////////////////////////////// | |
471 | static UINT64 | |
472 | __bid_simple_round64_sticky (UINT64 sign, int exponent, UINT128 P, | |
473 | int extra_digits, int rounding_mode, | |
474 | unsigned *fpsc) { | |
475 | UINT128 Q_high, Q_low, C128; | |
476 | UINT64 C64; | |
477 | int amount, rmode; | |
478 | ||
479 | rmode = rounding_mode; | |
480 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
481 | #ifndef IEEE_ROUND_NEAREST | |
482 | if (sign && (unsigned) (rmode - 1) < 2) | |
483 | rmode = 3 - rmode; | |
484 | #endif | |
485 | #endif | |
b2a00c89 | 486 | __add_128_64 (P, P, round_const_table[rmode][extra_digits]); |
200359e8 L |
487 | |
488 | // get P*(2^M[extra_digits])/10^extra_digits | |
489 | __mul_128x128_full (Q_high, Q_low, P, | |
b2a00c89 | 490 | reciprocals10_128[extra_digits]); |
200359e8 L |
491 | |
492 | // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
b2a00c89 | 493 | amount = recip_scale[extra_digits]; |
200359e8 L |
494 | __shr_128 (C128, Q_high, amount); |
495 | ||
496 | C64 = __low_64 (C128); | |
497 | ||
498 | #ifdef SET_STATUS_FLAGS | |
499 | ||
500 | __set_status_flags (fpsc, INEXACT_EXCEPTION); | |
501 | ||
502 | #endif | |
503 | ||
504 | return get_BID64 (sign, exponent, C64, rounding_mode, fpsc); | |
505 | } | |
506 | ||
507 | /////////////////////////////////////////////////////////////////// | |
508 | // round 128-bit coefficient and return result in BID64 format | |
509 | /////////////////////////////////////////////////////////////////// | |
510 | static UINT64 | |
511 | __bid_full_round64 (UINT64 sign, int exponent, UINT128 P, | |
512 | int extra_digits, int rounding_mode, | |
513 | unsigned *fpsc) { | |
514 | UINT128 Q_high, Q_low, C128, Stemp, PU; | |
515 | UINT64 remainder_h, C64, carry, CY; | |
516 | int amount, amount2, rmode, status = 0; | |
517 | ||
518 | if (exponent < 0) { | |
519 | if (exponent >= -16 && (extra_digits + exponent < 0)) { | |
520 | extra_digits = -exponent; | |
521 | #ifdef SET_STATUS_FLAGS | |
522 | if (extra_digits > 0) { | |
523 | rmode = rounding_mode; | |
524 | if (sign && (unsigned) (rmode - 1) < 2) | |
525 | rmode = 3 - rmode; | |
526 | __add_128_128 (PU, P, | |
b2a00c89 | 527 | round_const_table_128[rmode][extra_digits]); |
200359e8 | 528 | if (__unsigned_compare_gt_128 |
b2a00c89 | 529 | (power10_table_128[extra_digits + 15], PU)) |
200359e8 L |
530 | status = UNDERFLOW_EXCEPTION; |
531 | } | |
532 | #endif | |
533 | } | |
534 | } | |
535 | ||
536 | if (extra_digits > 0) { | |
537 | exponent += extra_digits; | |
538 | rmode = rounding_mode; | |
539 | if (sign && (unsigned) (rmode - 1) < 2) | |
540 | rmode = 3 - rmode; | |
b2a00c89 | 541 | __add_128_128 (P, P, round_const_table_128[rmode][extra_digits]); |
200359e8 L |
542 | |
543 | // get P*(2^M[extra_digits])/10^extra_digits | |
544 | __mul_128x128_full (Q_high, Q_low, P, | |
b2a00c89 | 545 | reciprocals10_128[extra_digits]); |
200359e8 L |
546 | |
547 | // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
b2a00c89 | 548 | amount = recip_scale[extra_digits]; |
200359e8 L |
549 | __shr_128_long (C128, Q_high, amount); |
550 | ||
551 | C64 = __low_64 (C128); | |
552 | ||
553 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
554 | #ifndef IEEE_ROUND_NEAREST | |
b2a00c89 | 555 | if (rmode == 0) //ROUNDING_TO_NEAREST |
200359e8 L |
556 | #endif |
557 | if (C64 & 1) { | |
558 | // check whether fractional part of initial_P/10^extra_digits | |
b2a00c89 | 559 | // is exactly .5 |
200359e8 L |
560 | |
561 | // get remainder | |
562 | amount2 = 64 - amount; | |
563 | remainder_h = 0; | |
564 | remainder_h--; | |
565 | remainder_h >>= amount2; | |
566 | remainder_h = remainder_h & Q_high.w[0]; | |
567 | ||
568 | if (!remainder_h | |
b2a00c89 L |
569 | && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1] |
570 | || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1] | |
200359e8 | 571 | && Q_low.w[0] < |
b2a00c89 | 572 | reciprocals10_128[extra_digits].w[0]))) { |
200359e8 L |
573 | C64--; |
574 | } | |
575 | } | |
576 | #endif | |
577 | ||
578 | #ifdef SET_STATUS_FLAGS | |
579 | status |= INEXACT_EXCEPTION; | |
580 | ||
581 | // get remainder | |
582 | remainder_h = Q_high.w[0] << (64 - amount); | |
583 | ||
584 | switch (rmode) { | |
585 | case ROUNDING_TO_NEAREST: | |
586 | case ROUNDING_TIES_AWAY: | |
587 | // test whether fractional part is 0 | |
588 | if (remainder_h == 0x8000000000000000ull | |
b2a00c89 L |
589 | && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1] |
590 | || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1] | |
200359e8 | 591 | && Q_low.w[0] < |
b2a00c89 | 592 | reciprocals10_128[extra_digits].w[0]))) |
200359e8 L |
593 | status = EXACT_STATUS; |
594 | break; | |
595 | case ROUNDING_DOWN: | |
596 | case ROUNDING_TO_ZERO: | |
597 | if (!remainder_h | |
b2a00c89 L |
598 | && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1] |
599 | || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1] | |
200359e8 | 600 | && Q_low.w[0] < |
b2a00c89 | 601 | reciprocals10_128[extra_digits].w[0]))) |
200359e8 L |
602 | status = EXACT_STATUS; |
603 | break; | |
604 | default: | |
605 | // round up | |
606 | __add_carry_out (Stemp.w[0], CY, Q_low.w[0], | |
b2a00c89 | 607 | reciprocals10_128[extra_digits].w[0]); |
200359e8 | 608 | __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1], |
b2a00c89 | 609 | reciprocals10_128[extra_digits].w[1], CY); |
200359e8 L |
610 | if ((remainder_h >> (64 - amount)) + carry >= |
611 | (((UINT64) 1) << amount)) | |
612 | status = EXACT_STATUS; | |
613 | } | |
614 | ||
615 | __set_status_flags (fpsc, status); | |
616 | ||
617 | #endif | |
618 | } else { | |
619 | C64 = P.w[0]; | |
620 | if (!C64) { | |
621 | sign = 0; | |
622 | if (rounding_mode == ROUNDING_DOWN) | |
623 | sign = 0x8000000000000000ull; | |
624 | } | |
625 | } | |
626 | return get_BID64 (sign, exponent, C64, rounding_mode, fpsc); | |
627 | } | |
628 | ||
629 | ///////////////////////////////////////////////////////////////////////////////// | |
630 | // round 192-bit coefficient (P, remainder_P) and return result in BID64 format | |
631 | // the lowest 64 bits (remainder_P) are used for midpoint checking only | |
632 | //////////////////////////////////////////////////////////////////////////////// | |
b2a00c89 | 633 | static UINT64 |
200359e8 L |
634 | __bid_full_round64_remainder (UINT64 sign, int exponent, UINT128 P, |
635 | int extra_digits, UINT64 remainder_P, | |
636 | int rounding_mode, unsigned *fpsc, | |
637 | unsigned uf_status) { | |
638 | UINT128 Q_high, Q_low, C128, Stemp; | |
639 | UINT64 remainder_h, C64, carry, CY; | |
640 | int amount, amount2, rmode, status = uf_status; | |
641 | ||
642 | rmode = rounding_mode; | |
643 | if (sign && (unsigned) (rmode - 1) < 2) | |
644 | rmode = 3 - rmode; | |
645 | if (rmode == ROUNDING_UP && remainder_P) { | |
646 | P.w[0]++; | |
647 | if (!P.w[0]) | |
648 | P.w[1]++; | |
649 | } | |
650 | ||
651 | if (extra_digits) { | |
b2a00c89 | 652 | __add_128_64 (P, P, round_const_table[rmode][extra_digits]); |
200359e8 L |
653 | |
654 | // get P*(2^M[extra_digits])/10^extra_digits | |
655 | __mul_128x128_full (Q_high, Q_low, P, | |
b2a00c89 | 656 | reciprocals10_128[extra_digits]); |
200359e8 L |
657 | |
658 | // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
b2a00c89 | 659 | amount = recip_scale[extra_digits]; |
200359e8 L |
660 | __shr_128 (C128, Q_high, amount); |
661 | ||
662 | C64 = __low_64 (C128); | |
663 | ||
664 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
665 | #ifndef IEEE_ROUND_NEAREST | |
b2a00c89 | 666 | if (rmode == 0) //ROUNDING_TO_NEAREST |
200359e8 L |
667 | #endif |
668 | if (!remainder_P && (C64 & 1)) { | |
669 | // check whether fractional part of initial_P/10^extra_digits | |
b2a00c89 | 670 | // is exactly .5 |
200359e8 L |
671 | |
672 | // get remainder | |
673 | amount2 = 64 - amount; | |
674 | remainder_h = 0; | |
675 | remainder_h--; | |
676 | remainder_h >>= amount2; | |
677 | remainder_h = remainder_h & Q_high.w[0]; | |
678 | ||
679 | if (!remainder_h | |
b2a00c89 L |
680 | && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1] |
681 | || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1] | |
200359e8 | 682 | && Q_low.w[0] < |
b2a00c89 | 683 | reciprocals10_128[extra_digits].w[0]))) { |
200359e8 L |
684 | C64--; |
685 | } | |
686 | } | |
687 | #endif | |
688 | ||
689 | #ifdef SET_STATUS_FLAGS | |
690 | status |= INEXACT_EXCEPTION; | |
691 | ||
692 | if (!remainder_P) { | |
693 | // get remainder | |
694 | remainder_h = Q_high.w[0] << (64 - amount); | |
695 | ||
696 | switch (rmode) { | |
697 | case ROUNDING_TO_NEAREST: | |
698 | case ROUNDING_TIES_AWAY: | |
699 | // test whether fractional part is 0 | |
700 | if (remainder_h == 0x8000000000000000ull | |
b2a00c89 L |
701 | && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1] |
702 | || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1] | |
200359e8 | 703 | && Q_low.w[0] < |
b2a00c89 | 704 | reciprocals10_128[extra_digits].w[0]))) |
200359e8 L |
705 | status = EXACT_STATUS; |
706 | break; | |
707 | case ROUNDING_DOWN: | |
708 | case ROUNDING_TO_ZERO: | |
709 | if (!remainder_h | |
b2a00c89 L |
710 | && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1] |
711 | || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1] | |
200359e8 | 712 | && Q_low.w[0] < |
b2a00c89 | 713 | reciprocals10_128[extra_digits].w[0]))) |
200359e8 L |
714 | status = EXACT_STATUS; |
715 | break; | |
716 | default: | |
717 | // round up | |
718 | __add_carry_out (Stemp.w[0], CY, Q_low.w[0], | |
b2a00c89 | 719 | reciprocals10_128[extra_digits].w[0]); |
200359e8 | 720 | __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1], |
b2a00c89 | 721 | reciprocals10_128[extra_digits].w[1], CY); |
200359e8 L |
722 | if ((remainder_h >> (64 - amount)) + carry >= |
723 | (((UINT64) 1) << amount)) | |
724 | status = EXACT_STATUS; | |
725 | } | |
726 | } | |
727 | __set_status_flags (fpsc, status); | |
728 | ||
729 | #endif | |
730 | } else { | |
731 | C64 = P.w[0]; | |
732 | #ifdef SET_STATUS_FLAGS | |
733 | if (remainder_P) { | |
734 | __set_status_flags (fpsc, uf_status | INEXACT_EXCEPTION); | |
735 | } | |
736 | #endif | |
737 | } | |
738 | ||
739 | return get_BID64 (sign, exponent + extra_digits, C64, rounding_mode, | |
740 | fpsc); | |
741 | } | |
742 | ||
743 | ||
744 | /////////////////////////////////////////////////////////////////// | |
745 | // get P/10^extra_digits | |
746 | // result fits in 64 bits | |
747 | /////////////////////////////////////////////////////////////////// | |
748 | __BID_INLINE__ UINT64 | |
749 | __truncate (UINT128 P, int extra_digits) | |
750 | // extra_digits <= 16 | |
751 | { | |
752 | UINT128 Q_high, Q_low, C128; | |
753 | UINT64 C64; | |
754 | int amount; | |
755 | ||
756 | // get P*(2^M[extra_digits])/10^extra_digits | |
757 | __mul_128x128_full (Q_high, Q_low, P, | |
b2a00c89 | 758 | reciprocals10_128[extra_digits]); |
200359e8 L |
759 | |
760 | // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
b2a00c89 | 761 | amount = recip_scale[extra_digits]; |
200359e8 L |
762 | __shr_128 (C128, Q_high, amount); |
763 | ||
764 | C64 = __low_64 (C128); | |
765 | ||
766 | return C64; | |
767 | } | |
768 | ||
769 | ||
770 | /////////////////////////////////////////////////////////////////// | |
771 | // return number of decimal digits in 128-bit value X | |
772 | /////////////////////////////////////////////////////////////////// | |
773 | __BID_INLINE__ int | |
774 | __get_dec_digits64 (UINT128 X) { | |
775 | int_double tempx; | |
776 | int digits_x, bin_expon_cx; | |
777 | ||
778 | if (!X.w[1]) { | |
779 | //--- get number of bits in the coefficients of x and y --- | |
780 | tempx.d = (double) X.w[0]; | |
781 | bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff; | |
782 | // get number of decimal digits in the coeff_x | |
b2a00c89 L |
783 | digits_x = estimate_decimal_digits[bin_expon_cx]; |
784 | if (X.w[0] >= power10_table_128[digits_x].w[0]) | |
200359e8 L |
785 | digits_x++; |
786 | return digits_x; | |
787 | } | |
788 | tempx.d = (double) X.w[1]; | |
789 | bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff; | |
790 | // get number of decimal digits in the coeff_x | |
b2a00c89 L |
791 | digits_x = estimate_decimal_digits[bin_expon_cx + 64]; |
792 | if (__unsigned_compare_ge_128 (X, power10_table_128[digits_x])) | |
200359e8 L |
793 | digits_x++; |
794 | ||
795 | return digits_x; | |
796 | } | |
797 | ||
798 | ||
799 | //////////////////////////////////////////////////////////////////////////////// | |
800 | // | |
801 | // add 64-bit coefficient to 128-bit coefficient, return result in BID64 format | |
802 | // | |
803 | //////////////////////////////////////////////////////////////////////////////// | |
804 | __BID_INLINE__ UINT64 | |
805 | get_add128 (UINT64 sign_x, int exponent_x, UINT64 coefficient_x, | |
806 | UINT64 sign_y, int final_exponent_y, UINT128 CY, | |
807 | int extra_digits, int rounding_mode, unsigned *fpsc) { | |
808 | UINT128 CY_L, CX, FS, F, CT, ST, T2; | |
809 | UINT64 CYh, CY0L, T, S, coefficient_y, remainder_y; | |
810 | SINT64 D = 0; | |
811 | int_double tempx; | |
812 | int diff_dec_expon, extra_digits2, exponent_y, status; | |
813 | int extra_dx, diff_dec2, bin_expon_cx, digits_x, rmode; | |
814 | ||
815 | // CY has more than 16 decimal digits | |
816 | ||
817 | exponent_y = final_exponent_y - extra_digits; | |
818 | ||
819 | #ifdef IEEE_ROUND_NEAREST_TIES_AWAY | |
820 | rounding_mode = 0; | |
821 | #endif | |
822 | #ifdef IEEE_ROUND_NEAREST | |
823 | rounding_mode = 0; | |
824 | #endif | |
825 | ||
826 | if (exponent_x > exponent_y) { | |
827 | // normalize x | |
828 | //--- get number of bits in the coefficients of x and y --- | |
829 | tempx.d = (double) coefficient_x; | |
830 | bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff; | |
831 | // get number of decimal digits in the coeff_x | |
b2a00c89 L |
832 | digits_x = estimate_decimal_digits[bin_expon_cx]; |
833 | if (coefficient_x >= power10_table_128[digits_x].w[0]) | |
200359e8 L |
834 | digits_x++; |
835 | ||
836 | extra_dx = 16 - digits_x; | |
b2a00c89 | 837 | coefficient_x *= power10_table_128[extra_dx].w[0]; |
200359e8 L |
838 | if ((sign_x ^ sign_y) && (coefficient_x == 1000000000000000ull)) { |
839 | extra_dx++; | |
840 | coefficient_x = 10000000000000000ull; | |
841 | } | |
842 | exponent_x -= extra_dx; | |
843 | ||
844 | if (exponent_x > exponent_y) { | |
845 | ||
846 | // exponent_x > exponent_y | |
847 | diff_dec_expon = exponent_x - exponent_y; | |
848 | ||
849 | if (exponent_x <= final_exponent_y + 1) { | |
850 | __mul_64x64_to_128 (CX, coefficient_x, | |
b2a00c89 | 851 | power10_table_128[diff_dec_expon].w[0]); |
200359e8 L |
852 | |
853 | if (sign_x == sign_y) { | |
854 | __add_128_128 (CT, CY, CX); | |
855 | if ((exponent_x > | |
856 | final_exponent_y) /*&& (final_exponent_y>0) */ ) | |
857 | extra_digits++; | |
858 | if (__unsigned_compare_ge_128 | |
b2a00c89 | 859 | (CT, power10_table_128[16 + extra_digits])) |
200359e8 L |
860 | extra_digits++; |
861 | } else { | |
862 | __sub_128_128 (CT, CY, CX); | |
863 | if (((SINT64) CT.w[1]) < 0) { | |
864 | CT.w[0] = 0 - CT.w[0]; | |
865 | CT.w[1] = 0 - CT.w[1]; | |
866 | if (CT.w[0]) | |
867 | CT.w[1]--; | |
868 | sign_y = sign_x; | |
869 | } else if (!(CT.w[1] | CT.w[0])) { | |
870 | sign_y = | |
871 | (rounding_mode != | |
872 | ROUNDING_DOWN) ? 0 : 0x8000000000000000ull; | |
873 | } | |
874 | if ((exponent_x + 1 >= | |
875 | final_exponent_y) /*&& (final_exponent_y>=0) */ ) { | |
876 | extra_digits = __get_dec_digits64 (CT) - 16; | |
877 | if (extra_digits <= 0) { | |
878 | if (!CT.w[0] && rounding_mode == ROUNDING_DOWN) | |
879 | sign_y = 0x8000000000000000ull; | |
880 | return get_BID64 (sign_y, exponent_y, CT.w[0], | |
881 | rounding_mode, fpsc); | |
882 | } | |
883 | } else | |
884 | if (__unsigned_compare_gt_128 | |
b2a00c89 | 885 | (power10_table_128[15 + extra_digits], CT)) |
200359e8 L |
886 | extra_digits--; |
887 | } | |
888 | ||
889 | return __bid_full_round64 (sign_y, exponent_y, CT, extra_digits, | |
890 | rounding_mode, fpsc); | |
891 | } | |
892 | // diff_dec2+extra_digits is the number of digits to eliminate from | |
893 | // argument CY | |
894 | diff_dec2 = exponent_x - final_exponent_y; | |
895 | ||
896 | if (diff_dec2 >= 17) { | |
897 | #ifndef IEEE_ROUND_NEAREST | |
898 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
899 | if ((rounding_mode) & 3) { | |
900 | switch (rounding_mode) { | |
901 | case ROUNDING_UP: | |
902 | if (!sign_y) { | |
903 | D = ((SINT64) (sign_x ^ sign_y)) >> 63; | |
904 | D = D + D + 1; | |
905 | coefficient_x += D; | |
906 | } | |
907 | break; | |
908 | case ROUNDING_DOWN: | |
909 | if (sign_y) { | |
910 | D = ((SINT64) (sign_x ^ sign_y)) >> 63; | |
911 | D = D + D + 1; | |
912 | coefficient_x += D; | |
913 | } | |
914 | break; | |
915 | case ROUNDING_TO_ZERO: | |
916 | if (sign_y != sign_x) { | |
917 | D = 0 - 1; | |
918 | coefficient_x += D; | |
919 | } | |
920 | break; | |
921 | } | |
922 | if (coefficient_x < 1000000000000000ull) { | |
923 | coefficient_x -= D; | |
924 | coefficient_x = | |
925 | D + (coefficient_x << 1) + (coefficient_x << 3); | |
926 | exponent_x--; | |
927 | } | |
928 | } | |
929 | #endif | |
930 | #endif | |
931 | #ifdef SET_STATUS_FLAGS | |
932 | if (CY.w[1] | CY.w[0]) | |
933 | __set_status_flags (fpsc, INEXACT_EXCEPTION); | |
934 | #endif | |
935 | return get_BID64 (sign_x, exponent_x, coefficient_x, | |
936 | rounding_mode, fpsc); | |
937 | } | |
938 | // here exponent_x <= 16+final_exponent_y | |
939 | ||
940 | // truncate CY to 16 dec. digits | |
941 | CYh = __truncate (CY, extra_digits); | |
942 | ||
943 | // get remainder | |
b2a00c89 | 944 | T = power10_table_128[extra_digits].w[0]; |
200359e8 L |
945 | __mul_64x64_to_64 (CY0L, CYh, T); |
946 | ||
947 | remainder_y = CY.w[0] - CY0L; | |
948 | ||
949 | // align coeff_x, CYh | |
950 | __mul_64x64_to_128 (CX, coefficient_x, | |
b2a00c89 | 951 | power10_table_128[diff_dec2].w[0]); |
200359e8 L |
952 | |
953 | if (sign_x == sign_y) { | |
954 | __add_128_64 (CT, CX, CYh); | |
955 | if (__unsigned_compare_ge_128 | |
b2a00c89 | 956 | (CT, power10_table_128[16 + diff_dec2])) |
200359e8 L |
957 | diff_dec2++; |
958 | } else { | |
959 | if (remainder_y) | |
960 | CYh++; | |
961 | __sub_128_64 (CT, CX, CYh); | |
962 | if (__unsigned_compare_gt_128 | |
b2a00c89 | 963 | (power10_table_128[15 + diff_dec2], CT)) |
200359e8 L |
964 | diff_dec2--; |
965 | } | |
966 | ||
967 | return __bid_full_round64_remainder (sign_x, final_exponent_y, CT, | |
968 | diff_dec2, remainder_y, | |
969 | rounding_mode, fpsc, 0); | |
970 | } | |
971 | } | |
972 | // Here (exponent_x <= exponent_y) | |
973 | { | |
974 | diff_dec_expon = exponent_y - exponent_x; | |
975 | ||
976 | if (diff_dec_expon > MAX_FORMAT_DIGITS) { | |
977 | rmode = rounding_mode; | |
978 | ||
979 | if ((sign_x ^ sign_y)) { | |
980 | if (!CY.w[0]) | |
981 | CY.w[1]--; | |
982 | CY.w[0]--; | |
983 | if (__unsigned_compare_gt_128 | |
b2a00c89 | 984 | (power10_table_128[15 + extra_digits], CY)) { |
200359e8 L |
985 | if (rmode & 3) { |
986 | extra_digits--; | |
987 | final_exponent_y--; | |
988 | } else { | |
989 | CY.w[0] = 1000000000000000ull; | |
990 | CY.w[1] = 0; | |
991 | extra_digits = 0; | |
992 | } | |
993 | } | |
994 | } | |
995 | __scale128_10 (CY, CY); | |
996 | extra_digits++; | |
997 | CY.w[0] |= 1; | |
998 | ||
999 | return __bid_simple_round64_sticky (sign_y, final_exponent_y, CY, | |
1000 | extra_digits, rmode, fpsc); | |
1001 | } | |
1002 | // apply sign to coeff_x | |
1003 | sign_x ^= sign_y; | |
1004 | sign_x = ((SINT64) sign_x) >> 63; | |
1005 | CX.w[0] = (coefficient_x + sign_x) ^ sign_x; | |
1006 | CX.w[1] = sign_x; | |
1007 | ||
1008 | // check whether CY (rounded to 16 digits) and CX have | |
1009 | // any digits in the same position | |
1010 | diff_dec2 = final_exponent_y - exponent_x; | |
1011 | ||
1012 | if (diff_dec2 <= 17) { | |
1013 | // align CY to 10^ex | |
b2a00c89 | 1014 | S = power10_table_128[diff_dec_expon].w[0]; |
200359e8 L |
1015 | __mul_64x128_short (CY_L, S, CY); |
1016 | ||
1017 | __add_128_128 (ST, CY_L, CX); | |
1018 | extra_digits2 = __get_dec_digits64 (ST) - 16; | |
1019 | return __bid_full_round64 (sign_y, exponent_x, ST, extra_digits2, | |
1020 | rounding_mode, fpsc); | |
1021 | } | |
1022 | // truncate CY to 16 dec. digits | |
1023 | CYh = __truncate (CY, extra_digits); | |
1024 | ||
1025 | // get remainder | |
b2a00c89 | 1026 | T = power10_table_128[extra_digits].w[0]; |
200359e8 L |
1027 | __mul_64x64_to_64 (CY0L, CYh, T); |
1028 | ||
1029 | coefficient_y = CY.w[0] - CY0L; | |
1030 | // add rounding constant | |
1031 | rmode = rounding_mode; | |
1032 | if (sign_y && (unsigned) (rmode - 1) < 2) | |
1033 | rmode = 3 - rmode; | |
1034 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
1035 | #ifndef IEEE_ROUND_NEAREST | |
b2a00c89 | 1036 | if (!(rmode & 3)) //ROUNDING_TO_NEAREST |
200359e8 L |
1037 | #endif |
1038 | #endif | |
1039 | { | |
b2a00c89 | 1040 | coefficient_y += round_const_table[rmode][extra_digits]; |
200359e8 L |
1041 | } |
1042 | // align coefficient_y, coefficient_x | |
b2a00c89 | 1043 | S = power10_table_128[diff_dec_expon].w[0]; |
200359e8 L |
1044 | __mul_64x64_to_128 (F, coefficient_y, S); |
1045 | ||
1046 | // fraction | |
1047 | __add_128_128 (FS, F, CX); | |
1048 | ||
1049 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
1050 | #ifndef IEEE_ROUND_NEAREST | |
b2a00c89 | 1051 | if (rmode == 0) //ROUNDING_TO_NEAREST |
200359e8 L |
1052 | #endif |
1053 | { | |
1054 | // rounding code, here RN_EVEN | |
1055 | // 10^(extra_digits+diff_dec_expon) | |
b2a00c89 | 1056 | T2 = power10_table_128[diff_dec_expon + extra_digits]; |
200359e8 L |
1057 | if (__unsigned_compare_gt_128 (FS, T2) |
1058 | || ((CYh & 1) && __test_equal_128 (FS, T2))) { | |
1059 | CYh++; | |
1060 | __sub_128_128 (FS, FS, T2); | |
1061 | } | |
1062 | } | |
1063 | #endif | |
1064 | #ifndef IEEE_ROUND_NEAREST | |
1065 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
b2a00c89 | 1066 | if (rmode == 4) //ROUNDING_TO_NEAREST |
200359e8 L |
1067 | #endif |
1068 | { | |
1069 | // rounding code, here RN_AWAY | |
1070 | // 10^(extra_digits+diff_dec_expon) | |
b2a00c89 | 1071 | T2 = power10_table_128[diff_dec_expon + extra_digits]; |
200359e8 L |
1072 | if (__unsigned_compare_ge_128 (FS, T2)) { |
1073 | CYh++; | |
1074 | __sub_128_128 (FS, FS, T2); | |
1075 | } | |
1076 | } | |
1077 | #endif | |
1078 | #ifndef IEEE_ROUND_NEAREST | |
1079 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
1080 | switch (rmode) { | |
1081 | case ROUNDING_DOWN: | |
1082 | case ROUNDING_TO_ZERO: | |
1083 | if ((SINT64) FS.w[1] < 0) { | |
1084 | CYh--; | |
1085 | if (CYh < 1000000000000000ull) { | |
1086 | CYh = 9999999999999999ull; | |
1087 | final_exponent_y--; | |
1088 | } | |
1089 | } else { | |
b2a00c89 | 1090 | T2 = power10_table_128[diff_dec_expon + extra_digits]; |
200359e8 L |
1091 | if (__unsigned_compare_ge_128 (FS, T2)) { |
1092 | CYh++; | |
1093 | __sub_128_128 (FS, FS, T2); | |
1094 | } | |
1095 | } | |
1096 | break; | |
1097 | case ROUNDING_UP: | |
1098 | if ((SINT64) FS.w[1] < 0) | |
1099 | break; | |
b2a00c89 | 1100 | T2 = power10_table_128[diff_dec_expon + extra_digits]; |
200359e8 L |
1101 | if (__unsigned_compare_gt_128 (FS, T2)) { |
1102 | CYh += 2; | |
1103 | __sub_128_128 (FS, FS, T2); | |
1104 | } else if ((FS.w[1] == T2.w[1]) && (FS.w[0] == T2.w[0])) { | |
1105 | CYh++; | |
1106 | FS.w[1] = FS.w[0] = 0; | |
1107 | } else if (FS.w[1] | FS.w[0]) | |
1108 | CYh++; | |
1109 | break; | |
1110 | } | |
1111 | #endif | |
1112 | #endif | |
1113 | ||
1114 | #ifdef SET_STATUS_FLAGS | |
1115 | status = INEXACT_EXCEPTION; | |
1116 | #ifndef IEEE_ROUND_NEAREST | |
1117 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
1118 | if (!(rmode & 3)) | |
1119 | #endif | |
1120 | #endif | |
1121 | { | |
1122 | // RN modes | |
1123 | if ((FS.w[1] == | |
b2a00c89 | 1124 | round_const_table_128[0][diff_dec_expon + extra_digits].w[1]) |
200359e8 | 1125 | && (FS.w[0] == |
b2a00c89 | 1126 | round_const_table_128[0][diff_dec_expon + |
200359e8 L |
1127 | extra_digits].w[0])) |
1128 | status = EXACT_STATUS; | |
1129 | } | |
1130 | #ifndef IEEE_ROUND_NEAREST | |
1131 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
1132 | else if (!FS.w[1] && !FS.w[0]) | |
1133 | status = EXACT_STATUS; | |
1134 | #endif | |
1135 | #endif | |
1136 | ||
1137 | __set_status_flags (fpsc, status); | |
1138 | #endif | |
1139 | ||
1140 | return get_BID64 (sign_y, final_exponent_y, CYh, rounding_mode, | |
1141 | fpsc); | |
1142 | } | |
1143 | ||
1144 | } | |
1145 | ||
b2a00c89 L |
1146 | ////////////////////////////////////////////////////////////////////////// |
1147 | // | |
1148 | // If coefficient_z is less than 16 digits long, normalize to 16 digits | |
1149 | // | |
1150 | ///////////////////////////////////////////////////////////////////////// | |
1151 | static UINT64 | |
1152 | BID_normalize (UINT64 sign_z, int exponent_z, | |
1153 | UINT64 coefficient_z, UINT64 round_dir, int round_flag, | |
1154 | int rounding_mode, unsigned *fpsc) { | |
1155 | SINT64 D; | |
1156 | int_double tempx; | |
1157 | int digits_z, bin_expon, scale, rmode; | |
1158 | ||
1159 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
1160 | #ifndef IEEE_ROUND_NEAREST | |
1161 | rmode = rounding_mode; | |
1162 | if (sign_z && (unsigned) (rmode - 1) < 2) | |
1163 | rmode = 3 - rmode; | |
1164 | #else | |
1165 | if (coefficient_z >= power10_table_128[15].w[0]) | |
1166 | return z; | |
1167 | #endif | |
1168 | #endif | |
1169 | ||
1170 | //--- get number of bits in the coefficients of x and y --- | |
1171 | tempx.d = (double) coefficient_z; | |
1172 | bin_expon = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff; | |
1173 | // get number of decimal digits in the coeff_x | |
1174 | digits_z = estimate_decimal_digits[bin_expon]; | |
1175 | if (coefficient_z >= power10_table_128[digits_z].w[0]) | |
1176 | digits_z++; | |
1177 | ||
1178 | scale = 16 - digits_z; | |
1179 | exponent_z -= scale; | |
1180 | if (exponent_z < 0) { | |
1181 | scale += exponent_z; | |
1182 | exponent_z = 0; | |
1183 | } | |
1184 | coefficient_z *= power10_table_128[scale].w[0]; | |
1185 | ||
1186 | #ifdef SET_STATUS_FLAGS | |
1187 | if (round_flag) { | |
1188 | __set_status_flags (fpsc, INEXACT_EXCEPTION); | |
1189 | if (coefficient_z < 1000000000000000ull) | |
1190 | __set_status_flags (fpsc, UNDERFLOW_EXCEPTION); | |
1191 | else if ((coefficient_z == 1000000000000000ull) && !exponent_z | |
1192 | && ((SINT64) (round_dir ^ sign_z) < 0) && round_flag | |
1193 | && (rmode == ROUNDING_DOWN || rmode == ROUNDING_TO_ZERO)) | |
1194 | __set_status_flags (fpsc, UNDERFLOW_EXCEPTION); | |
1195 | } | |
1196 | #endif | |
1197 | ||
1198 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
1199 | #ifndef IEEE_ROUND_NEAREST | |
1200 | if (round_flag && (rmode & 3)) { | |
1201 | D = round_dir ^ sign_z; | |
1202 | ||
1203 | if (rmode == ROUNDING_UP) { | |
1204 | if (D >= 0) | |
1205 | coefficient_z++; | |
1206 | } else { | |
1207 | if (D < 0) | |
1208 | coefficient_z--; | |
1209 | if (coefficient_z < 1000000000000000ull && exponent_z) { | |
1210 | coefficient_z = 9999999999999999ull; | |
1211 | exponent_z--; | |
1212 | } | |
1213 | } | |
1214 | } | |
1215 | #endif | |
1216 | #endif | |
1217 | ||
1218 | return get_BID64 (sign_z, exponent_z, coefficient_z, rounding_mode, | |
1219 | fpsc); | |
1220 | } | |
1221 | ||
200359e8 L |
1222 | |
1223 | ////////////////////////////////////////////////////////////////////////// | |
1224 | // | |
1225 | // 0*10^ey + cz*10^ez, ey<ez | |
1226 | // | |
1227 | ////////////////////////////////////////////////////////////////////////// | |
1228 | ||
1229 | __BID_INLINE__ UINT64 | |
1230 | add_zero64 (int exponent_y, UINT64 sign_z, int exponent_z, | |
1231 | UINT64 coefficient_z, unsigned *prounding_mode, | |
1232 | unsigned *fpsc) { | |
1233 | int_double tempx; | |
1234 | int bin_expon, scale_k, scale_cz; | |
1235 | int diff_expon; | |
1236 | ||
1237 | diff_expon = exponent_z - exponent_y; | |
1238 | ||
1239 | tempx.d = (double) coefficient_z; | |
1240 | bin_expon = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff; | |
b2a00c89 L |
1241 | scale_cz = estimate_decimal_digits[bin_expon]; |
1242 | if (coefficient_z >= power10_table_128[scale_cz].w[0]) | |
200359e8 L |
1243 | scale_cz++; |
1244 | ||
1245 | scale_k = 16 - scale_cz; | |
1246 | if (diff_expon < scale_k) | |
1247 | scale_k = diff_expon; | |
b2a00c89 | 1248 | coefficient_z *= power10_table_128[scale_k].w[0]; |
200359e8 L |
1249 | |
1250 | return get_BID64 (sign_z, exponent_z - scale_k, coefficient_z, | |
1251 | *prounding_mode, fpsc); | |
1252 | } | |
1253 | #endif |