]>
Commit | Line | Data |
---|---|---|
f1717362 | 1 | /* Copyright (C) 2007-2016 Free Software Foundation, Inc. |
9b6b0236 | 2 | |
3 | This file is part of GCC. | |
4 | ||
5 | GCC is free software; you can redistribute it and/or modify it under | |
6 | the terms of the GNU General Public License as published by the Free | |
6bc9506f | 7 | Software Foundation; either version 3, or (at your option) any later |
9b6b0236 | 8 | version. |
9 | ||
9b6b0236 | 10 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 | for more details. | |
14 | ||
6bc9506f | 15 | Under Section 7 of GPL version 3, you are granted additional |
16 | permissions described in the GCC Runtime Library Exception, version | |
17 | 3.1, as published by the Free Software Foundation. | |
18 | ||
19 | You should have received a copy of the GNU General Public License and | |
20 | a copy of the GCC Runtime Library Exception along with this program; | |
21 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 | <http://www.gnu.org/licenses/>. */ | |
9b6b0236 | 23 | |
24 | /***************************************************************************** | |
25 | * BID64 divide | |
26 | ***************************************************************************** | |
27 | * | |
28 | * Algorithm description: | |
29 | * | |
30 | * if(coefficient_x<coefficient_y) | |
31 | * p = number_digits(coefficient_y) - number_digits(coefficient_x) | |
32 | * A = coefficient_x*10^p | |
33 | * B = coefficient_y | |
34 | * CA= A*10^(15+j), j=0 for A>=B, 1 otherwise | |
35 | * Q = 0 | |
36 | * else | |
37 | * get Q=(int)(coefficient_x/coefficient_y) | |
38 | * (based on double precision divide) | |
39 | * check for exact divide case | |
40 | * Let R = coefficient_x - Q*coefficient_y | |
41 | * Let m=16-number_digits(Q) | |
42 | * CA=R*10^m, Q=Q*10^m | |
43 | * B = coefficient_y | |
44 | * endif | |
45 | * if (CA<2^64) | |
46 | * Q += CA/B (64-bit unsigned divide) | |
47 | * else | |
48 | * get final Q using double precision divide, followed by 3 integer | |
49 | * iterations | |
50 | * if exact result, eliminate trailing zeros | |
51 | * check for underflow | |
52 | * round coefficient to nearest | |
53 | * | |
54 | ****************************************************************************/ | |
55 | ||
56 | #include "bid_internal.h" | |
84d1fc49 | 57 | #include "bid_div_macros.h" |
58 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
59 | #include <fenv.h> | |
9b6b0236 | 60 | |
84d1fc49 | 61 | #define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT |
62 | #endif | |
63 | ||
64 | extern UINT32 convert_table[5][128][2]; | |
65 | extern SINT8 factors[][2]; | |
66 | extern UINT8 packed_10000_zeros[]; | |
9b6b0236 | 67 | |
68 | ||
69 | #if DECIMAL_CALL_BY_REFERENCE | |
70 | ||
71 | void | |
84d1fc49 | 72 | bid64_div (UINT64 * pres, UINT64 * px, |
9b6b0236 | 73 | UINT64 * |
74 | py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
75 | _EXC_INFO_PARAM) { | |
76 | UINT64 x, y; | |
77 | #else | |
78 | ||
79 | UINT64 | |
84d1fc49 | 80 | bid64_div (UINT64 x, |
9b6b0236 | 81 | UINT64 y _RND_MODE_PARAM _EXC_FLAGS_PARAM |
82 | _EXC_MASKS_PARAM _EXC_INFO_PARAM) { | |
83 | #endif | |
84 | UINT128 CA, CT; | |
85 | UINT64 sign_x, sign_y, coefficient_x, coefficient_y, A, B, QX, PD; | |
86 | UINT64 A2, Q, Q2, B2, B4, B5, R, T, DU, res; | |
87 | UINT64 valid_x, valid_y; | |
88 | SINT64 D; | |
89 | int_double t_scale, tempq, temp_b; | |
90 | int_float tempx, tempy; | |
91 | double da, db, dq, da_h, da_l; | |
84d1fc49 | 92 | int exponent_x, exponent_y, bin_expon_cx; |
9b6b0236 | 93 | int diff_expon, ed1, ed2, bin_index; |
94 | int rmode, amount; | |
95 | int nzeros, i, j, k, d5; | |
96 | UINT32 QX32, tdigit[3], digit, digit_h, digit_low; | |
84d1fc49 | 97 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS |
98 | fexcept_t binaryflags = 0; | |
99 | #endif | |
9b6b0236 | 100 | |
101 | #if DECIMAL_CALL_BY_REFERENCE | |
102 | #if !DECIMAL_GLOBAL_ROUNDING | |
103 | _IDEC_round rnd_mode = *prnd_mode; | |
104 | #endif | |
105 | x = *px; | |
106 | y = *py; | |
107 | #endif | |
108 | ||
109 | valid_x = unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x); | |
110 | valid_y = unpack_BID64 (&sign_y, &exponent_y, &coefficient_y, y); | |
111 | ||
112 | // unpack arguments, check for NaN or Infinity | |
113 | if (!valid_x) { | |
114 | // x is Inf. or NaN | |
115 | #ifdef SET_STATUS_FLAGS | |
116 | if ((y & SNAN_MASK64) == SNAN_MASK64) // y is sNaN | |
117 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
118 | #endif | |
119 | ||
120 | // test if x is NaN | |
121 | if ((x & NAN_MASK64) == NAN_MASK64) { | |
122 | #ifdef SET_STATUS_FLAGS | |
123 | if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN | |
124 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
125 | #endif | |
84d1fc49 | 126 | BID_RETURN (coefficient_x & QUIET_MASK64); |
9b6b0236 | 127 | } |
128 | // x is Infinity? | |
129 | if ((x & INFINITY_MASK64) == INFINITY_MASK64) { | |
130 | // check if y is Inf or NaN | |
131 | if ((y & INFINITY_MASK64) == INFINITY_MASK64) { | |
132 | // y==Inf, return NaN | |
84d1fc49 | 133 | if ((y & NAN_MASK64) == INFINITY_MASK64) { // Inf/Inf |
9b6b0236 | 134 | #ifdef SET_STATUS_FLAGS |
9b6b0236 | 135 | __set_status_flags (pfpsf, INVALID_EXCEPTION); |
136 | #endif | |
84d1fc49 | 137 | BID_RETURN (NAN_MASK64); |
138 | } | |
139 | } else { | |
140 | // otherwise return +/-Inf | |
141 | BID_RETURN (((x ^ y) & 0x8000000000000000ull) | | |
142 | INFINITY_MASK64); | |
9b6b0236 | 143 | } |
9b6b0236 | 144 | } |
145 | // x==0 | |
84d1fc49 | 146 | if (((y & INFINITY_MASK64) != INFINITY_MASK64) |
147 | && !(coefficient_y)) { | |
9b6b0236 | 148 | // y==0 , return NaN |
149 | #ifdef SET_STATUS_FLAGS | |
150 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
151 | #endif | |
152 | BID_RETURN (NAN_MASK64); | |
153 | } | |
154 | if (((y & INFINITY_MASK64) != INFINITY_MASK64)) { | |
155 | if ((y & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64) | |
156 | exponent_y = ((UINT32) (y >> 51)) & 0x3ff; | |
157 | else | |
158 | exponent_y = ((UINT32) (y >> 53)) & 0x3ff; | |
159 | sign_y = y & 0x8000000000000000ull; | |
160 | ||
161 | exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS; | |
162 | if (exponent_x > DECIMAL_MAX_EXPON_64) | |
163 | exponent_x = DECIMAL_MAX_EXPON_64; | |
164 | else if (exponent_x < 0) | |
165 | exponent_x = 0; | |
166 | BID_RETURN ((sign_x ^ sign_y) | (((UINT64) exponent_x) << 53)); | |
167 | } | |
168 | ||
169 | } | |
170 | if (!valid_y) { | |
171 | // y is Inf. or NaN | |
172 | ||
173 | // test if y is NaN | |
174 | if ((y & NAN_MASK64) == NAN_MASK64) { | |
175 | #ifdef SET_STATUS_FLAGS | |
176 | if ((y & SNAN_MASK64) == SNAN_MASK64) // sNaN | |
177 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
178 | #endif | |
84d1fc49 | 179 | BID_RETURN (coefficient_y & QUIET_MASK64); |
9b6b0236 | 180 | } |
181 | // y is Infinity? | |
182 | if ((y & INFINITY_MASK64) == INFINITY_MASK64) { | |
183 | // return +/-0 | |
184 | BID_RETURN (((x ^ y) & 0x8000000000000000ull)); | |
185 | } | |
186 | // y is 0 | |
187 | #ifdef SET_STATUS_FLAGS | |
188 | __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); | |
189 | #endif | |
190 | BID_RETURN ((sign_x ^ sign_y) | INFINITY_MASK64); | |
191 | } | |
84d1fc49 | 192 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS |
193 | (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
194 | #endif | |
9b6b0236 | 195 | diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS; |
196 | ||
197 | if (coefficient_x < coefficient_y) { | |
198 | // get number of decimal digits for c_x, c_y | |
199 | ||
200 | //--- get number of bits in the coefficients of x and y --- | |
201 | tempx.d = (float) coefficient_x; | |
202 | tempy.d = (float) coefficient_y; | |
203 | bin_index = (tempy.i - tempx.i) >> 23; | |
204 | ||
84d1fc49 | 205 | A = coefficient_x * power10_index_binexp[bin_index]; |
9b6b0236 | 206 | B = coefficient_y; |
207 | ||
208 | temp_b.d = (double) B; | |
209 | ||
210 | // compare A, B | |
211 | DU = (A - B) >> 63; | |
212 | ed1 = 15 + (int) DU; | |
84d1fc49 | 213 | ed2 = estimate_decimal_digits[bin_index] + ed1; |
214 | T = power10_table_128[ed1].w[0]; | |
9b6b0236 | 215 | __mul_64x64_to_128 (CA, A, T); |
216 | ||
217 | Q = 0; | |
218 | diff_expon = diff_expon - ed2; | |
219 | ||
220 | // adjust double precision db, to ensure that later A/B - (int)(da/db) > -1 | |
221 | if (coefficient_y < 0x0020000000000000ull) { | |
222 | temp_b.i += 1; | |
223 | db = temp_b.d; | |
224 | } else | |
225 | db = (double) (B + 2 + (B & 1)); | |
226 | ||
227 | } else { | |
228 | // get c_x/c_y | |
229 | ||
230 | // set last bit before conversion to DP | |
231 | A2 = coefficient_x | 1; | |
232 | da = (double) A2; | |
233 | ||
234 | db = (double) coefficient_y; | |
235 | ||
236 | tempq.d = da / db; | |
237 | Q = (UINT64) tempq.d; | |
238 | ||
239 | R = coefficient_x - coefficient_y * Q; | |
240 | ||
241 | // will use to get number of dec. digits of Q | |
242 | bin_expon_cx = (tempq.i >> 52) - 0x3ff; | |
243 | ||
244 | // R<0 ? | |
245 | D = ((SINT64) R) >> 63; | |
246 | Q += D; | |
247 | R += (coefficient_y & D); | |
248 | ||
249 | // exact result ? | |
250 | if (((SINT64) R) <= 0) { | |
251 | // can have R==-1 for coeff_y==1 | |
252 | res = | |
253 | get_BID64 (sign_x ^ sign_y, diff_expon, (Q + R), rnd_mode, | |
254 | pfpsf); | |
84d1fc49 | 255 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS |
256 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
257 | #endif | |
9b6b0236 | 258 | BID_RETURN (res); |
259 | } | |
260 | // get decimal digits of Q | |
84d1fc49 | 261 | DU = power10_index_binexp[bin_expon_cx] - Q - 1; |
9b6b0236 | 262 | DU >>= 63; |
263 | ||
84d1fc49 | 264 | ed2 = 16 - estimate_decimal_digits[bin_expon_cx] - (int) DU; |
9b6b0236 | 265 | |
84d1fc49 | 266 | T = power10_table_128[ed2].w[0]; |
9b6b0236 | 267 | __mul_64x64_to_128 (CA, R, T); |
268 | B = coefficient_y; | |
269 | ||
84d1fc49 | 270 | Q *= power10_table_128[ed2].w[0]; |
9b6b0236 | 271 | diff_expon -= ed2; |
272 | ||
273 | } | |
274 | ||
275 | if (!CA.w[1]) { | |
276 | Q2 = CA.w[0] / B; | |
277 | B2 = B + B; | |
278 | B4 = B2 + B2; | |
279 | R = CA.w[0] - Q2 * B; | |
280 | Q += Q2; | |
281 | } else { | |
282 | ||
283 | // 2^64 | |
284 | t_scale.i = 0x43f0000000000000ull; | |
285 | // convert CA to DP | |
286 | da_h = CA.w[1]; | |
287 | da_l = CA.w[0]; | |
288 | da = da_h * t_scale.d + da_l; | |
289 | ||
290 | // quotient | |
291 | dq = da / db; | |
292 | Q2 = (UINT64) dq; | |
293 | ||
294 | // get w[0] remainder | |
295 | R = CA.w[0] - Q2 * B; | |
296 | ||
297 | // R<0 ? | |
298 | D = ((SINT64) R) >> 63; | |
299 | Q2 += D; | |
300 | R += (B & D); | |
301 | ||
302 | // now R<6*B | |
303 | ||
304 | // quick divide | |
305 | ||
306 | // 4*B | |
307 | B2 = B + B; | |
308 | B4 = B2 + B2; | |
309 | ||
310 | R = R - B4; | |
311 | // R<0 ? | |
312 | D = ((SINT64) R) >> 63; | |
313 | // restore R if negative | |
314 | R += (B4 & D); | |
315 | Q2 += ((~D) & 4); | |
316 | ||
317 | R = R - B2; | |
318 | // R<0 ? | |
319 | D = ((SINT64) R) >> 63; | |
320 | // restore R if negative | |
321 | R += (B2 & D); | |
322 | Q2 += ((~D) & 2); | |
323 | ||
324 | R = R - B; | |
325 | // R<0 ? | |
326 | D = ((SINT64) R) >> 63; | |
327 | // restore R if negative | |
328 | R += (B & D); | |
329 | Q2 += ((~D) & 1); | |
330 | ||
331 | Q += Q2; | |
332 | } | |
333 | ||
334 | #ifdef SET_STATUS_FLAGS | |
335 | if (R) { | |
336 | // set status flags | |
337 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
338 | } | |
339 | #ifndef LEAVE_TRAILING_ZEROS | |
340 | else | |
341 | #endif | |
342 | #else | |
343 | #ifndef LEAVE_TRAILING_ZEROS | |
344 | if (!R) | |
345 | #endif | |
346 | #endif | |
347 | #ifndef LEAVE_TRAILING_ZEROS | |
348 | { | |
349 | // eliminate trailing zeros | |
350 | ||
351 | // check whether CX, CY are short | |
352 | if ((coefficient_x <= 1024) && (coefficient_y <= 1024)) { | |
353 | i = (int) coefficient_y - 1; | |
354 | j = (int) coefficient_x - 1; | |
84d1fc49 | 355 | // difference in powers of 2 factors for Y and X |
356 | nzeros = ed2 - factors[i][0] + factors[j][0]; | |
357 | // difference in powers of 5 factors | |
358 | d5 = ed2 - factors[i][1] + factors[j][1]; | |
9b6b0236 | 359 | if (d5 < nzeros) |
360 | nzeros = d5; | |
361 | ||
84d1fc49 | 362 | __mul_64x64_to_128 (CT, Q, reciprocals10_64[nzeros]); |
9b6b0236 | 363 | |
364 | // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 | |
84d1fc49 | 365 | amount = short_recip_scale[nzeros]; |
9b6b0236 | 366 | Q = CT.w[1] >> amount; |
367 | ||
368 | diff_expon += nzeros; | |
369 | } else { | |
370 | tdigit[0] = Q & 0x3ffffff; | |
371 | tdigit[1] = 0; | |
372 | QX = Q >> 26; | |
373 | QX32 = QX; | |
374 | nzeros = 0; | |
375 | ||
376 | for (j = 0; QX32; j++, QX32 >>= 7) { | |
377 | k = (QX32 & 127); | |
84d1fc49 | 378 | tdigit[0] += convert_table[j][k][0]; |
379 | tdigit[1] += convert_table[j][k][1]; | |
9b6b0236 | 380 | if (tdigit[0] >= 100000000) { |
381 | tdigit[0] -= 100000000; | |
382 | tdigit[1]++; | |
383 | } | |
384 | } | |
385 | ||
386 | digit = tdigit[0]; | |
387 | if (!digit && !tdigit[1]) | |
388 | nzeros += 16; | |
389 | else { | |
390 | if (!digit) { | |
391 | nzeros += 8; | |
392 | digit = tdigit[1]; | |
393 | } | |
394 | // decompose digit | |
395 | PD = (UINT64) digit *0x068DB8BBull; | |
396 | digit_h = (UINT32) (PD >> 40); | |
397 | digit_low = digit - digit_h * 10000; | |
398 | ||
399 | if (!digit_low) | |
400 | nzeros += 4; | |
401 | else | |
402 | digit_h = digit_low; | |
403 | ||
404 | if (!(digit_h & 1)) | |
405 | nzeros += | |
84d1fc49 | 406 | 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> |
9b6b0236 | 407 | (digit_h & 7)); |
408 | } | |
409 | ||
410 | if (nzeros) { | |
84d1fc49 | 411 | __mul_64x64_to_128 (CT, Q, reciprocals10_64[nzeros]); |
9b6b0236 | 412 | |
413 | // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 | |
84d1fc49 | 414 | amount = short_recip_scale[nzeros]; |
9b6b0236 | 415 | Q = CT.w[1] >> amount; |
416 | } | |
417 | diff_expon += nzeros; | |
418 | ||
419 | } | |
420 | if (diff_expon >= 0) { | |
421 | res = | |
422 | fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, Q, | |
423 | rnd_mode, pfpsf); | |
84d1fc49 | 424 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS |
425 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
426 | #endif | |
9b6b0236 | 427 | BID_RETURN (res); |
428 | } | |
429 | } | |
430 | #endif | |
431 | ||
432 | if (diff_expon >= 0) { | |
433 | #ifdef IEEE_ROUND_NEAREST | |
434 | // round to nearest code | |
435 | // R*10 | |
436 | R += R; | |
437 | R = (R << 2) + R; | |
438 | B5 = B4 + B; | |
439 | ||
440 | // compare 10*R to 5*B | |
441 | R = B5 - R; | |
442 | // correction for (R==0 && (Q&1)) | |
443 | R -= (Q & 1); | |
444 | // R<0 ? | |
445 | D = ((UINT64) R) >> 63; | |
446 | Q += D; | |
447 | #else | |
448 | #ifdef IEEE_ROUND_NEAREST_TIES_AWAY | |
449 | // round to nearest code | |
450 | // R*10 | |
451 | R += R; | |
452 | R = (R << 2) + R; | |
453 | B5 = B4 + B; | |
454 | ||
455 | // compare 10*R to 5*B | |
456 | R = B5 - R; | |
457 | // correction for (R==0 && (Q&1)) | |
458 | R -= (Q & 1); | |
459 | // R<0 ? | |
460 | D = ((UINT64) R) >> 63; | |
461 | Q += D; | |
462 | #else | |
463 | rmode = rnd_mode; | |
464 | if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) | |
465 | rmode = 3 - rmode; | |
466 | switch (rmode) { | |
467 | case 0: // round to nearest code | |
468 | case ROUNDING_TIES_AWAY: | |
469 | // R*10 | |
470 | R += R; | |
471 | R = (R << 2) + R; | |
472 | B5 = B4 + B; | |
473 | // compare 10*R to 5*B | |
474 | R = B5 - R; | |
475 | // correction for (R==0 && (Q&1)) | |
476 | R -= ((Q | (rmode >> 2)) & 1); | |
477 | // R<0 ? | |
478 | D = ((UINT64) R) >> 63; | |
479 | Q += D; | |
480 | break; | |
481 | case ROUNDING_DOWN: | |
482 | case ROUNDING_TO_ZERO: | |
483 | break; | |
484 | default: // rounding up | |
485 | Q++; | |
486 | break; | |
487 | } | |
488 | #endif | |
489 | #endif | |
490 | ||
491 | res = | |
492 | fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, Q, rnd_mode, | |
493 | pfpsf); | |
84d1fc49 | 494 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS |
495 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
496 | #endif | |
9b6b0236 | 497 | BID_RETURN (res); |
498 | } else { | |
499 | // UF occurs | |
500 | ||
501 | #ifdef SET_STATUS_FLAGS | |
502 | if ((diff_expon + 16 < 0)) { | |
503 | // set status flags | |
504 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
505 | } | |
506 | #endif | |
507 | rmode = rnd_mode; | |
508 | res = | |
509 | get_BID64_UF (sign_x ^ sign_y, diff_expon, Q, R, rmode, pfpsf); | |
84d1fc49 | 510 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS |
511 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
512 | #endif | |
513 | BID_RETURN (res); | |
514 | ||
515 | } | |
516 | } | |
517 | ||
518 | ||
519 | ||
520 | TYPE0_FUNCTION_ARGTYPE1_ARG128 (UINT64, bid64dq_div, UINT64, x, y) | |
521 | UINT256 CA4 = | |
522 | { {0x0ull, 0x0ull, 0x0ull, 0x0ull} }, CA4r, P256, QB256; | |
a3365171 | 523 | UINT128 CX, CY, T128, CQ, CQ2, CR, CA, TP128, Qh, Tmp; |
84d1fc49 | 524 | UINT64 sign_x, sign_y, T, carry64, D, Q_low, QX, valid_y, PD, res; |
525 | int_float fx, fy, f64; | |
526 | UINT32 QX32, tdigit[3], digit, digit_h, digit_low; | |
527 | int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2, | |
528 | digits_q, amount; | |
529 | int nzeros, i, j, k, d5, done = 0; | |
530 | unsigned rmode; | |
531 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
532 | fexcept_t binaryflags = 0; | |
533 | #endif | |
534 | ||
535 | valid_y = unpack_BID128_value (&sign_y, &exponent_y, &CY, y); | |
536 | ||
537 | // unpack arguments, check for NaN or Infinity | |
538 | CX.w[1] = 0; | |
539 | if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], (x))) { | |
540 | #ifdef SET_STATUS_FLAGS | |
541 | if (((y.w[1] & SNAN_MASK64) == SNAN_MASK64) || // y is sNaN | |
542 | ((x & SNAN_MASK64) == SNAN_MASK64)) | |
543 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
544 | #endif | |
545 | // test if x is NaN | |
546 | if (((x) & 0x7c00000000000000ull) == 0x7c00000000000000ull) { | |
547 | res = CX.w[0]; | |
548 | BID_RETURN (res & QUIET_MASK64); | |
549 | } | |
550 | // x is Infinity? | |
551 | if (((x) & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
552 | // check if y is Inf. | |
553 | if (((y.w[1] & 0x7c00000000000000ull) == 0x7800000000000000ull)) | |
554 | // return NaN | |
555 | { | |
556 | #ifdef SET_STATUS_FLAGS | |
557 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
558 | #endif | |
559 | res = 0x7c00000000000000ull; | |
560 | BID_RETURN (res); | |
561 | } | |
562 | if (((y.w[1] & 0x7c00000000000000ull) != 0x7c00000000000000ull)) { | |
563 | // otherwise return +/-Inf | |
564 | res = | |
565 | (((x) ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull; | |
566 | BID_RETURN (res); | |
567 | } | |
568 | } | |
569 | // x is 0 | |
570 | if ((y.w[1] & INFINITY_MASK64) != INFINITY_MASK64) { | |
571 | if ((!CY.w[0]) && !(CY.w[1] & 0x0001ffffffffffffull)) { | |
572 | #ifdef SET_STATUS_FLAGS | |
573 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
574 | #endif | |
575 | // x=y=0, return NaN | |
576 | res = 0x7c00000000000000ull; | |
577 | BID_RETURN (res); | |
578 | } | |
579 | // return 0 | |
580 | res = ((x) ^ y.w[1]) & 0x8000000000000000ull; | |
581 | exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128; | |
582 | if (exponent_x > DECIMAL_MAX_EXPON_64) | |
583 | exponent_x = DECIMAL_MAX_EXPON_64; | |
584 | else if (exponent_x < 0) | |
585 | exponent_x = 0; | |
586 | res |= (((UINT64) exponent_x) << 53); | |
587 | BID_RETURN (res); | |
588 | } | |
589 | } | |
590 | exponent_x += (DECIMAL_EXPONENT_BIAS_128 - DECIMAL_EXPONENT_BIAS); | |
591 | if (!valid_y) { | |
592 | // y is Inf. or NaN | |
593 | ||
594 | // test if y is NaN | |
595 | if ((y.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { | |
596 | #ifdef SET_STATUS_FLAGS | |
597 | if ((y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN | |
598 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
599 | #endif | |
600 | Tmp.w[1] = (CY.w[1] & 0x00003fffffffffffull); | |
601 | Tmp.w[0] = CY.w[0]; | |
602 | TP128 = reciprocals10_128[18]; | |
a3365171 | 603 | __mul_128x128_high (Qh, Tmp, TP128); |
84d1fc49 | 604 | amount = recip_scale[18]; |
605 | __shr_128 (Tmp, Qh, amount); | |
606 | res = (CY.w[1] & 0xfc00000000000000ull) | Tmp.w[0]; | |
607 | BID_RETURN (res); | |
608 | } | |
609 | // y is Infinity? | |
610 | if ((y.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
611 | // return +/-0 | |
612 | res = sign_x ^ sign_y; | |
613 | BID_RETURN (res); | |
614 | } | |
615 | // y is 0, return +/-Inf | |
616 | res = | |
617 | (((x) ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull; | |
618 | #ifdef SET_STATUS_FLAGS | |
619 | __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); | |
620 | #endif | |
621 | BID_RETURN (res); | |
622 | } | |
623 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
624 | (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
625 | #endif | |
626 | diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS; | |
627 | ||
628 | if (__unsigned_compare_gt_128 (CY, CX)) { | |
629 | // CX < CY | |
630 | ||
631 | // 2^64 | |
632 | f64.i = 0x5f800000; | |
633 | ||
634 | // fx ~ CX, fy ~ CY | |
635 | fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; | |
636 | fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0]; | |
637 | // expon_cy - expon_cx | |
638 | bin_index = (fy.i - fx.i) >> 23; | |
639 | ||
640 | if (CX.w[1]) { | |
641 | T = power10_index_binexp_128[bin_index].w[0]; | |
642 | __mul_64x128_short (CA, T, CX); | |
643 | } else { | |
644 | T128 = power10_index_binexp_128[bin_index]; | |
645 | __mul_64x128_short (CA, CX.w[0], T128); | |
646 | } | |
647 | ||
648 | ed2 = 15; | |
649 | if (__unsigned_compare_gt_128 (CY, CA)) | |
650 | ed2++; | |
651 | ||
652 | T128 = power10_table_128[ed2]; | |
653 | __mul_128x128_to_256 (CA4, CA, T128); | |
654 | ||
655 | ed2 += estimate_decimal_digits[bin_index]; | |
656 | CQ.w[0] = CQ.w[1] = 0; | |
657 | diff_expon = diff_expon - ed2; | |
658 | ||
659 | } else { | |
660 | // get CQ = CX/CY | |
661 | __div_128_by_128 (&CQ, &CR, CX, CY); | |
662 | ||
663 | // get number of decimal digits in CQ | |
664 | // 2^64 | |
665 | f64.i = 0x5f800000; | |
666 | fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0]; | |
667 | // binary expon. of CQ | |
668 | bin_expon = (fx.i - 0x3f800000) >> 23; | |
669 | ||
670 | digits_q = estimate_decimal_digits[bin_expon]; | |
671 | TP128.w[0] = power10_index_binexp_128[bin_expon].w[0]; | |
672 | TP128.w[1] = power10_index_binexp_128[bin_expon].w[1]; | |
673 | if (__unsigned_compare_ge_128 (CQ, TP128)) | |
674 | digits_q++; | |
675 | ||
676 | if (digits_q <= 16) { | |
677 | if (!CR.w[1] && !CR.w[0]) { | |
678 | res = get_BID64 (sign_x ^ sign_y, diff_expon, | |
679 | CQ.w[0], rnd_mode, pfpsf); | |
680 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
681 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
682 | #endif | |
683 | BID_RETURN (res); | |
684 | } | |
685 | ||
686 | ed2 = 16 - digits_q; | |
687 | T128.w[0] = power10_table_128[ed2].w[0]; | |
688 | __mul_64x128_to_192 (CA4, (T128.w[0]), CR); | |
689 | diff_expon = diff_expon - ed2; | |
690 | CQ.w[0] *= T128.w[0]; | |
691 | } else { | |
692 | ed2 = digits_q - 16; | |
693 | diff_expon += ed2; | |
694 | T128 = reciprocals10_128[ed2]; | |
695 | __mul_128x128_to_256 (P256, CQ, T128); | |
696 | amount = recip_scale[ed2]; | |
697 | CQ.w[0] = (P256.w[2] >> amount) | (P256.w[3] << (64 - amount)); | |
698 | CQ.w[1] = 0; | |
699 | ||
700 | __mul_64x64_to_128 (CQ2, CQ.w[0], (power10_table_128[ed2].w[0])); | |
701 | ||
702 | __mul_64x64_to_128 (QB256, CQ2.w[0], CY.w[0]); | |
703 | QB256.w[1] += CQ2.w[0] * CY.w[1] + CQ2.w[1] * CY.w[0]; | |
704 | ||
705 | CA4.w[1] = CX.w[1] - QB256.w[1]; | |
706 | CA4.w[0] = CX.w[0] - QB256.w[0]; | |
707 | if (CX.w[0] < QB256.w[0]) | |
708 | CA4.w[1]--; | |
709 | if (CR.w[0] || CR.w[1]) | |
710 | CA4.w[0] |= 1; | |
711 | done = 1; | |
712 | ||
713 | } | |
714 | ||
715 | } | |
716 | if (!done) { | |
717 | __div_256_by_128 (&CQ, &CA4, CY); | |
718 | } | |
719 | ||
720 | ||
721 | ||
722 | #ifdef SET_STATUS_FLAGS | |
723 | if (CA4.w[0] || CA4.w[1]) { | |
724 | // set status flags | |
725 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
726 | } | |
727 | #ifndef LEAVE_TRAILING_ZEROS | |
728 | else | |
729 | #endif | |
730 | #else | |
731 | #ifndef LEAVE_TRAILING_ZEROS | |
732 | if (!CA4.w[0] && !CA4.w[1]) | |
733 | #endif | |
734 | #endif | |
735 | #ifndef LEAVE_TRAILING_ZEROS | |
736 | // check whether result is exact | |
737 | { | |
738 | // check whether CX, CY are short | |
739 | if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) { | |
740 | i = (int) CY.w[0] - 1; | |
741 | j = (int) CX.w[0] - 1; | |
742 | // difference in powers of 2 factors for Y and X | |
743 | nzeros = ed2 - factors[i][0] + factors[j][0]; | |
744 | // difference in powers of 5 factors | |
745 | d5 = ed2 - factors[i][1] + factors[j][1]; | |
746 | if (d5 < nzeros) | |
747 | nzeros = d5; | |
748 | // get P*(2^M[extra_digits])/10^extra_digits | |
a3365171 | 749 | __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
84d1fc49 | 750 | |
751 | // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
752 | amount = recip_scale[nzeros]; | |
753 | __shr_128_long (CQ, Qh, amount); | |
754 | ||
755 | diff_expon += nzeros; | |
756 | } else { | |
757 | // decompose Q as Qh*10^17 + Ql | |
758 | Q_low = CQ.w[0]; | |
759 | ||
760 | { | |
761 | tdigit[0] = Q_low & 0x3ffffff; | |
762 | tdigit[1] = 0; | |
763 | QX = Q_low >> 26; | |
764 | QX32 = QX; | |
765 | nzeros = 0; | |
766 | ||
767 | for (j = 0; QX32; j++, QX32 >>= 7) { | |
768 | k = (QX32 & 127); | |
769 | tdigit[0] += convert_table[j][k][0]; | |
770 | tdigit[1] += convert_table[j][k][1]; | |
771 | if (tdigit[0] >= 100000000) { | |
772 | tdigit[0] -= 100000000; | |
773 | tdigit[1]++; | |
774 | } | |
775 | } | |
776 | ||
777 | if (tdigit[1] >= 100000000) { | |
778 | tdigit[1] -= 100000000; | |
779 | if (tdigit[1] >= 100000000) | |
780 | tdigit[1] -= 100000000; | |
781 | } | |
782 | ||
783 | digit = tdigit[0]; | |
784 | if (!digit && !tdigit[1]) | |
785 | nzeros += 16; | |
786 | else { | |
787 | if (!digit) { | |
788 | nzeros += 8; | |
789 | digit = tdigit[1]; | |
790 | } | |
791 | // decompose digit | |
792 | PD = (UINT64) digit *0x068DB8BBull; | |
793 | digit_h = (UINT32) (PD >> 40); | |
794 | digit_low = digit - digit_h * 10000; | |
795 | ||
796 | if (!digit_low) | |
797 | nzeros += 4; | |
798 | else | |
799 | digit_h = digit_low; | |
800 | ||
801 | if (!(digit_h & 1)) | |
802 | nzeros += | |
803 | 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> | |
804 | (digit_h & 7)); | |
805 | } | |
806 | ||
807 | if (nzeros) { | |
808 | // get P*(2^M[extra_digits])/10^extra_digits | |
a3365171 | 809 | __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
84d1fc49 | 810 | |
811 | // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
812 | amount = recip_scale[nzeros]; | |
813 | __shr_128 (CQ, Qh, amount); | |
814 | } | |
815 | diff_expon += nzeros; | |
816 | ||
817 | } | |
818 | } | |
819 | if(diff_expon>=0){ | |
820 | res = | |
821 | fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], | |
822 | rnd_mode, pfpsf); | |
823 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
824 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
825 | #endif | |
826 | BID_RETURN (res); | |
827 | } | |
828 | } | |
829 | #endif | |
830 | ||
831 | if (diff_expon >= 0) { | |
832 | #ifdef IEEE_ROUND_NEAREST | |
833 | // rounding | |
834 | // 2*CA4 - CY | |
835 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
836 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
837 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
838 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
839 | ||
840 | D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
841 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
842 | ||
843 | CQ.w[0] += carry64; | |
844 | #else | |
845 | #ifdef IEEE_ROUND_NEAREST_TIES_AWAY | |
846 | // rounding | |
847 | // 2*CA4 - CY | |
848 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
849 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
850 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
851 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
852 | ||
853 | D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
854 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
855 | ||
856 | CQ.w[0] += carry64; | |
857 | if (CQ.w[0] < carry64) | |
858 | CQ.w[1]++; | |
859 | #else | |
860 | rmode = rnd_mode; | |
861 | if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) | |
862 | rmode = 3 - rmode; | |
863 | switch (rmode) { | |
864 | case ROUNDING_TO_NEAREST: // round to nearest code | |
865 | // rounding | |
866 | // 2*CA4 - CY | |
867 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
868 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
869 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
870 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
871 | D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
872 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
873 | CQ.w[0] += carry64; | |
874 | if (CQ.w[0] < carry64) | |
875 | CQ.w[1]++; | |
876 | break; | |
877 | case ROUNDING_TIES_AWAY: | |
878 | // rounding | |
879 | // 2*CA4 - CY | |
880 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
881 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
882 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
883 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
884 | D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
885 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
886 | CQ.w[0] += carry64; | |
887 | if (CQ.w[0] < carry64) | |
888 | CQ.w[1]++; | |
889 | break; | |
890 | case ROUNDING_DOWN: | |
891 | case ROUNDING_TO_ZERO: | |
892 | break; | |
893 | default: // rounding up | |
894 | CQ.w[0]++; | |
895 | if (!CQ.w[0]) | |
896 | CQ.w[1]++; | |
897 | break; | |
898 | } | |
899 | #endif | |
900 | #endif | |
901 | ||
902 | res = | |
903 | fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], rnd_mode, | |
904 | pfpsf); | |
905 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
906 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
907 | #endif | |
908 | BID_RETURN (res); | |
909 | } else { | |
910 | // UF occurs | |
911 | ||
912 | #ifdef SET_STATUS_FLAGS | |
913 | if ((diff_expon + 16 < 0)) { | |
914 | // set status flags | |
915 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
916 | } | |
917 | #endif | |
918 | rmode = rnd_mode; | |
919 | res = | |
920 | get_BID64_UF (sign_x ^ sign_y, diff_expon, CQ.w[0], CA4.w[1] | CA4.w[0], rmode, pfpsf); | |
921 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
922 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
923 | #endif | |
924 | BID_RETURN (res); | |
925 | ||
926 | } | |
927 | ||
928 | } | |
929 | ||
930 | ||
931 | //#define LEAVE_TRAILING_ZEROS | |
932 | ||
933 | TYPE0_FUNCTION_ARG128_ARGTYPE2 (UINT64, bid64qd_div, x, UINT64, y) | |
934 | ||
935 | UINT256 CA4 = | |
936 | { {0x0ull, 0x0ull, 0x0ull, 0x0ull} }, CA4r, P256, QB256; | |
a3365171 | 937 | UINT128 CX, CY, T128, CQ, CQ2, CR, CA, TP128, Qh, Tmp; |
84d1fc49 | 938 | UINT64 sign_x, sign_y, T, carry64, D, Q_low, QX, PD, res, valid_y; |
939 | int_float fx, fy, f64; | |
940 | UINT32 QX32, tdigit[3], digit, digit_h, digit_low; | |
941 | int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2, | |
942 | digits_q, amount; | |
943 | int nzeros, i, j, k, d5, done = 0; | |
944 | unsigned rmode; | |
945 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
946 | fexcept_t binaryflags = 0; | |
947 | #endif | |
948 | ||
949 | valid_y = unpack_BID64 (&sign_y, &exponent_y, &CY.w[0], (y)); | |
950 | ||
951 | // unpack arguments, check for NaN or Infinity | |
952 | if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) { | |
953 | // test if x is NaN | |
954 | if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { | |
955 | #ifdef SET_STATUS_FLAGS | |
956 | if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull || // sNaN | |
957 | (y & 0x7e00000000000000ull) == 0x7e00000000000000ull) | |
958 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
959 | #endif | |
960 | Tmp.w[1] = (CX.w[1] & 0x00003fffffffffffull); | |
961 | Tmp.w[0] = CX.w[0]; | |
962 | TP128 = reciprocals10_128[18]; | |
a3365171 | 963 | __mul_128x128_high (Qh, Tmp, TP128); |
84d1fc49 | 964 | amount = recip_scale[18]; |
965 | __shr_128 (Tmp, Qh, amount); | |
966 | res = (CX.w[1] & 0xfc00000000000000ull) | Tmp.w[0]; | |
967 | BID_RETURN (res); | |
968 | } | |
969 | // x is Infinity? | |
970 | if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
971 | // check if y is Inf. | |
972 | if (((y & 0x7c00000000000000ull) == 0x7800000000000000ull)) | |
973 | // return NaN | |
974 | { | |
975 | #ifdef SET_STATUS_FLAGS | |
976 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
977 | #endif | |
978 | res = 0x7c00000000000000ull; | |
979 | BID_RETURN (res); | |
980 | } | |
981 | if (((y & 0x7c00000000000000ull) != 0x7c00000000000000ull)) { | |
982 | // otherwise return +/-Inf | |
983 | res = | |
984 | ((x.w[1] ^ (y)) & 0x8000000000000000ull) | 0x7800000000000000ull; | |
985 | BID_RETURN (res); | |
986 | } | |
987 | } | |
988 | // x is 0 | |
989 | if (((y & INFINITY_MASK64) != INFINITY_MASK64) && | |
990 | !(CY.w[0])) { | |
991 | #ifdef SET_STATUS_FLAGS | |
992 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
993 | #endif | |
994 | // x=y=0, return NaN | |
995 | res = 0x7c00000000000000ull; | |
996 | BID_RETURN (res); | |
997 | } | |
998 | // return 0 | |
999 | if (((y & 0x7800000000000000ull) != 0x7800000000000000ull)) { | |
1000 | if (!CY.w[0]) { | |
1001 | #ifdef SET_STATUS_FLAGS | |
1002 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
1003 | #endif | |
1004 | res = 0x7c00000000000000ull; | |
1005 | BID_RETURN (res); | |
1006 | } | |
1007 | exponent_x = | |
1008 | exponent_x - exponent_y - DECIMAL_EXPONENT_BIAS_128 + | |
1009 | (DECIMAL_EXPONENT_BIAS << 1); | |
1010 | if (exponent_x > DECIMAL_MAX_EXPON_64) | |
1011 | exponent_x = DECIMAL_MAX_EXPON_64; | |
1012 | else if (exponent_x < 0) | |
1013 | exponent_x = 0; | |
1014 | res = (sign_x ^ sign_y) | (((UINT64) exponent_x) << 53); | |
1015 | BID_RETURN (res); | |
1016 | } | |
1017 | } | |
1018 | CY.w[1] = 0; | |
1019 | if (!valid_y) { | |
1020 | // y is Inf. or NaN | |
1021 | ||
1022 | // test if y is NaN | |
1023 | if ((y & NAN_MASK64) == NAN_MASK64) { | |
1024 | #ifdef SET_STATUS_FLAGS | |
1025 | if ((y & SNAN_MASK64) == SNAN_MASK64) // sNaN | |
1026 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
1027 | #endif | |
1028 | BID_RETURN (CY.w[0] & QUIET_MASK64); | |
1029 | } | |
1030 | // y is Infinity? | |
1031 | if (((y) & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
1032 | // return +/-0 | |
1033 | res = sign_x ^ sign_y; | |
1034 | BID_RETURN (res); | |
1035 | } | |
1036 | // y is 0, return +/-Inf | |
1037 | res = | |
1038 | ((x.w[1] ^ (y)) & 0x8000000000000000ull) | 0x7800000000000000ull; | |
1039 | #ifdef SET_STATUS_FLAGS | |
1040 | __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); | |
1041 | #endif | |
1042 | BID_RETURN (res); | |
1043 | } | |
1044 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1045 | (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1046 | #endif | |
1047 | diff_expon = | |
1048 | exponent_x - exponent_y - DECIMAL_EXPONENT_BIAS_128 + | |
1049 | (DECIMAL_EXPONENT_BIAS << 1); | |
1050 | ||
1051 | if (__unsigned_compare_gt_128 (CY, CX)) { | |
1052 | // CX < CY | |
1053 | ||
1054 | // 2^64 | |
1055 | f64.i = 0x5f800000; | |
1056 | ||
1057 | // fx ~ CX, fy ~ CY | |
1058 | fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; | |
1059 | fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0]; | |
1060 | // expon_cy - expon_cx | |
1061 | bin_index = (fy.i - fx.i) >> 23; | |
1062 | ||
1063 | if (CX.w[1]) { | |
1064 | T = power10_index_binexp_128[bin_index].w[0]; | |
1065 | __mul_64x128_short (CA, T, CX); | |
1066 | } else { | |
1067 | T128 = power10_index_binexp_128[bin_index]; | |
1068 | __mul_64x128_short (CA, CX.w[0], T128); | |
1069 | } | |
1070 | ||
1071 | ed2 = 15; | |
1072 | if (__unsigned_compare_gt_128 (CY, CA)) | |
1073 | ed2++; | |
1074 | ||
1075 | T128 = power10_table_128[ed2]; | |
1076 | __mul_128x128_to_256 (CA4, CA, T128); | |
1077 | ||
1078 | ed2 += estimate_decimal_digits[bin_index]; | |
1079 | CQ.w[0] = CQ.w[1] = 0; | |
1080 | diff_expon = diff_expon - ed2; | |
1081 | ||
1082 | } else { | |
1083 | // get CQ = CX/CY | |
1084 | __div_128_by_128 (&CQ, &CR, CX, CY); | |
1085 | ||
1086 | // get number of decimal digits in CQ | |
1087 | // 2^64 | |
1088 | f64.i = 0x5f800000; | |
1089 | fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0]; | |
1090 | // binary expon. of CQ | |
1091 | bin_expon = (fx.i - 0x3f800000) >> 23; | |
1092 | ||
1093 | digits_q = estimate_decimal_digits[bin_expon]; | |
1094 | TP128.w[0] = power10_index_binexp_128[bin_expon].w[0]; | |
1095 | TP128.w[1] = power10_index_binexp_128[bin_expon].w[1]; | |
1096 | if (__unsigned_compare_ge_128 (CQ, TP128)) | |
1097 | digits_q++; | |
1098 | ||
1099 | if (digits_q <= 16) { | |
1100 | if (!CR.w[1] && !CR.w[0]) { | |
1101 | res = get_BID64 (sign_x ^ sign_y, diff_expon, | |
1102 | CQ.w[0], rnd_mode, pfpsf); | |
1103 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1104 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1105 | #endif | |
1106 | BID_RETURN (res); | |
1107 | } | |
1108 | ||
1109 | ed2 = 16 - digits_q; | |
1110 | T128.w[0] = power10_table_128[ed2].w[0]; | |
1111 | __mul_64x128_to_192 (CA4, (T128.w[0]), CR); | |
1112 | diff_expon = diff_expon - ed2; | |
1113 | CQ.w[0] *= T128.w[0]; | |
1114 | } else { | |
1115 | ed2 = digits_q - 16; | |
1116 | diff_expon += ed2; | |
1117 | T128 = reciprocals10_128[ed2]; | |
1118 | __mul_128x128_to_256 (P256, CQ, T128); | |
1119 | amount = recip_scale[ed2]; | |
1120 | CQ.w[0] = (P256.w[2] >> amount) | (P256.w[3] << (64 - amount)); | |
1121 | CQ.w[1] = 0; | |
1122 | ||
1123 | __mul_64x64_to_128 (CQ2, CQ.w[0], (power10_table_128[ed2].w[0])); | |
1124 | ||
1125 | __mul_64x64_to_128 (QB256, CQ2.w[0], CY.w[0]); | |
1126 | QB256.w[1] += CQ2.w[0] * CY.w[1] + CQ2.w[1] * CY.w[0]; | |
1127 | ||
1128 | CA4.w[1] = CX.w[1] - QB256.w[1]; | |
1129 | CA4.w[0] = CX.w[0] - QB256.w[0]; | |
1130 | if (CX.w[0] < QB256.w[0]) | |
1131 | CA4.w[1]--; | |
1132 | if (CR.w[0] || CR.w[1]) | |
1133 | CA4.w[0] |= 1; | |
1134 | done = 1; | |
1135 | if(CA4.w[1]|CA4.w[0]) { | |
1136 | __mul_64x128_low(CY, (power10_table_128[ed2].w[0]),CY); | |
1137 | } | |
1138 | ||
1139 | } | |
1140 | ||
1141 | } | |
1142 | ||
1143 | if (!done) { | |
1144 | __div_256_by_128 (&CQ, &CA4, CY); | |
1145 | } | |
1146 | ||
1147 | #ifdef SET_STATUS_FLAGS | |
1148 | if (CA4.w[0] || CA4.w[1]) { | |
1149 | // set status flags | |
1150 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
1151 | } | |
1152 | #ifndef LEAVE_TRAILING_ZEROS | |
1153 | else | |
1154 | #endif | |
1155 | #else | |
1156 | #ifndef LEAVE_TRAILING_ZEROS | |
1157 | if (!CA4.w[0] && !CA4.w[1]) | |
1158 | #endif | |
1159 | #endif | |
1160 | #ifndef LEAVE_TRAILING_ZEROS | |
1161 | // check whether result is exact | |
1162 | { | |
1163 | if(!done) { | |
1164 | // check whether CX, CY are short | |
1165 | if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) { | |
1166 | i = (int) CY.w[0] - 1; | |
1167 | j = (int) CX.w[0] - 1; | |
1168 | // difference in powers of 2 factors for Y and X | |
1169 | nzeros = ed2 - factors[i][0] + factors[j][0]; | |
1170 | // difference in powers of 5 factors | |
1171 | d5 = ed2 - factors[i][1] + factors[j][1]; | |
1172 | if (d5 < nzeros) | |
1173 | nzeros = d5; | |
1174 | // get P*(2^M[extra_digits])/10^extra_digits | |
a3365171 | 1175 | __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
84d1fc49 | 1176 | //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2]; |
1177 | ||
1178 | // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
1179 | amount = recip_scale[nzeros]; | |
1180 | __shr_128_long (CQ, Qh, amount); | |
1181 | ||
1182 | diff_expon += nzeros; | |
1183 | } else { | |
1184 | // decompose Q as Qh*10^17 + Ql | |
1185 | //T128 = reciprocals10_128[17]; | |
1186 | Q_low = CQ.w[0]; | |
1187 | ||
1188 | { | |
1189 | tdigit[0] = Q_low & 0x3ffffff; | |
1190 | tdigit[1] = 0; | |
1191 | QX = Q_low >> 26; | |
1192 | QX32 = QX; | |
1193 | nzeros = 0; | |
1194 | ||
1195 | for (j = 0; QX32; j++, QX32 >>= 7) { | |
1196 | k = (QX32 & 127); | |
1197 | tdigit[0] += convert_table[j][k][0]; | |
1198 | tdigit[1] += convert_table[j][k][1]; | |
1199 | if (tdigit[0] >= 100000000) { | |
1200 | tdigit[0] -= 100000000; | |
1201 | tdigit[1]++; | |
1202 | } | |
1203 | } | |
1204 | ||
1205 | if (tdigit[1] >= 100000000) { | |
1206 | tdigit[1] -= 100000000; | |
1207 | if (tdigit[1] >= 100000000) | |
1208 | tdigit[1] -= 100000000; | |
1209 | } | |
1210 | ||
1211 | digit = tdigit[0]; | |
1212 | if (!digit && !tdigit[1]) | |
1213 | nzeros += 16; | |
1214 | else { | |
1215 | if (!digit) { | |
1216 | nzeros += 8; | |
1217 | digit = tdigit[1]; | |
1218 | } | |
1219 | // decompose digit | |
1220 | PD = (UINT64) digit *0x068DB8BBull; | |
1221 | digit_h = (UINT32) (PD >> 40); | |
1222 | digit_low = digit - digit_h * 10000; | |
1223 | ||
1224 | if (!digit_low) | |
1225 | nzeros += 4; | |
1226 | else | |
1227 | digit_h = digit_low; | |
1228 | ||
1229 | if (!(digit_h & 1)) | |
1230 | nzeros += | |
1231 | 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> | |
1232 | (digit_h & 7)); | |
1233 | } | |
1234 | ||
1235 | if (nzeros) { | |
1236 | // get P*(2^M[extra_digits])/10^extra_digits | |
a3365171 | 1237 | __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
84d1fc49 | 1238 | |
1239 | // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
1240 | amount = recip_scale[nzeros]; | |
1241 | __shr_128 (CQ, Qh, amount); | |
1242 | } | |
1243 | diff_expon += nzeros; | |
1244 | ||
1245 | } | |
1246 | } | |
1247 | } | |
1248 | if(diff_expon>=0){ | |
1249 | res = | |
1250 | fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], | |
1251 | rnd_mode, pfpsf); | |
1252 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1253 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1254 | #endif | |
9b6b0236 | 1255 | BID_RETURN (res); |
84d1fc49 | 1256 | } |
1257 | } | |
1258 | #endif | |
9b6b0236 | 1259 | |
84d1fc49 | 1260 | if (diff_expon >= 0) { |
1261 | #ifdef IEEE_ROUND_NEAREST | |
1262 | // rounding | |
1263 | // 2*CA4 - CY | |
1264 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1265 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1266 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1267 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1268 | ||
1269 | D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
1270 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
1271 | ||
1272 | CQ.w[0] += carry64; | |
1273 | //if(CQ.w[0]<carry64) | |
1274 | //CQ.w[1] ++; | |
1275 | #else | |
1276 | #ifdef IEEE_ROUND_NEAREST_TIES_AWAY | |
1277 | // rounding | |
1278 | // 2*CA4 - CY | |
1279 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1280 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1281 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1282 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1283 | ||
1284 | D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
1285 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
1286 | ||
1287 | CQ.w[0] += carry64; | |
1288 | if (CQ.w[0] < carry64) | |
1289 | CQ.w[1]++; | |
1290 | #else | |
1291 | rmode = rnd_mode; | |
1292 | if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) | |
1293 | rmode = 3 - rmode; | |
1294 | switch (rmode) { | |
1295 | case ROUNDING_TO_NEAREST: // round to nearest code | |
1296 | // rounding | |
1297 | // 2*CA4 - CY | |
1298 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1299 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1300 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1301 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1302 | D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
1303 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
1304 | CQ.w[0] += carry64; | |
1305 | if (CQ.w[0] < carry64) | |
1306 | CQ.w[1]++; | |
1307 | break; | |
1308 | case ROUNDING_TIES_AWAY: | |
1309 | // rounding | |
1310 | // 2*CA4 - CY | |
1311 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1312 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1313 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1314 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1315 | D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
1316 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
1317 | CQ.w[0] += carry64; | |
1318 | if (CQ.w[0] < carry64) | |
1319 | CQ.w[1]++; | |
1320 | break; | |
1321 | case ROUNDING_DOWN: | |
1322 | case ROUNDING_TO_ZERO: | |
1323 | break; | |
1324 | default: // rounding up | |
1325 | CQ.w[0]++; | |
1326 | if (!CQ.w[0]) | |
1327 | CQ.w[1]++; | |
1328 | break; | |
9b6b0236 | 1329 | } |
84d1fc49 | 1330 | #endif |
1331 | #endif | |
9b6b0236 | 1332 | |
84d1fc49 | 1333 | |
1334 | res = | |
1335 | fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], rnd_mode, | |
1336 | pfpsf); | |
1337 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1338 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1339 | #endif | |
1340 | BID_RETURN (res); | |
1341 | } else { | |
1342 | // UF occurs | |
1343 | ||
1344 | #ifdef SET_STATUS_FLAGS | |
1345 | if ((diff_expon + 16 < 0)) { | |
1346 | // set status flags | |
1347 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
1348 | } | |
1349 | #endif | |
1350 | rmode = rnd_mode; | |
1351 | res = | |
1352 | get_BID64_UF (sign_x ^ sign_y, diff_expon, CQ.w[0], CA4.w[1] | CA4.w[0], rmode, pfpsf); | |
1353 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1354 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1355 | #endif | |
1356 | BID_RETURN (res); | |
1357 | ||
1358 | } | |
1359 | ||
1360 | } | |
1361 | ||
1362 | //#define LEAVE_TRAILING_ZEROS | |
1363 | ||
1364 | extern UINT32 convert_table[5][128][2]; | |
1365 | extern SINT8 factors[][2]; | |
1366 | extern UINT8 packed_10000_zeros[]; | |
1367 | ||
1368 | ||
1369 | //UINT64* bid64_div128x128(UINT64 res, UINT128 *px, UINT128 *py, unsigned rnd_mode, unsigned *pfpsf) | |
1370 | ||
1371 | TYPE0_FUNCTION_ARG128_ARG128 (UINT64, bid64qq_div, x, y) | |
1372 | UINT256 CA4 = | |
1373 | { {0x0ull, 0x0ull, 0x0ull, 0x0ull} }, CA4r, P256, QB256; | |
a3365171 | 1374 | UINT128 CX, CY, T128, CQ, CQ2, CR, CA, TP128, Qh, Tmp; |
84d1fc49 | 1375 | UINT64 sign_x, sign_y, T, carry64, D, Q_low, QX, valid_y, PD, res; |
1376 | int_float fx, fy, f64; | |
1377 | UINT32 QX32, tdigit[3], digit, digit_h, digit_low; | |
1378 | int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2, | |
1379 | digits_q, amount; | |
1380 | int nzeros, i, j, k, d5, done = 0; | |
1381 | unsigned rmode; | |
1382 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1383 | fexcept_t binaryflags = 0; | |
1384 | #endif | |
1385 | ||
1386 | valid_y = unpack_BID128_value (&sign_y, &exponent_y, &CY, y); | |
1387 | ||
1388 | // unpack arguments, check for NaN or Infinity | |
1389 | if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) { | |
1390 | // test if x is NaN | |
1391 | if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { | |
1392 | #ifdef SET_STATUS_FLAGS | |
1393 | if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull || // sNaN | |
1394 | (y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) | |
1395 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
1396 | #endif | |
1397 | Tmp.w[1] = (CX.w[1] & 0x00003fffffffffffull); | |
1398 | Tmp.w[0] = CX.w[0]; | |
1399 | TP128 = reciprocals10_128[18]; | |
a3365171 | 1400 | __mul_128x128_high (Qh, Tmp, TP128); |
84d1fc49 | 1401 | amount = recip_scale[18]; |
1402 | __shr_128 (Tmp, Qh, amount); | |
1403 | res = (CX.w[1] & 0xfc00000000000000ull) | Tmp.w[0]; | |
1404 | BID_RETURN (res); | |
1405 | } | |
1406 | // x is Infinity? | |
1407 | if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
1408 | // check if y is Inf. | |
1409 | if (((y.w[1] & 0x7c00000000000000ull) == 0x7800000000000000ull)) | |
1410 | // return NaN | |
1411 | { | |
1412 | #ifdef SET_STATUS_FLAGS | |
1413 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
1414 | #endif | |
1415 | res = 0x7c00000000000000ull; | |
1416 | BID_RETURN (res); | |
1417 | } | |
1418 | if (((y.w[1] & 0x7c00000000000000ull) != 0x7c00000000000000ull)) { | |
1419 | // otherwise return +/-Inf | |
1420 | res = | |
1421 | ((x.w[1] ^ y. | |
1422 | w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull; | |
1423 | BID_RETURN (res); | |
1424 | } | |
1425 | } | |
1426 | // x is 0 | |
1427 | if (((y.w[1] & 0x7800000000000000ull) != 0x7800000000000000ull)) { | |
1428 | if ((!CY.w[0]) && !(CY.w[1] & 0x0001ffffffffffffull)) { | |
1429 | #ifdef SET_STATUS_FLAGS | |
1430 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
1431 | #endif | |
1432 | // x=y=0, return NaN | |
1433 | res = 0x7c00000000000000ull; | |
1434 | BID_RETURN (res); | |
1435 | } | |
1436 | // return 0 | |
1437 | res = (x.w[1] ^ y.w[1]) & 0x8000000000000000ull; | |
1438 | exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS; | |
1439 | if (exponent_x > DECIMAL_MAX_EXPON_64) | |
1440 | exponent_x = DECIMAL_MAX_EXPON_64; | |
1441 | else if (exponent_x < 0) | |
1442 | exponent_x = 0; | |
1443 | res |= (((UINT64) exponent_x) << 53); | |
1444 | BID_RETURN (res); | |
1445 | } | |
1446 | } | |
1447 | if (!valid_y) { | |
1448 | // y is Inf. or NaN | |
1449 | ||
1450 | // test if y is NaN | |
1451 | if ((y.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { | |
1452 | #ifdef SET_STATUS_FLAGS | |
1453 | if ((y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN | |
1454 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
1455 | #endif | |
1456 | Tmp.w[1] = (CY.w[1] & 0x00003fffffffffffull); | |
1457 | Tmp.w[0] = CY.w[0]; | |
1458 | TP128 = reciprocals10_128[18]; | |
a3365171 | 1459 | __mul_128x128_high (Qh, Tmp, TP128); |
84d1fc49 | 1460 | amount = recip_scale[18]; |
1461 | __shr_128 (Tmp, Qh, amount); | |
1462 | res = (CY.w[1] & 0xfc00000000000000ull) | Tmp.w[0]; | |
1463 | BID_RETURN (res); | |
1464 | } | |
1465 | // y is Infinity? | |
1466 | if ((y.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
1467 | // return +/-0 | |
1468 | res = sign_x ^ sign_y; | |
1469 | BID_RETURN (res); | |
1470 | } | |
1471 | // y is 0, return +/-Inf | |
1472 | res = | |
1473 | ((x.w[1] ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull; | |
1474 | #ifdef SET_STATUS_FLAGS | |
1475 | __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); | |
1476 | #endif | |
1477 | BID_RETURN (res); | |
1478 | } | |
1479 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1480 | (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1481 | #endif | |
1482 | diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS; | |
1483 | ||
1484 | if (__unsigned_compare_gt_128 (CY, CX)) { | |
1485 | // CX < CY | |
1486 | ||
1487 | // 2^64 | |
1488 | f64.i = 0x5f800000; | |
1489 | ||
1490 | // fx ~ CX, fy ~ CY | |
1491 | fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; | |
1492 | fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0]; | |
1493 | // expon_cy - expon_cx | |
1494 | bin_index = (fy.i - fx.i) >> 23; | |
1495 | ||
1496 | if (CX.w[1]) { | |
1497 | T = power10_index_binexp_128[bin_index].w[0]; | |
1498 | __mul_64x128_short (CA, T, CX); | |
1499 | } else { | |
1500 | T128 = power10_index_binexp_128[bin_index]; | |
1501 | __mul_64x128_short (CA, CX.w[0], T128); | |
1502 | } | |
1503 | ||
1504 | ed2 = 15; | |
1505 | if (__unsigned_compare_gt_128 (CY, CA)) | |
1506 | ed2++; | |
1507 | ||
1508 | T128 = power10_table_128[ed2]; | |
1509 | __mul_128x128_to_256 (CA4, CA, T128); | |
1510 | ||
1511 | ed2 += estimate_decimal_digits[bin_index]; | |
1512 | CQ.w[0] = CQ.w[1] = 0; | |
1513 | diff_expon = diff_expon - ed2; | |
1514 | ||
1515 | } else { | |
1516 | // get CQ = CX/CY | |
1517 | __div_128_by_128 (&CQ, &CR, CX, CY); | |
1518 | ||
1519 | // get number of decimal digits in CQ | |
1520 | // 2^64 | |
1521 | f64.i = 0x5f800000; | |
1522 | fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0]; | |
1523 | // binary expon. of CQ | |
1524 | bin_expon = (fx.i - 0x3f800000) >> 23; | |
1525 | ||
1526 | digits_q = estimate_decimal_digits[bin_expon]; | |
1527 | TP128.w[0] = power10_index_binexp_128[bin_expon].w[0]; | |
1528 | TP128.w[1] = power10_index_binexp_128[bin_expon].w[1]; | |
1529 | if (__unsigned_compare_ge_128 (CQ, TP128)) | |
1530 | digits_q++; | |
1531 | ||
1532 | if (digits_q <= 16) { | |
1533 | if (!CR.w[1] && !CR.w[0]) { | |
1534 | res = get_BID64 (sign_x ^ sign_y, diff_expon, | |
1535 | CQ.w[0], rnd_mode, pfpsf); | |
1536 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1537 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1538 | #endif | |
1539 | BID_RETURN (res); | |
1540 | } | |
1541 | ||
1542 | ed2 = 16 - digits_q; | |
1543 | T128.w[0] = power10_table_128[ed2].w[0]; | |
1544 | __mul_64x128_to_192 (CA4, (T128.w[0]), CR); | |
1545 | diff_expon = diff_expon - ed2; | |
1546 | CQ.w[0] *= T128.w[0]; | |
1547 | } else { | |
1548 | ed2 = digits_q - 16; | |
1549 | diff_expon += ed2; | |
1550 | T128 = reciprocals10_128[ed2]; | |
1551 | __mul_128x128_to_256 (P256, CQ, T128); | |
1552 | amount = recip_scale[ed2]; | |
1553 | CQ.w[0] = (P256.w[2] >> amount) | (P256.w[3] << (64 - amount)); | |
1554 | CQ.w[1] = 0; | |
1555 | ||
1556 | __mul_64x64_to_128 (CQ2, CQ.w[0], (power10_table_128[ed2].w[0])); | |
1557 | ||
1558 | __mul_64x64_to_128 (QB256, CQ2.w[0], CY.w[0]); | |
1559 | QB256.w[1] += CQ2.w[0] * CY.w[1] + CQ2.w[1] * CY.w[0]; | |
1560 | ||
1561 | CA4.w[1] = CX.w[1] - QB256.w[1]; | |
1562 | CA4.w[0] = CX.w[0] - QB256.w[0]; | |
1563 | if (CX.w[0] < QB256.w[0]) | |
1564 | CA4.w[1]--; | |
1565 | if (CR.w[0] || CR.w[1]) | |
1566 | CA4.w[0] |= 1; | |
1567 | done = 1; | |
1568 | if(CA4.w[1]|CA4.w[0]) { | |
1569 | __mul_64x128_low(CY, (power10_table_128[ed2].w[0]),CY); | |
1570 | } | |
1571 | } | |
1572 | ||
1573 | } | |
1574 | ||
1575 | if (!done) { | |
1576 | __div_256_by_128 (&CQ, &CA4, CY); | |
1577 | } | |
1578 | ||
1579 | ||
1580 | ||
1581 | #ifdef SET_STATUS_FLAGS | |
1582 | if (CA4.w[0] || CA4.w[1]) { | |
1583 | // set status flags | |
1584 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
1585 | } | |
1586 | #ifndef LEAVE_TRAILING_ZEROS | |
1587 | else | |
1588 | #endif | |
1589 | #else | |
1590 | #ifndef LEAVE_TRAILING_ZEROS | |
1591 | if (!CA4.w[0] && !CA4.w[1]) | |
1592 | #endif | |
1593 | #endif | |
1594 | #ifndef LEAVE_TRAILING_ZEROS | |
1595 | // check whether result is exact | |
1596 | { | |
1597 | if(!done) { | |
1598 | // check whether CX, CY are short | |
1599 | if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) { | |
1600 | i = (int) CY.w[0] - 1; | |
1601 | j = (int) CX.w[0] - 1; | |
1602 | // difference in powers of 2 factors for Y and X | |
1603 | nzeros = ed2 - factors[i][0] + factors[j][0]; | |
1604 | // difference in powers of 5 factors | |
1605 | d5 = ed2 - factors[i][1] + factors[j][1]; | |
1606 | if (d5 < nzeros) | |
1607 | nzeros = d5; | |
1608 | // get P*(2^M[extra_digits])/10^extra_digits | |
a3365171 | 1609 | __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
84d1fc49 | 1610 | //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2]; |
1611 | ||
1612 | // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
1613 | amount = recip_scale[nzeros]; | |
1614 | __shr_128_long (CQ, Qh, amount); | |
1615 | ||
1616 | diff_expon += nzeros; | |
1617 | } else { | |
1618 | // decompose Q as Qh*10^17 + Ql | |
1619 | //T128 = reciprocals10_128[17]; | |
1620 | Q_low = CQ.w[0]; | |
1621 | ||
1622 | { | |
1623 | tdigit[0] = Q_low & 0x3ffffff; | |
1624 | tdigit[1] = 0; | |
1625 | QX = Q_low >> 26; | |
1626 | QX32 = QX; | |
1627 | nzeros = 0; | |
1628 | ||
1629 | for (j = 0; QX32; j++, QX32 >>= 7) { | |
1630 | k = (QX32 & 127); | |
1631 | tdigit[0] += convert_table[j][k][0]; | |
1632 | tdigit[1] += convert_table[j][k][1]; | |
1633 | if (tdigit[0] >= 100000000) { | |
1634 | tdigit[0] -= 100000000; | |
1635 | tdigit[1]++; | |
1636 | } | |
1637 | } | |
1638 | ||
1639 | if (tdigit[1] >= 100000000) { | |
1640 | tdigit[1] -= 100000000; | |
1641 | if (tdigit[1] >= 100000000) | |
1642 | tdigit[1] -= 100000000; | |
1643 | } | |
1644 | ||
1645 | digit = tdigit[0]; | |
1646 | if (!digit && !tdigit[1]) | |
1647 | nzeros += 16; | |
1648 | else { | |
1649 | if (!digit) { | |
1650 | nzeros += 8; | |
1651 | digit = tdigit[1]; | |
1652 | } | |
1653 | // decompose digit | |
1654 | PD = (UINT64) digit *0x068DB8BBull; | |
1655 | digit_h = (UINT32) (PD >> 40); | |
1656 | digit_low = digit - digit_h * 10000; | |
1657 | ||
1658 | if (!digit_low) | |
1659 | nzeros += 4; | |
1660 | else | |
1661 | digit_h = digit_low; | |
1662 | ||
1663 | if (!(digit_h & 1)) | |
1664 | nzeros += | |
1665 | 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> | |
1666 | (digit_h & 7)); | |
1667 | } | |
1668 | ||
1669 | if (nzeros) { | |
1670 | // get P*(2^M[extra_digits])/10^extra_digits | |
a3365171 | 1671 | __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
84d1fc49 | 1672 | |
1673 | // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
1674 | amount = recip_scale[nzeros]; | |
1675 | __shr_128 (CQ, Qh, amount); | |
1676 | } | |
1677 | diff_expon += nzeros; | |
1678 | ||
1679 | } | |
1680 | } | |
1681 | } | |
1682 | if(diff_expon>=0){ | |
1683 | res = | |
1684 | fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], | |
1685 | rnd_mode, pfpsf); | |
1686 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1687 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1688 | #endif | |
1689 | BID_RETURN (res); | |
1690 | } | |
1691 | } | |
1692 | #endif | |
1693 | ||
1694 | if(diff_expon>=0) { | |
1695 | ||
1696 | #ifdef IEEE_ROUND_NEAREST | |
1697 | // rounding | |
1698 | // 2*CA4 - CY | |
1699 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1700 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1701 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1702 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1703 | ||
1704 | D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
1705 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
1706 | ||
1707 | CQ.w[0] += carry64; | |
1708 | //if(CQ.w[0]<carry64) | |
1709 | //CQ.w[1] ++; | |
1710 | #else | |
1711 | #ifdef IEEE_ROUND_NEAREST_TIES_AWAY | |
1712 | // rounding | |
1713 | // 2*CA4 - CY | |
1714 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1715 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1716 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1717 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1718 | ||
1719 | D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
1720 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
1721 | ||
1722 | CQ.w[0] += carry64; | |
1723 | if (CQ.w[0] < carry64) | |
1724 | CQ.w[1]++; | |
1725 | #else | |
1726 | rmode = rnd_mode; | |
1727 | if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) | |
1728 | rmode = 3 - rmode; | |
1729 | switch (rmode) { | |
1730 | case ROUNDING_TO_NEAREST: // round to nearest code | |
1731 | // rounding | |
1732 | // 2*CA4 - CY | |
1733 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1734 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1735 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1736 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1737 | D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
1738 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
1739 | CQ.w[0] += carry64; | |
1740 | if (CQ.w[0] < carry64) | |
1741 | CQ.w[1]++; | |
1742 | break; | |
1743 | case ROUNDING_TIES_AWAY: | |
1744 | // rounding | |
1745 | // 2*CA4 - CY | |
1746 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1747 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1748 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1749 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1750 | D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
1751 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
1752 | CQ.w[0] += carry64; | |
1753 | if (CQ.w[0] < carry64) | |
1754 | CQ.w[1]++; | |
1755 | break; | |
1756 | case ROUNDING_DOWN: | |
1757 | case ROUNDING_TO_ZERO: | |
1758 | break; | |
1759 | default: // rounding up | |
1760 | CQ.w[0]++; | |
1761 | if (!CQ.w[0]) | |
1762 | CQ.w[1]++; | |
1763 | break; | |
1764 | } | |
1765 | #endif | |
1766 | #endif | |
1767 | ||
1768 | ||
1769 | res = | |
1770 | fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], rnd_mode, | |
1771 | pfpsf); | |
1772 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1773 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1774 | #endif | |
1775 | BID_RETURN (res); | |
1776 | } else { | |
1777 | // UF occurs | |
1778 | ||
1779 | #ifdef SET_STATUS_FLAGS | |
1780 | if ((diff_expon + 16 < 0)) { | |
1781 | // set status flags | |
1782 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
1783 | } | |
1784 | #endif | |
1785 | rmode = rnd_mode; | |
1786 | res = | |
1787 | get_BID64_UF (sign_x ^ sign_y, diff_expon, CQ.w[0], CA4.w[1] | CA4.w[0], rmode, pfpsf); | |
1788 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1789 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1790 | #endif | |
1791 | BID_RETURN (res); | |
1792 | ||
1793 | } | |
9b6b0236 | 1794 | |
1795 | } |