]>
Commit | Line | Data |
---|---|---|
7adcbafe | 1 | /* Copyright (C) 2007-2022 Free Software Foundation, Inc. |
200359e8 L |
2 | |
3 | This file is part of GCC. | |
4 | ||
5 | GCC is free software; you can redistribute it and/or modify it under | |
6 | the terms of the GNU General Public License as published by the Free | |
748086b7 | 7 | Software Foundation; either version 3, or (at your option) any later |
200359e8 L |
8 | version. |
9 | ||
200359e8 L |
10 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 | for more details. | |
14 | ||
748086b7 JJ |
15 | Under Section 7 of GPL version 3, you are granted additional |
16 | permissions described in the GCC Runtime Library Exception, version | |
17 | 3.1, as published by the Free Software Foundation. | |
18 | ||
19 | You should have received a copy of the GNU General Public License and | |
20 | a copy of the GCC Runtime Library Exception along with this program; | |
21 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 | <http://www.gnu.org/licenses/>. */ | |
200359e8 L |
23 | |
24 | #define BID_128RES | |
b2a00c89 L |
25 | #include "bid_div_macros.h" |
26 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
27 | #include <fenv.h> | |
200359e8 | 28 | |
b2a00c89 L |
29 | #define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT |
30 | #endif | |
31 | ||
32 | extern UINT32 convert_table[5][128][2]; | |
33 | extern SINT8 factors[][2]; | |
34 | extern UINT8 packed_10000_zeros[]; | |
200359e8 | 35 | |
b2a00c89 | 36 | BID128_FUNCTION_ARG2 (bid128_div, x, y) |
200359e8 | 37 | |
b2a00c89 | 38 | UINT256 CA4, CA4r, P256; |
0b6df824 | 39 | UINT128 CX, CY, T128, CQ, CR, CA, TP128, Qh, res; |
b2a00c89 L |
40 | UINT64 sign_x, sign_y, T, carry64, D, Q_high, Q_low, QX, PD, |
41 | valid_y; | |
42 | int_float fx, fy, f64; | |
43 | UINT32 QX32, tdigit[3], digit, digit_h, digit_low; | |
44 | int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2, | |
45 | digits_q, amount; | |
46 | int nzeros, i, j, k, d5; | |
47 | unsigned rmode; | |
48 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
49 | fexcept_t binaryflags = 0; | |
50 | #endif | |
200359e8 | 51 | |
b2a00c89 | 52 | valid_y = unpack_BID128_value (&sign_y, &exponent_y, &CY, y); |
200359e8 L |
53 | |
54 | // unpack arguments, check for NaN or Infinity | |
b2a00c89 | 55 | if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) { |
200359e8 | 56 | // test if x is NaN |
b2a00c89 | 57 | if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { |
200359e8 | 58 | #ifdef SET_STATUS_FLAGS |
b2a00c89 L |
59 | if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull || // sNaN |
60 | (y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) | |
61 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
200359e8 | 62 | #endif |
b2a00c89 L |
63 | res.w[1] = (CX.w[1]) & QUIET_MASK64; |
64 | res.w[0] = CX.w[0]; | |
65 | BID_RETURN (res); | |
66 | } | |
200359e8 | 67 | // x is Infinity? |
b2a00c89 L |
68 | if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { |
69 | // check if y is Inf. | |
70 | if (((y.w[1] & 0x7c00000000000000ull) == 0x7800000000000000ull)) | |
71 | // return NaN | |
72 | { | |
200359e8 | 73 | #ifdef SET_STATUS_FLAGS |
b2a00c89 | 74 | __set_status_flags (pfpsf, INVALID_EXCEPTION); |
200359e8 | 75 | #endif |
b2a00c89 L |
76 | res.w[1] = 0x7c00000000000000ull; |
77 | res.w[0] = 0; | |
78 | BID_RETURN (res); | |
79 | } | |
80 | // y is NaN? | |
81 | if (((y.w[1] & 0x7c00000000000000ull) != 0x7c00000000000000ull)) | |
82 | // return NaN | |
83 | { | |
84 | // return +/-Inf | |
85 | res.w[1] = ((x.w[1] ^ y.w[1]) & 0x8000000000000000ull) | | |
86 | 0x7800000000000000ull; | |
87 | res.w[0] = 0; | |
88 | BID_RETURN (res); | |
89 | } | |
90 | } | |
200359e8 | 91 | // x is 0 |
b2a00c89 L |
92 | if ((y.w[1] & 0x7800000000000000ull) < 0x7800000000000000ull) { |
93 | if ((!CY.w[0]) && !(CY.w[1] & 0x0001ffffffffffffull)) { | |
200359e8 | 94 | #ifdef SET_STATUS_FLAGS |
b2a00c89 | 95 | __set_status_flags (pfpsf, INVALID_EXCEPTION); |
200359e8 | 96 | #endif |
b2a00c89 L |
97 | // x=y=0, return NaN |
98 | res.w[1] = 0x7c00000000000000ull; | |
99 | res.w[0] = 0; | |
100 | BID_RETURN (res); | |
200359e8 | 101 | } |
b2a00c89 L |
102 | // return 0 |
103 | res.w[1] = (x.w[1] ^ y.w[1]) & 0x8000000000000000ull; | |
104 | exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128; | |
105 | if (exponent_x > DECIMAL_MAX_EXPON_128) | |
106 | exponent_x = DECIMAL_MAX_EXPON_128; | |
107 | else if (exponent_x < 0) | |
108 | exponent_x = 0; | |
109 | res.w[1] |= (((UINT64) exponent_x) << 49); | |
110 | res.w[0] = 0; | |
111 | BID_RETURN (res); | |
112 | } | |
113 | } | |
114 | if (!valid_y) { | |
115 | // y is Inf. or NaN | |
200359e8 | 116 | |
b2a00c89 L |
117 | // test if y is NaN |
118 | if ((y.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { | |
200359e8 | 119 | #ifdef SET_STATUS_FLAGS |
b2a00c89 L |
120 | if ((y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN |
121 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
200359e8 | 122 | #endif |
b2a00c89 L |
123 | res.w[1] = CY.w[1] & QUIET_MASK64; |
124 | res.w[0] = CY.w[0]; | |
125 | BID_RETURN (res); | |
126 | } | |
127 | // y is Infinity? | |
128 | if ((y.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
129 | // return +/-0 | |
130 | res.w[1] = sign_x ^ sign_y; | |
131 | res.w[0] = 0; | |
132 | BID_RETURN (res); | |
133 | } | |
134 | // y is 0, return +/-Inf | |
200359e8 | 135 | #ifdef SET_STATUS_FLAGS |
b2a00c89 L |
136 | __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); |
137 | #endif | |
138 | res.w[1] = | |
139 | ((x.w[1] ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull; | |
140 | res.w[0] = 0; | |
141 | BID_RETURN (res); | |
142 | } | |
143 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
144 | (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
145 | #endif | |
146 | diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128; | |
147 | ||
148 | if (__unsigned_compare_gt_128 (CY, CX)) { | |
149 | // CX < CY | |
150 | ||
151 | // 2^64 | |
152 | f64.i = 0x5f800000; | |
153 | ||
154 | // fx ~ CX, fy ~ CY | |
155 | fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; | |
156 | fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0]; | |
157 | // expon_cy - expon_cx | |
158 | bin_index = (fy.i - fx.i) >> 23; | |
159 | ||
160 | if (CX.w[1]) { | |
161 | T = power10_index_binexp_128[bin_index].w[0]; | |
162 | __mul_64x128_short (CA, T, CX); | |
163 | } else { | |
164 | T128 = power10_index_binexp_128[bin_index]; | |
165 | __mul_64x128_short (CA, CX.w[0], T128); | |
166 | } | |
167 | ||
168 | ed2 = 33; | |
169 | if (__unsigned_compare_gt_128 (CY, CA)) | |
170 | ed2++; | |
171 | ||
172 | T128 = power10_table_128[ed2]; | |
173 | __mul_128x128_to_256 (CA4, CA, T128); | |
174 | ||
175 | ed2 += estimate_decimal_digits[bin_index]; | |
176 | CQ.w[0] = CQ.w[1] = 0; | |
177 | diff_expon = diff_expon - ed2; | |
178 | ||
179 | } else { | |
180 | // get CQ = CX/CY | |
181 | __div_128_by_128 (&CQ, &CR, CX, CY); | |
182 | ||
183 | if (!CR.w[1] && !CR.w[0]) { | |
184 | get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, | |
185 | pfpsf); | |
186 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
187 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
200359e8 | 188 | #endif |
200359e8 L |
189 | BID_RETURN (res); |
190 | } | |
b2a00c89 L |
191 | // get number of decimal digits in CQ |
192 | // 2^64 | |
193 | f64.i = 0x5f800000; | |
194 | fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0]; | |
195 | // binary expon. of CQ | |
196 | bin_expon = (fx.i - 0x3f800000) >> 23; | |
197 | ||
198 | digits_q = estimate_decimal_digits[bin_expon]; | |
199 | TP128.w[0] = power10_index_binexp_128[bin_expon].w[0]; | |
200 | TP128.w[1] = power10_index_binexp_128[bin_expon].w[1]; | |
201 | if (__unsigned_compare_ge_128 (CQ, TP128)) | |
202 | digits_q++; | |
203 | ||
204 | ed2 = 34 - digits_q; | |
205 | T128.w[0] = power10_table_128[ed2].w[0]; | |
206 | T128.w[1] = power10_table_128[ed2].w[1]; | |
207 | __mul_128x128_to_256 (CA4, CR, T128); | |
208 | diff_expon = diff_expon - ed2; | |
209 | __mul_128x128_low (CQ, CQ, T128); | |
210 | ||
211 | } | |
212 | ||
213 | __div_256_by_128 (&CQ, &CA4, CY); | |
214 | ||
215 | #ifdef SET_STATUS_FLAGS | |
216 | if (CA4.w[0] || CA4.w[1]) { | |
217 | // set status flags | |
218 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
219 | } | |
220 | #ifndef LEAVE_TRAILING_ZEROS | |
221 | else | |
222 | #endif | |
223 | #else | |
224 | #ifndef LEAVE_TRAILING_ZEROS | |
225 | if (!CA4.w[0] && !CA4.w[1]) | |
226 | #endif | |
227 | #endif | |
228 | #ifndef LEAVE_TRAILING_ZEROS | |
229 | // check whether result is exact | |
230 | { | |
231 | // check whether CX, CY are short | |
232 | if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) { | |
233 | i = (int) CY.w[0] - 1; | |
234 | j = (int) CX.w[0] - 1; | |
235 | // difference in powers of 2 factors for Y and X | |
236 | nzeros = ed2 - factors[i][0] + factors[j][0]; | |
237 | // difference in powers of 5 factors | |
238 | d5 = ed2 - factors[i][1] + factors[j][1]; | |
239 | if (d5 < nzeros) | |
240 | nzeros = d5; | |
241 | // get P*(2^M[extra_digits])/10^extra_digits | |
0b6df824 | 242 | __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
200359e8 | 243 | |
b2a00c89 L |
244 | // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 |
245 | amount = recip_scale[nzeros]; | |
246 | __shr_128_long (CQ, Qh, amount); | |
200359e8 | 247 | |
b2a00c89 L |
248 | diff_expon += nzeros; |
249 | } else { | |
250 | // decompose Q as Qh*10^17 + Ql | |
251 | //T128 = reciprocals10_128[17]; | |
252 | T128.w[0] = 0x44909befeb9fad49ull; | |
253 | T128.w[1] = 0x000b877aa3236a4bull; | |
254 | __mul_128x128_to_256 (P256, CQ, T128); | |
255 | //amount = recip_scale[17]; | |
256 | Q_high = (P256.w[2] >> 44) | (P256.w[3] << (64 - 44)); | |
257 | Q_low = CQ.w[0] - Q_high * 100000000000000000ull; | |
258 | ||
259 | if (!Q_low) { | |
260 | diff_expon += 17; | |
261 | ||
262 | tdigit[0] = Q_high & 0x3ffffff; | |
263 | tdigit[1] = 0; | |
264 | QX = Q_high >> 26; | |
265 | QX32 = QX; | |
266 | nzeros = 0; | |
267 | ||
268 | for (j = 0; QX32; j++, QX32 >>= 7) { | |
269 | k = (QX32 & 127); | |
270 | tdigit[0] += convert_table[j][k][0]; | |
271 | tdigit[1] += convert_table[j][k][1]; | |
272 | if (tdigit[0] >= 100000000) { | |
273 | tdigit[0] -= 100000000; | |
274 | tdigit[1]++; | |
275 | } | |
276 | } | |
277 | ||
278 | if (tdigit[1] >= 100000000) { | |
279 | tdigit[1] -= 100000000; | |
280 | if (tdigit[1] >= 100000000) | |
281 | tdigit[1] -= 100000000; | |
282 | } | |
283 | ||
284 | digit = tdigit[0]; | |
285 | if (!digit && !tdigit[1]) | |
286 | nzeros += 16; | |
287 | else { | |
288 | if (!digit) { | |
289 | nzeros += 8; | |
290 | digit = tdigit[1]; | |
291 | } | |
292 | // decompose digit | |
293 | PD = (UINT64) digit *0x068DB8BBull; | |
294 | digit_h = (UINT32) (PD >> 40); | |
295 | digit_low = digit - digit_h * 10000; | |
200359e8 | 296 | |
b2a00c89 L |
297 | if (!digit_low) |
298 | nzeros += 4; | |
299 | else | |
300 | digit_h = digit_low; | |
200359e8 | 301 | |
b2a00c89 L |
302 | if (!(digit_h & 1)) |
303 | nzeros += | |
304 | 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> | |
305 | (digit_h & 7)); | |
306 | } | |
307 | ||
308 | if (nzeros) { | |
309 | __mul_64x64_to_128 (CQ, Q_high, reciprocals10_64[nzeros]); | |
310 | ||
311 | // now get P/10^extra_digits: shift C64 right by M[extra_digits]-64 | |
312 | amount = short_recip_scale[nzeros]; | |
313 | CQ.w[0] = CQ.w[1] >> amount; | |
314 | } else | |
315 | CQ.w[0] = Q_high; | |
316 | CQ.w[1] = 0; | |
317 | ||
318 | diff_expon += nzeros; | |
200359e8 | 319 | } else { |
b2a00c89 L |
320 | tdigit[0] = Q_low & 0x3ffffff; |
321 | tdigit[1] = 0; | |
322 | QX = Q_low >> 26; | |
323 | QX32 = QX; | |
324 | nzeros = 0; | |
325 | ||
326 | for (j = 0; QX32; j++, QX32 >>= 7) { | |
327 | k = (QX32 & 127); | |
328 | tdigit[0] += convert_table[j][k][0]; | |
329 | tdigit[1] += convert_table[j][k][1]; | |
330 | if (tdigit[0] >= 100000000) { | |
331 | tdigit[0] -= 100000000; | |
332 | tdigit[1]++; | |
333 | } | |
334 | } | |
335 | ||
336 | if (tdigit[1] >= 100000000) { | |
337 | tdigit[1] -= 100000000; | |
338 | if (tdigit[1] >= 100000000) | |
339 | tdigit[1] -= 100000000; | |
340 | } | |
341 | ||
342 | digit = tdigit[0]; | |
343 | if (!digit && !tdigit[1]) | |
344 | nzeros += 16; | |
345 | else { | |
346 | if (!digit) { | |
347 | nzeros += 8; | |
348 | digit = tdigit[1]; | |
349 | } | |
350 | // decompose digit | |
351 | PD = (UINT64) digit *0x068DB8BBull; | |
352 | digit_h = (UINT32) (PD >> 40); | |
353 | digit_low = digit - digit_h * 10000; | |
354 | ||
355 | if (!digit_low) | |
356 | nzeros += 4; | |
357 | else | |
358 | digit_h = digit_low; | |
359 | ||
360 | if (!(digit_h & 1)) | |
361 | nzeros += | |
362 | 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> | |
363 | (digit_h & 7)); | |
364 | } | |
365 | ||
366 | if (nzeros) { | |
367 | // get P*(2^M[extra_digits])/10^extra_digits | |
0b6df824 | 368 | __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
b2a00c89 L |
369 | |
370 | //now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
371 | amount = recip_scale[nzeros]; | |
372 | __shr_128 (CQ, Qh, amount); | |
373 | } | |
374 | diff_expon += nzeros; | |
375 | ||
200359e8 | 376 | } |
b2a00c89 L |
377 | } |
378 | get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf); | |
379 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
380 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
381 | #endif | |
382 | BID_RETURN (res); | |
383 | } | |
384 | #endif | |
385 | ||
386 | if (diff_expon >= 0) { | |
387 | #ifdef IEEE_ROUND_NEAREST | |
388 | // rounding | |
389 | // 2*CA4 - CY | |
390 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
391 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
392 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
393 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
394 | ||
395 | D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
396 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
397 | ||
398 | CQ.w[0] += carry64; | |
399 | if (CQ.w[0] < carry64) | |
400 | CQ.w[1]++; | |
401 | #else | |
402 | #ifdef IEEE_ROUND_NEAREST_TIES_AWAY | |
403 | // rounding | |
404 | // 2*CA4 - CY | |
405 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
406 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
407 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
408 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
409 | ||
410 | D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
411 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
412 | ||
413 | CQ.w[0] += carry64; | |
414 | if (CQ.w[0] < carry64) | |
415 | CQ.w[1]++; | |
416 | #else | |
417 | rmode = rnd_mode; | |
418 | if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) | |
419 | rmode = 3 - rmode; | |
420 | switch (rmode) { | |
421 | case ROUNDING_TO_NEAREST: // round to nearest code | |
422 | // rounding | |
423 | // 2*CA4 - CY | |
424 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
425 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
426 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
427 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
428 | D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
429 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
430 | CQ.w[0] += carry64; | |
431 | if (CQ.w[0] < carry64) | |
432 | CQ.w[1]++; | |
433 | break; | |
434 | case ROUNDING_TIES_AWAY: | |
435 | // rounding | |
436 | // 2*CA4 - CY | |
437 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
438 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
439 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
440 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
441 | D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
442 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
443 | CQ.w[0] += carry64; | |
444 | if (CQ.w[0] < carry64) | |
445 | CQ.w[1]++; | |
446 | break; | |
447 | case ROUNDING_DOWN: | |
448 | case ROUNDING_TO_ZERO: | |
449 | break; | |
450 | default: // rounding up | |
451 | CQ.w[0]++; | |
452 | if (!CQ.w[0]) | |
453 | CQ.w[1]++; | |
454 | break; | |
455 | } | |
456 | #endif | |
457 | #endif | |
458 | ||
459 | } else { | |
460 | #ifdef SET_STATUS_FLAGS | |
461 | if (CA4.w[0] || CA4.w[1]) { | |
462 | // set status flags | |
463 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
464 | } | |
465 | #endif | |
466 | ||
467 | handle_UF_128_rem (&res, sign_x ^ sign_y, diff_expon, CQ, | |
468 | CA4.w[1] | CA4.w[0], &rnd_mode, pfpsf); | |
469 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
470 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
471 | #endif | |
472 | BID_RETURN (res); | |
473 | ||
474 | } | |
475 | ||
476 | get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf); | |
477 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
478 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
479 | #endif | |
480 | BID_RETURN (res); | |
481 | } | |
482 | ||
483 | ||
484 | //#define LEAVE_TRAILING_ZEROS | |
485 | ||
486 | TYPE0_FUNCTION_ARGTYPE1_ARGTYPE2 (UINT128, bid128dd_div, UINT64, x, | |
487 | UINT64, y) | |
488 | ||
489 | UINT256 CA4, CA4r, P256; | |
0b6df824 | 490 | UINT128 CX, CY, T128, CQ, CR, CA, TP128, Qh, res; |
b2a00c89 L |
491 | UINT64 sign_x, sign_y, T, carry64, D, Q_high, Q_low, QX, PD, |
492 | valid_y; | |
493 | int_float fx, fy, f64; | |
494 | UINT32 QX32, tdigit[3], digit, digit_h, digit_low; | |
495 | int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2, | |
496 | digits_q, amount; | |
497 | int nzeros, i, j, k, d5; | |
498 | unsigned rmode; | |
499 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
500 | fexcept_t binaryflags = 0; | |
501 | #endif | |
502 | ||
503 | valid_y = unpack_BID64 (&sign_y, &exponent_y, &CY.w[0], y); | |
504 | ||
505 | // unpack arguments, check for NaN or Infinity | |
506 | CX.w[1] = 0; | |
507 | if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], (x))) { | |
508 | #ifdef SET_STATUS_FLAGS | |
509 | if ((y & SNAN_MASK64) == SNAN_MASK64) // y is sNaN | |
510 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
511 | #endif | |
512 | ||
513 | // test if x is NaN | |
514 | if ((x & NAN_MASK64) == NAN_MASK64) { | |
515 | #ifdef SET_STATUS_FLAGS | |
516 | if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN | |
517 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
518 | #endif | |
519 | res.w[0] = (CX.w[0] & 0x0003ffffffffffffull); | |
520 | __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]); | |
521 | res.w[1] |= ((CX.w[0]) & 0xfc00000000000000ull); | |
522 | BID_RETURN (res); | |
523 | } | |
524 | // x is Infinity? | |
525 | if (((x) & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
526 | // check if y is Inf. | |
527 | if ((((y) & 0x7c00000000000000ull) == 0x7800000000000000ull)) | |
528 | // return NaN | |
529 | { | |
530 | #ifdef SET_STATUS_FLAGS | |
531 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
532 | #endif | |
533 | res.w[1] = 0x7c00000000000000ull; | |
534 | res.w[0] = 0; | |
535 | BID_RETURN (res); | |
536 | } | |
537 | if ((((y) & 0x7c00000000000000ull) != 0x7c00000000000000ull)) { | |
538 | // otherwise return +/-Inf | |
539 | res.w[1] = | |
540 | (((x) ^ (y)) & 0x8000000000000000ull) | 0x7800000000000000ull; | |
541 | res.w[0] = 0; | |
542 | BID_RETURN (res); | |
543 | } | |
544 | } | |
545 | // x is 0 | |
546 | if ((((y) & 0x7800000000000000ull) != 0x7800000000000000ull)) { | |
547 | if(!CY.w[0]) { | |
548 | #ifdef SET_STATUS_FLAGS | |
549 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
550 | #endif | |
551 | // x=y=0, return NaN | |
552 | res.w[1] = 0x7c00000000000000ull; | |
553 | res.w[0] = 0; | |
554 | BID_RETURN (res); | |
555 | } | |
556 | // return 0 | |
557 | res.w[1] = ((x) ^ (y)) & 0x8000000000000000ull; | |
558 | if (((y) & 0x6000000000000000ull) == 0x6000000000000000ull) | |
559 | exponent_y = ((UINT32) ((y) >> 51)) & 0x3ff; | |
560 | else | |
561 | exponent_y = ((UINT32) ((y) >> 53)) & 0x3ff; | |
562 | exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128; | |
563 | if (exponent_x > DECIMAL_MAX_EXPON_128) | |
564 | exponent_x = DECIMAL_MAX_EXPON_128; | |
565 | else if (exponent_x < 0) | |
566 | exponent_x = 0; | |
567 | res.w[1] |= (((UINT64) exponent_x) << 49); | |
568 | res.w[0] = 0; | |
569 | BID_RETURN (res); | |
570 | } | |
571 | } | |
572 | ||
573 | CY.w[1] = 0; | |
574 | if (!valid_y) { | |
575 | // y is Inf. or NaN | |
200359e8 | 576 | |
b2a00c89 L |
577 | // test if y is NaN |
578 | if ((y & NAN_MASK64) == NAN_MASK64) { | |
579 | #ifdef SET_STATUS_FLAGS | |
580 | if ((y & SNAN_MASK64) == SNAN_MASK64) // sNaN | |
581 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
582 | #endif | |
583 | res.w[0] = (CY.w[0] & 0x0003ffffffffffffull); | |
584 | __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]); | |
585 | res.w[1] |= ((CY.w[0]) & 0xfc00000000000000ull); | |
586 | BID_RETURN (res); | |
587 | } | |
588 | // y is Infinity? | |
589 | if (((y) & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
590 | // return +/-0 | |
591 | res.w[1] = sign_x ^ sign_y; | |
592 | res.w[0] = 0; | |
593 | BID_RETURN (res); | |
594 | } | |
595 | // y is 0, return +/-Inf | |
596 | res.w[1] = | |
597 | (((x) ^ (y)) & 0x8000000000000000ull) | 0x7800000000000000ull; | |
598 | res.w[0] = 0; | |
599 | #ifdef SET_STATUS_FLAGS | |
600 | __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); | |
601 | #endif | |
602 | BID_RETURN (res); | |
603 | } | |
604 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
605 | (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
606 | #endif | |
607 | diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128; | |
200359e8 | 608 | |
b2a00c89 L |
609 | if (__unsigned_compare_gt_128 (CY, CX)) { |
610 | // CX < CY | |
200359e8 | 611 | |
b2a00c89 L |
612 | // 2^64 |
613 | f64.i = 0x5f800000; | |
200359e8 | 614 | |
b2a00c89 L |
615 | // fx ~ CX, fy ~ CY |
616 | fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; | |
617 | fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0]; | |
618 | // expon_cy - expon_cx | |
619 | bin_index = (fy.i - fx.i) >> 23; | |
620 | ||
621 | if (CX.w[1]) { | |
622 | T = power10_index_binexp_128[bin_index].w[0]; | |
623 | __mul_64x128_short (CA, T, CX); | |
200359e8 | 624 | } else { |
b2a00c89 L |
625 | T128 = power10_index_binexp_128[bin_index]; |
626 | __mul_64x128_short (CA, CX.w[0], T128); | |
627 | } | |
200359e8 | 628 | |
b2a00c89 L |
629 | ed2 = 33; |
630 | if (__unsigned_compare_gt_128 (CY, CA)) | |
631 | ed2++; | |
632 | ||
633 | T128 = power10_table_128[ed2]; | |
634 | __mul_128x128_to_256 (CA4, CA, T128); | |
200359e8 | 635 | |
b2a00c89 L |
636 | ed2 += estimate_decimal_digits[bin_index]; |
637 | CQ.w[0] = CQ.w[1] = 0; | |
638 | diff_expon = diff_expon - ed2; | |
200359e8 | 639 | |
b2a00c89 L |
640 | } else { |
641 | // get CQ = CX/CY | |
642 | __div_128_by_128 (&CQ, &CR, CX, CY); | |
200359e8 | 643 | |
b2a00c89 L |
644 | if (!CR.w[1] && !CR.w[0]) { |
645 | get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, | |
646 | pfpsf); | |
647 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
648 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
649 | #endif | |
650 | BID_RETURN (res); | |
200359e8 | 651 | } |
b2a00c89 L |
652 | // get number of decimal digits in CQ |
653 | // 2^64 | |
654 | f64.i = 0x5f800000; | |
655 | fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0]; | |
656 | // binary expon. of CQ | |
657 | bin_expon = (fx.i - 0x3f800000) >> 23; | |
658 | ||
659 | digits_q = estimate_decimal_digits[bin_expon]; | |
660 | TP128.w[0] = power10_index_binexp_128[bin_expon].w[0]; | |
661 | TP128.w[1] = power10_index_binexp_128[bin_expon].w[1]; | |
662 | if (__unsigned_compare_ge_128 (CQ, TP128)) | |
663 | digits_q++; | |
664 | ||
665 | ed2 = 34 - digits_q; | |
666 | T128.w[0] = power10_table_128[ed2].w[0]; | |
667 | T128.w[1] = power10_table_128[ed2].w[1]; | |
668 | __mul_128x128_to_256 (CA4, CR, T128); | |
669 | diff_expon = diff_expon - ed2; | |
670 | __mul_128x128_low (CQ, CQ, T128); | |
671 | ||
672 | } | |
673 | ||
674 | __div_256_by_128 (&CQ, &CA4, CY); | |
200359e8 | 675 | |
200359e8 L |
676 | |
677 | #ifdef SET_STATUS_FLAGS | |
678 | if (CA4.w[0] || CA4.w[1]) { | |
679 | // set status flags | |
680 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
681 | } | |
682 | #ifndef LEAVE_TRAILING_ZEROS | |
683 | else | |
684 | #endif | |
685 | #else | |
686 | #ifndef LEAVE_TRAILING_ZEROS | |
687 | if (!CA4.w[0] && !CA4.w[1]) | |
688 | #endif | |
689 | #endif | |
690 | #ifndef LEAVE_TRAILING_ZEROS | |
691 | // check whether result is exact | |
692 | { | |
693 | // check whether CX, CY are short | |
694 | if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) { | |
695 | i = (int) CY.w[0] - 1; | |
696 | j = (int) CX.w[0] - 1; | |
b2a00c89 L |
697 | // difference in powers of 2 factors for Y and X |
698 | nzeros = ed2 - factors[i][0] + factors[j][0]; | |
699 | // difference in powers of 5 factors | |
700 | d5 = ed2 - factors[i][1] + factors[j][1]; | |
200359e8 | 701 | if (d5 < nzeros) |
b2a00c89 | 702 | nzeros = d5; |
200359e8 | 703 | // get P*(2^M[extra_digits])/10^extra_digits |
0b6df824 | 704 | __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
b2a00c89 | 705 | //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2]; |
200359e8 L |
706 | |
707 | // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
b2a00c89 | 708 | amount = recip_scale[nzeros]; |
200359e8 L |
709 | __shr_128_long (CQ, Qh, amount); |
710 | ||
711 | diff_expon += nzeros; | |
712 | } else { | |
713 | // decompose Q as Qh*10^17 + Ql | |
b2a00c89 | 714 | //T128 = reciprocals10_128[17]; |
200359e8 L |
715 | T128.w[0] = 0x44909befeb9fad49ull; |
716 | T128.w[1] = 0x000b877aa3236a4bull; | |
717 | __mul_128x128_to_256 (P256, CQ, T128); | |
b2a00c89 | 718 | //amount = recip_scale[17]; |
200359e8 L |
719 | Q_high = (P256.w[2] >> 44) | (P256.w[3] << (64 - 44)); |
720 | Q_low = CQ.w[0] - Q_high * 100000000000000000ull; | |
721 | ||
722 | if (!Q_low) { | |
b2a00c89 L |
723 | diff_expon += 17; |
724 | ||
725 | tdigit[0] = Q_high & 0x3ffffff; | |
726 | tdigit[1] = 0; | |
727 | QX = Q_high >> 26; | |
728 | QX32 = QX; | |
729 | nzeros = 0; | |
730 | ||
731 | for (j = 0; QX32; j++, QX32 >>= 7) { | |
732 | k = (QX32 & 127); | |
733 | tdigit[0] += convert_table[j][k][0]; | |
734 | tdigit[1] += convert_table[j][k][1]; | |
735 | if (tdigit[0] >= 100000000) { | |
736 | tdigit[0] -= 100000000; | |
737 | tdigit[1]++; | |
738 | } | |
739 | } | |
740 | ||
741 | ||
742 | if (tdigit[1] >= 100000000) { | |
743 | tdigit[1] -= 100000000; | |
744 | if (tdigit[1] >= 100000000) | |
745 | tdigit[1] -= 100000000; | |
746 | } | |
747 | ||
748 | digit = tdigit[0]; | |
749 | if (!digit && !tdigit[1]) | |
750 | nzeros += 16; | |
751 | else { | |
752 | if (!digit) { | |
753 | nzeros += 8; | |
754 | digit = tdigit[1]; | |
755 | } | |
756 | // decompose digit | |
757 | PD = (UINT64) digit *0x068DB8BBull; | |
758 | digit_h = (UINT32) (PD >> 40); | |
759 | digit_low = digit - digit_h * 10000; | |
760 | ||
761 | if (!digit_low) | |
762 | nzeros += 4; | |
763 | else | |
764 | digit_h = digit_low; | |
765 | ||
766 | if (!(digit_h & 1)) | |
767 | nzeros += | |
768 | 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> | |
769 | (digit_h & 7)); | |
770 | } | |
771 | ||
772 | if (nzeros) { | |
773 | __mul_64x64_to_128 (CQ, Q_high, reciprocals10_64[nzeros]); | |
774 | ||
775 | // now get P/10^extra_digits: shift C64 right by M[extra_digits]-64 | |
776 | amount = short_recip_scale[nzeros]; | |
777 | CQ.w[0] = CQ.w[1] >> amount; | |
778 | } else | |
779 | CQ.w[0] = Q_high; | |
780 | CQ.w[1] = 0; | |
781 | ||
782 | diff_expon += nzeros; | |
200359e8 | 783 | } else { |
b2a00c89 L |
784 | tdigit[0] = Q_low & 0x3ffffff; |
785 | tdigit[1] = 0; | |
786 | QX = Q_low >> 26; | |
787 | QX32 = QX; | |
788 | nzeros = 0; | |
789 | ||
790 | for (j = 0; QX32; j++, QX32 >>= 7) { | |
791 | k = (QX32 & 127); | |
792 | tdigit[0] += convert_table[j][k][0]; | |
793 | tdigit[1] += convert_table[j][k][1]; | |
794 | if (tdigit[0] >= 100000000) { | |
795 | tdigit[0] -= 100000000; | |
796 | tdigit[1]++; | |
797 | } | |
798 | } | |
799 | ||
800 | if (tdigit[1] >= 100000000) { | |
801 | tdigit[1] -= 100000000; | |
802 | if (tdigit[1] >= 100000000) | |
803 | tdigit[1] -= 100000000; | |
804 | } | |
805 | ||
806 | digit = tdigit[0]; | |
807 | if (!digit && !tdigit[1]) | |
808 | nzeros += 16; | |
809 | else { | |
810 | if (!digit) { | |
811 | nzeros += 8; | |
812 | digit = tdigit[1]; | |
813 | } | |
814 | // decompose digit | |
815 | PD = (UINT64) digit *0x068DB8BBull; | |
816 | digit_h = (UINT32) (PD >> 40); | |
817 | digit_low = digit - digit_h * 10000; | |
818 | ||
819 | if (!digit_low) | |
820 | nzeros += 4; | |
821 | else | |
822 | digit_h = digit_low; | |
823 | ||
824 | if (!(digit_h & 1)) | |
825 | nzeros += | |
826 | 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> | |
827 | (digit_h & 7)); | |
828 | } | |
829 | ||
830 | if (nzeros) { | |
831 | // get P*(2^M[extra_digits])/10^extra_digits | |
0b6df824 | 832 | __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
b2a00c89 L |
833 | |
834 | // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
835 | amount = recip_scale[nzeros]; | |
836 | __shr_128 (CQ, Qh, amount); | |
837 | } | |
838 | diff_expon += nzeros; | |
200359e8 L |
839 | |
840 | } | |
841 | } | |
b2a00c89 L |
842 | get_BID128(&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode,pfpsf); |
843 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
844 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
845 | #endif | |
200359e8 L |
846 | BID_RETURN (res); |
847 | } | |
848 | #endif | |
849 | ||
b2a00c89 | 850 | if (diff_expon >= 0) { |
200359e8 | 851 | #ifdef IEEE_ROUND_NEAREST |
b2a00c89 L |
852 | // rounding |
853 | // 2*CA4 - CY | |
854 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
855 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
856 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
857 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
858 | ||
859 | D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
860 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
861 | ||
862 | CQ.w[0] += carry64; | |
863 | if (CQ.w[0] < carry64) | |
864 | CQ.w[1]++; | |
865 | #else | |
866 | #ifdef IEEE_ROUND_NEAREST_TIES_AWAY | |
867 | // rounding | |
868 | // 2*CA4 - CY | |
869 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
870 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
871 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
872 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
873 | ||
874 | D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
875 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
876 | ||
877 | CQ.w[0] += carry64; | |
878 | if (CQ.w[0] < carry64) | |
879 | CQ.w[1]++; | |
880 | #else | |
881 | rmode = rnd_mode; | |
882 | if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) | |
883 | rmode = 3 - rmode; | |
884 | switch (rmode) { | |
885 | case ROUNDING_TO_NEAREST: // round to nearest code | |
200359e8 L |
886 | // rounding |
887 | // 2*CA4 - CY | |
888 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
889 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
890 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
891 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
200359e8 L |
892 | D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; |
893 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
200359e8 L |
894 | CQ.w[0] += carry64; |
895 | if (CQ.w[0] < carry64) | |
896 | CQ.w[1]++; | |
b2a00c89 L |
897 | break; |
898 | case ROUNDING_TIES_AWAY: | |
200359e8 L |
899 | // rounding |
900 | // 2*CA4 - CY | |
901 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
902 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
903 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
904 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
200359e8 L |
905 | D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; |
906 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
200359e8 L |
907 | CQ.w[0] += carry64; |
908 | if (CQ.w[0] < carry64) | |
909 | CQ.w[1]++; | |
b2a00c89 L |
910 | break; |
911 | case ROUNDING_DOWN: | |
912 | case ROUNDING_TO_ZERO: | |
913 | break; | |
914 | default: // rounding up | |
915 | CQ.w[0]++; | |
916 | if (!CQ.w[0]) | |
917 | CQ.w[1]++; | |
918 | break; | |
919 | } | |
200359e8 L |
920 | #endif |
921 | #endif | |
922 | ||
b2a00c89 | 923 | } else { |
200359e8 | 924 | #ifdef SET_STATUS_FLAGS |
b2a00c89 L |
925 | if (CA4.w[0] || CA4.w[1]) { |
926 | // set status flags | |
927 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
928 | } | |
200359e8 | 929 | #endif |
b2a00c89 L |
930 | handle_UF_128_rem (&res, sign_x ^ sign_y, diff_expon, CQ, |
931 | CA4.w[1] | CA4.w[0], &rnd_mode, pfpsf); | |
932 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
933 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
934 | #endif | |
935 | BID_RETURN (res); | |
200359e8 | 936 | |
b2a00c89 | 937 | } |
200359e8 | 938 | |
b2a00c89 L |
939 | get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf); |
940 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
941 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
942 | #endif | |
943 | BID_RETURN (res); | |
944 | } | |
200359e8 | 945 | |
b2a00c89 L |
946 | |
947 | BID128_FUNCTION_ARGTYPE1_ARG128 (bid128dq_div, UINT64, x, y) | |
948 | UINT256 CA4, CA4r, P256; | |
0b6df824 | 949 | UINT128 CX, CY, T128, CQ, CR, CA, TP128, Qh, res; |
b2a00c89 L |
950 | UINT64 sign_x, sign_y, T, carry64, D, Q_high, Q_low, QX, valid_y, |
951 | PD; | |
952 | int_float fx, fy, f64; | |
953 | UINT32 QX32, tdigit[3], digit, digit_h, digit_low; | |
954 | int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2, | |
955 | digits_q, amount; | |
956 | int nzeros, i, j, k, d5; | |
957 | unsigned rmode; | |
958 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
959 | fexcept_t binaryflags = 0; | |
960 | #endif | |
961 | ||
962 | valid_y = unpack_BID128_value (&sign_y, &exponent_y, &CY, y); | |
963 | ||
964 | // unpack arguments, check for NaN or Infinity | |
965 | CX.w[1] = 0; | |
966 | if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], x)) { | |
967 | #ifdef SET_STATUS_FLAGS | |
968 | if ((y.w[1] & SNAN_MASK64) == SNAN_MASK64) // y is sNaN | |
969 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
970 | #endif | |
971 | ||
972 | // test if x is NaN | |
973 | if ((x & NAN_MASK64) == NAN_MASK64) { | |
974 | #ifdef SET_STATUS_FLAGS | |
975 | if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN | |
976 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
977 | #endif | |
978 | res.w[0] = (CX.w[0] & 0x0003ffffffffffffull); | |
979 | __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]); | |
980 | res.w[1] |= ((CX.w[0]) & 0xfc00000000000000ull); | |
981 | BID_RETURN (res); | |
982 | } | |
983 | // x is Infinity? | |
984 | if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
985 | // check if y is Inf. | |
986 | if (((y.w[1] & 0x7c00000000000000ull) == 0x7800000000000000ull)) | |
987 | // return NaN | |
988 | { | |
989 | #ifdef SET_STATUS_FLAGS | |
990 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
991 | #endif | |
992 | res.w[1] = 0x7c00000000000000ull; | |
993 | res.w[0] = 0; | |
994 | BID_RETURN (res); | |
995 | } | |
996 | if (((y.w[1] & 0x7c00000000000000ull) != 0x7c00000000000000ull)) { | |
997 | // otherwise return +/-Inf | |
998 | res.w[1] = | |
999 | ((x ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull; | |
1000 | res.w[0] = 0; | |
1001 | BID_RETURN (res); | |
1002 | } | |
1003 | } | |
1004 | // x is 0 | |
1005 | if ((y.w[1] & INFINITY_MASK64) != INFINITY_MASK64) { | |
1006 | if ((!CY.w[0]) && !(CY.w[1] & 0x0001ffffffffffffull)) { | |
1007 | #ifdef SET_STATUS_FLAGS | |
1008 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
1009 | #endif | |
1010 | // x=y=0, return NaN | |
1011 | res.w[1] = 0x7c00000000000000ull; | |
1012 | res.w[0] = 0; | |
1013 | BID_RETURN (res); | |
1014 | } | |
1015 | // return 0 | |
1016 | res.w[1] = (x ^ y.w[1]) & 0x8000000000000000ull; | |
1017 | exponent_x = exponent_x - exponent_y + (DECIMAL_EXPONENT_BIAS_128<<1) - DECIMAL_EXPONENT_BIAS; | |
1018 | if (exponent_x > DECIMAL_MAX_EXPON_128) | |
1019 | exponent_x = DECIMAL_MAX_EXPON_128; | |
1020 | else if (exponent_x < 0) | |
1021 | exponent_x = 0; | |
1022 | res.w[1] |= (((UINT64) exponent_x) << 49); | |
1023 | res.w[0] = 0; | |
1024 | BID_RETURN (res); | |
1025 | } | |
1026 | } | |
1027 | exponent_x += (DECIMAL_EXPONENT_BIAS_128 - DECIMAL_EXPONENT_BIAS); | |
1028 | ||
1029 | if (!valid_y) { | |
1030 | // y is Inf. or NaN | |
1031 | ||
1032 | // test if y is NaN | |
1033 | if ((y.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { | |
1034 | #ifdef SET_STATUS_FLAGS | |
1035 | if ((y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN | |
1036 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
1037 | #endif | |
1038 | res.w[1] = CY.w[1] & QUIET_MASK64; | |
1039 | res.w[0] = CY.w[0]; | |
1040 | BID_RETURN (res); | |
1041 | } | |
1042 | // y is Infinity? | |
1043 | if ((y.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
1044 | // return +/-0 | |
1045 | res.w[1] = sign_x ^ sign_y; | |
1046 | res.w[0] = 0; | |
1047 | BID_RETURN (res); | |
1048 | } | |
1049 | // y is 0, return +/-Inf | |
1050 | res.w[1] = | |
1051 | ((x ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull; | |
1052 | res.w[0] = 0; | |
1053 | #ifdef SET_STATUS_FLAGS | |
1054 | __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); | |
1055 | #endif | |
1056 | BID_RETURN (res); | |
1057 | } | |
1058 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1059 | (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1060 | #endif | |
1061 | diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128; | |
1062 | ||
1063 | if (__unsigned_compare_gt_128 (CY, CX)) { | |
1064 | // CX < CY | |
1065 | ||
1066 | // 2^64 | |
1067 | f64.i = 0x5f800000; | |
1068 | ||
1069 | // fx ~ CX, fy ~ CY | |
1070 | fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; | |
1071 | fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0]; | |
1072 | // expon_cy - expon_cx | |
1073 | bin_index = (fy.i - fx.i) >> 23; | |
1074 | ||
1075 | if (CX.w[1]) { | |
1076 | T = power10_index_binexp_128[bin_index].w[0]; | |
1077 | __mul_64x128_short (CA, T, CX); | |
1078 | } else { | |
1079 | T128 = power10_index_binexp_128[bin_index]; | |
1080 | __mul_64x128_short (CA, CX.w[0], T128); | |
1081 | } | |
1082 | ||
1083 | ed2 = 33; | |
1084 | if (__unsigned_compare_gt_128 (CY, CA)) | |
1085 | ed2++; | |
1086 | ||
1087 | T128 = power10_table_128[ed2]; | |
1088 | __mul_128x128_to_256 (CA4, CA, T128); | |
1089 | ||
1090 | ed2 += estimate_decimal_digits[bin_index]; | |
1091 | CQ.w[0] = CQ.w[1] = 0; | |
1092 | diff_expon = diff_expon - ed2; | |
1093 | ||
1094 | } else { | |
1095 | // get CQ = CX/CY | |
1096 | __div_128_by_128 (&CQ, &CR, CX, CY); | |
1097 | ||
1098 | if (!CR.w[1] && !CR.w[0]) { | |
1099 | get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, | |
1100 | pfpsf); | |
1101 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1102 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1103 | #endif | |
1104 | BID_RETURN (res); | |
1105 | } | |
1106 | // get number of decimal digits in CQ | |
1107 | // 2^64 | |
1108 | f64.i = 0x5f800000; | |
1109 | fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0]; | |
1110 | // binary expon. of CQ | |
1111 | bin_expon = (fx.i - 0x3f800000) >> 23; | |
1112 | ||
1113 | digits_q = estimate_decimal_digits[bin_expon]; | |
1114 | TP128.w[0] = power10_index_binexp_128[bin_expon].w[0]; | |
1115 | TP128.w[1] = power10_index_binexp_128[bin_expon].w[1]; | |
1116 | if (__unsigned_compare_ge_128 (CQ, TP128)) | |
1117 | digits_q++; | |
1118 | ||
1119 | ed2 = 34 - digits_q; | |
1120 | T128.w[0] = power10_table_128[ed2].w[0]; | |
1121 | T128.w[1] = power10_table_128[ed2].w[1]; | |
1122 | __mul_128x128_to_256 (CA4, CR, T128); | |
1123 | diff_expon = diff_expon - ed2; | |
1124 | __mul_128x128_low (CQ, CQ, T128); | |
1125 | ||
1126 | } | |
1127 | ||
1128 | __div_256_by_128 (&CQ, &CA4, CY); | |
1129 | ||
1130 | #ifdef SET_STATUS_FLAGS | |
1131 | if (CA4.w[0] || CA4.w[1]) { | |
1132 | // set status flags | |
1133 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
1134 | } | |
1135 | #ifndef LEAVE_TRAILING_ZEROS | |
1136 | else | |
1137 | #endif | |
1138 | #else | |
1139 | #ifndef LEAVE_TRAILING_ZEROS | |
1140 | if (!CA4.w[0] && !CA4.w[1]) | |
1141 | #endif | |
1142 | #endif | |
1143 | #ifndef LEAVE_TRAILING_ZEROS | |
1144 | // check whether result is exact | |
1145 | { | |
1146 | //printf("ed2=%d,nz=%d,a=%d,CQ="LX16","LX16", RH="LX16", RL="LX16"\n",ed2,nzeros,amount,CQ.w[1],CQ.w[0],reciprocals10_128[nzeros].w[1],reciprocals10_128[nzeros].w[0]);fflush(stdout); | |
1147 | // check whether CX, CY are short | |
1148 | if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) { | |
1149 | i = (int) CY.w[0] - 1; | |
1150 | j = (int) CX.w[0] - 1; | |
1151 | // difference in powers of 2 factors for Y and X | |
1152 | nzeros = ed2 - factors[i][0] + factors[j][0]; | |
1153 | // difference in powers of 5 factors | |
1154 | d5 = ed2 - factors[i][1] + factors[j][1]; | |
1155 | if (d5 < nzeros) | |
1156 | nzeros = d5; | |
1157 | // get P*(2^M[extra_digits])/10^extra_digits | |
0b6df824 | 1158 | __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
b2a00c89 L |
1159 | //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2]; |
1160 | ||
1161 | // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
1162 | amount = recip_scale[nzeros]; | |
1163 | __shr_128_long (CQ, Qh, amount); | |
1164 | ||
1165 | diff_expon += nzeros; | |
1166 | } else { | |
1167 | // decompose Q as Qh*10^17 + Ql | |
1168 | //T128 = reciprocals10_128[17]; | |
1169 | T128.w[0] = 0x44909befeb9fad49ull; | |
1170 | T128.w[1] = 0x000b877aa3236a4bull; | |
1171 | __mul_128x128_to_256 (P256, CQ, T128); | |
1172 | //amount = recip_scale[17]; | |
1173 | Q_high = (P256.w[2] >> 44) | (P256.w[3] << (64 - 44)); | |
1174 | Q_low = CQ.w[0] - Q_high * 100000000000000000ull; | |
1175 | ||
1176 | if (!Q_low) { | |
1177 | diff_expon += 17; | |
1178 | ||
1179 | tdigit[0] = Q_high & 0x3ffffff; | |
1180 | tdigit[1] = 0; | |
1181 | QX = Q_high >> 26; | |
1182 | QX32 = QX; | |
1183 | nzeros = 0; | |
1184 | ||
1185 | for (j = 0; QX32; j++, QX32 >>= 7) { | |
1186 | k = (QX32 & 127); | |
1187 | tdigit[0] += convert_table[j][k][0]; | |
1188 | tdigit[1] += convert_table[j][k][1]; | |
1189 | if (tdigit[0] >= 100000000) { | |
1190 | tdigit[0] -= 100000000; | |
1191 | tdigit[1]++; | |
1192 | } | |
1193 | } | |
1194 | ||
1195 | ||
1196 | if (tdigit[1] >= 100000000) { | |
1197 | tdigit[1] -= 100000000; | |
1198 | if (tdigit[1] >= 100000000) | |
1199 | tdigit[1] -= 100000000; | |
1200 | } | |
1201 | ||
1202 | digit = tdigit[0]; | |
1203 | if (!digit && !tdigit[1]) | |
1204 | nzeros += 16; | |
1205 | else { | |
1206 | if (!digit) { | |
1207 | nzeros += 8; | |
1208 | digit = tdigit[1]; | |
1209 | } | |
1210 | // decompose digit | |
1211 | PD = (UINT64) digit *0x068DB8BBull; | |
1212 | digit_h = (UINT32) (PD >> 40); | |
1213 | //printf("i=%d, nz=%d, digit=%d (%d, %016I64x %016I64x)\n",i,nzeros,digit_h,digit,PD,digit_h);fflush(stdout); | |
1214 | digit_low = digit - digit_h * 10000; | |
1215 | ||
1216 | if (!digit_low) | |
1217 | nzeros += 4; | |
1218 | else | |
1219 | digit_h = digit_low; | |
1220 | ||
1221 | if (!(digit_h & 1)) | |
1222 | nzeros += | |
1223 | 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> | |
1224 | (digit_h & 7)); | |
1225 | } | |
1226 | ||
1227 | if (nzeros) { | |
1228 | __mul_64x64_to_128 (CQ, Q_high, reciprocals10_64[nzeros]); | |
1229 | ||
1230 | // now get P/10^extra_digits: shift C64 right by M[extra_digits]-64 | |
1231 | amount = short_recip_scale[nzeros]; | |
1232 | CQ.w[0] = CQ.w[1] >> amount; | |
1233 | } else | |
1234 | CQ.w[0] = Q_high; | |
1235 | CQ.w[1] = 0; | |
1236 | ||
1237 | diff_expon += nzeros; | |
1238 | } else { | |
1239 | tdigit[0] = Q_low & 0x3ffffff; | |
1240 | tdigit[1] = 0; | |
1241 | QX = Q_low >> 26; | |
1242 | QX32 = QX; | |
1243 | nzeros = 0; | |
1244 | ||
1245 | for (j = 0; QX32; j++, QX32 >>= 7) { | |
1246 | k = (QX32 & 127); | |
1247 | tdigit[0] += convert_table[j][k][0]; | |
1248 | tdigit[1] += convert_table[j][k][1]; | |
1249 | if (tdigit[0] >= 100000000) { | |
1250 | tdigit[0] -= 100000000; | |
1251 | tdigit[1]++; | |
1252 | } | |
1253 | } | |
1254 | ||
1255 | if (tdigit[1] >= 100000000) { | |
1256 | tdigit[1] -= 100000000; | |
1257 | if (tdigit[1] >= 100000000) | |
1258 | tdigit[1] -= 100000000; | |
1259 | } | |
1260 | ||
1261 | digit = tdigit[0]; | |
1262 | if (!digit && !tdigit[1]) | |
1263 | nzeros += 16; | |
1264 | else { | |
1265 | if (!digit) { | |
1266 | nzeros += 8; | |
1267 | digit = tdigit[1]; | |
1268 | } | |
1269 | // decompose digit | |
1270 | PD = (UINT64) digit *0x068DB8BBull; | |
1271 | digit_h = (UINT32) (PD >> 40); | |
1272 | //printf("i=%d, nz=%d, digit=%d (%d, %016I64x %016I64x)\n",i,nzeros,digit_h,digit,PD,digit_h);fflush(stdout); | |
1273 | digit_low = digit - digit_h * 10000; | |
1274 | ||
1275 | if (!digit_low) | |
1276 | nzeros += 4; | |
1277 | else | |
1278 | digit_h = digit_low; | |
1279 | ||
1280 | if (!(digit_h & 1)) | |
1281 | nzeros += | |
1282 | 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> | |
1283 | (digit_h & 7)); | |
1284 | } | |
1285 | ||
1286 | if (nzeros) { | |
1287 | // get P*(2^M[extra_digits])/10^extra_digits | |
0b6df824 | 1288 | __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
b2a00c89 L |
1289 | |
1290 | // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
1291 | amount = recip_scale[nzeros]; | |
1292 | __shr_128 (CQ, Qh, amount); | |
1293 | } | |
1294 | diff_expon += nzeros; | |
1295 | ||
1296 | } | |
1297 | } | |
1298 | get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, | |
1299 | pfpsf); | |
1300 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1301 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1302 | #endif | |
1303 | BID_RETURN (res); | |
1304 | } | |
1305 | #endif | |
1306 | ||
1307 | if (diff_expon >= 0) { | |
1308 | #ifdef IEEE_ROUND_NEAREST | |
1309 | // rounding | |
1310 | // 2*CA4 - CY | |
1311 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1312 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1313 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1314 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1315 | ||
1316 | D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
1317 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
1318 | ||
1319 | CQ.w[0] += carry64; | |
1320 | if (CQ.w[0] < carry64) | |
1321 | CQ.w[1]++; | |
1322 | #else | |
1323 | #ifdef IEEE_ROUND_NEAREST_TIES_AWAY | |
1324 | // rounding | |
1325 | // 2*CA4 - CY | |
1326 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1327 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1328 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1329 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1330 | ||
1331 | D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
1332 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
1333 | ||
1334 | CQ.w[0] += carry64; | |
1335 | if (CQ.w[0] < carry64) | |
1336 | CQ.w[1]++; | |
1337 | #else | |
1338 | rmode = rnd_mode; | |
1339 | if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) | |
1340 | rmode = 3 - rmode; | |
1341 | switch (rmode) { | |
1342 | case ROUNDING_TO_NEAREST: // round to nearest code | |
1343 | // rounding | |
1344 | // 2*CA4 - CY | |
1345 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1346 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1347 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1348 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1349 | D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
1350 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
1351 | CQ.w[0] += carry64; | |
1352 | if (CQ.w[0] < carry64) | |
1353 | CQ.w[1]++; | |
1354 | break; | |
1355 | case ROUNDING_TIES_AWAY: | |
1356 | // rounding | |
1357 | // 2*CA4 - CY | |
1358 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1359 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1360 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1361 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1362 | D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
1363 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
1364 | CQ.w[0] += carry64; | |
1365 | if (CQ.w[0] < carry64) | |
1366 | CQ.w[1]++; | |
1367 | break; | |
1368 | case ROUNDING_DOWN: | |
1369 | case ROUNDING_TO_ZERO: | |
1370 | break; | |
1371 | default: // rounding up | |
1372 | CQ.w[0]++; | |
1373 | if (!CQ.w[0]) | |
1374 | CQ.w[1]++; | |
1375 | break; | |
1376 | } | |
1377 | #endif | |
1378 | #endif | |
1379 | ||
1380 | } else { | |
1381 | #ifdef SET_STATUS_FLAGS | |
1382 | if (CA4.w[0] || CA4.w[1]) { | |
1383 | // set status flags | |
1384 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
1385 | } | |
1386 | #endif | |
1387 | handle_UF_128_rem (&res, sign_x ^ sign_y, diff_expon, CQ, | |
1388 | CA4.w[1] | CA4.w[0], &rnd_mode, pfpsf); | |
1389 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1390 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1391 | #endif | |
1392 | BID_RETURN (res); | |
1393 | } | |
1394 | ||
1395 | get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf); | |
1396 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1397 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1398 | #endif | |
1399 | BID_RETURN (res); | |
1400 | ||
1401 | } | |
1402 | ||
1403 | ||
1404 | BID128_FUNCTION_ARG128_ARGTYPE2 (bid128qd_div, x, UINT64, y) | |
1405 | UINT256 CA4, CA4r, P256; | |
0b6df824 | 1406 | UINT128 CX, CY, T128, CQ, CR, CA, TP128, Qh, res; |
b2a00c89 L |
1407 | UINT64 sign_x, sign_y, T, carry64, D, Q_high, Q_low, QX, PD, |
1408 | valid_y; | |
1409 | int_float fx, fy, f64; | |
1410 | UINT32 QX32, tdigit[3], digit, digit_h, digit_low; | |
1411 | int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2, | |
1412 | digits_q, amount; | |
1413 | int nzeros, i, j, k, d5, rmode; | |
1414 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1415 | fexcept_t binaryflags = 0; | |
1416 | #endif | |
1417 | ||
1418 | ||
1419 | valid_y = unpack_BID64 (&sign_y, &exponent_y, &CY.w[0], y); | |
1420 | // unpack arguments, check for NaN or Infinity | |
1421 | if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) { | |
1422 | // test if x is NaN | |
1423 | if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { | |
1424 | #ifdef SET_STATUS_FLAGS | |
1425 | if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull || // sNaN | |
1426 | (y & 0x7e00000000000000ull) == 0x7e00000000000000ull) | |
1427 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
1428 | #endif | |
1429 | res.w[1] = (CX.w[1]) & QUIET_MASK64; | |
1430 | res.w[0] = CX.w[0]; | |
1431 | BID_RETURN (res); | |
1432 | } | |
1433 | // x is Infinity? | |
1434 | if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
1435 | // check if y is Inf. | |
1436 | if (((y & 0x7c00000000000000ull) == 0x7800000000000000ull)) | |
1437 | // return NaN | |
1438 | { | |
1439 | #ifdef SET_STATUS_FLAGS | |
1440 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
1441 | #endif | |
1442 | res.w[1] = 0x7c00000000000000ull; | |
1443 | res.w[0] = 0; | |
1444 | BID_RETURN (res); | |
1445 | } | |
1446 | // y is NaN? | |
1447 | if (((y & 0x7c00000000000000ull) != 0x7c00000000000000ull)) | |
1448 | // return NaN | |
1449 | { | |
1450 | // return +/-Inf | |
1451 | res.w[1] = ((x.w[1] ^ y) & 0x8000000000000000ull) | | |
1452 | 0x7800000000000000ull; | |
1453 | res.w[0] = 0; | |
1454 | BID_RETURN (res); | |
1455 | } | |
1456 | } | |
1457 | // x is 0 | |
1458 | if ((y & 0x7800000000000000ull) < 0x7800000000000000ull) { | |
1459 | if (!CY.w[0]) { | |
1460 | #ifdef SET_STATUS_FLAGS | |
1461 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
1462 | #endif | |
1463 | // x=y=0, return NaN | |
1464 | res.w[1] = 0x7c00000000000000ull; | |
1465 | res.w[0] = 0; | |
1466 | BID_RETURN (res); | |
1467 | } | |
1468 | // return 0 | |
1469 | res.w[1] = (x.w[1] ^ y) & 0x8000000000000000ull; | |
1470 | exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS; | |
1471 | if (exponent_x > DECIMAL_MAX_EXPON_128) | |
1472 | exponent_x = DECIMAL_MAX_EXPON_128; | |
1473 | else if (exponent_x < 0) | |
1474 | exponent_x = 0; | |
1475 | res.w[1] |= (((UINT64) exponent_x) << 49); | |
1476 | res.w[0] = 0; | |
1477 | BID_RETURN (res); | |
1478 | } | |
1479 | } | |
1480 | CY.w[1] = 0; | |
1481 | if (!valid_y) { | |
1482 | // y is Inf. or NaN | |
1483 | ||
1484 | // test if y is NaN | |
1485 | if ((y & NAN_MASK64) == NAN_MASK64) { | |
1486 | #ifdef SET_STATUS_FLAGS | |
1487 | if ((y & SNAN_MASK64) == SNAN_MASK64) // sNaN | |
1488 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
1489 | #endif | |
1490 | res.w[0] = (CY.w[0] & 0x0003ffffffffffffull); | |
1491 | __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]); | |
1492 | res.w[1] |= ((CY.w[0]) & 0xfc00000000000000ull); | |
1493 | BID_RETURN (res); | |
1494 | } | |
1495 | // y is Infinity? | |
1496 | if ((y & INFINITY_MASK64) == INFINITY_MASK64) { | |
1497 | // return +/-0 | |
1498 | res.w[1] = ((x.w[1] ^ y) & 0x8000000000000000ull); | |
1499 | res.w[0] = 0; | |
1500 | BID_RETURN (res); | |
1501 | } | |
1502 | // y is 0 | |
1503 | #ifdef SET_STATUS_FLAGS | |
1504 | __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); | |
1505 | #endif | |
1506 | res.w[1] = (sign_x ^ sign_y) | INFINITY_MASK64; | |
1507 | res.w[0] = 0; | |
1508 | BID_RETURN (res); | |
1509 | } | |
1510 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1511 | (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1512 | #endif | |
1513 | diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS; | |
1514 | ||
1515 | if (__unsigned_compare_gt_128 (CY, CX)) { | |
1516 | // CX < CY | |
1517 | ||
1518 | // 2^64 | |
1519 | f64.i = 0x5f800000; | |
1520 | ||
1521 | // fx ~ CX, fy ~ CY | |
1522 | fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; | |
1523 | fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0]; | |
1524 | // expon_cy - expon_cx | |
1525 | bin_index = (fy.i - fx.i) >> 23; | |
1526 | ||
1527 | if (CX.w[1]) { | |
1528 | T = power10_index_binexp_128[bin_index].w[0]; | |
1529 | __mul_64x128_short (CA, T, CX); | |
1530 | } else { | |
1531 | T128 = power10_index_binexp_128[bin_index]; | |
1532 | __mul_64x128_short (CA, CX.w[0], T128); | |
1533 | } | |
1534 | ||
1535 | ed2 = 33; | |
1536 | if (__unsigned_compare_gt_128 (CY, CA)) | |
1537 | ed2++; | |
1538 | ||
1539 | T128 = power10_table_128[ed2]; | |
1540 | __mul_128x128_to_256 (CA4, CA, T128); | |
1541 | ||
1542 | ed2 += estimate_decimal_digits[bin_index]; | |
1543 | CQ.w[0] = CQ.w[1] = 0; | |
1544 | diff_expon = diff_expon - ed2; | |
1545 | ||
1546 | } else { | |
1547 | // get CQ = CX/CY | |
1548 | __div_128_by_128 (&CQ, &CR, CX, CY); | |
1549 | ||
1550 | if (!CR.w[1] && !CR.w[0]) { | |
1551 | get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, | |
1552 | pfpsf); | |
1553 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1554 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1555 | #endif | |
1556 | BID_RETURN (res); | |
1557 | } | |
1558 | // get number of decimal digits in CQ | |
1559 | // 2^64 | |
1560 | f64.i = 0x5f800000; | |
1561 | fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0]; | |
1562 | // binary expon. of CQ | |
1563 | bin_expon = (fx.i - 0x3f800000) >> 23; | |
1564 | ||
1565 | digits_q = estimate_decimal_digits[bin_expon]; | |
1566 | TP128.w[0] = power10_index_binexp_128[bin_expon].w[0]; | |
1567 | TP128.w[1] = power10_index_binexp_128[bin_expon].w[1]; | |
1568 | if (__unsigned_compare_ge_128 (CQ, TP128)) | |
1569 | digits_q++; | |
1570 | ||
1571 | ed2 = 34 - digits_q; | |
1572 | T128.w[0] = power10_table_128[ed2].w[0]; | |
1573 | T128.w[1] = power10_table_128[ed2].w[1]; | |
1574 | __mul_128x128_to_256 (CA4, CR, T128); | |
1575 | diff_expon = diff_expon - ed2; | |
1576 | __mul_128x128_low (CQ, CQ, T128); | |
1577 | ||
1578 | } | |
1579 | ||
1580 | __div_256_by_128 (&CQ, &CA4, CY); | |
1581 | ||
1582 | ||
1583 | #ifdef SET_STATUS_FLAGS | |
1584 | if (CA4.w[0] || CA4.w[1]) { | |
1585 | // set status flags | |
1586 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
1587 | } | |
1588 | #ifndef LEAVE_TRAILING_ZEROS | |
1589 | else | |
1590 | #endif | |
1591 | #else | |
1592 | #ifndef LEAVE_TRAILING_ZEROS | |
1593 | if (!CA4.w[0] && !CA4.w[1]) | |
1594 | #endif | |
1595 | #endif | |
1596 | #ifndef LEAVE_TRAILING_ZEROS | |
1597 | // check whether result is exact | |
1598 | { | |
1599 | // check whether CX, CY are short | |
1600 | if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) { | |
1601 | i = (int) CY.w[0] - 1; | |
1602 | j = (int) CX.w[0] - 1; | |
1603 | // difference in powers of 2 factors for Y and X | |
1604 | nzeros = ed2 - factors[i][0] + factors[j][0]; | |
1605 | // difference in powers of 5 factors | |
1606 | d5 = ed2 - factors[i][1] + factors[j][1]; | |
1607 | if (d5 < nzeros) | |
1608 | nzeros = d5; | |
1609 | // get P*(2^M[extra_digits])/10^extra_digits | |
0b6df824 | 1610 | __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
b2a00c89 L |
1611 | //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2]; |
1612 | ||
1613 | // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
1614 | amount = recip_scale[nzeros]; | |
1615 | __shr_128_long (CQ, Qh, amount); | |
1616 | ||
1617 | diff_expon += nzeros; | |
1618 | } else { | |
1619 | // decompose Q as Qh*10^17 + Ql | |
1620 | //T128 = reciprocals10_128[17]; | |
1621 | T128.w[0] = 0x44909befeb9fad49ull; | |
1622 | T128.w[1] = 0x000b877aa3236a4bull; | |
1623 | __mul_128x128_to_256 (P256, CQ, T128); | |
1624 | //amount = recip_scale[17]; | |
1625 | Q_high = (P256.w[2] >> 44) | (P256.w[3] << (64 - 44)); | |
1626 | Q_low = CQ.w[0] - Q_high * 100000000000000000ull; | |
1627 | ||
1628 | if (!Q_low) { | |
1629 | diff_expon += 17; | |
1630 | ||
1631 | tdigit[0] = Q_high & 0x3ffffff; | |
1632 | tdigit[1] = 0; | |
1633 | QX = Q_high >> 26; | |
1634 | QX32 = QX; | |
1635 | nzeros = 0; | |
1636 | ||
1637 | for (j = 0; QX32; j++, QX32 >>= 7) { | |
1638 | k = (QX32 & 127); | |
1639 | tdigit[0] += convert_table[j][k][0]; | |
1640 | tdigit[1] += convert_table[j][k][1]; | |
1641 | if (tdigit[0] >= 100000000) { | |
1642 | tdigit[0] -= 100000000; | |
1643 | tdigit[1]++; | |
1644 | } | |
1645 | } | |
1646 | ||
1647 | ||
1648 | if (tdigit[1] >= 100000000) { | |
1649 | tdigit[1] -= 100000000; | |
1650 | if (tdigit[1] >= 100000000) | |
1651 | tdigit[1] -= 100000000; | |
1652 | } | |
1653 | ||
1654 | digit = tdigit[0]; | |
1655 | if (!digit && !tdigit[1]) | |
1656 | nzeros += 16; | |
1657 | else { | |
1658 | if (!digit) { | |
1659 | nzeros += 8; | |
1660 | digit = tdigit[1]; | |
1661 | } | |
1662 | // decompose digit | |
1663 | PD = (UINT64) digit *0x068DB8BBull; | |
1664 | digit_h = (UINT32) (PD >> 40); | |
1665 | digit_low = digit - digit_h * 10000; | |
1666 | ||
1667 | if (!digit_low) | |
1668 | nzeros += 4; | |
1669 | else | |
1670 | digit_h = digit_low; | |
1671 | ||
1672 | if (!(digit_h & 1)) | |
1673 | nzeros += | |
1674 | 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> | |
1675 | (digit_h & 7)); | |
1676 | } | |
1677 | ||
1678 | if (nzeros) { | |
1679 | __mul_64x64_to_128 (CQ, Q_high, reciprocals10_64[nzeros]); | |
1680 | ||
1681 | // now get P/10^extra_digits: shift C64 right by M[extra_digits]-64 | |
1682 | amount = short_recip_scale[nzeros]; | |
1683 | CQ.w[0] = CQ.w[1] >> amount; | |
1684 | } else | |
1685 | CQ.w[0] = Q_high; | |
1686 | CQ.w[1] = 0; | |
1687 | ||
1688 | diff_expon += nzeros; | |
1689 | } else { | |
1690 | tdigit[0] = Q_low & 0x3ffffff; | |
1691 | tdigit[1] = 0; | |
1692 | QX = Q_low >> 26; | |
1693 | QX32 = QX; | |
1694 | nzeros = 0; | |
1695 | ||
1696 | for (j = 0; QX32; j++, QX32 >>= 7) { | |
1697 | k = (QX32 & 127); | |
1698 | tdigit[0] += convert_table[j][k][0]; | |
1699 | tdigit[1] += convert_table[j][k][1]; | |
1700 | if (tdigit[0] >= 100000000) { | |
1701 | tdigit[0] -= 100000000; | |
1702 | tdigit[1]++; | |
1703 | } | |
1704 | } | |
1705 | ||
1706 | if (tdigit[1] >= 100000000) { | |
1707 | tdigit[1] -= 100000000; | |
1708 | if (tdigit[1] >= 100000000) | |
1709 | tdigit[1] -= 100000000; | |
1710 | } | |
1711 | ||
1712 | digit = tdigit[0]; | |
1713 | if (!digit && !tdigit[1]) | |
1714 | nzeros += 16; | |
1715 | else { | |
1716 | if (!digit) { | |
1717 | nzeros += 8; | |
1718 | digit = tdigit[1]; | |
1719 | } | |
1720 | // decompose digit | |
1721 | PD = (UINT64) digit *0x068DB8BBull; | |
1722 | digit_h = (UINT32) (PD >> 40); | |
1723 | digit_low = digit - digit_h * 10000; | |
1724 | ||
1725 | if (!digit_low) | |
1726 | nzeros += 4; | |
1727 | else | |
1728 | digit_h = digit_low; | |
1729 | ||
1730 | if (!(digit_h & 1)) | |
1731 | nzeros += | |
1732 | 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> | |
1733 | (digit_h & 7)); | |
1734 | } | |
1735 | ||
1736 | if (nzeros) { | |
1737 | // get P*(2^M[extra_digits])/10^extra_digits | |
0b6df824 | 1738 | __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
b2a00c89 L |
1739 | |
1740 | // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
1741 | amount = recip_scale[nzeros]; | |
1742 | __shr_128 (CQ, Qh, amount); | |
1743 | } | |
1744 | diff_expon += nzeros; | |
1745 | ||
1746 | } | |
1747 | } | |
1748 | get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode,pfpsf); | |
1749 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1750 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1751 | #endif | |
1752 | BID_RETURN (res); | |
1753 | } | |
1754 | #endif | |
1755 | ||
1756 | if (diff_expon >= 0) { | |
1757 | #ifdef IEEE_ROUND_NEAREST | |
1758 | // rounding | |
1759 | // 2*CA4 - CY | |
1760 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1761 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1762 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1763 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1764 | ||
1765 | D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
1766 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
1767 | ||
1768 | CQ.w[0] += carry64; | |
1769 | if (CQ.w[0] < carry64) | |
1770 | CQ.w[1]++; | |
1771 | #else | |
1772 | #ifdef IEEE_ROUND_NEAREST_TIES_AWAY | |
1773 | // rounding | |
1774 | // 2*CA4 - CY | |
1775 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1776 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1777 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1778 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1779 | ||
1780 | D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
1781 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
1782 | ||
1783 | CQ.w[0] += carry64; | |
1784 | if (CQ.w[0] < carry64) | |
1785 | CQ.w[1]++; | |
1786 | #else | |
1787 | rmode = rnd_mode; | |
1788 | if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) | |
1789 | rmode = 3 - rmode; | |
1790 | switch (rmode) { | |
1791 | case ROUNDING_TO_NEAREST: // round to nearest code | |
1792 | // rounding | |
1793 | // 2*CA4 - CY | |
1794 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1795 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1796 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1797 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1798 | D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
1799 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
1800 | CQ.w[0] += carry64; | |
1801 | if (CQ.w[0] < carry64) | |
1802 | CQ.w[1]++; | |
1803 | break; | |
1804 | case ROUNDING_TIES_AWAY: | |
1805 | // rounding | |
1806 | // 2*CA4 - CY | |
1807 | CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1808 | CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1809 | __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1810 | CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1811 | D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
1812 | carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
1813 | CQ.w[0] += carry64; | |
1814 | if (CQ.w[0] < carry64) | |
1815 | CQ.w[1]++; | |
1816 | break; | |
1817 | case ROUNDING_DOWN: | |
1818 | case ROUNDING_TO_ZERO: | |
1819 | break; | |
1820 | default: // rounding up | |
1821 | CQ.w[0]++; | |
1822 | if (!CQ.w[0]) | |
1823 | CQ.w[1]++; | |
1824 | break; | |
1825 | } | |
1826 | #endif | |
1827 | #endif | |
1828 | ||
1829 | } else { | |
1830 | #ifdef SET_STATUS_FLAGS | |
1831 | if (CA4.w[0] || CA4.w[1]) { | |
1832 | // set status flags | |
1833 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
1834 | } | |
1835 | #endif | |
1836 | handle_UF_128_rem (&res, sign_x ^ sign_y, diff_expon, CQ, | |
1837 | CA4.w[1] | CA4.w[0], &rnd_mode, pfpsf); | |
1838 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1839 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1840 | #endif | |
1841 | BID_RETURN (res); | |
1842 | ||
1843 | } | |
1844 | ||
1845 | get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf); | |
1846 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1847 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1848 | #endif | |
1849 | BID_RETURN (res); | |
200359e8 L |
1850 | |
1851 | } |