]>
Commit | Line | Data |
---|---|---|
748086b7 | 1 | /* Copyright (C) 2007, 2009 Free Software Foundation, Inc. |
200359e8 L |
2 | |
3 | This file is part of GCC. | |
4 | ||
5 | GCC is free software; you can redistribute it and/or modify it under | |
6 | the terms of the GNU General Public License as published by the Free | |
748086b7 | 7 | Software Foundation; either version 3, or (at your option) any later |
200359e8 L |
8 | version. |
9 | ||
200359e8 L |
10 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 | for more details. | |
14 | ||
748086b7 JJ |
15 | Under Section 7 of GPL version 3, you are granted additional |
16 | permissions described in the GCC Runtime Library Exception, version | |
17 | 3.1, as published by the Free Software Foundation. | |
18 | ||
19 | You should have received a copy of the GNU General Public License and | |
20 | a copy of the GCC Runtime Library Exception along with this program; | |
21 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 | <http://www.gnu.org/licenses/>. */ | |
200359e8 L |
23 | |
24 | /***************************************************************************** | |
25 | * BID64 square root | |
26 | ***************************************************************************** | |
27 | * | |
28 | * Algorithm description: | |
29 | * | |
30 | * if(exponent_x is odd) | |
31 | * scale coefficient_x by 10, adjust exponent | |
32 | * - get lower estimate for number of digits in coefficient_x | |
33 | * - scale coefficient x to between 31 and 33 decimal digits | |
34 | * - in parallel, check for exact case and return if true | |
35 | * - get high part of result coefficient using double precision sqrt | |
36 | * - compute remainder and refine coefficient in one iteration (which | |
37 | * modifies it by at most 1) | |
38 | * - result exponent is easy to compute from the adjusted arg. exponent | |
39 | * | |
40 | ****************************************************************************/ | |
41 | ||
42 | #include "bid_internal.h" | |
b2a00c89 L |
43 | #include "bid_sqrt_macros.h" |
44 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
45 | #include <fenv.h> | |
46 | ||
47 | #define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT | |
48 | #endif | |
200359e8 L |
49 | |
50 | extern double sqrt (double); | |
51 | ||
52 | #if DECIMAL_CALL_BY_REFERENCE | |
53 | ||
54 | void | |
b2a00c89 | 55 | bid64_sqrt (UINT64 * pres, |
200359e8 L |
56 | UINT64 * |
57 | px _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
58 | _EXC_INFO_PARAM) { | |
59 | UINT64 x; | |
60 | #else | |
61 | ||
62 | UINT64 | |
b2a00c89 | 63 | bid64_sqrt (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM |
200359e8 L |
64 | _EXC_MASKS_PARAM _EXC_INFO_PARAM) { |
65 | #endif | |
66 | UINT128 CA, CT; | |
67 | UINT64 sign_x, coefficient_x; | |
68 | UINT64 Q, Q2, A10, C4, R, R2, QE, res; | |
69 | SINT64 D; | |
70 | int_double t_scale; | |
71 | int_float tempx; | |
72 | double da, dq, da_h, da_l, dqe; | |
73 | int exponent_x, exponent_q, bin_expon_cx; | |
74 | int digits_x; | |
75 | int scale; | |
b2a00c89 L |
76 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS |
77 | fexcept_t binaryflags = 0; | |
78 | #endif | |
200359e8 L |
79 | |
80 | #if DECIMAL_CALL_BY_REFERENCE | |
81 | #if !DECIMAL_GLOBAL_ROUNDING | |
82 | _IDEC_round rnd_mode = *prnd_mode; | |
83 | #endif | |
84 | x = *px; | |
85 | #endif | |
86 | ||
87 | // unpack arguments, check for NaN or Infinity | |
88 | if (!unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x)) { | |
89 | // x is Inf. or NaN or 0 | |
200359e8 | 90 | if ((x & INFINITY_MASK64) == INFINITY_MASK64) { |
b2a00c89 L |
91 | res = coefficient_x; |
92 | if ((coefficient_x & SSNAN_MASK64) == SINFINITY_MASK64) // -Infinity | |
200359e8 L |
93 | { |
94 | res = NAN_MASK64; | |
95 | #ifdef SET_STATUS_FLAGS | |
96 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
97 | #endif | |
98 | } | |
99 | #ifdef SET_STATUS_FLAGS | |
100 | if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN | |
101 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
102 | #endif | |
103 | BID_RETURN (res & QUIET_MASK64); | |
104 | } | |
105 | // x is 0 | |
106 | exponent_x = (exponent_x + DECIMAL_EXPONENT_BIAS) >> 1; | |
107 | res = sign_x | (((UINT64) exponent_x) << 53); | |
108 | BID_RETURN (res); | |
109 | } | |
110 | // x<0? | |
111 | if (sign_x && coefficient_x) { | |
112 | res = NAN_MASK64; | |
113 | #ifdef SET_STATUS_FLAGS | |
114 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
115 | #endif | |
116 | BID_RETURN (res); | |
117 | } | |
b2a00c89 L |
118 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS |
119 | (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
120 | #endif | |
200359e8 L |
121 | //--- get number of bits in the coefficient of x --- |
122 | tempx.d = (float) coefficient_x; | |
123 | bin_expon_cx = ((tempx.i >> 23) & 0xff) - 0x7f; | |
b2a00c89 | 124 | digits_x = estimate_decimal_digits[bin_expon_cx]; |
200359e8 | 125 | // add test for range |
b2a00c89 | 126 | if (coefficient_x >= power10_index_binexp[bin_expon_cx]) |
200359e8 L |
127 | digits_x++; |
128 | ||
129 | A10 = coefficient_x; | |
130 | if (exponent_x & 1) { | |
131 | A10 = (A10 << 2) + A10; | |
132 | A10 += A10; | |
133 | } | |
134 | ||
135 | dqe = sqrt ((double) A10); | |
136 | QE = (UINT32) dqe; | |
137 | if (QE * QE == A10) { | |
138 | res = | |
139 | very_fast_get_BID64 (0, (exponent_x + DECIMAL_EXPONENT_BIAS) >> 1, | |
140 | QE); | |
b2a00c89 L |
141 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS |
142 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
143 | #endif | |
200359e8 L |
144 | BID_RETURN (res); |
145 | } | |
146 | // if exponent is odd, scale coefficient by 10 | |
147 | scale = 31 - digits_x; | |
148 | exponent_q = exponent_x - scale; | |
149 | scale += (exponent_q & 1); // exp. bias is even | |
150 | ||
b2a00c89 | 151 | CT = power10_table_128[scale]; |
200359e8 L |
152 | __mul_64x128_short (CA, coefficient_x, CT); |
153 | ||
154 | // 2^64 | |
155 | t_scale.i = 0x43f0000000000000ull; | |
156 | // convert CA to DP | |
157 | da_h = CA.w[1]; | |
158 | da_l = CA.w[0]; | |
159 | da = da_h * t_scale.d + da_l; | |
160 | ||
161 | dq = sqrt (da); | |
162 | ||
163 | Q = (UINT64) dq; | |
164 | ||
165 | // get sign(sqrt(CA)-Q) | |
166 | R = CA.w[0] - Q * Q; | |
167 | R = ((SINT64) R) >> 63; | |
168 | D = R + R + 1; | |
169 | ||
170 | exponent_q = (exponent_q + DECIMAL_EXPONENT_BIAS) >> 1; | |
171 | ||
172 | #ifdef SET_STATUS_FLAGS | |
173 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
174 | #endif | |
175 | ||
176 | #ifndef IEEE_ROUND_NEAREST | |
177 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
178 | if (!((rnd_mode) & 3)) { | |
179 | #endif | |
180 | #endif | |
181 | ||
182 | // midpoint to check | |
183 | Q2 = Q + Q + D; | |
184 | C4 = CA.w[0] << 2; | |
185 | ||
186 | // get sign(-sqrt(CA)+Midpoint) | |
187 | R2 = Q2 * Q2 - C4; | |
188 | R2 = ((SINT64) R2) >> 63; | |
189 | ||
190 | // adjust Q if R!=R2 | |
191 | Q += (D & (R ^ R2)); | |
192 | #ifndef IEEE_ROUND_NEAREST | |
193 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
194 | } else { | |
195 | C4 = CA.w[0]; | |
196 | Q += D; | |
197 | if ((SINT64) (Q * Q - C4) > 0) | |
198 | Q--; | |
199 | if (rnd_mode == ROUNDING_UP) | |
200 | Q++; | |
201 | } | |
202 | #endif | |
203 | #endif | |
204 | ||
205 | res = fast_get_BID64 (0, exponent_q, Q); | |
b2a00c89 L |
206 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS |
207 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
208 | #endif | |
209 | BID_RETURN (res); | |
210 | } | |
211 | ||
212 | ||
213 | TYPE0_FUNCTION_ARG1 (UINT64, bid64q_sqrt, x) | |
214 | ||
215 | UINT256 M256, C4, C8; | |
216 | UINT128 CX, CX2, A10, S2, T128, CS, CSM, CS2, C256, CS1, | |
217 | mul_factor2_long = { {0x0ull, 0x0ull} }, QH, Tmp, TP128, Qh, Ql; | |
218 | UINT64 sign_x, Carry, B10, res, mul_factor, mul_factor2 = 0x0ull, CS0; | |
219 | SINT64 D; | |
220 | int_float fx, f64; | |
221 | int exponent_x, bin_expon_cx, done = 0; | |
222 | int digits, scale, exponent_q = 0, exact = 1, amount, extra_digits; | |
223 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
224 | fexcept_t binaryflags = 0; | |
225 | #endif | |
226 | ||
227 | // unpack arguments, check for NaN or Infinity | |
228 | if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) { | |
229 | res = CX.w[1]; | |
230 | // NaN ? | |
231 | if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { | |
232 | #ifdef SET_STATUS_FLAGS | |
233 | if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN | |
234 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
235 | #endif | |
236 | Tmp.w[1] = (CX.w[1] & 0x00003fffffffffffull); | |
237 | Tmp.w[0] = CX.w[0]; | |
238 | TP128 = reciprocals10_128[18]; | |
239 | __mul_128x128_full (Qh, Ql, Tmp, TP128); | |
240 | amount = recip_scale[18]; | |
241 | __shr_128 (Tmp, Qh, amount); | |
242 | res = (CX.w[1] & 0xfc00000000000000ull) | Tmp.w[0]; | |
243 | BID_RETURN (res); | |
244 | } | |
245 | // x is Infinity? | |
246 | if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
247 | if (sign_x) { | |
248 | // -Inf, return NaN | |
249 | res = 0x7c00000000000000ull; | |
250 | #ifdef SET_STATUS_FLAGS | |
251 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
252 | #endif | |
253 | } | |
254 | BID_RETURN (res); | |
255 | } | |
256 | // x is 0 otherwise | |
257 | ||
258 | exponent_x = | |
259 | ((exponent_x - DECIMAL_EXPONENT_BIAS_128) >> 1) + | |
260 | DECIMAL_EXPONENT_BIAS; | |
261 | if (exponent_x < 0) | |
262 | exponent_x = 0; | |
263 | if (exponent_x > DECIMAL_MAX_EXPON_64) | |
264 | exponent_x = DECIMAL_MAX_EXPON_64; | |
265 | //res= sign_x | (((UINT64)exponent_x)<<53); | |
266 | res = get_BID64 (sign_x, exponent_x, 0, rnd_mode, pfpsf); | |
267 | BID_RETURN (res); | |
268 | } | |
269 | if (sign_x) { | |
270 | res = 0x7c00000000000000ull; | |
271 | #ifdef SET_STATUS_FLAGS | |
272 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
273 | #endif | |
200359e8 L |
274 | BID_RETURN (res); |
275 | } | |
b2a00c89 L |
276 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS |
277 | (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
278 | #endif | |
279 | ||
280 | // 2^64 | |
281 | f64.i = 0x5f800000; | |
282 | ||
283 | // fx ~ CX | |
284 | fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; | |
285 | bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f; | |
286 | digits = estimate_decimal_digits[bin_expon_cx]; | |
287 | ||
288 | A10 = CX; | |
289 | if (exponent_x & 1) { | |
290 | A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61); | |
291 | A10.w[0] = CX.w[0] << 3; | |
292 | CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63); | |
293 | CX2.w[0] = CX.w[0] << 1; | |
294 | __add_128_128 (A10, A10, CX2); | |
295 | } | |
296 | ||
297 | C256.w[1] = A10.w[1]; | |
298 | C256.w[0] = A10.w[0]; | |
299 | CS.w[0] = short_sqrt128 (A10); | |
300 | CS.w[1] = 0; | |
301 | mul_factor = 0; | |
302 | // check for exact result | |
303 | if (CS.w[0] < 10000000000000000ull) { | |
304 | if (CS.w[0] * CS.w[0] == A10.w[0]) { | |
305 | __sqr64_fast (S2, CS.w[0]); | |
306 | if (S2.w[1] == A10.w[1]) // && S2.w[0]==A10.w[0]) | |
307 | { | |
308 | res = | |
309 | get_BID64 (0, | |
310 | ((exponent_x - DECIMAL_EXPONENT_BIAS_128) >> 1) + | |
311 | DECIMAL_EXPONENT_BIAS, CS.w[0], rnd_mode, pfpsf); | |
312 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
313 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
314 | #endif | |
315 | BID_RETURN (res); | |
316 | } | |
317 | } | |
318 | if (CS.w[0] >= 1000000000000000ull) { | |
319 | done = 1; | |
320 | exponent_q = exponent_x; | |
321 | C256.w[1] = A10.w[1]; | |
322 | C256.w[0] = A10.w[0]; | |
323 | } | |
324 | #ifdef SET_STATUS_FLAGS | |
325 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
326 | #endif | |
327 | exact = 0; | |
328 | } else { | |
329 | B10 = 0x3333333333333334ull; | |
330 | __mul_64x64_to_128_full (CS2, CS.w[0], B10); | |
331 | CS0 = CS2.w[1] >> 1; | |
332 | if (CS.w[0] != ((CS0 << 3) + (CS0 << 1))) { | |
333 | #ifdef SET_STATUS_FLAGS | |
334 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
335 | #endif | |
336 | exact = 0; | |
337 | } | |
338 | done = 1; | |
339 | CS.w[0] = CS0; | |
340 | exponent_q = exponent_x + 2; | |
341 | mul_factor = 10; | |
342 | mul_factor2 = 100; | |
343 | if (CS.w[0] >= 10000000000000000ull) { | |
344 | __mul_64x64_to_128_full (CS2, CS.w[0], B10); | |
345 | CS0 = CS2.w[1] >> 1; | |
346 | if (CS.w[0] != ((CS0 << 3) + (CS0 << 1))) { | |
347 | #ifdef SET_STATUS_FLAGS | |
348 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
349 | #endif | |
350 | exact = 0; | |
351 | } | |
352 | exponent_q += 2; | |
353 | CS.w[0] = CS0; | |
354 | mul_factor = 100; | |
355 | mul_factor2 = 10000; | |
356 | } | |
357 | if (exact) { | |
358 | CS0 = CS.w[0] * mul_factor; | |
359 | __sqr64_fast (CS1, CS0) | |
360 | if ((CS1.w[0] != A10.w[0]) || (CS1.w[1] != A10.w[1])) { | |
361 | #ifdef SET_STATUS_FLAGS | |
362 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
363 | #endif | |
364 | exact = 0; | |
365 | } | |
366 | } | |
367 | } | |
368 | ||
369 | if (!done) { | |
370 | // get number of digits in CX | |
371 | D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1]; | |
372 | if (D > 0 | |
373 | || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0])) | |
374 | digits++; | |
375 | ||
376 | // if exponent is odd, scale coefficient by 10 | |
377 | scale = 31 - digits; | |
378 | exponent_q = exponent_x - scale; | |
379 | scale += (exponent_q & 1); // exp. bias is even | |
380 | ||
381 | T128 = power10_table_128[scale]; | |
382 | __mul_128x128_low (C256, CX, T128); | |
383 | ||
384 | ||
385 | CS.w[0] = short_sqrt128 (C256); | |
386 | } | |
387 | //printf("CS=%016I64x\n",CS.w[0]); | |
388 | ||
389 | exponent_q = | |
390 | ((exponent_q - DECIMAL_EXPONENT_BIAS_128) >> 1) + | |
391 | DECIMAL_EXPONENT_BIAS; | |
392 | if ((exponent_q < 0) && (exponent_q + MAX_FORMAT_DIGITS >= 0)) { | |
393 | extra_digits = -exponent_q; | |
394 | exponent_q = 0; | |
395 | ||
396 | // get coeff*(2^M[extra_digits])/10^extra_digits | |
397 | __mul_64x64_to_128 (QH, CS.w[0], reciprocals10_64[extra_digits]); | |
398 | ||
399 | // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
400 | amount = short_recip_scale[extra_digits]; | |
401 | ||
402 | CS0 = QH.w[1] >> amount; | |
403 | ||
404 | #ifdef SET_STATUS_FLAGS | |
405 | if (exact) { | |
406 | if (CS.w[0] != CS0 * power10_table_128[extra_digits].w[0]) | |
407 | exact = 0; | |
408 | } | |
409 | if (!exact) | |
410 | __set_status_flags (pfpsf, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION); | |
411 | #endif | |
412 | ||
413 | CS.w[0] = CS0; | |
414 | if (!mul_factor) | |
415 | mul_factor = 1; | |
416 | mul_factor *= power10_table_128[extra_digits].w[0]; | |
417 | __mul_64x64_to_128 (mul_factor2_long, mul_factor, mul_factor); | |
418 | if (mul_factor2_long.w[1]) | |
419 | mul_factor2 = 0; | |
420 | else | |
421 | mul_factor2 = mul_factor2_long.w[1]; | |
422 | } | |
423 | // 4*C256 | |
424 | C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62); | |
425 | C4.w[0] = C256.w[0] << 2; | |
426 | ||
427 | #ifndef IEEE_ROUND_NEAREST | |
428 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
429 | if (!((rnd_mode) & 3)) { | |
430 | #endif | |
431 | #endif | |
432 | // compare to midpoints | |
433 | CSM.w[0] = (CS.w[0] + CS.w[0]) | 1; | |
434 | //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x\n",C4.w[1],C4.w[0],CSM.w[1],CSM.w[0], CS.w[0]); | |
435 | if (mul_factor) | |
436 | CSM.w[0] *= mul_factor; | |
437 | // CSM^2 | |
438 | __mul_64x64_to_128 (M256, CSM.w[0], CSM.w[0]); | |
439 | //__mul_128x128_to_256(M256, CSM, CSM); | |
440 | ||
441 | if (C4.w[1] > M256.w[1] || | |
442 | (C4.w[1] == M256.w[1] && C4.w[0] > M256.w[0])) { | |
443 | // round up | |
444 | CS.w[0]++; | |
445 | } else { | |
446 | C8.w[0] = CS.w[0] << 3; | |
447 | C8.w[1] = 0; | |
448 | if (mul_factor) { | |
449 | if (mul_factor2) { | |
450 | __mul_64x64_to_128 (C8, C8.w[0], mul_factor2); | |
451 | } else { | |
452 | __mul_64x128_low (C8, C8.w[0], mul_factor2_long); | |
453 | } | |
454 | } | |
455 | // M256 - 8*CSM | |
456 | __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); | |
457 | M256.w[1] = M256.w[1] - C8.w[1] - Carry; | |
458 | ||
459 | // if CSM' > C256, round up | |
460 | if (M256.w[1] > C4.w[1] || | |
461 | (M256.w[1] == C4.w[1] && M256.w[0] > C4.w[0])) { | |
462 | // round down | |
463 | if (CS.w[0]) | |
464 | CS.w[0]--; | |
465 | } | |
466 | } | |
467 | #ifndef IEEE_ROUND_NEAREST | |
468 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
469 | } else { | |
470 | CS.w[0]++; | |
471 | CSM.w[0] = CS.w[0]; | |
472 | C8.w[0] = CSM.w[0] << 1; | |
473 | if (mul_factor) | |
474 | CSM.w[0] *= mul_factor; | |
475 | __mul_64x64_to_128 (M256, CSM.w[0], CSM.w[0]); | |
476 | C8.w[1] = 0; | |
477 | if (mul_factor) { | |
478 | if (mul_factor2) { | |
479 | __mul_64x64_to_128 (C8, C8.w[0], mul_factor2); | |
480 | } else { | |
481 | __mul_64x128_low (C8, C8.w[0], mul_factor2_long); | |
482 | } | |
483 | } | |
484 | //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x\n",C256.w[1],C256.w[0],M256.w[1],M256.w[0], CS.w[0]); | |
485 | ||
486 | if (M256.w[1] > C256.w[1] || | |
487 | (M256.w[1] == C256.w[1] && M256.w[0] > C256.w[0])) { | |
488 | __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); | |
489 | M256.w[1] = M256.w[1] - Carry - C8.w[1]; | |
490 | M256.w[0]++; | |
491 | if (!M256.w[0]) { | |
492 | M256.w[1]++; | |
493 | ||
494 | } | |
495 | ||
496 | if ((M256.w[1] > C256.w[1] || | |
497 | (M256.w[1] == C256.w[1] && M256.w[0] > C256.w[0])) | |
498 | && (CS.w[0] > 1)) { | |
499 | ||
500 | CS.w[0]--; | |
501 | ||
502 | if (CS.w[0] > 1) { | |
503 | __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); | |
504 | M256.w[1] = M256.w[1] - Carry - C8.w[1]; | |
505 | M256.w[0]++; | |
506 | if (!M256.w[0]) { | |
507 | M256.w[1]++; | |
508 | } | |
509 | ||
510 | if (M256.w[1] > C256.w[1] || | |
511 | (M256.w[1] == C256.w[1] && M256.w[0] > C256.w[0])) | |
512 | CS.w[0]--; | |
513 | } | |
514 | } | |
515 | } | |
516 | ||
517 | else { | |
518 | /*__add_carry_out(M256.w[0], Carry, M256.w[0], C8.w[0]); | |
519 | M256.w[1] = M256.w[1] + Carry + C8.w[1]; | |
520 | M256.w[0]++; | |
521 | if(!M256.w[0]) | |
522 | { | |
523 | M256.w[1]++; | |
524 | } | |
525 | CS.w[0]++; | |
526 | if(M256.w[1]<C256.w[1] || | |
527 | (M256.w[1]==C256.w[1] && M256.w[0]<=C256.w[0])) | |
528 | { | |
529 | CS.w[0]++; | |
530 | }*/ | |
531 | CS.w[0]++; | |
532 | } | |
533 | //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x %d\n",C4.w[1],C4.w[0],M256.w[1],M256.w[0], CS.w[0], exact); | |
534 | // RU? | |
535 | if (((rnd_mode) != ROUNDING_UP) || exact) { | |
536 | if (CS.w[0]) | |
537 | CS.w[0]--; | |
538 | } | |
539 | ||
540 | } | |
541 | #endif | |
542 | #endif | |
543 | //printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x %d\n",C4.w[1],C4.w[0],M256.w[1],M256.w[0], CS.w[0], exact); | |
544 | ||
545 | res = get_BID64 (0, exponent_q, CS.w[0], rnd_mode, pfpsf); | |
546 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
547 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
548 | #endif | |
549 | BID_RETURN (res); | |
550 | ||
551 | ||
552 | } |