]>
Commit | Line | Data |
---|---|---|
200359e8 L |
1 | /* Copyright (C) 2007 Free Software Foundation, Inc. |
2 | ||
3 | This file is part of GCC. | |
4 | ||
5 | GCC is free software; you can redistribute it and/or modify it under | |
6 | the terms of the GNU General Public License as published by the Free | |
7 | Software Foundation; either version 2, or (at your option) any later | |
8 | version. | |
9 | ||
10 | In addition to the permissions in the GNU General Public License, the | |
11 | Free Software Foundation gives you unlimited permission to link the | |
12 | compiled version of this file into combinations with other programs, | |
13 | and to distribute those combinations without any restriction coming | |
14 | from the use of this file. (The General Public License restrictions | |
15 | do apply in other respects; for example, they cover modification of | |
16 | the file, and distribution when not linked into a combine | |
17 | executable.) | |
18 | ||
19 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
20 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
21 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
22 | for more details. | |
23 | ||
24 | You should have received a copy of the GNU General Public License | |
25 | along with GCC; see the file COPYING. If not, write to the Free | |
26 | Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA | |
27 | 02110-1301, USA. */ | |
28 | ||
29 | #define BID_128RES | |
b2a00c89 L |
30 | #include "bid_internal.h" |
31 | #include "bid_sqrt_macros.h" | |
32 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
33 | #include <fenv.h> | |
200359e8 | 34 | |
b2a00c89 L |
35 | #define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT |
36 | #endif | |
37 | ||
38 | BID128_FUNCTION_ARG1 (bid128_sqrt, x) | |
200359e8 | 39 | |
b2a00c89 L |
40 | UINT256 M256, C256, C4, C8; |
41 | UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res; | |
42 | UINT64 sign_x, Carry; | |
43 | SINT64 D; | |
44 | int_float fx, f64; | |
45 | int exponent_x, bin_expon_cx; | |
46 | int digits, scale, exponent_q; | |
47 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
48 | fexcept_t binaryflags = 0; | |
49 | #endif | |
200359e8 L |
50 | |
51 | // unpack arguments, check for NaN or Infinity | |
b2a00c89 L |
52 | if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) { |
53 | res.w[1] = CX.w[1]; | |
54 | res.w[0] = CX.w[0]; | |
200359e8 | 55 | // NaN ? |
b2a00c89 | 56 | if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { |
200359e8 | 57 | #ifdef SET_STATUS_FLAGS |
b2a00c89 L |
58 | if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN |
59 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
200359e8 | 60 | #endif |
b2a00c89 L |
61 | res.w[1] = CX.w[1] & QUIET_MASK64; |
62 | BID_RETURN (res); | |
63 | } | |
200359e8 | 64 | // x is Infinity? |
b2a00c89 L |
65 | if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { |
66 | res.w[1] = CX.w[1]; | |
200359e8 | 67 | if (sign_x) { |
b2a00c89 | 68 | // -Inf, return NaN |
200359e8 | 69 | res.w[1] = 0x7c00000000000000ull; |
200359e8 L |
70 | #ifdef SET_STATUS_FLAGS |
71 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
72 | #endif | |
200359e8 | 73 | } |
b2a00c89 L |
74 | BID_RETURN (res); |
75 | } | |
76 | // x is 0 otherwise | |
77 | ||
78 | res.w[1] = | |
79 | sign_x | | |
80 | ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1) << 49); | |
81 | res.w[0] = 0; | |
82 | BID_RETURN (res); | |
83 | } | |
84 | if (sign_x) { | |
85 | res.w[1] = 0x7c00000000000000ull; | |
86 | res.w[0] = 0; | |
87 | #ifdef SET_STATUS_FLAGS | |
88 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
89 | #endif | |
90 | BID_RETURN (res); | |
91 | } | |
92 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
93 | (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
94 | #endif | |
200359e8 | 95 | // 2^64 |
b2a00c89 | 96 | f64.i = 0x5f800000; |
200359e8 L |
97 | |
98 | // fx ~ CX | |
b2a00c89 L |
99 | fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; |
100 | bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f; | |
101 | digits = estimate_decimal_digits[bin_expon_cx]; | |
200359e8 | 102 | |
b2a00c89 L |
103 | A10 = CX; |
104 | if (exponent_x & 1) { | |
105 | A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61); | |
106 | A10.w[0] = CX.w[0] << 3; | |
107 | CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63); | |
108 | CX2.w[0] = CX.w[0] << 1; | |
109 | __add_128_128 (A10, A10, CX2); | |
110 | } | |
111 | ||
112 | CS.w[0] = short_sqrt128 (A10); | |
113 | CS.w[1] = 0; | |
200359e8 | 114 | // check for exact result |
b2a00c89 L |
115 | if (CS.w[0] * CS.w[0] == A10.w[0]) { |
116 | __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]); | |
117 | if (S2.w[1] == A10.w[1]) // && S2.w[0]==A10.w[0]) | |
118 | { | |
119 | get_BID128_very_fast (&res, 0, | |
120 | (exponent_x + | |
121 | DECIMAL_EXPONENT_BIAS_128) >> 1, CS); | |
122 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
123 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
124 | #endif | |
125 | BID_RETURN (res); | |
200359e8 | 126 | } |
b2a00c89 | 127 | } |
200359e8 | 128 | // get number of digits in CX |
b2a00c89 L |
129 | D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1]; |
130 | if (D > 0 | |
131 | || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0])) | |
132 | digits++; | |
200359e8 L |
133 | |
134 | // if exponent is odd, scale coefficient by 10 | |
b2a00c89 L |
135 | scale = 67 - digits; |
136 | exponent_q = exponent_x - scale; | |
137 | scale += (exponent_q & 1); // exp. bias is even | |
200359e8 | 138 | |
b2a00c89 L |
139 | if (scale > 38) { |
140 | T128 = power10_table_128[scale - 37]; | |
141 | __mul_128x128_low (CX1, CX, T128); | |
200359e8 | 142 | |
b2a00c89 L |
143 | TP128 = power10_table_128[37]; |
144 | __mul_128x128_to_256 (C256, CX1, TP128); | |
145 | } else { | |
146 | T128 = power10_table_128[scale]; | |
147 | __mul_128x128_to_256 (C256, CX, T128); | |
148 | } | |
200359e8 L |
149 | |
150 | ||
151 | // 4*C256 | |
b2a00c89 L |
152 | C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62); |
153 | C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62); | |
154 | C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62); | |
155 | C4.w[0] = C256.w[0] << 2; | |
200359e8 | 156 | |
b2a00c89 | 157 | long_sqrt128 (&CS, C256); |
200359e8 L |
158 | |
159 | #ifndef IEEE_ROUND_NEAREST | |
160 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
b2a00c89 | 161 | if (!((rnd_mode) & 3)) { |
200359e8 L |
162 | #endif |
163 | #endif | |
b2a00c89 L |
164 | // compare to midpoints |
165 | CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63); | |
166 | CSM.w[0] = (CS.w[0] + CS.w[0]) | 1; | |
167 | // CSM^2 | |
168 | //__mul_128x128_to_256(M256, CSM, CSM); | |
169 | __sqr128_to_256 (M256, CSM); | |
170 | ||
171 | if (C4.w[3] > M256.w[3] | |
172 | || (C4.w[3] == M256.w[3] | |
173 | && (C4.w[2] > M256.w[2] | |
174 | || (C4.w[2] == M256.w[2] | |
175 | && (C4.w[1] > M256.w[1] | |
176 | || (C4.w[1] == M256.w[1] | |
177 | && C4.w[0] > M256.w[0])))))) { | |
178 | // round up | |
179 | CS.w[0]++; | |
180 | if (!CS.w[0]) | |
181 | CS.w[1]++; | |
182 | } else { | |
183 | C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61); | |
184 | C8.w[0] = CS.w[0] << 3; | |
185 | // M256 - 8*CSM | |
186 | __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); | |
187 | __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); | |
188 | __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); | |
189 | M256.w[3] = M256.w[3] - Carry; | |
190 | ||
191 | // if CSM' > C256, round up | |
192 | if (M256.w[3] > C4.w[3] | |
193 | || (M256.w[3] == C4.w[3] | |
194 | && (M256.w[2] > C4.w[2] | |
195 | || (M256.w[2] == C4.w[2] | |
196 | && (M256.w[1] > C4.w[1] | |
197 | || (M256.w[1] == C4.w[1] | |
198 | && M256.w[0] > C4.w[0])))))) { | |
199 | // round down | |
200359e8 | 200 | if (!CS.w[0]) |
b2a00c89 L |
201 | CS.w[1]--; |
202 | CS.w[0]--; | |
200359e8 | 203 | } |
b2a00c89 | 204 | } |
200359e8 L |
205 | #ifndef IEEE_ROUND_NEAREST |
206 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
b2a00c89 L |
207 | } else { |
208 | __sqr128_to_256 (M256, CS); | |
209 | C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63); | |
210 | C8.w[0] = CS.w[0] << 1; | |
211 | if (M256.w[3] > C256.w[3] | |
212 | || (M256.w[3] == C256.w[3] | |
213 | && (M256.w[2] > C256.w[2] | |
214 | || (M256.w[2] == C256.w[2] | |
215 | && (M256.w[1] > C256.w[1] | |
216 | || (M256.w[1] == C256.w[1] | |
217 | && M256.w[0] > C256.w[0])))))) { | |
218 | __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); | |
219 | __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); | |
220 | __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); | |
221 | M256.w[3] = M256.w[3] - Carry; | |
222 | M256.w[0]++; | |
223 | if (!M256.w[0]) { | |
224 | M256.w[1]++; | |
225 | if (!M256.w[1]) { | |
226 | M256.w[2]++; | |
227 | if (!M256.w[2]) | |
228 | M256.w[3]++; | |
229 | } | |
230 | } | |
231 | ||
232 | if (!CS.w[0]) | |
233 | CS.w[1]--; | |
234 | CS.w[0]--; | |
235 | ||
200359e8 L |
236 | if (M256.w[3] > C256.w[3] |
237 | || (M256.w[3] == C256.w[3] | |
238 | && (M256.w[2] > C256.w[2] | |
239 | || (M256.w[2] == C256.w[2] | |
240 | && (M256.w[1] > C256.w[1] | |
241 | || (M256.w[1] == C256.w[1] | |
242 | && M256.w[0] > C256.w[0])))))) { | |
200359e8 L |
243 | |
244 | if (!CS.w[0]) | |
245 | CS.w[1]--; | |
246 | CS.w[0]--; | |
b2a00c89 L |
247 | } |
248 | } | |
200359e8 | 249 | |
b2a00c89 L |
250 | else { |
251 | __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]); | |
252 | __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); | |
253 | __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); | |
254 | M256.w[3] = M256.w[3] + Carry; | |
255 | M256.w[0]++; | |
256 | if (!M256.w[0]) { | |
257 | M256.w[1]++; | |
258 | if (!M256.w[1]) { | |
259 | M256.w[2]++; | |
260 | if (!M256.w[2]) | |
261 | M256.w[3]++; | |
200359e8 L |
262 | } |
263 | } | |
b2a00c89 L |
264 | if (M256.w[3] < C256.w[3] |
265 | || (M256.w[3] == C256.w[3] | |
266 | && (M256.w[2] < C256.w[2] | |
267 | || (M256.w[2] == C256.w[2] | |
268 | && (M256.w[1] < C256.w[1] | |
269 | || (M256.w[1] == C256.w[1] | |
270 | && M256.w[0] <= C256.w[0])))))) { | |
200359e8 | 271 | |
b2a00c89 L |
272 | CS.w[0]++; |
273 | if (!CS.w[0]) | |
274 | CS.w[1]++; | |
275 | } | |
276 | } | |
277 | // RU? | |
278 | if ((rnd_mode) == ROUNDING_UP) { | |
279 | CS.w[0]++; | |
280 | if (!CS.w[0]) | |
281 | CS.w[1]++; | |
282 | } | |
283 | ||
284 | } | |
285 | #endif | |
286 | #endif | |
287 | ||
288 | #ifdef SET_STATUS_FLAGS | |
289 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
290 | #endif | |
291 | get_BID128_fast (&res, 0, | |
292 | (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1, CS); | |
293 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
294 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
295 | #endif | |
296 | BID_RETURN (res); | |
297 | } | |
298 | ||
299 | ||
300 | ||
301 | BID128_FUNCTION_ARGTYPE1 (bid128d_sqrt, UINT64, x) | |
302 | ||
303 | UINT256 M256, C256, C4, C8; | |
304 | UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res; | |
305 | UINT64 sign_x, Carry; | |
306 | SINT64 D; | |
307 | int_float fx, f64; | |
308 | int exponent_x, bin_expon_cx; | |
309 | int digits, scale, exponent_q; | |
310 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
311 | fexcept_t binaryflags = 0; | |
312 | #endif | |
313 | ||
314 | // unpack arguments, check for NaN or Infinity | |
315 | // unpack arguments, check for NaN or Infinity | |
316 | CX.w[1] = 0; | |
317 | if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], x)) { | |
318 | res.w[1] = CX.w[0]; | |
319 | res.w[0] = 0; | |
320 | // NaN ? | |
321 | if ((x & 0x7c00000000000000ull) == 0x7c00000000000000ull) { | |
322 | #ifdef SET_STATUS_FLAGS | |
323 | if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN | |
324 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
325 | #endif | |
326 | res.w[0] = (CX.w[0] & 0x0003ffffffffffffull); | |
327 | __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]); | |
328 | res.w[1] |= ((CX.w[0]) & 0xfc00000000000000ull); | |
329 | BID_RETURN (res); | |
330 | } | |
331 | // x is Infinity? | |
332 | if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
333 | if (sign_x) { | |
334 | // -Inf, return NaN | |
335 | res.w[1] = 0x7c00000000000000ull; | |
336 | #ifdef SET_STATUS_FLAGS | |
337 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
338 | #endif | |
339 | } | |
340 | BID_RETURN (res); | |
341 | } | |
342 | // x is 0 otherwise | |
343 | ||
344 | exponent_x = | |
345 | exponent_x - DECIMAL_EXPONENT_BIAS + DECIMAL_EXPONENT_BIAS_128; | |
346 | res.w[1] = | |
347 | sign_x | ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1) | |
348 | << 49); | |
349 | res.w[0] = 0; | |
350 | BID_RETURN (res); | |
351 | } | |
352 | if (sign_x) { | |
353 | res.w[1] = 0x7c00000000000000ull; | |
354 | res.w[0] = 0; | |
355 | #ifdef SET_STATUS_FLAGS | |
356 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
357 | #endif | |
358 | BID_RETURN (res); | |
359 | } | |
360 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
361 | (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
362 | #endif | |
363 | exponent_x = | |
364 | exponent_x - DECIMAL_EXPONENT_BIAS + DECIMAL_EXPONENT_BIAS_128; | |
365 | ||
366 | // 2^64 | |
367 | f64.i = 0x5f800000; | |
368 | ||
369 | // fx ~ CX | |
370 | fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; | |
371 | bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f; | |
372 | digits = estimate_decimal_digits[bin_expon_cx]; | |
373 | ||
374 | A10 = CX; | |
375 | if (exponent_x & 1) { | |
376 | A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61); | |
377 | A10.w[0] = CX.w[0] << 3; | |
378 | CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63); | |
379 | CX2.w[0] = CX.w[0] << 1; | |
380 | __add_128_128 (A10, A10, CX2); | |
381 | } | |
382 | ||
383 | CS.w[0] = short_sqrt128 (A10); | |
384 | CS.w[1] = 0; | |
385 | // check for exact result | |
386 | if (CS.w[0] * CS.w[0] == A10.w[0]) { | |
387 | __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]); | |
388 | if (S2.w[1] == A10.w[1]) { | |
389 | get_BID128_very_fast (&res, 0, | |
390 | (exponent_x + DECIMAL_EXPONENT_BIAS_128) >> 1, | |
391 | CS); | |
392 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
393 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
394 | #endif | |
395 | BID_RETURN (res); | |
396 | } | |
397 | } | |
398 | // get number of digits in CX | |
399 | D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1]; | |
400 | if (D > 0 | |
401 | || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0])) | |
402 | digits++; | |
403 | ||
404 | // if exponent is odd, scale coefficient by 10 | |
405 | scale = 67 - digits; | |
406 | exponent_q = exponent_x - scale; | |
407 | scale += (exponent_q & 1); // exp. bias is even | |
408 | ||
409 | if (scale > 38) { | |
410 | T128 = power10_table_128[scale - 37]; | |
411 | __mul_128x128_low (CX1, CX, T128); | |
412 | ||
413 | TP128 = power10_table_128[37]; | |
414 | __mul_128x128_to_256 (C256, CX1, TP128); | |
415 | } else { | |
416 | T128 = power10_table_128[scale]; | |
417 | __mul_128x128_to_256 (C256, CX, T128); | |
418 | } | |
419 | ||
420 | ||
421 | // 4*C256 | |
422 | C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62); | |
423 | C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62); | |
424 | C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62); | |
425 | C4.w[0] = C256.w[0] << 2; | |
426 | ||
427 | long_sqrt128 (&CS, C256); | |
428 | ||
429 | #ifndef IEEE_ROUND_NEAREST | |
430 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
431 | if (!((rnd_mode) & 3)) { | |
432 | #endif | |
433 | #endif | |
434 | // compare to midpoints | |
435 | CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63); | |
436 | CSM.w[0] = (CS.w[0] + CS.w[0]) | 1; | |
437 | // CSM^2 | |
438 | //__mul_128x128_to_256(M256, CSM, CSM); | |
439 | __sqr128_to_256 (M256, CSM); | |
440 | ||
441 | if (C4.w[3] > M256.w[3] | |
442 | || (C4.w[3] == M256.w[3] | |
443 | && (C4.w[2] > M256.w[2] | |
444 | || (C4.w[2] == M256.w[2] | |
445 | && (C4.w[1] > M256.w[1] | |
446 | || (C4.w[1] == M256.w[1] | |
447 | && C4.w[0] > M256.w[0])))))) { | |
448 | // round up | |
449 | CS.w[0]++; | |
450 | if (!CS.w[0]) | |
451 | CS.w[1]++; | |
452 | } else { | |
453 | C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61); | |
454 | C8.w[0] = CS.w[0] << 3; | |
455 | // M256 - 8*CSM | |
456 | __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); | |
457 | __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); | |
458 | __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); | |
459 | M256.w[3] = M256.w[3] - Carry; | |
460 | ||
461 | // if CSM' > C256, round up | |
462 | if (M256.w[3] > C4.w[3] | |
463 | || (M256.w[3] == C4.w[3] | |
464 | && (M256.w[2] > C4.w[2] | |
465 | || (M256.w[2] == C4.w[2] | |
466 | && (M256.w[1] > C4.w[1] | |
467 | || (M256.w[1] == C4.w[1] | |
468 | && M256.w[0] > C4.w[0])))))) { | |
469 | // round down | |
470 | if (!CS.w[0]) | |
471 | CS.w[1]--; | |
472 | CS.w[0]--; | |
473 | } | |
474 | } | |
475 | #ifndef IEEE_ROUND_NEAREST | |
476 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
477 | } else { | |
478 | __sqr128_to_256 (M256, CS); | |
479 | C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63); | |
480 | C8.w[0] = CS.w[0] << 1; | |
481 | if (M256.w[3] > C256.w[3] | |
482 | || (M256.w[3] == C256.w[3] | |
483 | && (M256.w[2] > C256.w[2] | |
484 | || (M256.w[2] == C256.w[2] | |
485 | && (M256.w[1] > C256.w[1] | |
486 | || (M256.w[1] == C256.w[1] | |
487 | && M256.w[0] > C256.w[0])))))) { | |
488 | __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); | |
489 | __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); | |
490 | __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); | |
491 | M256.w[3] = M256.w[3] - Carry; | |
492 | M256.w[0]++; | |
493 | if (!M256.w[0]) { | |
494 | M256.w[1]++; | |
495 | if (!M256.w[1]) { | |
496 | M256.w[2]++; | |
497 | if (!M256.w[2]) | |
498 | M256.w[3]++; | |
200359e8 | 499 | } |
b2a00c89 L |
500 | } |
501 | ||
502 | if (!CS.w[0]) | |
503 | CS.w[1]--; | |
504 | CS.w[0]--; | |
505 | ||
506 | if (M256.w[3] > C256.w[3] | |
507 | || (M256.w[3] == C256.w[3] | |
508 | && (M256.w[2] > C256.w[2] | |
509 | || (M256.w[2] == C256.w[2] | |
510 | && (M256.w[1] > C256.w[1] | |
511 | || (M256.w[1] == C256.w[1] | |
512 | && M256.w[0] > C256.w[0])))))) { | |
513 | ||
514 | if (!CS.w[0]) | |
515 | CS.w[1]--; | |
516 | CS.w[0]--; | |
517 | } | |
518 | } | |
519 | ||
520 | else { | |
521 | __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]); | |
522 | __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); | |
523 | __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); | |
524 | M256.w[3] = M256.w[3] + Carry; | |
525 | M256.w[0]++; | |
526 | if (!M256.w[0]) { | |
527 | M256.w[1]++; | |
528 | if (!M256.w[1]) { | |
529 | M256.w[2]++; | |
530 | if (!M256.w[2]) | |
531 | M256.w[3]++; | |
200359e8 L |
532 | } |
533 | } | |
b2a00c89 L |
534 | if (M256.w[3] < C256.w[3] |
535 | || (M256.w[3] == C256.w[3] | |
536 | && (M256.w[2] < C256.w[2] | |
537 | || (M256.w[2] == C256.w[2] | |
538 | && (M256.w[1] < C256.w[1] | |
539 | || (M256.w[1] == C256.w[1] | |
540 | && M256.w[0] <= C256.w[0])))))) { | |
541 | ||
200359e8 L |
542 | CS.w[0]++; |
543 | if (!CS.w[0]) | |
544 | CS.w[1]++; | |
545 | } | |
200359e8 | 546 | } |
b2a00c89 L |
547 | // RU? |
548 | if ((rnd_mode) == ROUNDING_UP) { | |
549 | CS.w[0]++; | |
550 | if (!CS.w[0]) | |
551 | CS.w[1]++; | |
552 | } | |
553 | ||
554 | } | |
200359e8 L |
555 | #endif |
556 | #endif | |
557 | ||
558 | #ifdef SET_STATUS_FLAGS | |
b2a00c89 | 559 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); |
200359e8 | 560 | #endif |
b2a00c89 L |
561 | get_BID128_fast (&res, 0, (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1, |
562 | CS); | |
563 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
564 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
565 | #endif | |
566 | BID_RETURN (res); | |
567 | ||
568 | ||
200359e8 | 569 | } |