]>
Commit | Line | Data |
---|---|---|
cbe34bb5 | 1 | /* Copyright (C) 2007-2017 Free Software Foundation, Inc. |
200359e8 L |
2 | |
3 | This file is part of GCC. | |
4 | ||
5 | GCC is free software; you can redistribute it and/or modify it under | |
6 | the terms of the GNU General Public License as published by the Free | |
748086b7 | 7 | Software Foundation; either version 3, or (at your option) any later |
200359e8 L |
8 | version. |
9 | ||
200359e8 L |
10 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 | for more details. | |
14 | ||
748086b7 JJ |
15 | Under Section 7 of GPL version 3, you are granted additional |
16 | permissions described in the GCC Runtime Library Exception, version | |
17 | 3.1, as published by the Free Software Foundation. | |
18 | ||
19 | You should have received a copy of the GNU General Public License and | |
20 | a copy of the GCC Runtime Library Exception along with this program; | |
21 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 | <http://www.gnu.org/licenses/>. */ | |
200359e8 L |
23 | |
24 | #define BID_128RES | |
b2a00c89 L |
25 | #include "bid_internal.h" |
26 | #include "bid_sqrt_macros.h" | |
27 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
28 | #include <fenv.h> | |
200359e8 | 29 | |
b2a00c89 L |
30 | #define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT |
31 | #endif | |
32 | ||
33 | BID128_FUNCTION_ARG1 (bid128_sqrt, x) | |
200359e8 | 34 | |
b2a00c89 L |
35 | UINT256 M256, C256, C4, C8; |
36 | UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res; | |
37 | UINT64 sign_x, Carry; | |
38 | SINT64 D; | |
39 | int_float fx, f64; | |
40 | int exponent_x, bin_expon_cx; | |
41 | int digits, scale, exponent_q; | |
42 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
43 | fexcept_t binaryflags = 0; | |
44 | #endif | |
200359e8 L |
45 | |
46 | // unpack arguments, check for NaN or Infinity | |
b2a00c89 L |
47 | if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) { |
48 | res.w[1] = CX.w[1]; | |
49 | res.w[0] = CX.w[0]; | |
200359e8 | 50 | // NaN ? |
b2a00c89 | 51 | if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { |
200359e8 | 52 | #ifdef SET_STATUS_FLAGS |
b2a00c89 L |
53 | if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN |
54 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
200359e8 | 55 | #endif |
b2a00c89 L |
56 | res.w[1] = CX.w[1] & QUIET_MASK64; |
57 | BID_RETURN (res); | |
58 | } | |
200359e8 | 59 | // x is Infinity? |
b2a00c89 L |
60 | if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { |
61 | res.w[1] = CX.w[1]; | |
200359e8 | 62 | if (sign_x) { |
b2a00c89 | 63 | // -Inf, return NaN |
200359e8 | 64 | res.w[1] = 0x7c00000000000000ull; |
200359e8 L |
65 | #ifdef SET_STATUS_FLAGS |
66 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
67 | #endif | |
200359e8 | 68 | } |
b2a00c89 L |
69 | BID_RETURN (res); |
70 | } | |
71 | // x is 0 otherwise | |
72 | ||
73 | res.w[1] = | |
74 | sign_x | | |
75 | ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1) << 49); | |
76 | res.w[0] = 0; | |
77 | BID_RETURN (res); | |
78 | } | |
79 | if (sign_x) { | |
80 | res.w[1] = 0x7c00000000000000ull; | |
81 | res.w[0] = 0; | |
82 | #ifdef SET_STATUS_FLAGS | |
83 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
84 | #endif | |
85 | BID_RETURN (res); | |
86 | } | |
87 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
88 | (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
89 | #endif | |
200359e8 | 90 | // 2^64 |
b2a00c89 | 91 | f64.i = 0x5f800000; |
200359e8 L |
92 | |
93 | // fx ~ CX | |
b2a00c89 L |
94 | fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; |
95 | bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f; | |
96 | digits = estimate_decimal_digits[bin_expon_cx]; | |
200359e8 | 97 | |
b2a00c89 L |
98 | A10 = CX; |
99 | if (exponent_x & 1) { | |
100 | A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61); | |
101 | A10.w[0] = CX.w[0] << 3; | |
102 | CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63); | |
103 | CX2.w[0] = CX.w[0] << 1; | |
104 | __add_128_128 (A10, A10, CX2); | |
105 | } | |
106 | ||
107 | CS.w[0] = short_sqrt128 (A10); | |
108 | CS.w[1] = 0; | |
200359e8 | 109 | // check for exact result |
b2a00c89 L |
110 | if (CS.w[0] * CS.w[0] == A10.w[0]) { |
111 | __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]); | |
112 | if (S2.w[1] == A10.w[1]) // && S2.w[0]==A10.w[0]) | |
113 | { | |
114 | get_BID128_very_fast (&res, 0, | |
115 | (exponent_x + | |
116 | DECIMAL_EXPONENT_BIAS_128) >> 1, CS); | |
117 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
118 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
119 | #endif | |
120 | BID_RETURN (res); | |
200359e8 | 121 | } |
b2a00c89 | 122 | } |
200359e8 | 123 | // get number of digits in CX |
b2a00c89 L |
124 | D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1]; |
125 | if (D > 0 | |
126 | || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0])) | |
127 | digits++; | |
200359e8 L |
128 | |
129 | // if exponent is odd, scale coefficient by 10 | |
b2a00c89 L |
130 | scale = 67 - digits; |
131 | exponent_q = exponent_x - scale; | |
132 | scale += (exponent_q & 1); // exp. bias is even | |
200359e8 | 133 | |
b2a00c89 L |
134 | if (scale > 38) { |
135 | T128 = power10_table_128[scale - 37]; | |
136 | __mul_128x128_low (CX1, CX, T128); | |
200359e8 | 137 | |
b2a00c89 L |
138 | TP128 = power10_table_128[37]; |
139 | __mul_128x128_to_256 (C256, CX1, TP128); | |
140 | } else { | |
141 | T128 = power10_table_128[scale]; | |
142 | __mul_128x128_to_256 (C256, CX, T128); | |
143 | } | |
200359e8 L |
144 | |
145 | ||
146 | // 4*C256 | |
b2a00c89 L |
147 | C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62); |
148 | C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62); | |
149 | C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62); | |
150 | C4.w[0] = C256.w[0] << 2; | |
200359e8 | 151 | |
b2a00c89 | 152 | long_sqrt128 (&CS, C256); |
200359e8 L |
153 | |
154 | #ifndef IEEE_ROUND_NEAREST | |
155 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
b2a00c89 | 156 | if (!((rnd_mode) & 3)) { |
200359e8 L |
157 | #endif |
158 | #endif | |
b2a00c89 L |
159 | // compare to midpoints |
160 | CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63); | |
161 | CSM.w[0] = (CS.w[0] + CS.w[0]) | 1; | |
162 | // CSM^2 | |
163 | //__mul_128x128_to_256(M256, CSM, CSM); | |
164 | __sqr128_to_256 (M256, CSM); | |
165 | ||
166 | if (C4.w[3] > M256.w[3] | |
167 | || (C4.w[3] == M256.w[3] | |
168 | && (C4.w[2] > M256.w[2] | |
169 | || (C4.w[2] == M256.w[2] | |
170 | && (C4.w[1] > M256.w[1] | |
171 | || (C4.w[1] == M256.w[1] | |
172 | && C4.w[0] > M256.w[0])))))) { | |
173 | // round up | |
174 | CS.w[0]++; | |
175 | if (!CS.w[0]) | |
176 | CS.w[1]++; | |
177 | } else { | |
178 | C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61); | |
179 | C8.w[0] = CS.w[0] << 3; | |
180 | // M256 - 8*CSM | |
181 | __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); | |
182 | __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); | |
183 | __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); | |
184 | M256.w[3] = M256.w[3] - Carry; | |
185 | ||
186 | // if CSM' > C256, round up | |
187 | if (M256.w[3] > C4.w[3] | |
188 | || (M256.w[3] == C4.w[3] | |
189 | && (M256.w[2] > C4.w[2] | |
190 | || (M256.w[2] == C4.w[2] | |
191 | && (M256.w[1] > C4.w[1] | |
192 | || (M256.w[1] == C4.w[1] | |
193 | && M256.w[0] > C4.w[0])))))) { | |
194 | // round down | |
200359e8 | 195 | if (!CS.w[0]) |
b2a00c89 L |
196 | CS.w[1]--; |
197 | CS.w[0]--; | |
200359e8 | 198 | } |
b2a00c89 | 199 | } |
200359e8 L |
200 | #ifndef IEEE_ROUND_NEAREST |
201 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
b2a00c89 L |
202 | } else { |
203 | __sqr128_to_256 (M256, CS); | |
204 | C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63); | |
205 | C8.w[0] = CS.w[0] << 1; | |
206 | if (M256.w[3] > C256.w[3] | |
207 | || (M256.w[3] == C256.w[3] | |
208 | && (M256.w[2] > C256.w[2] | |
209 | || (M256.w[2] == C256.w[2] | |
210 | && (M256.w[1] > C256.w[1] | |
211 | || (M256.w[1] == C256.w[1] | |
212 | && M256.w[0] > C256.w[0])))))) { | |
213 | __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); | |
214 | __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); | |
215 | __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); | |
216 | M256.w[3] = M256.w[3] - Carry; | |
217 | M256.w[0]++; | |
218 | if (!M256.w[0]) { | |
219 | M256.w[1]++; | |
220 | if (!M256.w[1]) { | |
221 | M256.w[2]++; | |
222 | if (!M256.w[2]) | |
223 | M256.w[3]++; | |
224 | } | |
225 | } | |
226 | ||
227 | if (!CS.w[0]) | |
228 | CS.w[1]--; | |
229 | CS.w[0]--; | |
230 | ||
200359e8 L |
231 | if (M256.w[3] > C256.w[3] |
232 | || (M256.w[3] == C256.w[3] | |
233 | && (M256.w[2] > C256.w[2] | |
234 | || (M256.w[2] == C256.w[2] | |
235 | && (M256.w[1] > C256.w[1] | |
236 | || (M256.w[1] == C256.w[1] | |
237 | && M256.w[0] > C256.w[0])))))) { | |
200359e8 L |
238 | |
239 | if (!CS.w[0]) | |
240 | CS.w[1]--; | |
241 | CS.w[0]--; | |
b2a00c89 L |
242 | } |
243 | } | |
200359e8 | 244 | |
b2a00c89 L |
245 | else { |
246 | __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]); | |
247 | __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); | |
248 | __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); | |
249 | M256.w[3] = M256.w[3] + Carry; | |
250 | M256.w[0]++; | |
251 | if (!M256.w[0]) { | |
252 | M256.w[1]++; | |
253 | if (!M256.w[1]) { | |
254 | M256.w[2]++; | |
255 | if (!M256.w[2]) | |
256 | M256.w[3]++; | |
200359e8 L |
257 | } |
258 | } | |
b2a00c89 L |
259 | if (M256.w[3] < C256.w[3] |
260 | || (M256.w[3] == C256.w[3] | |
261 | && (M256.w[2] < C256.w[2] | |
262 | || (M256.w[2] == C256.w[2] | |
263 | && (M256.w[1] < C256.w[1] | |
264 | || (M256.w[1] == C256.w[1] | |
265 | && M256.w[0] <= C256.w[0])))))) { | |
200359e8 | 266 | |
b2a00c89 L |
267 | CS.w[0]++; |
268 | if (!CS.w[0]) | |
269 | CS.w[1]++; | |
270 | } | |
271 | } | |
272 | // RU? | |
273 | if ((rnd_mode) == ROUNDING_UP) { | |
274 | CS.w[0]++; | |
275 | if (!CS.w[0]) | |
276 | CS.w[1]++; | |
277 | } | |
278 | ||
279 | } | |
280 | #endif | |
281 | #endif | |
282 | ||
283 | #ifdef SET_STATUS_FLAGS | |
284 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
285 | #endif | |
286 | get_BID128_fast (&res, 0, | |
287 | (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1, CS); | |
288 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
289 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
290 | #endif | |
291 | BID_RETURN (res); | |
292 | } | |
293 | ||
294 | ||
295 | ||
296 | BID128_FUNCTION_ARGTYPE1 (bid128d_sqrt, UINT64, x) | |
297 | ||
298 | UINT256 M256, C256, C4, C8; | |
299 | UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res; | |
300 | UINT64 sign_x, Carry; | |
301 | SINT64 D; | |
302 | int_float fx, f64; | |
303 | int exponent_x, bin_expon_cx; | |
304 | int digits, scale, exponent_q; | |
305 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
306 | fexcept_t binaryflags = 0; | |
307 | #endif | |
308 | ||
309 | // unpack arguments, check for NaN or Infinity | |
310 | // unpack arguments, check for NaN or Infinity | |
311 | CX.w[1] = 0; | |
312 | if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], x)) { | |
313 | res.w[1] = CX.w[0]; | |
314 | res.w[0] = 0; | |
315 | // NaN ? | |
316 | if ((x & 0x7c00000000000000ull) == 0x7c00000000000000ull) { | |
317 | #ifdef SET_STATUS_FLAGS | |
318 | if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN | |
319 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
320 | #endif | |
321 | res.w[0] = (CX.w[0] & 0x0003ffffffffffffull); | |
322 | __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]); | |
323 | res.w[1] |= ((CX.w[0]) & 0xfc00000000000000ull); | |
324 | BID_RETURN (res); | |
325 | } | |
326 | // x is Infinity? | |
327 | if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
328 | if (sign_x) { | |
329 | // -Inf, return NaN | |
330 | res.w[1] = 0x7c00000000000000ull; | |
331 | #ifdef SET_STATUS_FLAGS | |
332 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
333 | #endif | |
334 | } | |
335 | BID_RETURN (res); | |
336 | } | |
337 | // x is 0 otherwise | |
338 | ||
339 | exponent_x = | |
340 | exponent_x - DECIMAL_EXPONENT_BIAS + DECIMAL_EXPONENT_BIAS_128; | |
341 | res.w[1] = | |
342 | sign_x | ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1) | |
343 | << 49); | |
344 | res.w[0] = 0; | |
345 | BID_RETURN (res); | |
346 | } | |
347 | if (sign_x) { | |
348 | res.w[1] = 0x7c00000000000000ull; | |
349 | res.w[0] = 0; | |
350 | #ifdef SET_STATUS_FLAGS | |
351 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
352 | #endif | |
353 | BID_RETURN (res); | |
354 | } | |
355 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
356 | (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
357 | #endif | |
358 | exponent_x = | |
359 | exponent_x - DECIMAL_EXPONENT_BIAS + DECIMAL_EXPONENT_BIAS_128; | |
360 | ||
361 | // 2^64 | |
362 | f64.i = 0x5f800000; | |
363 | ||
364 | // fx ~ CX | |
365 | fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; | |
366 | bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f; | |
367 | digits = estimate_decimal_digits[bin_expon_cx]; | |
368 | ||
369 | A10 = CX; | |
370 | if (exponent_x & 1) { | |
371 | A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61); | |
372 | A10.w[0] = CX.w[0] << 3; | |
373 | CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63); | |
374 | CX2.w[0] = CX.w[0] << 1; | |
375 | __add_128_128 (A10, A10, CX2); | |
376 | } | |
377 | ||
378 | CS.w[0] = short_sqrt128 (A10); | |
379 | CS.w[1] = 0; | |
380 | // check for exact result | |
381 | if (CS.w[0] * CS.w[0] == A10.w[0]) { | |
382 | __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]); | |
383 | if (S2.w[1] == A10.w[1]) { | |
384 | get_BID128_very_fast (&res, 0, | |
385 | (exponent_x + DECIMAL_EXPONENT_BIAS_128) >> 1, | |
386 | CS); | |
387 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
388 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
389 | #endif | |
390 | BID_RETURN (res); | |
391 | } | |
392 | } | |
393 | // get number of digits in CX | |
394 | D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1]; | |
395 | if (D > 0 | |
396 | || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0])) | |
397 | digits++; | |
398 | ||
399 | // if exponent is odd, scale coefficient by 10 | |
400 | scale = 67 - digits; | |
401 | exponent_q = exponent_x - scale; | |
402 | scale += (exponent_q & 1); // exp. bias is even | |
403 | ||
404 | if (scale > 38) { | |
405 | T128 = power10_table_128[scale - 37]; | |
406 | __mul_128x128_low (CX1, CX, T128); | |
407 | ||
408 | TP128 = power10_table_128[37]; | |
409 | __mul_128x128_to_256 (C256, CX1, TP128); | |
410 | } else { | |
411 | T128 = power10_table_128[scale]; | |
412 | __mul_128x128_to_256 (C256, CX, T128); | |
413 | } | |
414 | ||
415 | ||
416 | // 4*C256 | |
417 | C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62); | |
418 | C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62); | |
419 | C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62); | |
420 | C4.w[0] = C256.w[0] << 2; | |
421 | ||
422 | long_sqrt128 (&CS, C256); | |
423 | ||
424 | #ifndef IEEE_ROUND_NEAREST | |
425 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
426 | if (!((rnd_mode) & 3)) { | |
427 | #endif | |
428 | #endif | |
429 | // compare to midpoints | |
430 | CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63); | |
431 | CSM.w[0] = (CS.w[0] + CS.w[0]) | 1; | |
432 | // CSM^2 | |
433 | //__mul_128x128_to_256(M256, CSM, CSM); | |
434 | __sqr128_to_256 (M256, CSM); | |
435 | ||
436 | if (C4.w[3] > M256.w[3] | |
437 | || (C4.w[3] == M256.w[3] | |
438 | && (C4.w[2] > M256.w[2] | |
439 | || (C4.w[2] == M256.w[2] | |
440 | && (C4.w[1] > M256.w[1] | |
441 | || (C4.w[1] == M256.w[1] | |
442 | && C4.w[0] > M256.w[0])))))) { | |
443 | // round up | |
444 | CS.w[0]++; | |
445 | if (!CS.w[0]) | |
446 | CS.w[1]++; | |
447 | } else { | |
448 | C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61); | |
449 | C8.w[0] = CS.w[0] << 3; | |
450 | // M256 - 8*CSM | |
451 | __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); | |
452 | __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); | |
453 | __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); | |
454 | M256.w[3] = M256.w[3] - Carry; | |
455 | ||
456 | // if CSM' > C256, round up | |
457 | if (M256.w[3] > C4.w[3] | |
458 | || (M256.w[3] == C4.w[3] | |
459 | && (M256.w[2] > C4.w[2] | |
460 | || (M256.w[2] == C4.w[2] | |
461 | && (M256.w[1] > C4.w[1] | |
462 | || (M256.w[1] == C4.w[1] | |
463 | && M256.w[0] > C4.w[0])))))) { | |
464 | // round down | |
465 | if (!CS.w[0]) | |
466 | CS.w[1]--; | |
467 | CS.w[0]--; | |
468 | } | |
469 | } | |
470 | #ifndef IEEE_ROUND_NEAREST | |
471 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
472 | } else { | |
473 | __sqr128_to_256 (M256, CS); | |
474 | C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63); | |
475 | C8.w[0] = CS.w[0] << 1; | |
476 | if (M256.w[3] > C256.w[3] | |
477 | || (M256.w[3] == C256.w[3] | |
478 | && (M256.w[2] > C256.w[2] | |
479 | || (M256.w[2] == C256.w[2] | |
480 | && (M256.w[1] > C256.w[1] | |
481 | || (M256.w[1] == C256.w[1] | |
482 | && M256.w[0] > C256.w[0])))))) { | |
483 | __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); | |
484 | __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); | |
485 | __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); | |
486 | M256.w[3] = M256.w[3] - Carry; | |
487 | M256.w[0]++; | |
488 | if (!M256.w[0]) { | |
489 | M256.w[1]++; | |
490 | if (!M256.w[1]) { | |
491 | M256.w[2]++; | |
492 | if (!M256.w[2]) | |
493 | M256.w[3]++; | |
200359e8 | 494 | } |
b2a00c89 L |
495 | } |
496 | ||
497 | if (!CS.w[0]) | |
498 | CS.w[1]--; | |
499 | CS.w[0]--; | |
500 | ||
501 | if (M256.w[3] > C256.w[3] | |
502 | || (M256.w[3] == C256.w[3] | |
503 | && (M256.w[2] > C256.w[2] | |
504 | || (M256.w[2] == C256.w[2] | |
505 | && (M256.w[1] > C256.w[1] | |
506 | || (M256.w[1] == C256.w[1] | |
507 | && M256.w[0] > C256.w[0])))))) { | |
508 | ||
509 | if (!CS.w[0]) | |
510 | CS.w[1]--; | |
511 | CS.w[0]--; | |
512 | } | |
513 | } | |
514 | ||
515 | else { | |
516 | __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]); | |
517 | __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); | |
518 | __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); | |
519 | M256.w[3] = M256.w[3] + Carry; | |
520 | M256.w[0]++; | |
521 | if (!M256.w[0]) { | |
522 | M256.w[1]++; | |
523 | if (!M256.w[1]) { | |
524 | M256.w[2]++; | |
525 | if (!M256.w[2]) | |
526 | M256.w[3]++; | |
200359e8 L |
527 | } |
528 | } | |
b2a00c89 L |
529 | if (M256.w[3] < C256.w[3] |
530 | || (M256.w[3] == C256.w[3] | |
531 | && (M256.w[2] < C256.w[2] | |
532 | || (M256.w[2] == C256.w[2] | |
533 | && (M256.w[1] < C256.w[1] | |
534 | || (M256.w[1] == C256.w[1] | |
535 | && M256.w[0] <= C256.w[0])))))) { | |
536 | ||
200359e8 L |
537 | CS.w[0]++; |
538 | if (!CS.w[0]) | |
539 | CS.w[1]++; | |
540 | } | |
200359e8 | 541 | } |
b2a00c89 L |
542 | // RU? |
543 | if ((rnd_mode) == ROUNDING_UP) { | |
544 | CS.w[0]++; | |
545 | if (!CS.w[0]) | |
546 | CS.w[1]++; | |
547 | } | |
548 | ||
549 | } | |
200359e8 L |
550 | #endif |
551 | #endif | |
552 | ||
553 | #ifdef SET_STATUS_FLAGS | |
b2a00c89 | 554 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); |
200359e8 | 555 | #endif |
b2a00c89 L |
556 | get_BID128_fast (&res, 0, (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1, |
557 | CS); | |
558 | #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
559 | (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
560 | #endif | |
561 | BID_RETURN (res); | |
562 | ||
563 | ||
200359e8 | 564 | } |