]>
Commit | Line | Data |
---|---|---|
200359e8 L |
1 | /* Copyright (C) 2007 Free Software Foundation, Inc. |
2 | ||
3 | This file is part of GCC. | |
4 | ||
5 | GCC is free software; you can redistribute it and/or modify it under | |
6 | the terms of the GNU General Public License as published by the Free | |
7 | Software Foundation; either version 2, or (at your option) any later | |
8 | version. | |
9 | ||
10 | In addition to the permissions in the GNU General Public License, the | |
11 | Free Software Foundation gives you unlimited permission to link the | |
12 | compiled version of this file into combinations with other programs, | |
13 | and to distribute those combinations without any restriction coming | |
14 | from the use of this file. (The General Public License restrictions | |
15 | do apply in other respects; for example, they cover modification of | |
16 | the file, and distribution when not linked into a combine | |
17 | executable.) | |
18 | ||
19 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
20 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
21 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
22 | for more details. | |
23 | ||
24 | You should have received a copy of the GNU General Public License | |
25 | along with GCC; see the file COPYING. If not, write to the Free | |
26 | Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA | |
27 | 02110-1301, USA. */ | |
28 | ||
29 | #define BID_128RES | |
30 | #include "sqrt_macros.h" | |
31 | ||
32 | BID128_FUNCTION_ARG1(__bid128_sqrt, x) | |
33 | ||
34 | UINT256 M256, C256, C4, C8; | |
35 | UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res; | |
36 | UINT64 sign_x, Carry; | |
37 | SINT64 D; | |
38 | int_float fx, f64; | |
39 | int exponent_x = 0, bin_expon_cx; | |
40 | int digits, scale, exponent_q; | |
41 | ||
42 | // unpack arguments, check for NaN or Infinity | |
43 | if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) { | |
44 | res.w[1] = x.w[1]; | |
45 | res.w[0] = x.w[0]; | |
46 | // NaN ? | |
47 | if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { | |
48 | #ifdef SET_STATUS_FLAGS | |
49 | if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN | |
50 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
51 | #endif | |
52 | res.w[1] &= QUIET_MASK64; | |
53 | BID_RETURN (res); | |
54 | } | |
55 | // x is Infinity? | |
56 | if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
57 | if (sign_x) { | |
58 | // -Inf, return NaN | |
59 | res.w[1] = 0x7c00000000000000ull; | |
60 | #ifdef SET_STATUS_FLAGS | |
61 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
62 | #endif | |
63 | } | |
64 | BID_RETURN (res); | |
65 | } | |
66 | // x is 0 otherwise | |
67 | ||
68 | res.w[1] = | |
69 | sign_x | | |
70 | ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1) << | |
71 | 49); | |
72 | BID_RETURN (res); | |
73 | } | |
74 | if (sign_x) { | |
75 | res.w[1] = 0x7c00000000000000ull; | |
76 | res.w[0] = 0; | |
77 | #ifdef SET_STATUS_FLAGS | |
78 | __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
79 | #endif | |
80 | BID_RETURN (res); | |
81 | } | |
82 | // 2^64 | |
83 | f64.i = 0x5f800000; | |
84 | ||
85 | // fx ~ CX | |
86 | fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; | |
87 | bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f; | |
88 | digits = __bid_estimate_decimal_digits[bin_expon_cx]; | |
89 | ||
90 | A10 = CX; | |
91 | if (exponent_x & 1) { | |
92 | A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61); | |
93 | A10.w[0] = CX.w[0] << 3; | |
94 | CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63); | |
95 | CX2.w[0] = CX.w[0] << 1; | |
96 | __add_128_128 (A10, A10, CX2); | |
97 | } | |
98 | ||
99 | CS.w[0] = short_sqrt128 (A10); | |
100 | CS.w[1] = 0; | |
101 | // check for exact result | |
102 | if (CS.w[0] * CS.w[0] == A10.w[0]) { | |
103 | __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]); | |
104 | if (S2.w[1] == A10.w[1]) // && S2.w[0]==A10.w[0]) | |
105 | { | |
106 | get_BID128_very_fast (&res, 0, | |
107 | (exponent_x + | |
108 | DECIMAL_EXPONENT_BIAS_128) >> 1, CS); | |
109 | BID_RETURN (res); | |
110 | } | |
111 | } | |
112 | // get number of digits in CX | |
113 | D = CX.w[1] - __bid_power10_index_binexp_128[bin_expon_cx].w[1]; | |
114 | if (D > 0 | |
115 | || (!D && CX.w[0] >= __bid_power10_index_binexp_128[bin_expon_cx].w[0])) | |
116 | digits++; | |
117 | ||
118 | // if exponent is odd, scale coefficient by 10 | |
119 | scale = 67 - digits; | |
120 | exponent_q = exponent_x - scale; | |
121 | scale += (exponent_q & 1); // exp. bias is even | |
122 | ||
123 | if (scale > 38) { | |
124 | T128 = __bid_power10_table_128[scale - 37]; | |
125 | __mul_128x128_low (CX1, CX, T128); | |
126 | ||
127 | TP128 = __bid_power10_table_128[37]; | |
128 | __mul_128x128_to_256 (C256, CX1, TP128); | |
129 | } else { | |
130 | T128 = __bid_power10_table_128[scale]; | |
131 | __mul_128x128_to_256 (C256, CX, T128); | |
132 | } | |
133 | ||
134 | ||
135 | // 4*C256 | |
136 | C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62); | |
137 | C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62); | |
138 | C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62); | |
139 | C4.w[0] = C256.w[0] << 2; | |
140 | ||
141 | long_sqrt128 (&CS, C256); | |
142 | ||
143 | #ifndef IEEE_ROUND_NEAREST | |
144 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
145 | if (!((rnd_mode) & 3)) { | |
146 | #endif | |
147 | #endif | |
148 | // compare to midpoints | |
149 | CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63); | |
150 | CSM.w[0] = (CS.w[0] + CS.w[0]) | 1; | |
151 | // CSM^2 | |
152 | //__mul_128x128_to_256(M256, CSM, CSM); | |
153 | __sqr128_to_256 (M256, CSM); | |
154 | ||
155 | if (C4.w[3] > M256.w[3] | |
156 | || (C4.w[3] == M256.w[3] | |
157 | && (C4.w[2] > M256.w[2] | |
158 | || (C4.w[2] == M256.w[2] | |
159 | && (C4.w[1] > M256.w[1] | |
160 | || (C4.w[1] == M256.w[1] | |
161 | && C4.w[0] > M256.w[0])))))) { | |
162 | // round up | |
163 | CS.w[0]++; | |
164 | if (!CS.w[0]) | |
165 | CS.w[1]++; | |
166 | } else { | |
167 | C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61); | |
168 | C8.w[0] = CS.w[0] << 3; | |
169 | // M256 - 8*CSM | |
170 | __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); | |
171 | __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); | |
172 | __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); | |
173 | M256.w[3] = M256.w[3] - Carry; | |
174 | ||
175 | // if CSM' > C256, round up | |
176 | if (M256.w[3] > C4.w[3] | |
177 | || (M256.w[3] == C4.w[3] | |
178 | && (M256.w[2] > C4.w[2] | |
179 | || (M256.w[2] == C4.w[2] | |
180 | && (M256.w[1] > C4.w[1] | |
181 | || (M256.w[1] == C4.w[1] | |
182 | && M256.w[0] > C4.w[0])))))) { | |
183 | // round down | |
184 | if (!CS.w[0]) | |
185 | CS.w[1]--; | |
186 | CS.w[0]--; | |
187 | } | |
188 | } | |
189 | #ifndef IEEE_ROUND_NEAREST | |
190 | #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
191 | } else { | |
192 | __sqr128_to_256 (M256, CS); | |
193 | C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63); | |
194 | C8.w[0] = CS.w[0] << 1; | |
195 | if (M256.w[3] > C256.w[3] | |
196 | || (M256.w[3] == C256.w[3] | |
197 | && (M256.w[2] > C256.w[2] | |
198 | || (M256.w[2] == C256.w[2] | |
199 | && (M256.w[1] > C256.w[1] | |
200 | || (M256.w[1] == C256.w[1] | |
201 | && M256.w[0] > C256.w[0])))))) { | |
202 | __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); | |
203 | __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); | |
204 | __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); | |
205 | M256.w[3] = M256.w[3] - Carry; | |
206 | M256.w[0]++; | |
207 | if (!M256.w[0]) { | |
208 | M256.w[1]++; | |
209 | if (!M256.w[1]) { | |
210 | M256.w[2]++; | |
211 | if (!M256.w[2]) | |
212 | M256.w[3]++; | |
213 | } | |
214 | } | |
215 | ||
216 | if (!CS.w[0]) | |
217 | CS.w[1]--; | |
218 | CS.w[0]--; | |
219 | ||
220 | if (M256.w[3] > C256.w[3] | |
221 | || (M256.w[3] == C256.w[3] | |
222 | && (M256.w[2] > C256.w[2] | |
223 | || (M256.w[2] == C256.w[2] | |
224 | && (M256.w[1] > C256.w[1] | |
225 | || (M256.w[1] == C256.w[1] | |
226 | && M256.w[0] > C256.w[0])))))) { | |
227 | ||
228 | if (!CS.w[0]) | |
229 | CS.w[1]--; | |
230 | CS.w[0]--; | |
231 | } | |
232 | } | |
233 | ||
234 | else { | |
235 | __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]); | |
236 | __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); | |
237 | __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); | |
238 | M256.w[3] = M256.w[3] + Carry; | |
239 | M256.w[0]++; | |
240 | if (!M256.w[0]) { | |
241 | M256.w[1]++; | |
242 | if (!M256.w[1]) { | |
243 | M256.w[2]++; | |
244 | if (!M256.w[2]) | |
245 | M256.w[3]++; | |
246 | } | |
247 | } | |
248 | if (M256.w[3] < C256.w[3] | |
249 | || (M256.w[3] == C256.w[3] | |
250 | && (M256.w[2] < C256.w[2] | |
251 | || (M256.w[2] == C256.w[2] | |
252 | && (M256.w[1] < C256.w[1] | |
253 | || (M256.w[1] == C256.w[1] | |
254 | && M256.w[0] <= C256.w[0])))))) { | |
255 | ||
256 | CS.w[0]++; | |
257 | if (!CS.w[0]) | |
258 | CS.w[1]++; | |
259 | } | |
260 | } | |
261 | // RU? | |
262 | if ((rnd_mode) == ROUNDING_UP) { | |
263 | CS.w[0]++; | |
264 | if (!CS.w[0]) | |
265 | CS.w[1]++; | |
266 | } | |
267 | ||
268 | } | |
269 | #endif | |
270 | #endif | |
271 | ||
272 | #ifdef SET_STATUS_FLAGS | |
273 | __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
274 | #endif | |
275 | get_BID128_fast (&res, 0, | |
276 | (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1, CS); | |
277 | BID_RETURN (res); | |
278 | } |