]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/libbid/bid128_sqrt.c
Merged with libbbid branch at revision 126349.
[thirdparty/gcc.git] / libgcc / config / libbid / bid128_sqrt.c
CommitLineData
200359e8
L
1/* Copyright (C) 2007 Free Software Foundation, Inc.
2
3This file is part of GCC.
4
5GCC is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License as published by the Free
7Software Foundation; either version 2, or (at your option) any later
8version.
9
10In addition to the permissions in the GNU General Public License, the
11Free Software Foundation gives you unlimited permission to link the
12compiled version of this file into combinations with other programs,
13and to distribute those combinations without any restriction coming
14from the use of this file. (The General Public License restrictions
15do apply in other respects; for example, they cover modification of
16the file, and distribution when not linked into a combine
17executable.)
18
19GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20WARRANTY; without even the implied warranty of MERCHANTABILITY or
21FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
22for more details.
23
24You should have received a copy of the GNU General Public License
25along with GCC; see the file COPYING. If not, write to the Free
26Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
2702110-1301, USA. */
28
29#define BID_128RES
30#include "sqrt_macros.h"
31
32BID128_FUNCTION_ARG1(__bid128_sqrt, x)
33
34 UINT256 M256, C256, C4, C8;
35 UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res;
36 UINT64 sign_x, Carry;
37 SINT64 D;
38 int_float fx, f64;
39 int exponent_x = 0, bin_expon_cx;
40 int digits, scale, exponent_q;
41
42 // unpack arguments, check for NaN or Infinity
43 if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
44 res.w[1] = x.w[1];
45 res.w[0] = x.w[0];
46 // NaN ?
47 if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
48#ifdef SET_STATUS_FLAGS
49 if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN
50 __set_status_flags (pfpsf, INVALID_EXCEPTION);
51#endif
52 res.w[1] &= QUIET_MASK64;
53 BID_RETURN (res);
54 }
55 // x is Infinity?
56 if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
57 if (sign_x) {
58 // -Inf, return NaN
59 res.w[1] = 0x7c00000000000000ull;
60#ifdef SET_STATUS_FLAGS
61 __set_status_flags (pfpsf, INVALID_EXCEPTION);
62#endif
63 }
64 BID_RETURN (res);
65 }
66 // x is 0 otherwise
67
68 res.w[1] =
69 sign_x |
70 ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1) <<
71 49);
72 BID_RETURN (res);
73 }
74 if (sign_x) {
75 res.w[1] = 0x7c00000000000000ull;
76 res.w[0] = 0;
77#ifdef SET_STATUS_FLAGS
78 __set_status_flags (pfpsf, INVALID_EXCEPTION);
79#endif
80 BID_RETURN (res);
81 }
82 // 2^64
83 f64.i = 0x5f800000;
84
85 // fx ~ CX
86 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
87 bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f;
88 digits = __bid_estimate_decimal_digits[bin_expon_cx];
89
90 A10 = CX;
91 if (exponent_x & 1) {
92 A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61);
93 A10.w[0] = CX.w[0] << 3;
94 CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63);
95 CX2.w[0] = CX.w[0] << 1;
96 __add_128_128 (A10, A10, CX2);
97 }
98
99 CS.w[0] = short_sqrt128 (A10);
100 CS.w[1] = 0;
101 // check for exact result
102 if (CS.w[0] * CS.w[0] == A10.w[0]) {
103 __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]);
104 if (S2.w[1] == A10.w[1]) // && S2.w[0]==A10.w[0])
105 {
106 get_BID128_very_fast (&res, 0,
107 (exponent_x +
108 DECIMAL_EXPONENT_BIAS_128) >> 1, CS);
109 BID_RETURN (res);
110 }
111 }
112 // get number of digits in CX
113 D = CX.w[1] - __bid_power10_index_binexp_128[bin_expon_cx].w[1];
114 if (D > 0
115 || (!D && CX.w[0] >= __bid_power10_index_binexp_128[bin_expon_cx].w[0]))
116 digits++;
117
118 // if exponent is odd, scale coefficient by 10
119 scale = 67 - digits;
120 exponent_q = exponent_x - scale;
121 scale += (exponent_q & 1); // exp. bias is even
122
123 if (scale > 38) {
124 T128 = __bid_power10_table_128[scale - 37];
125 __mul_128x128_low (CX1, CX, T128);
126
127 TP128 = __bid_power10_table_128[37];
128 __mul_128x128_to_256 (C256, CX1, TP128);
129 } else {
130 T128 = __bid_power10_table_128[scale];
131 __mul_128x128_to_256 (C256, CX, T128);
132 }
133
134
135 // 4*C256
136 C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62);
137 C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62);
138 C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62);
139 C4.w[0] = C256.w[0] << 2;
140
141 long_sqrt128 (&CS, C256);
142
143#ifndef IEEE_ROUND_NEAREST
144#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
145 if (!((rnd_mode) & 3)) {
146#endif
147#endif
148 // compare to midpoints
149 CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
150 CSM.w[0] = (CS.w[0] + CS.w[0]) | 1;
151 // CSM^2
152 //__mul_128x128_to_256(M256, CSM, CSM);
153 __sqr128_to_256 (M256, CSM);
154
155 if (C4.w[3] > M256.w[3]
156 || (C4.w[3] == M256.w[3]
157 && (C4.w[2] > M256.w[2]
158 || (C4.w[2] == M256.w[2]
159 && (C4.w[1] > M256.w[1]
160 || (C4.w[1] == M256.w[1]
161 && C4.w[0] > M256.w[0])))))) {
162 // round up
163 CS.w[0]++;
164 if (!CS.w[0])
165 CS.w[1]++;
166 } else {
167 C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61);
168 C8.w[0] = CS.w[0] << 3;
169 // M256 - 8*CSM
170 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
171 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
172 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
173 M256.w[3] = M256.w[3] - Carry;
174
175 // if CSM' > C256, round up
176 if (M256.w[3] > C4.w[3]
177 || (M256.w[3] == C4.w[3]
178 && (M256.w[2] > C4.w[2]
179 || (M256.w[2] == C4.w[2]
180 && (M256.w[1] > C4.w[1]
181 || (M256.w[1] == C4.w[1]
182 && M256.w[0] > C4.w[0])))))) {
183 // round down
184 if (!CS.w[0])
185 CS.w[1]--;
186 CS.w[0]--;
187 }
188 }
189#ifndef IEEE_ROUND_NEAREST
190#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
191 } else {
192 __sqr128_to_256 (M256, CS);
193 C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
194 C8.w[0] = CS.w[0] << 1;
195 if (M256.w[3] > C256.w[3]
196 || (M256.w[3] == C256.w[3]
197 && (M256.w[2] > C256.w[2]
198 || (M256.w[2] == C256.w[2]
199 && (M256.w[1] > C256.w[1]
200 || (M256.w[1] == C256.w[1]
201 && M256.w[0] > C256.w[0])))))) {
202 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
203 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
204 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
205 M256.w[3] = M256.w[3] - Carry;
206 M256.w[0]++;
207 if (!M256.w[0]) {
208 M256.w[1]++;
209 if (!M256.w[1]) {
210 M256.w[2]++;
211 if (!M256.w[2])
212 M256.w[3]++;
213 }
214 }
215
216 if (!CS.w[0])
217 CS.w[1]--;
218 CS.w[0]--;
219
220 if (M256.w[3] > C256.w[3]
221 || (M256.w[3] == C256.w[3]
222 && (M256.w[2] > C256.w[2]
223 || (M256.w[2] == C256.w[2]
224 && (M256.w[1] > C256.w[1]
225 || (M256.w[1] == C256.w[1]
226 && M256.w[0] > C256.w[0])))))) {
227
228 if (!CS.w[0])
229 CS.w[1]--;
230 CS.w[0]--;
231 }
232 }
233
234 else {
235 __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
236 __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
237 __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
238 M256.w[3] = M256.w[3] + Carry;
239 M256.w[0]++;
240 if (!M256.w[0]) {
241 M256.w[1]++;
242 if (!M256.w[1]) {
243 M256.w[2]++;
244 if (!M256.w[2])
245 M256.w[3]++;
246 }
247 }
248 if (M256.w[3] < C256.w[3]
249 || (M256.w[3] == C256.w[3]
250 && (M256.w[2] < C256.w[2]
251 || (M256.w[2] == C256.w[2]
252 && (M256.w[1] < C256.w[1]
253 || (M256.w[1] == C256.w[1]
254 && M256.w[0] <= C256.w[0])))))) {
255
256 CS.w[0]++;
257 if (!CS.w[0])
258 CS.w[1]++;
259 }
260 }
261 // RU?
262 if ((rnd_mode) == ROUNDING_UP) {
263 CS.w[0]++;
264 if (!CS.w[0])
265 CS.w[1]++;
266 }
267
268 }
269#endif
270#endif
271
272#ifdef SET_STATUS_FLAGS
273 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
274#endif
275 get_BID128_fast (&res, 0,
276 (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1, CS);
277 BID_RETURN (res);
278}