]>
Commit | Line | Data |
---|---|---|
a945c346 | 1 | /* Copyright (C) 2007-2024 Free Software Foundation, Inc. |
200359e8 L |
2 | |
3 | This file is part of GCC. | |
4 | ||
5 | GCC is free software; you can redistribute it and/or modify it under | |
6 | the terms of the GNU General Public License as published by the Free | |
748086b7 | 7 | Software Foundation; either version 3, or (at your option) any later |
200359e8 L |
8 | version. |
9 | ||
200359e8 L |
10 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 | for more details. | |
14 | ||
748086b7 JJ |
15 | Under Section 7 of GPL version 3, you are granted additional |
16 | permissions described in the GCC Runtime Library Exception, version | |
17 | 3.1, as published by the Free Software Foundation. | |
18 | ||
19 | You should have received a copy of the GNU General Public License and | |
20 | a copy of the GCC Runtime Library Exception along with this program; | |
21 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 | <http://www.gnu.org/licenses/>. */ | |
200359e8 L |
23 | |
24 | #define BID_128RES | |
25 | ||
26 | #include "bid_internal.h" | |
27 | ||
28 | /***************************************************************************** | |
29 | * BID128_round_integral_exact | |
30 | ****************************************************************************/ | |
31 | ||
b2a00c89 | 32 | BID128_FUNCTION_ARG1 (bid128_round_integral_exact, x) |
200359e8 | 33 | |
b2a00c89 L |
34 | UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} |
35 | }; | |
36 | UINT64 x_sign; | |
37 | UINT64 x_exp; | |
38 | int exp; // unbiased exponent | |
200359e8 | 39 | // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) |
b2a00c89 L |
40 | UINT64 tmp64; |
41 | BID_UI64DOUBLE tmp1; | |
42 | unsigned int x_nr_bits; | |
43 | int q, ind, shift; | |
44 | UINT128 C1; | |
45 | UINT256 fstar; | |
46 | UINT256 P256; | |
200359e8 L |
47 | |
48 | // check for NaN or Infinity | |
b2a00c89 L |
49 | if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { |
50 | // x is special | |
51 | if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN | |
52 | // if x = NaN, then res = Q (x) | |
53 | // check first for non-canonical NaN payload | |
54 | if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || | |
55 | (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && | |
56 | (x.w[0] > 0x38c15b09ffffffffull))) { | |
57 | x.w[1] = x.w[1] & 0xffffc00000000000ull; | |
58 | x.w[0] = 0x0ull; | |
59 | } | |
60 | if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN | |
61 | // set invalid flag | |
62 | *pfpsf |= INVALID_EXCEPTION; | |
63 | // return quiet (x) | |
64 | res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] | |
65 | res.w[0] = x.w[0]; | |
66 | } else { // x is QNaN | |
67 | // return x | |
68 | res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] | |
69 | res.w[0] = x.w[0]; | |
200359e8 | 70 | } |
b2a00c89 L |
71 | BID_RETURN (res) |
72 | } else { // x is not a NaN, so it must be infinity | |
73 | if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf | |
74 | // return +inf | |
75 | res.w[1] = 0x7800000000000000ull; | |
76 | res.w[0] = 0x0000000000000000ull; | |
77 | } else { // x is -inf | |
78 | // return -inf | |
79 | res.w[1] = 0xf800000000000000ull; | |
80 | res.w[0] = 0x0000000000000000ull; | |
81 | } | |
82 | BID_RETURN (res); | |
200359e8 | 83 | } |
b2a00c89 | 84 | } |
200359e8 | 85 | // unpack x |
b2a00c89 L |
86 | x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative |
87 | C1.w[1] = x.w[1] & MASK_COEFF; | |
88 | C1.w[0] = x.w[0]; | |
200359e8 | 89 | |
b2a00c89 L |
90 | // check for non-canonical values (treated as zero) |
91 | if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 | |
92 | // non-canonical | |
93 | x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits | |
94 | C1.w[1] = 0; // significand high | |
95 | C1.w[0] = 0; // significand low | |
96 | } else { // G0_G1 != 11 | |
97 | x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits | |
98 | if (C1.w[1] > 0x0001ed09bead87c0ull || | |
99 | (C1.w[1] == 0x0001ed09bead87c0ull | |
100 | && C1.w[0] > 0x378d8e63ffffffffull)) { | |
101 | // x is non-canonical if coefficient is larger than 10^34 -1 | |
200359e8 L |
102 | C1.w[1] = 0; |
103 | C1.w[0] = 0; | |
b2a00c89 L |
104 | } else { // canonical |
105 | ; | |
200359e8 | 106 | } |
b2a00c89 L |
107 | } |
108 | ||
200359e8 | 109 | // test for input equal to zero |
b2a00c89 L |
110 | if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { |
111 | // x is 0 | |
112 | // return 0 preserving the sign bit and the preferred exponent | |
113 | // of MAX(Q(x), 0) | |
114 | if (x_exp <= (0x1820ull << 49)) { | |
115 | res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; | |
116 | } else { | |
117 | res.w[1] = x_sign | x_exp; | |
200359e8 | 118 | } |
b2a00c89 L |
119 | res.w[0] = 0x0000000000000000ull; |
120 | BID_RETURN (res); | |
121 | } | |
200359e8 L |
122 | // x is not special and is not zero |
123 | ||
b2a00c89 L |
124 | switch (rnd_mode) { |
125 | case ROUNDING_TO_NEAREST: | |
126 | case ROUNDING_TIES_AWAY: | |
127 | // if (exp <= -(p+1)) return 0.0 | |
128 | if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35 | |
129 | res.w[1] = x_sign | 0x3040000000000000ull; | |
130 | res.w[0] = 0x0000000000000000ull; | |
131 | *pfpsf |= INEXACT_EXCEPTION; | |
200359e8 L |
132 | BID_RETURN (res); |
133 | } | |
b2a00c89 L |
134 | break; |
135 | case ROUNDING_DOWN: | |
136 | // if (exp <= -p) return -1.0 or +0.0 | |
137 | if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffa000000000000ull == -34 | |
138 | if (x_sign) { | |
139 | // if negative, return negative 1, because we know coefficient | |
140 | // is non-zero (would have been caught above) | |
141 | res.w[1] = 0xb040000000000000ull; | |
142 | res.w[0] = 0x0000000000000001ull; | |
143 | } else { | |
144 | // if positive, return positive 0, because we know coefficient is | |
145 | // non-zero (would have been caught above) | |
146 | res.w[1] = 0x3040000000000000ull; | |
200359e8 | 147 | res.w[0] = 0x0000000000000000ull; |
200359e8 | 148 | } |
b2a00c89 L |
149 | *pfpsf |= INEXACT_EXCEPTION; |
150 | BID_RETURN (res); | |
200359e8 | 151 | } |
b2a00c89 L |
152 | break; |
153 | case ROUNDING_UP: | |
154 | // if (exp <= -p) return -0.0 or +1.0 | |
155 | if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 | |
156 | if (x_sign) { | |
157 | // if negative, return negative 0, because we know the coefficient | |
158 | // is non-zero (would have been caught above) | |
159 | res.w[1] = 0xb040000000000000ull; | |
160 | res.w[0] = 0x0000000000000000ull; | |
200359e8 | 161 | } else { |
b2a00c89 L |
162 | // if positive, return positive 1, because we know coefficient is |
163 | // non-zero (would have been caught above) | |
164 | res.w[1] = 0x3040000000000000ull; | |
165 | res.w[0] = 0x0000000000000001ull; | |
200359e8 | 166 | } |
b2a00c89 | 167 | *pfpsf |= INEXACT_EXCEPTION; |
200359e8 L |
168 | BID_RETURN (res); |
169 | } | |
b2a00c89 L |
170 | break; |
171 | case ROUNDING_TO_ZERO: | |
172 | // if (exp <= -p) return -0.0 or +0.0 | |
173 | if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 | |
200359e8 L |
174 | res.w[1] = x_sign | 0x3040000000000000ull; |
175 | res.w[0] = 0x0000000000000000ull; | |
b2a00c89 | 176 | *pfpsf |= INEXACT_EXCEPTION; |
200359e8 L |
177 | BID_RETURN (res); |
178 | } | |
b2a00c89 | 179 | break; |
affd77d3 | 180 | default: break; // default added to avoid compiler warning |
b2a00c89 L |
181 | } |
182 | ||
200359e8 L |
183 | // q = nr. of decimal digits in x |
184 | // determine first the nr. of bits in x | |
b2a00c89 L |
185 | if (C1.w[1] == 0) { |
186 | if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 | |
187 | // split the 64-bit value in two 32-bit halves to avoid rounding errors | |
188 | if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 | |
189 | tmp1.d = (double) (C1.w[0] >> 32); // exact conversion | |
190 | x_nr_bits = | |
191 | 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
192 | } else { // x < 2^32 | |
193 | tmp1.d = (double) (C1.w[0]); // exact conversion | |
200359e8 L |
194 | x_nr_bits = |
195 | 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
196 | } | |
b2a00c89 L |
197 | } else { // if x < 2^53 |
198 | tmp1.d = (double) C1.w[0]; // exact conversion | |
200359e8 | 199 | x_nr_bits = |
b2a00c89 | 200 | 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
200359e8 | 201 | } |
b2a00c89 L |
202 | } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) |
203 | tmp1.d = (double) C1.w[1]; // exact conversion | |
204 | x_nr_bits = | |
205 | 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
206 | } | |
207 | ||
208 | q = nr_digits[x_nr_bits - 1].digits; | |
209 | if (q == 0) { | |
210 | q = nr_digits[x_nr_bits - 1].digits1; | |
211 | if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || | |
212 | (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && | |
213 | C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) | |
214 | q++; | |
215 | } | |
216 | exp = (x_exp >> 49) - 6176; | |
217 | if (exp >= 0) { // -exp <= 0 | |
218 | // the argument is an integer already | |
219 | res.w[1] = x.w[1]; | |
220 | res.w[0] = x.w[0]; | |
221 | BID_RETURN (res); | |
222 | } | |
223 | // exp < 0 | |
224 | switch (rnd_mode) { | |
225 | case ROUNDING_TO_NEAREST: | |
226 | if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q | |
227 | // need to shift right -exp digits from the coefficient; exp will be 0 | |
200359e8 L |
228 | ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' |
229 | // chop off ind digits from the lower part of C1 | |
230 | // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits | |
231 | tmp64 = C1.w[0]; | |
232 | if (ind <= 19) { | |
b2a00c89 | 233 | C1.w[0] = C1.w[0] + midpoint64[ind - 1]; |
200359e8 | 234 | } else { |
b2a00c89 L |
235 | C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; |
236 | C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; | |
200359e8 L |
237 | } |
238 | if (C1.w[0] < tmp64) | |
239 | C1.w[1]++; | |
240 | // calculate C* and f* | |
241 | // C* is actually floor(C*) in this case | |
242 | // C* and f* need shifting and masking, as shown by | |
b2a00c89 | 243 | // shiftright128[] and maskhigh128[] |
200359e8 | 244 | // 1 <= x <= 34 |
b2a00c89 | 245 | // kx = 10^(-x) = ten2mk128[ind - 1] |
200359e8 L |
246 | // C* = (C1 + 1/2 * 10^x) * 10^(-x) |
247 | // the approximation of 10^(-x) was rounded up to 118 bits | |
b2a00c89 | 248 | __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); |
200359e8 | 249 | // determine the value of res and fstar |
b2a00c89 L |
250 | |
251 | // determine inexactness of the rounding of C* | |
252 | // if (0 < f* - 1/2 < 10^(-x)) then | |
253 | // the result is exact | |
254 | // else // if (f* - 1/2 > T*) then | |
255 | // the result is inexact | |
256 | // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[] | |
257 | ||
200359e8 | 258 | if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 |
b2a00c89 | 259 | // redundant shift = shiftright128[ind - 1]; // shift = 0 |
200359e8 L |
260 | res.w[1] = P256.w[3]; |
261 | res.w[0] = P256.w[2]; | |
262 | // redundant fstar.w[3] = 0; | |
263 | // redundant fstar.w[2] = 0; | |
b2a00c89 L |
264 | fstar.w[1] = P256.w[1]; |
265 | fstar.w[0] = P256.w[0]; | |
200359e8 L |
266 | // fraction f* < 10^(-x) <=> midpoint |
267 | // f* is in the right position to be compared with | |
b2a00c89 | 268 | // 10^(-x) from ten2mk128[] |
200359e8 L |
269 | // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even) |
270 | if ((res.w[0] & 0x0000000000000001ull) && // is result odd? | |
b2a00c89 L |
271 | ((fstar.w[1] < (ten2mk128[ind - 1].w[1])) |
272 | || ((fstar.w[1] == ten2mk128[ind - 1].w[1]) | |
273 | && (fstar.w[0] < ten2mk128[ind - 1].w[0])))) { | |
200359e8 L |
274 | // subract 1 to make even |
275 | if (res.w[0]-- == 0) { | |
276 | res.w[1]--; | |
277 | } | |
278 | } | |
b2a00c89 L |
279 | if (fstar.w[1] > 0x8000000000000000ull || |
280 | (fstar.w[1] == 0x8000000000000000ull | |
281 | && fstar.w[0] > 0x0ull)) { | |
282 | // f* > 1/2 and the result may be exact | |
283 | tmp64 = fstar.w[1] - 0x8000000000000000ull; // f* - 1/2 | |
284 | if (tmp64 > ten2mk128[ind - 1].w[1] || | |
285 | (tmp64 == ten2mk128[ind - 1].w[1] && | |
286 | fstar.w[0] >= ten2mk128[ind - 1].w[0])) { | |
287 | // set the inexact flag | |
288 | *pfpsf |= INEXACT_EXCEPTION; | |
289 | } // else the result is exact | |
290 | } else { // the result is inexact; f2* <= 1/2 | |
291 | // set the inexact flag | |
292 | *pfpsf |= INEXACT_EXCEPTION; | |
293 | } | |
200359e8 | 294 | } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 |
b2a00c89 | 295 | shift = shiftright128[ind - 1]; // 3 <= shift <= 63 |
200359e8 L |
296 | res.w[1] = (P256.w[3] >> shift); |
297 | res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); | |
298 | // redundant fstar.w[3] = 0; | |
b2a00c89 | 299 | fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; |
200359e8 L |
300 | fstar.w[1] = P256.w[1]; |
301 | fstar.w[0] = P256.w[0]; | |
302 | // fraction f* < 10^(-x) <=> midpoint | |
303 | // f* is in the right position to be compared with | |
b2a00c89 | 304 | // 10^(-x) from ten2mk128[] |
200359e8 | 305 | if ((res.w[0] & 0x0000000000000001ull) && // is result odd? |
b2a00c89 L |
306 | fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] || |
307 | (fstar.w[1] == ten2mk128[ind - 1].w[1] && | |
308 | fstar.w[0] < ten2mk128[ind - 1].w[0]))) { | |
200359e8 L |
309 | // subract 1 to make even |
310 | if (res.w[0]-- == 0) { | |
311 | res.w[1]--; | |
312 | } | |
313 | } | |
b2a00c89 L |
314 | if (fstar.w[2] > onehalf128[ind - 1] || |
315 | (fstar.w[2] == onehalf128[ind - 1] | |
316 | && (fstar.w[1] || fstar.w[0]))) { | |
317 | // f2* > 1/2 and the result may be exact | |
318 | // Calculate f2* - 1/2 | |
319 | tmp64 = fstar.w[2] - onehalf128[ind - 1]; | |
320 | if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] || | |
321 | (fstar.w[1] == ten2mk128[ind - 1].w[1] && | |
322 | fstar.w[0] >= ten2mk128[ind - 1].w[0])) { | |
323 | // set the inexact flag | |
324 | *pfpsf |= INEXACT_EXCEPTION; | |
325 | } // else the result is exact | |
326 | } else { // the result is inexact; f2* <= 1/2 | |
327 | // set the inexact flag | |
328 | *pfpsf |= INEXACT_EXCEPTION; | |
329 | } | |
200359e8 | 330 | } else { // 22 <= ind - 1 <= 33 |
b2a00c89 | 331 | shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 |
200359e8 L |
332 | res.w[1] = 0; |
333 | res.w[0] = P256.w[3] >> shift; | |
b2a00c89 | 334 | fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; |
200359e8 L |
335 | fstar.w[2] = P256.w[2]; |
336 | fstar.w[1] = P256.w[1]; | |
337 | fstar.w[0] = P256.w[0]; | |
338 | // fraction f* < 10^(-x) <=> midpoint | |
339 | // f* is in the right position to be compared with | |
b2a00c89 | 340 | // 10^(-x) from ten2mk128[] |
200359e8 | 341 | if ((res.w[0] & 0x0000000000000001ull) && // is result odd? |
b2a00c89 L |
342 | fstar.w[3] == 0 && fstar.w[2] == 0 && |
343 | (fstar.w[1] < ten2mk128[ind - 1].w[1] || | |
344 | (fstar.w[1] == ten2mk128[ind - 1].w[1] && | |
345 | fstar.w[0] < ten2mk128[ind - 1].w[0]))) { | |
200359e8 L |
346 | // subract 1 to make even |
347 | if (res.w[0]-- == 0) { | |
348 | res.w[1]--; | |
349 | } | |
350 | } | |
b2a00c89 L |
351 | if (fstar.w[3] > onehalf128[ind - 1] || |
352 | (fstar.w[3] == onehalf128[ind - 1] && | |
353 | (fstar.w[2] || fstar.w[1] || fstar.w[0]))) { | |
354 | // f2* > 1/2 and the result may be exact | |
355 | // Calculate f2* - 1/2 | |
356 | tmp64 = fstar.w[3] - onehalf128[ind - 1]; | |
357 | if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] | |
358 | || (fstar.w[1] == ten2mk128[ind - 1].w[1] | |
359 | && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { | |
360 | // set the inexact flag | |
361 | *pfpsf |= INEXACT_EXCEPTION; | |
362 | } // else the result is exact | |
363 | } else { // the result is inexact; f2* <= 1/2 | |
364 | // set the inexact flag | |
365 | *pfpsf |= INEXACT_EXCEPTION; | |
366 | } | |
200359e8 L |
367 | } |
368 | res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; | |
369 | BID_RETURN (res); | |
370 | } else { // if ((q + exp) < 0) <=> q < -exp | |
371 | // the result is +0 or -0 | |
372 | res.w[1] = x_sign | 0x3040000000000000ull; | |
373 | res.w[0] = 0x0000000000000000ull; | |
b2a00c89 | 374 | *pfpsf |= INEXACT_EXCEPTION; |
200359e8 L |
375 | BID_RETURN (res); |
376 | } | |
b2a00c89 L |
377 | break; |
378 | case ROUNDING_TIES_AWAY: | |
379 | if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q | |
380 | // need to shift right -exp digits from the coefficient; exp will be 0 | |
381 | ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' | |
382 | // chop off ind digits from the lower part of C1 | |
383 | // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits | |
384 | tmp64 = C1.w[0]; | |
385 | if (ind <= 19) { | |
386 | C1.w[0] = C1.w[0] + midpoint64[ind - 1]; | |
387 | } else { | |
388 | C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; | |
389 | C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; | |
390 | } | |
391 | if (C1.w[0] < tmp64) | |
392 | C1.w[1]++; | |
393 | // calculate C* and f* | |
394 | // C* is actually floor(C*) in this case | |
395 | // C* and f* need shifting and masking, as shown by | |
396 | // shiftright128[] and maskhigh128[] | |
397 | // 1 <= x <= 34 | |
398 | // kx = 10^(-x) = ten2mk128[ind - 1] | |
399 | // C* = (C1 + 1/2 * 10^x) * 10^(-x) | |
400 | // the approximation of 10^(-x) was rounded up to 118 bits | |
401 | __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); | |
402 | // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g. | |
403 | // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999 | |
404 | // if (0 < f* < 10^(-x)) then the result is a midpoint | |
405 | // if floor(C*) is even then C* = floor(C*) - logical right | |
406 | // shift; C* has p decimal digits, correct by Prop. 1) | |
407 | // else if floor(C*) is odd C* = floor(C*)-1 (logical right | |
408 | // shift; C* has p decimal digits, correct by Pr. 1) | |
409 | // else | |
410 | // C* = floor(C*) (logical right shift; C has p decimal digits, | |
411 | // correct by Property 1) | |
412 | // n = C* * 10^(e+x) | |
200359e8 | 413 | |
b2a00c89 L |
414 | // determine also the inexactness of the rounding of C* |
415 | // if (0 < f* - 1/2 < 10^(-x)) then | |
416 | // the result is exact | |
417 | // else // if (f* - 1/2 > T*) then | |
418 | // the result is inexact | |
419 | // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[] | |
420 | // shift right C* by Ex-128 = shiftright128[ind] | |
421 | if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 | |
422 | // redundant shift = shiftright128[ind - 1]; // shift = 0 | |
423 | res.w[1] = P256.w[3]; | |
424 | res.w[0] = P256.w[2]; | |
425 | // redundant fstar.w[3] = 0; | |
426 | // redundant fstar.w[2] = 0; | |
427 | fstar.w[1] = P256.w[1]; | |
428 | fstar.w[0] = P256.w[0]; | |
429 | if (fstar.w[1] > 0x8000000000000000ull || | |
430 | (fstar.w[1] == 0x8000000000000000ull | |
431 | && fstar.w[0] > 0x0ull)) { | |
432 | // f* > 1/2 and the result may be exact | |
433 | tmp64 = fstar.w[1] - 0x8000000000000000ull; // f* - 1/2 | |
434 | if ((tmp64 > ten2mk128[ind - 1].w[1] || | |
435 | (tmp64 == ten2mk128[ind - 1].w[1] && | |
436 | fstar.w[0] >= ten2mk128[ind - 1].w[0]))) { | |
437 | // set the inexact flag | |
438 | *pfpsf |= INEXACT_EXCEPTION; | |
439 | } // else the result is exact | |
440 | } else { // the result is inexact; f2* <= 1/2 | |
441 | // set the inexact flag | |
442 | *pfpsf |= INEXACT_EXCEPTION; | |
443 | } | |
444 | } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 | |
445 | shift = shiftright128[ind - 1]; // 3 <= shift <= 63 | |
446 | res.w[1] = (P256.w[3] >> shift); | |
447 | res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); | |
448 | // redundant fstar.w[3] = 0; | |
449 | fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; | |
450 | fstar.w[1] = P256.w[1]; | |
451 | fstar.w[0] = P256.w[0]; | |
452 | if (fstar.w[2] > onehalf128[ind - 1] || | |
453 | (fstar.w[2] == onehalf128[ind - 1] | |
454 | && (fstar.w[1] || fstar.w[0]))) { | |
455 | // f2* > 1/2 and the result may be exact | |
456 | // Calculate f2* - 1/2 | |
457 | tmp64 = fstar.w[2] - onehalf128[ind - 1]; | |
458 | if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] || | |
459 | (fstar.w[1] == ten2mk128[ind - 1].w[1] && | |
460 | fstar.w[0] >= ten2mk128[ind - 1].w[0])) { | |
461 | // set the inexact flag | |
462 | *pfpsf |= INEXACT_EXCEPTION; | |
463 | } // else the result is exact | |
464 | } else { // the result is inexact; f2* <= 1/2 | |
465 | // set the inexact flag | |
466 | *pfpsf |= INEXACT_EXCEPTION; | |
200359e8 | 467 | } |
b2a00c89 L |
468 | } else { // 22 <= ind - 1 <= 33 |
469 | shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 | |
470 | res.w[1] = 0; | |
471 | res.w[0] = P256.w[3] >> shift; | |
472 | fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; | |
473 | fstar.w[2] = P256.w[2]; | |
474 | fstar.w[1] = P256.w[1]; | |
475 | fstar.w[0] = P256.w[0]; | |
476 | if (fstar.w[3] > onehalf128[ind - 1] || | |
477 | (fstar.w[3] == onehalf128[ind - 1] && | |
478 | (fstar.w[2] || fstar.w[1] || fstar.w[0]))) { | |
479 | // f2* > 1/2 and the result may be exact | |
480 | // Calculate f2* - 1/2 | |
481 | tmp64 = fstar.w[3] - onehalf128[ind - 1]; | |
482 | if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] | |
483 | || (fstar.w[1] == ten2mk128[ind - 1].w[1] | |
484 | && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { | |
485 | // set the inexact flag | |
486 | *pfpsf |= INEXACT_EXCEPTION; | |
487 | } // else the result is exact | |
488 | } else { // the result is inexact; f2* <= 1/2 | |
489 | // set the inexact flag | |
490 | *pfpsf |= INEXACT_EXCEPTION; | |
200359e8 | 491 | } |
200359e8 | 492 | } |
b2a00c89 L |
493 | // if the result was a midpoint, it was already rounded away from zero |
494 | res.w[1] |= x_sign | 0x3040000000000000ull; | |
200359e8 | 495 | BID_RETURN (res); |
b2a00c89 L |
496 | } else { // if ((q + exp) < 0) <=> q < -exp |
497 | // the result is +0 or -0 | |
498 | res.w[1] = x_sign | 0x3040000000000000ull; | |
499 | res.w[0] = 0x0000000000000000ull; | |
500 | *pfpsf |= INEXACT_EXCEPTION; | |
200359e8 L |
501 | BID_RETURN (res); |
502 | } | |
b2a00c89 L |
503 | break; |
504 | case ROUNDING_DOWN: | |
505 | if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q | |
506 | // need to shift right -exp digits from the coefficient; exp will be 0 | |
200359e8 L |
507 | ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' |
508 | // (number of digits to be chopped off) | |
509 | // chop off ind digits from the lower part of C1 | |
510 | // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate | |
511 | // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP | |
512 | // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE | |
513 | // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE | |
b2a00c89 | 514 | // tmp64 = C1.w[0]; |
200359e8 | 515 | // if (ind <= 19) { |
b2a00c89 | 516 | // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; |
200359e8 | 517 | // } else { |
b2a00c89 L |
518 | // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; |
519 | // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; | |
200359e8 L |
520 | // } |
521 | // if (C1.w[0] < tmp64) C1.w[1]++; | |
522 | // if carry-out from C1.w[0], increment C1.w[1] | |
523 | // calculate C* and f* | |
524 | // C* is actually floor(C*) in this case | |
525 | // C* and f* need shifting and masking, as shown by | |
b2a00c89 | 526 | // shiftright128[] and maskhigh128[] |
200359e8 | 527 | // 1 <= x <= 34 |
b2a00c89 | 528 | // kx = 10^(-x) = ten2mk128[ind - 1] |
200359e8 L |
529 | // C* = (C1 + 1/2 * 10^x) * 10^(-x) |
530 | // the approximation of 10^(-x) was rounded up to 118 bits | |
b2a00c89 | 531 | __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); |
200359e8 L |
532 | if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 |
533 | res.w[1] = P256.w[3]; | |
534 | res.w[0] = P256.w[2]; | |
b2a00c89 L |
535 | // redundant fstar.w[3] = 0; |
536 | // redundant fstar.w[2] = 0; | |
537 | // redundant fstar.w[1] = P256.w[1]; | |
538 | // redundant fstar.w[0] = P256.w[0]; | |
539 | // fraction f* > 10^(-x) <=> inexact | |
540 | // f* is in the right position to be compared with | |
541 | // 10^(-x) from ten2mk128[] | |
542 | if ((P256.w[1] > ten2mk128[ind - 1].w[1]) | |
543 | || (P256.w[1] == ten2mk128[ind - 1].w[1] | |
544 | && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { | |
545 | *pfpsf |= INEXACT_EXCEPTION; | |
546 | // if positive, the truncated value is already the correct result | |
547 | if (x_sign) { // if negative | |
200359e8 L |
548 | if (++res.w[0] == 0) { |
549 | res.w[1]++; | |
550 | } | |
551 | } | |
552 | } | |
553 | } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 | |
b2a00c89 | 554 | shift = shiftright128[ind - 1]; // 0 <= shift <= 102 |
200359e8 L |
555 | res.w[1] = (P256.w[3] >> shift); |
556 | res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); | |
b2a00c89 L |
557 | // redundant fstar.w[3] = 0; |
558 | fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; | |
559 | fstar.w[1] = P256.w[1]; | |
560 | fstar.w[0] = P256.w[0]; | |
561 | // fraction f* > 10^(-x) <=> inexact | |
562 | // f* is in the right position to be compared with | |
563 | // 10^(-x) from ten2mk128[] | |
564 | if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || | |
565 | (fstar.w[1] == ten2mk128[ind - 1].w[1] && | |
566 | fstar.w[0] >= ten2mk128[ind - 1].w[0])) { | |
567 | *pfpsf |= INEXACT_EXCEPTION; | |
568 | // if positive, the truncated value is already the correct result | |
569 | if (x_sign) { // if negative | |
200359e8 L |
570 | if (++res.w[0] == 0) { |
571 | res.w[1]++; | |
572 | } | |
573 | } | |
574 | } | |
575 | } else { // 22 <= ind - 1 <= 33 | |
b2a00c89 | 576 | shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 |
200359e8 L |
577 | res.w[1] = 0; |
578 | res.w[0] = P256.w[3] >> shift; | |
b2a00c89 L |
579 | fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; |
580 | fstar.w[2] = P256.w[2]; | |
581 | fstar.w[1] = P256.w[1]; | |
582 | fstar.w[0] = P256.w[0]; | |
583 | // fraction f* > 10^(-x) <=> inexact | |
584 | // f* is in the right position to be compared with | |
585 | // 10^(-x) from ten2mk128[] | |
586 | if (fstar.w[3] || fstar.w[2] | |
587 | || fstar.w[1] > ten2mk128[ind - 1].w[1] | |
588 | || (fstar.w[1] == ten2mk128[ind - 1].w[1] | |
589 | && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { | |
590 | *pfpsf |= INEXACT_EXCEPTION; | |
591 | // if positive, the truncated value is already the correct result | |
592 | if (x_sign) { // if negative | |
200359e8 L |
593 | if (++res.w[0] == 0) { |
594 | res.w[1]++; | |
595 | } | |
596 | } | |
597 | } | |
598 | } | |
599 | res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; | |
600 | BID_RETURN (res); | |
601 | } else { // if exp < 0 and q + exp <= 0 | |
602 | if (x_sign) { // negative rounds down to -1.0 | |
603 | res.w[1] = 0xb040000000000000ull; | |
604 | res.w[0] = 0x0000000000000001ull; | |
605 | } else { // positive rpunds down to +0.0 | |
606 | res.w[1] = 0x3040000000000000ull; | |
607 | res.w[0] = 0x0000000000000000ull; | |
608 | } | |
b2a00c89 | 609 | *pfpsf |= INEXACT_EXCEPTION; |
200359e8 L |
610 | BID_RETURN (res); |
611 | } | |
b2a00c89 L |
612 | break; |
613 | case ROUNDING_UP: | |
614 | if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q | |
200359e8 L |
615 | // need to shift right -exp digits from the coefficient; exp will be 0 |
616 | ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' | |
617 | // (number of digits to be chopped off) | |
618 | // chop off ind digits from the lower part of C1 | |
619 | // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate | |
620 | // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP | |
621 | // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE | |
622 | // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE | |
623 | // tmp64 = C1.w[0]; | |
624 | // if (ind <= 19) { | |
b2a00c89 | 625 | // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; |
200359e8 | 626 | // } else { |
b2a00c89 L |
627 | // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; |
628 | // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; | |
200359e8 L |
629 | // } |
630 | // if (C1.w[0] < tmp64) C1.w[1]++; | |
631 | // if carry-out from C1.w[0], increment C1.w[1] | |
632 | // calculate C* and f* | |
633 | // C* is actually floor(C*) in this case | |
634 | // C* and f* need shifting and masking, as shown by | |
b2a00c89 | 635 | // shiftright128[] and maskhigh128[] |
200359e8 | 636 | // 1 <= x <= 34 |
b2a00c89 | 637 | // kx = 10^(-x) = ten2mk128[ind - 1] |
200359e8 L |
638 | // C* = C1 * 10^(-x) |
639 | // the approximation of 10^(-x) was rounded up to 118 bits | |
b2a00c89 | 640 | __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); |
200359e8 L |
641 | if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 |
642 | res.w[1] = P256.w[3]; | |
643 | res.w[0] = P256.w[2]; | |
b2a00c89 L |
644 | // redundant fstar.w[3] = 0; |
645 | // redundant fstar.w[2] = 0; | |
646 | // redundant fstar.w[1] = P256.w[1]; | |
647 | // redundant fstar.w[0] = P256.w[0]; | |
648 | // fraction f* > 10^(-x) <=> inexact | |
649 | // f* is in the right position to be compared with | |
650 | // 10^(-x) from ten2mk128[] | |
651 | if ((P256.w[1] > ten2mk128[ind - 1].w[1]) | |
652 | || (P256.w[1] == ten2mk128[ind - 1].w[1] | |
653 | && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { | |
654 | *pfpsf |= INEXACT_EXCEPTION; | |
655 | // if negative, the truncated value is already the correct result | |
656 | if (!x_sign) { // if positive | |
200359e8 L |
657 | if (++res.w[0] == 0) { |
658 | res.w[1]++; | |
659 | } | |
660 | } | |
661 | } | |
662 | } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 | |
b2a00c89 | 663 | shift = shiftright128[ind - 1]; // 3 <= shift <= 63 |
200359e8 L |
664 | res.w[1] = (P256.w[3] >> shift); |
665 | res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); | |
b2a00c89 L |
666 | // redundant fstar.w[3] = 0; |
667 | fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; | |
668 | fstar.w[1] = P256.w[1]; | |
669 | fstar.w[0] = P256.w[0]; | |
670 | // fraction f* > 10^(-x) <=> inexact | |
671 | // f* is in the right position to be compared with | |
672 | // 10^(-x) from ten2mk128[] | |
673 | if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || | |
674 | (fstar.w[1] == ten2mk128[ind - 1].w[1] && | |
675 | fstar.w[0] >= ten2mk128[ind - 1].w[0])) { | |
676 | *pfpsf |= INEXACT_EXCEPTION; | |
677 | // if negative, the truncated value is already the correct result | |
678 | if (!x_sign) { // if positive | |
200359e8 L |
679 | if (++res.w[0] == 0) { |
680 | res.w[1]++; | |
681 | } | |
682 | } | |
683 | } | |
684 | } else { // 22 <= ind - 1 <= 33 | |
b2a00c89 | 685 | shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 |
200359e8 L |
686 | res.w[1] = 0; |
687 | res.w[0] = P256.w[3] >> shift; | |
b2a00c89 L |
688 | fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; |
689 | fstar.w[2] = P256.w[2]; | |
690 | fstar.w[1] = P256.w[1]; | |
691 | fstar.w[0] = P256.w[0]; | |
692 | // fraction f* > 10^(-x) <=> inexact | |
693 | // f* is in the right position to be compared with | |
694 | // 10^(-x) from ten2mk128[] | |
695 | if (fstar.w[3] || fstar.w[2] | |
696 | || fstar.w[1] > ten2mk128[ind - 1].w[1] | |
697 | || (fstar.w[1] == ten2mk128[ind - 1].w[1] | |
698 | && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { | |
699 | *pfpsf |= INEXACT_EXCEPTION; | |
700 | // if negative, the truncated value is already the correct result | |
701 | if (!x_sign) { // if positive | |
200359e8 L |
702 | if (++res.w[0] == 0) { |
703 | res.w[1]++; | |
704 | } | |
705 | } | |
706 | } | |
707 | } | |
708 | res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; | |
709 | BID_RETURN (res); | |
710 | } else { // if exp < 0 and q + exp <= 0 | |
711 | if (x_sign) { // negative rounds up to -0.0 | |
712 | res.w[1] = 0xb040000000000000ull; | |
713 | res.w[0] = 0x0000000000000000ull; | |
714 | } else { // positive rpunds up to +1.0 | |
715 | res.w[1] = 0x3040000000000000ull; | |
716 | res.w[0] = 0x0000000000000001ull; | |
717 | } | |
b2a00c89 | 718 | *pfpsf |= INEXACT_EXCEPTION; |
200359e8 L |
719 | BID_RETURN (res); |
720 | } | |
b2a00c89 L |
721 | break; |
722 | case ROUNDING_TO_ZERO: | |
723 | if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q | |
724 | // need to shift right -exp digits from the coefficient; exp will be 0 | |
200359e8 L |
725 | ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' |
726 | // (number of digits to be chopped off) | |
727 | // chop off ind digits from the lower part of C1 | |
728 | // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate | |
729 | // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP | |
730 | // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE | |
731 | // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE | |
732 | //tmp64 = C1.w[0]; | |
733 | // if (ind <= 19) { | |
b2a00c89 | 734 | // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; |
200359e8 | 735 | // } else { |
b2a00c89 L |
736 | // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; |
737 | // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; | |
200359e8 L |
738 | // } |
739 | // if (C1.w[0] < tmp64) C1.w[1]++; | |
740 | // if carry-out from C1.w[0], increment C1.w[1] | |
741 | // calculate C* and f* | |
742 | // C* is actually floor(C*) in this case | |
743 | // C* and f* need shifting and masking, as shown by | |
b2a00c89 | 744 | // shiftright128[] and maskhigh128[] |
200359e8 | 745 | // 1 <= x <= 34 |
b2a00c89 | 746 | // kx = 10^(-x) = ten2mk128[ind - 1] |
200359e8 L |
747 | // C* = (C1 + 1/2 * 10^x) * 10^(-x) |
748 | // the approximation of 10^(-x) was rounded up to 118 bits | |
b2a00c89 | 749 | __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); |
200359e8 L |
750 | if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 |
751 | res.w[1] = P256.w[3]; | |
752 | res.w[0] = P256.w[2]; | |
b2a00c89 L |
753 | // redundant fstar.w[3] = 0; |
754 | // redundant fstar.w[2] = 0; | |
755 | // redundant fstar.w[1] = P256.w[1]; | |
756 | // redundant fstar.w[0] = P256.w[0]; | |
757 | // fraction f* > 10^(-x) <=> inexact | |
758 | // f* is in the right position to be compared with | |
759 | // 10^(-x) from ten2mk128[] | |
760 | if ((P256.w[1] > ten2mk128[ind - 1].w[1]) | |
761 | || (P256.w[1] == ten2mk128[ind - 1].w[1] | |
762 | && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { | |
763 | *pfpsf |= INEXACT_EXCEPTION; | |
764 | } | |
200359e8 | 765 | } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 |
b2a00c89 | 766 | shift = shiftright128[ind - 1]; // 3 <= shift <= 63 |
200359e8 L |
767 | res.w[1] = (P256.w[3] >> shift); |
768 | res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); | |
b2a00c89 L |
769 | // redundant fstar.w[3] = 0; |
770 | fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; | |
771 | fstar.w[1] = P256.w[1]; | |
772 | fstar.w[0] = P256.w[0]; | |
773 | // fraction f* > 10^(-x) <=> inexact | |
774 | // f* is in the right position to be compared with | |
775 | // 10^(-x) from ten2mk128[] | |
776 | if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || | |
777 | (fstar.w[1] == ten2mk128[ind - 1].w[1] && | |
778 | fstar.w[0] >= ten2mk128[ind - 1].w[0])) { | |
779 | *pfpsf |= INEXACT_EXCEPTION; | |
780 | } | |
200359e8 | 781 | } else { // 22 <= ind - 1 <= 33 |
b2a00c89 | 782 | shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 |
200359e8 L |
783 | res.w[1] = 0; |
784 | res.w[0] = P256.w[3] >> shift; | |
b2a00c89 L |
785 | fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; |
786 | fstar.w[2] = P256.w[2]; | |
787 | fstar.w[1] = P256.w[1]; | |
788 | fstar.w[0] = P256.w[0]; | |
789 | // fraction f* > 10^(-x) <=> inexact | |
790 | // f* is in the right position to be compared with | |
791 | // 10^(-x) from ten2mk128[] | |
792 | if (fstar.w[3] || fstar.w[2] | |
793 | || fstar.w[1] > ten2mk128[ind - 1].w[1] | |
794 | || (fstar.w[1] == ten2mk128[ind - 1].w[1] | |
795 | && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { | |
796 | *pfpsf |= INEXACT_EXCEPTION; | |
797 | } | |
200359e8 L |
798 | } |
799 | res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; | |
800 | BID_RETURN (res); | |
801 | } else { // if exp < 0 and q + exp <= 0 the result is +0 or -0 | |
802 | res.w[1] = x_sign | 0x3040000000000000ull; | |
803 | res.w[0] = 0x0000000000000000ull; | |
b2a00c89 | 804 | *pfpsf |= INEXACT_EXCEPTION; |
200359e8 L |
805 | BID_RETURN (res); |
806 | } | |
b2a00c89 | 807 | break; |
affd77d3 | 808 | default: break; // default added to avoid compiler warning |
b2a00c89 L |
809 | } |
810 | ||
811 | BID_RETURN (res); | |
200359e8 L |
812 | } |
813 | ||
814 | /***************************************************************************** | |
b2a00c89 | 815 | * BID128_round_integral_nearest_even |
200359e8 L |
816 | ****************************************************************************/ |
817 | ||
b2a00c89 | 818 | BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_even, x) |
200359e8 | 819 | |
b2a00c89 L |
820 | UINT128 res; |
821 | UINT64 x_sign; | |
822 | UINT64 x_exp; | |
823 | int exp; // unbiased exponent | |
824 | // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) | |
825 | UINT64 tmp64; | |
826 | BID_UI64DOUBLE tmp1; | |
827 | unsigned int x_nr_bits; | |
828 | int q, ind, shift; | |
829 | UINT128 C1; | |
830 | // UINT128 res is C* at first - represents up to 34 decimal digits ~ 113 bits | |
831 | UINT256 fstar; | |
832 | UINT256 P256; | |
200359e8 L |
833 | |
834 | // check for NaN or Infinity | |
b2a00c89 | 835 | if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { |
200359e8 | 836 | // x is special |
b2a00c89 L |
837 | if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN |
838 | // if x = NaN, then res = Q (x) | |
839 | // check first for non-canonical NaN payload | |
840 | if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || | |
841 | (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && | |
842 | (x.w[0] > 0x38c15b09ffffffffull))) { | |
843 | x.w[1] = x.w[1] & 0xffffc00000000000ull; | |
844 | x.w[0] = 0x0ull; | |
845 | } | |
846 | if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN | |
847 | // set invalid flag | |
848 | *pfpsf |= INVALID_EXCEPTION; | |
849 | // return quiet (x) | |
850 | res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] | |
851 | res.w[0] = x.w[0]; | |
852 | } else { // x is QNaN | |
853 | // return x | |
854 | res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] | |
855 | res.w[0] = x.w[0]; | |
856 | } | |
857 | BID_RETURN (res) | |
858 | } else { // x is not a NaN, so it must be infinity | |
859 | if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf | |
860 | // return +inf | |
861 | res.w[1] = 0x7800000000000000ull; | |
862 | res.w[0] = 0x0000000000000000ull; | |
863 | } else { // x is -inf | |
864 | // return -inf | |
865 | res.w[1] = 0xf800000000000000ull; | |
866 | res.w[0] = 0x0000000000000000ull; | |
200359e8 | 867 | } |
b2a00c89 L |
868 | BID_RETURN (res); |
869 | } | |
870 | } | |
200359e8 | 871 | // unpack x |
b2a00c89 L |
872 | x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative |
873 | C1.w[1] = x.w[1] & MASK_COEFF; | |
874 | C1.w[0] = x.w[0]; | |
200359e8 | 875 | |
b2a00c89 L |
876 | // check for non-canonical values (treated as zero) |
877 | if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 | |
878 | // non-canonical | |
879 | x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits | |
880 | C1.w[1] = 0; // significand high | |
881 | C1.w[0] = 0; // significand low | |
882 | } else { // G0_G1 != 11 | |
883 | x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits | |
884 | if (C1.w[1] > 0x0001ed09bead87c0ull || | |
885 | (C1.w[1] == 0x0001ed09bead87c0ull | |
886 | && C1.w[0] > 0x378d8e63ffffffffull)) { | |
887 | // x is non-canonical if coefficient is larger than 10^34 -1 | |
200359e8 L |
888 | C1.w[1] = 0; |
889 | C1.w[0] = 0; | |
b2a00c89 L |
890 | } else { // canonical |
891 | ; | |
200359e8 | 892 | } |
b2a00c89 L |
893 | } |
894 | ||
200359e8 | 895 | // test for input equal to zero |
b2a00c89 L |
896 | if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { |
897 | // x is 0 | |
898 | // return 0 preserving the sign bit and the preferred exponent | |
899 | // of MAX(Q(x), 0) | |
900 | if (x_exp <= (0x1820ull << 49)) { | |
901 | res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; | |
902 | } else { | |
903 | res.w[1] = x_sign | x_exp; | |
200359e8 | 904 | } |
b2a00c89 L |
905 | res.w[0] = 0x0000000000000000ull; |
906 | BID_RETURN (res); | |
907 | } | |
200359e8 L |
908 | // x is not special and is not zero |
909 | ||
b2a00c89 L |
910 | // if (exp <= -(p+1)) return 0 |
911 | if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35 | |
912 | res.w[1] = x_sign | 0x3040000000000000ull; | |
913 | res.w[0] = 0x0000000000000000ull; | |
914 | BID_RETURN (res); | |
915 | } | |
200359e8 L |
916 | // q = nr. of decimal digits in x |
917 | // determine first the nr. of bits in x | |
b2a00c89 L |
918 | if (C1.w[1] == 0) { |
919 | if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 | |
920 | // split the 64-bit value in two 32-bit halves to avoid rounding errors | |
921 | if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 | |
922 | tmp1.d = (double) (C1.w[0] >> 32); // exact conversion | |
923 | x_nr_bits = | |
924 | 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
925 | } else { // x < 2^32 | |
926 | tmp1.d = (double) (C1.w[0]); // exact conversion | |
200359e8 L |
927 | x_nr_bits = |
928 | 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
929 | } | |
b2a00c89 L |
930 | } else { // if x < 2^53 |
931 | tmp1.d = (double) C1.w[0]; // exact conversion | |
200359e8 | 932 | x_nr_bits = |
b2a00c89 | 933 | 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); |
200359e8 | 934 | } |
b2a00c89 L |
935 | } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) |
936 | tmp1.d = (double) C1.w[1]; // exact conversion | |
937 | x_nr_bits = | |
938 | 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
939 | } | |
940 | ||
941 | q = nr_digits[x_nr_bits - 1].digits; | |
942 | if (q == 0) { | |
943 | q = nr_digits[x_nr_bits - 1].digits1; | |
944 | if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi | |
945 | || (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && | |
946 | C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) | |
947 | q++; | |
948 | } | |
949 | exp = (x_exp >> 49) - 6176; | |
950 | if (exp >= 0) { // -exp <= 0 | |
951 | // the argument is an integer already | |
952 | res.w[1] = x.w[1]; | |
953 | res.w[0] = x.w[0]; | |
954 | BID_RETURN (res); | |
955 | } else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q | |
956 | // need to shift right -exp digits from the coefficient; the exp will be 0 | |
957 | ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' | |
958 | // chop off ind digits from the lower part of C1 | |
959 | // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits | |
960 | tmp64 = C1.w[0]; | |
961 | if (ind <= 19) { | |
962 | C1.w[0] = C1.w[0] + midpoint64[ind - 1]; | |
963 | } else { | |
964 | C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; | |
965 | C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; | |
200359e8 | 966 | } |
b2a00c89 L |
967 | if (C1.w[0] < tmp64) |
968 | C1.w[1]++; | |
969 | // calculate C* and f* | |
970 | // C* is actually floor(C*) in this case | |
971 | // C* and f* need shifting and masking, as shown by | |
972 | // shiftright128[] and maskhigh128[] | |
973 | // 1 <= x <= 34 | |
974 | // kx = 10^(-x) = ten2mk128[ind - 1] | |
975 | // C* = (C1 + 1/2 * 10^x) * 10^(-x) | |
976 | // the approximation of 10^(-x) was rounded up to 118 bits | |
977 | __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); | |
978 | // determine the value of res and fstar | |
979 | if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 | |
980 | // redundant shift = shiftright128[ind - 1]; // shift = 0 | |
981 | res.w[1] = P256.w[3]; | |
982 | res.w[0] = P256.w[2]; | |
983 | // redundant fstar.w[3] = 0; | |
984 | // redundant fstar.w[2] = 0; | |
985 | // redundant fstar.w[1] = P256.w[1]; | |
986 | // redundant fstar.w[0] = P256.w[0]; | |
987 | // fraction f* < 10^(-x) <=> midpoint | |
988 | // f* is in the right position to be compared with | |
989 | // 10^(-x) from ten2mk128[] | |
990 | // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even) | |
991 | if ((res.w[0] & 0x0000000000000001ull) && // is result odd? | |
992 | ((P256.w[1] < (ten2mk128[ind - 1].w[1])) | |
993 | || ((P256.w[1] == ten2mk128[ind - 1].w[1]) | |
994 | && (P256.w[0] < ten2mk128[ind - 1].w[0])))) { | |
995 | // subract 1 to make even | |
996 | if (res.w[0]-- == 0) { | |
997 | res.w[1]--; | |
998 | } | |
200359e8 | 999 | } |
b2a00c89 L |
1000 | } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 |
1001 | shift = shiftright128[ind - 1]; // 3 <= shift <= 63 | |
1002 | res.w[1] = (P256.w[3] >> shift); | |
1003 | res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); | |
1004 | // redundant fstar.w[3] = 0; | |
1005 | fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; | |
1006 | fstar.w[1] = P256.w[1]; | |
1007 | fstar.w[0] = P256.w[0]; | |
1008 | // fraction f* < 10^(-x) <=> midpoint | |
1009 | // f* is in the right position to be compared with | |
1010 | // 10^(-x) from ten2mk128[] | |
1011 | if ((res.w[0] & 0x0000000000000001ull) && // is result odd? | |
1012 | fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] || | |
1013 | (fstar.w[1] == ten2mk128[ind - 1].w[1] && | |
1014 | fstar.w[0] < ten2mk128[ind - 1].w[0]))) { | |
1015 | // subract 1 to make even | |
1016 | if (res.w[0]-- == 0) { | |
1017 | res.w[1]--; | |
1018 | } | |
200359e8 | 1019 | } |
b2a00c89 L |
1020 | } else { // 22 <= ind - 1 <= 33 |
1021 | shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 | |
1022 | res.w[1] = 0; | |
1023 | res.w[0] = P256.w[3] >> shift; | |
1024 | fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; | |
1025 | fstar.w[2] = P256.w[2]; | |
1026 | fstar.w[1] = P256.w[1]; | |
1027 | fstar.w[0] = P256.w[0]; | |
1028 | // fraction f* < 10^(-x) <=> midpoint | |
1029 | // f* is in the right position to be compared with | |
1030 | // 10^(-x) from ten2mk128[] | |
1031 | if ((res.w[0] & 0x0000000000000001ull) && // is result odd? | |
1032 | fstar.w[3] == 0 && fstar.w[2] == 0 | |
1033 | && (fstar.w[1] < ten2mk128[ind - 1].w[1] | |
1034 | || (fstar.w[1] == ten2mk128[ind - 1].w[1] | |
1035 | && fstar.w[0] < ten2mk128[ind - 1].w[0]))) { | |
1036 | // subract 1 to make even | |
1037 | if (res.w[0]-- == 0) { | |
1038 | res.w[1]--; | |
1039 | } | |
1040 | } | |
1041 | } | |
1042 | res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; | |
1043 | BID_RETURN (res); | |
1044 | } else { // if ((q + exp) < 0) <=> q < -exp | |
1045 | // the result is +0 or -0 | |
1046 | res.w[1] = x_sign | 0x3040000000000000ull; | |
1047 | res.w[0] = 0x0000000000000000ull; | |
1048 | BID_RETURN (res); | |
1049 | } | |
1050 | } | |
1051 | ||
1052 | /***************************************************************************** | |
1053 | * BID128_round_integral_negative | |
1054 | ****************************************************************************/ | |
1055 | ||
1056 | BID128_FUNCTION_ARG1_NORND (bid128_round_integral_negative, x) | |
1057 | ||
1058 | UINT128 res; | |
1059 | UINT64 x_sign; | |
1060 | UINT64 x_exp; | |
1061 | int exp; // unbiased exponent | |
1062 | // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo | |
1063 | // (all are UINT64) | |
1064 | BID_UI64DOUBLE tmp1; | |
1065 | unsigned int x_nr_bits; | |
1066 | int q, ind, shift; | |
1067 | UINT128 C1; | |
1068 | // UINT128 res is C* at first - represents up to 34 decimal digits ~ | |
1069 | // 113 bits | |
1070 | UINT256 fstar; | |
1071 | UINT256 P256; | |
1072 | ||
1073 | // check for NaN or Infinity | |
1074 | if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { | |
1075 | // x is special | |
1076 | if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN | |
1077 | // if x = NaN, then res = Q (x) | |
1078 | // check first for non-canonical NaN payload | |
1079 | if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || | |
1080 | (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && | |
1081 | (x.w[0] > 0x38c15b09ffffffffull))) { | |
1082 | x.w[1] = x.w[1] & 0xffffc00000000000ull; | |
1083 | x.w[0] = 0x0ull; | |
1084 | } | |
1085 | if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN | |
1086 | // set invalid flag | |
1087 | *pfpsf |= INVALID_EXCEPTION; | |
1088 | // return quiet (x) | |
1089 | res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] | |
1090 | res.w[0] = x.w[0]; | |
1091 | } else { // x is QNaN | |
1092 | // return x | |
1093 | res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] | |
1094 | res.w[0] = x.w[0]; | |
1095 | } | |
1096 | BID_RETURN (res) | |
1097 | } else { // x is not a NaN, so it must be infinity | |
1098 | if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf | |
1099 | // return +inf | |
1100 | res.w[1] = 0x7800000000000000ull; | |
200359e8 | 1101 | res.w[0] = 0x0000000000000000ull; |
b2a00c89 L |
1102 | } else { // x is -inf |
1103 | // return -inf | |
1104 | res.w[1] = 0xf800000000000000ull; | |
1105 | res.w[0] = 0x0000000000000000ull; | |
1106 | } | |
1107 | BID_RETURN (res); | |
1108 | } | |
1109 | } | |
1110 | // unpack x | |
1111 | x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative | |
1112 | C1.w[1] = x.w[1] & MASK_COEFF; | |
1113 | C1.w[0] = x.w[0]; | |
1114 | ||
1115 | // check for non-canonical values (treated as zero) | |
1116 | if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 | |
1117 | // non-canonical | |
1118 | x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits | |
1119 | C1.w[1] = 0; // significand high | |
1120 | C1.w[0] = 0; // significand low | |
1121 | } else { // G0_G1 != 11 | |
1122 | x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits | |
1123 | if (C1.w[1] > 0x0001ed09bead87c0ull || | |
1124 | (C1.w[1] == 0x0001ed09bead87c0ull | |
1125 | && C1.w[0] > 0x378d8e63ffffffffull)) { | |
1126 | // x is non-canonical if coefficient is larger than 10^34 -1 | |
1127 | C1.w[1] = 0; | |
1128 | C1.w[0] = 0; | |
1129 | } else { // canonical | |
1130 | ; | |
1131 | } | |
1132 | } | |
1133 | ||
1134 | // test for input equal to zero | |
1135 | if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { | |
1136 | // x is 0 | |
1137 | // return 0 preserving the sign bit and the preferred exponent | |
1138 | // of MAX(Q(x), 0) | |
1139 | if (x_exp <= (0x1820ull << 49)) { | |
1140 | res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; | |
1141 | } else { | |
1142 | res.w[1] = x_sign | x_exp; | |
1143 | } | |
1144 | res.w[0] = 0x0000000000000000ull; | |
1145 | BID_RETURN (res); | |
1146 | } | |
1147 | // x is not special and is not zero | |
1148 | ||
1149 | // if (exp <= -p) return -1.0 or +0.0 | |
1150 | if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 | |
1151 | if (x_sign) { | |
1152 | // if negative, return negative 1, because we know the coefficient | |
1153 | // is non-zero (would have been caught above) | |
1154 | res.w[1] = 0xb040000000000000ull; | |
1155 | res.w[0] = 0x0000000000000001ull; | |
1156 | } else { | |
1157 | // if positive, return positive 0, because we know coefficient is | |
1158 | // non-zero (would have been caught above) | |
1159 | res.w[1] = 0x3040000000000000ull; | |
1160 | res.w[0] = 0x0000000000000000ull; | |
1161 | } | |
1162 | BID_RETURN (res); | |
1163 | } | |
1164 | // q = nr. of decimal digits in x | |
1165 | // determine first the nr. of bits in x | |
1166 | if (C1.w[1] == 0) { | |
1167 | if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 | |
1168 | // split the 64-bit value in two 32-bit halves to avoid rounding errors | |
1169 | if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 | |
1170 | tmp1.d = (double) (C1.w[0] >> 32); // exact conversion | |
1171 | x_nr_bits = | |
1172 | 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1173 | } else { // x < 2^32 | |
1174 | tmp1.d = (double) (C1.w[0]); // exact conversion | |
1175 | x_nr_bits = | |
1176 | 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1177 | } | |
1178 | } else { // if x < 2^53 | |
1179 | tmp1.d = (double) C1.w[0]; // exact conversion | |
1180 | x_nr_bits = | |
1181 | 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1182 | } | |
1183 | } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) | |
1184 | tmp1.d = (double) C1.w[1]; // exact conversion | |
1185 | x_nr_bits = | |
1186 | 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1187 | } | |
1188 | ||
1189 | q = nr_digits[x_nr_bits - 1].digits; | |
1190 | if (q == 0) { | |
1191 | q = nr_digits[x_nr_bits - 1].digits1; | |
1192 | if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || | |
1193 | (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && | |
1194 | C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) | |
1195 | q++; | |
1196 | } | |
1197 | exp = (x_exp >> 49) - 6176; | |
1198 | if (exp >= 0) { // -exp <= 0 | |
1199 | // the argument is an integer already | |
1200 | res.w[1] = x.w[1]; | |
1201 | res.w[0] = x.w[0]; | |
1202 | BID_RETURN (res); | |
1203 | } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q | |
1204 | // need to shift right -exp digits from the coefficient; the exp will be 0 | |
1205 | ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' | |
1206 | // (number of digits to be chopped off) | |
1207 | // chop off ind digits from the lower part of C1 | |
1208 | // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate | |
1209 | // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP | |
1210 | // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE | |
1211 | // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE | |
1212 | //tmp64 = C1.w[0]; | |
1213 | // if (ind <= 19) { | |
1214 | // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; | |
1215 | // } else { | |
1216 | // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; | |
1217 | // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; | |
1218 | // } | |
1219 | // if (C1.w[0] < tmp64) C1.w[1]++; | |
1220 | // if carry-out from C1.w[0], increment C1.w[1] | |
1221 | // calculate C* and f* | |
1222 | // C* is actually floor(C*) in this case | |
1223 | // C* and f* need shifting and masking, as shown by | |
1224 | // shiftright128[] and maskhigh128[] | |
1225 | // 1 <= x <= 34 | |
1226 | // kx = 10^(-x) = ten2mk128[ind - 1] | |
1227 | // C* = (C1 + 1/2 * 10^x) * 10^(-x) | |
1228 | // the approximation of 10^(-x) was rounded up to 118 bits | |
1229 | __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); | |
1230 | if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 | |
1231 | res.w[1] = P256.w[3]; | |
1232 | res.w[0] = P256.w[2]; | |
1233 | // if positive, the truncated value is already the correct result | |
1234 | if (x_sign) { // if negative | |
1235 | // redundant fstar.w[3] = 0; | |
1236 | // redundant fstar.w[2] = 0; | |
1237 | // redundant fstar.w[1] = P256.w[1]; | |
1238 | // redundant fstar.w[0] = P256.w[0]; | |
1239 | // fraction f* > 10^(-x) <=> inexact | |
1240 | // f* is in the right position to be compared with | |
1241 | // 10^(-x) from ten2mk128[] | |
1242 | if ((P256.w[1] > ten2mk128[ind - 1].w[1]) | |
1243 | || (P256.w[1] == ten2mk128[ind - 1].w[1] | |
1244 | && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { | |
1245 | if (++res.w[0] == 0) { | |
1246 | res.w[1]++; | |
1247 | } | |
1248 | } | |
1249 | } | |
1250 | } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 | |
1251 | shift = shiftright128[ind - 1]; // 0 <= shift <= 102 | |
1252 | res.w[1] = (P256.w[3] >> shift); | |
1253 | res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); | |
1254 | // if positive, the truncated value is already the correct result | |
1255 | if (x_sign) { // if negative | |
1256 | // redundant fstar.w[3] = 0; | |
1257 | fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; | |
1258 | fstar.w[1] = P256.w[1]; | |
1259 | fstar.w[0] = P256.w[0]; | |
1260 | // fraction f* > 10^(-x) <=> inexact | |
1261 | // f* is in the right position to be compared with | |
1262 | // 10^(-x) from ten2mk128[] | |
1263 | if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || | |
1264 | (fstar.w[1] == ten2mk128[ind - 1].w[1] && | |
1265 | fstar.w[0] >= ten2mk128[ind - 1].w[0])) { | |
1266 | if (++res.w[0] == 0) { | |
1267 | res.w[1]++; | |
1268 | } | |
1269 | } | |
1270 | } | |
1271 | } else { // 22 <= ind - 1 <= 33 | |
1272 | shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 | |
1273 | res.w[1] = 0; | |
1274 | res.w[0] = P256.w[3] >> shift; | |
1275 | // if positive, the truncated value is already the correct result | |
1276 | if (x_sign) { // if negative | |
1277 | fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; | |
1278 | fstar.w[2] = P256.w[2]; | |
1279 | fstar.w[1] = P256.w[1]; | |
1280 | fstar.w[0] = P256.w[0]; | |
1281 | // fraction f* > 10^(-x) <=> inexact | |
1282 | // f* is in the right position to be compared with | |
1283 | // 10^(-x) from ten2mk128[] | |
1284 | if (fstar.w[3] || fstar.w[2] | |
1285 | || fstar.w[1] > ten2mk128[ind - 1].w[1] | |
1286 | || (fstar.w[1] == ten2mk128[ind - 1].w[1] | |
1287 | && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { | |
1288 | if (++res.w[0] == 0) { | |
1289 | res.w[1]++; | |
1290 | } | |
1291 | } | |
1292 | } | |
1293 | } | |
1294 | res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; | |
1295 | BID_RETURN (res); | |
1296 | } else { // if exp < 0 and q + exp <= 0 | |
1297 | if (x_sign) { // negative rounds down to -1.0 | |
1298 | res.w[1] = 0xb040000000000000ull; | |
1299 | res.w[0] = 0x0000000000000001ull; | |
1300 | } else { // positive rpunds down to +0.0 | |
1301 | res.w[1] = 0x3040000000000000ull; | |
1302 | res.w[0] = 0x0000000000000000ull; | |
1303 | } | |
1304 | BID_RETURN (res); | |
1305 | } | |
1306 | } | |
1307 | ||
1308 | /***************************************************************************** | |
1309 | * BID128_round_integral_positive | |
1310 | ****************************************************************************/ | |
1311 | ||
1312 | BID128_FUNCTION_ARG1_NORND (bid128_round_integral_positive, x) | |
1313 | ||
1314 | UINT128 res; | |
1315 | UINT64 x_sign; | |
1316 | UINT64 x_exp; | |
1317 | int exp; // unbiased exponent | |
1318 | // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo | |
1319 | // (all are UINT64) | |
1320 | BID_UI64DOUBLE tmp1; | |
1321 | unsigned int x_nr_bits; | |
1322 | int q, ind, shift; | |
1323 | UINT128 C1; | |
1324 | // UINT128 res is C* at first - represents up to 34 decimal digits ~ | |
1325 | // 113 bits | |
1326 | UINT256 fstar; | |
1327 | UINT256 P256; | |
1328 | ||
1329 | // check for NaN or Infinity | |
1330 | if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { | |
1331 | // x is special | |
1332 | if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN | |
1333 | // if x = NaN, then res = Q (x) | |
1334 | // check first for non-canonical NaN payload | |
1335 | if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || | |
1336 | (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && | |
1337 | (x.w[0] > 0x38c15b09ffffffffull))) { | |
1338 | x.w[1] = x.w[1] & 0xffffc00000000000ull; | |
1339 | x.w[0] = 0x0ull; | |
1340 | } | |
1341 | if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN | |
1342 | // set invalid flag | |
1343 | *pfpsf |= INVALID_EXCEPTION; | |
1344 | // return quiet (x) | |
1345 | res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] | |
1346 | res.w[0] = x.w[0]; | |
1347 | } else { // x is QNaN | |
1348 | // return x | |
1349 | res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] | |
1350 | res.w[0] = x.w[0]; | |
1351 | } | |
1352 | BID_RETURN (res) | |
1353 | } else { // x is not a NaN, so it must be infinity | |
1354 | if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf | |
1355 | // return +inf | |
1356 | res.w[1] = 0x7800000000000000ull; | |
1357 | res.w[0] = 0x0000000000000000ull; | |
1358 | } else { // x is -inf | |
1359 | // return -inf | |
1360 | res.w[1] = 0xf800000000000000ull; | |
1361 | res.w[0] = 0x0000000000000000ull; | |
1362 | } | |
1363 | BID_RETURN (res); | |
1364 | } | |
1365 | } | |
1366 | // unpack x | |
1367 | x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative | |
1368 | C1.w[1] = x.w[1] & MASK_COEFF; | |
1369 | C1.w[0] = x.w[0]; | |
1370 | ||
1371 | // check for non-canonical values (treated as zero) | |
1372 | if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 | |
1373 | // non-canonical | |
1374 | x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits | |
1375 | C1.w[1] = 0; // significand high | |
1376 | C1.w[0] = 0; // significand low | |
1377 | } else { // G0_G1 != 11 | |
1378 | x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits | |
1379 | if (C1.w[1] > 0x0001ed09bead87c0ull || | |
1380 | (C1.w[1] == 0x0001ed09bead87c0ull | |
1381 | && C1.w[0] > 0x378d8e63ffffffffull)) { | |
1382 | // x is non-canonical if coefficient is larger than 10^34 -1 | |
1383 | C1.w[1] = 0; | |
1384 | C1.w[0] = 0; | |
1385 | } else { // canonical | |
1386 | ; | |
1387 | } | |
1388 | } | |
1389 | ||
1390 | // test for input equal to zero | |
1391 | if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { | |
1392 | // x is 0 | |
1393 | // return 0 preserving the sign bit and the preferred exponent | |
1394 | // of MAX(Q(x), 0) | |
1395 | if (x_exp <= (0x1820ull << 49)) { | |
1396 | res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; | |
1397 | } else { | |
1398 | res.w[1] = x_sign | x_exp; | |
1399 | } | |
1400 | res.w[0] = 0x0000000000000000ull; | |
1401 | BID_RETURN (res); | |
1402 | } | |
1403 | // x is not special and is not zero | |
1404 | ||
1405 | // if (exp <= -p) return -0.0 or +1.0 | |
1406 | if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 | |
1407 | if (x_sign) { | |
1408 | // if negative, return negative 0, because we know the coefficient | |
1409 | // is non-zero (would have been caught above) | |
1410 | res.w[1] = 0xb040000000000000ull; | |
1411 | res.w[0] = 0x0000000000000000ull; | |
1412 | } else { | |
1413 | // if positive, return positive 1, because we know coefficient is | |
1414 | // non-zero (would have been caught above) | |
1415 | res.w[1] = 0x3040000000000000ull; | |
1416 | res.w[0] = 0x0000000000000001ull; | |
1417 | } | |
1418 | BID_RETURN (res); | |
1419 | } | |
1420 | // q = nr. of decimal digits in x | |
1421 | // determine first the nr. of bits in x | |
1422 | if (C1.w[1] == 0) { | |
1423 | if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 | |
1424 | // split 64-bit value in two 32-bit halves to avoid rounding errors | |
1425 | if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 | |
1426 | tmp1.d = (double) (C1.w[0] >> 32); // exact conversion | |
1427 | x_nr_bits = | |
1428 | 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1429 | } else { // x < 2^32 | |
1430 | tmp1.d = (double) (C1.w[0]); // exact conversion | |
1431 | x_nr_bits = | |
1432 | 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1433 | } | |
1434 | } else { // if x < 2^53 | |
1435 | tmp1.d = (double) C1.w[0]; // exact conversion | |
1436 | x_nr_bits = | |
1437 | 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1438 | } | |
1439 | } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) | |
1440 | tmp1.d = (double) C1.w[1]; // exact conversion | |
1441 | x_nr_bits = | |
1442 | 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1443 | } | |
1444 | ||
1445 | q = nr_digits[x_nr_bits - 1].digits; | |
1446 | if (q == 0) { | |
1447 | q = nr_digits[x_nr_bits - 1].digits1; | |
1448 | if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || | |
1449 | (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && | |
1450 | C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) | |
1451 | q++; | |
1452 | } | |
1453 | exp = (x_exp >> 49) - 6176; | |
1454 | if (exp >= 0) { // -exp <= 0 | |
1455 | // the argument is an integer already | |
1456 | res.w[1] = x.w[1]; | |
1457 | res.w[0] = x.w[0]; | |
1458 | BID_RETURN (res); | |
1459 | } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q | |
1460 | // need to shift right -exp digits from the coefficient; exp will be 0 | |
1461 | ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' | |
1462 | // (number of digits to be chopped off) | |
1463 | // chop off ind digits from the lower part of C1 | |
1464 | // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate | |
1465 | // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP | |
1466 | // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE | |
1467 | // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE | |
1468 | // tmp64 = C1.w[0]; | |
1469 | // if (ind <= 19) { | |
1470 | // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; | |
1471 | // } else { | |
1472 | // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; | |
1473 | // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; | |
1474 | // } | |
1475 | // if (C1.w[0] < tmp64) C1.w[1]++; | |
1476 | // if carry-out from C1.w[0], increment C1.w[1] | |
1477 | // calculate C* and f* | |
1478 | // C* is actually floor(C*) in this case | |
1479 | // C* and f* need shifting and masking, as shown by | |
1480 | // shiftright128[] and maskhigh128[] | |
1481 | // 1 <= x <= 34 | |
1482 | // kx = 10^(-x) = ten2mk128[ind - 1] | |
1483 | // C* = C1 * 10^(-x) | |
1484 | // the approximation of 10^(-x) was rounded up to 118 bits | |
1485 | __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); | |
1486 | if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 | |
1487 | res.w[1] = P256.w[3]; | |
1488 | res.w[0] = P256.w[2]; | |
1489 | // if negative, the truncated value is already the correct result | |
1490 | if (!x_sign) { // if positive | |
1491 | // redundant fstar.w[3] = 0; | |
1492 | // redundant fstar.w[2] = 0; | |
1493 | // redundant fstar.w[1] = P256.w[1]; | |
1494 | // redundant fstar.w[0] = P256.w[0]; | |
1495 | // fraction f* > 10^(-x) <=> inexact | |
1496 | // f* is in the right position to be compared with | |
1497 | // 10^(-x) from ten2mk128[] | |
1498 | if ((P256.w[1] > ten2mk128[ind - 1].w[1]) | |
1499 | || (P256.w[1] == ten2mk128[ind - 1].w[1] | |
1500 | && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { | |
1501 | if (++res.w[0] == 0) { | |
1502 | res.w[1]++; | |
1503 | } | |
1504 | } | |
1505 | } | |
1506 | } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 | |
1507 | shift = shiftright128[ind - 1]; // 3 <= shift <= 63 | |
1508 | res.w[1] = (P256.w[3] >> shift); | |
1509 | res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); | |
1510 | // if negative, the truncated value is already the correct result | |
1511 | if (!x_sign) { // if positive | |
1512 | // redundant fstar.w[3] = 0; | |
1513 | fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; | |
1514 | fstar.w[1] = P256.w[1]; | |
1515 | fstar.w[0] = P256.w[0]; | |
1516 | // fraction f* > 10^(-x) <=> inexact | |
1517 | // f* is in the right position to be compared with | |
1518 | // 10^(-x) from ten2mk128[] | |
1519 | if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || | |
1520 | (fstar.w[1] == ten2mk128[ind - 1].w[1] && | |
1521 | fstar.w[0] >= ten2mk128[ind - 1].w[0])) { | |
1522 | if (++res.w[0] == 0) { | |
1523 | res.w[1]++; | |
1524 | } | |
1525 | } | |
1526 | } | |
1527 | } else { // 22 <= ind - 1 <= 33 | |
1528 | shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 | |
1529 | res.w[1] = 0; | |
1530 | res.w[0] = P256.w[3] >> shift; | |
1531 | // if negative, the truncated value is already the correct result | |
1532 | if (!x_sign) { // if positive | |
1533 | fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; | |
1534 | fstar.w[2] = P256.w[2]; | |
1535 | fstar.w[1] = P256.w[1]; | |
1536 | fstar.w[0] = P256.w[0]; | |
1537 | // fraction f* > 10^(-x) <=> inexact | |
1538 | // f* is in the right position to be compared with | |
1539 | // 10^(-x) from ten2mk128[] | |
1540 | if (fstar.w[3] || fstar.w[2] | |
1541 | || fstar.w[1] > ten2mk128[ind - 1].w[1] | |
1542 | || (fstar.w[1] == ten2mk128[ind - 1].w[1] | |
1543 | && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { | |
1544 | if (++res.w[0] == 0) { | |
1545 | res.w[1]++; | |
1546 | } | |
1547 | } | |
1548 | } | |
1549 | } | |
1550 | res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; | |
1551 | BID_RETURN (res); | |
1552 | } else { // if exp < 0 and q + exp <= 0 | |
1553 | if (x_sign) { // negative rounds up to -0.0 | |
1554 | res.w[1] = 0xb040000000000000ull; | |
1555 | res.w[0] = 0x0000000000000000ull; | |
1556 | } else { // positive rpunds up to +1.0 | |
1557 | res.w[1] = 0x3040000000000000ull; | |
1558 | res.w[0] = 0x0000000000000001ull; | |
1559 | } | |
1560 | BID_RETURN (res); | |
1561 | } | |
1562 | } | |
1563 | ||
1564 | /***************************************************************************** | |
1565 | * BID128_round_integral_zero | |
1566 | ****************************************************************************/ | |
1567 | ||
1568 | BID128_FUNCTION_ARG1_NORND (bid128_round_integral_zero, x) | |
1569 | ||
1570 | UINT128 res; | |
1571 | UINT64 x_sign; | |
1572 | UINT64 x_exp; | |
1573 | int exp; // unbiased exponent | |
1574 | // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo | |
1575 | // (all are UINT64) | |
1576 | BID_UI64DOUBLE tmp1; | |
1577 | unsigned int x_nr_bits; | |
1578 | int q, ind, shift; | |
1579 | UINT128 C1; | |
1580 | // UINT128 res is C* at first - represents up to 34 decimal digits ~ | |
1581 | // 113 bits | |
1582 | UINT256 P256; | |
1583 | ||
1584 | // check for NaN or Infinity | |
1585 | if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { | |
1586 | // x is special | |
1587 | if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN | |
1588 | // if x = NaN, then res = Q (x) | |
1589 | // check first for non-canonical NaN payload | |
1590 | if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || | |
1591 | (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && | |
1592 | (x.w[0] > 0x38c15b09ffffffffull))) { | |
1593 | x.w[1] = x.w[1] & 0xffffc00000000000ull; | |
1594 | x.w[0] = 0x0ull; | |
200359e8 | 1595 | } |
b2a00c89 L |
1596 | if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN |
1597 | // set invalid flag | |
1598 | *pfpsf |= INVALID_EXCEPTION; | |
1599 | // return quiet (x) | |
1600 | res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] | |
1601 | res.w[0] = x.w[0]; | |
1602 | } else { // x is QNaN | |
1603 | // return x | |
1604 | res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] | |
1605 | res.w[0] = x.w[0]; | |
1606 | } | |
1607 | BID_RETURN (res) | |
1608 | } else { // x is not a NaN, so it must be infinity | |
1609 | if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf | |
1610 | // return +inf | |
1611 | res.w[1] = 0x7800000000000000ull; | |
1612 | res.w[0] = 0x0000000000000000ull; | |
1613 | } else { // x is -inf | |
1614 | // return -inf | |
1615 | res.w[1] = 0xf800000000000000ull; | |
1616 | res.w[0] = 0x0000000000000000ull; | |
1617 | } | |
1618 | BID_RETURN (res); | |
1619 | } | |
1620 | } | |
1621 | // unpack x | |
1622 | x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative | |
1623 | C1.w[1] = x.w[1] & MASK_COEFF; | |
1624 | C1.w[0] = x.w[0]; | |
1625 | ||
1626 | // check for non-canonical values (treated as zero) | |
1627 | if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 | |
1628 | // non-canonical | |
1629 | x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits | |
1630 | C1.w[1] = 0; // significand high | |
1631 | C1.w[0] = 0; // significand low | |
1632 | } else { // G0_G1 != 11 | |
1633 | x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits | |
1634 | if (C1.w[1] > 0x0001ed09bead87c0ull || | |
1635 | (C1.w[1] == 0x0001ed09bead87c0ull | |
1636 | && C1.w[0] > 0x378d8e63ffffffffull)) { | |
1637 | // x is non-canonical if coefficient is larger than 10^34 -1 | |
1638 | C1.w[1] = 0; | |
1639 | C1.w[0] = 0; | |
1640 | } else { // canonical | |
1641 | ; | |
1642 | } | |
1643 | } | |
1644 | ||
1645 | // test for input equal to zero | |
1646 | if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { | |
1647 | // x is 0 | |
1648 | // return 0 preserving the sign bit and the preferred exponent | |
1649 | // of MAX(Q(x), 0) | |
1650 | if (x_exp <= (0x1820ull << 49)) { | |
1651 | res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; | |
1652 | } else { | |
1653 | res.w[1] = x_sign | x_exp; | |
1654 | } | |
1655 | res.w[0] = 0x0000000000000000ull; | |
1656 | BID_RETURN (res); | |
1657 | } | |
1658 | // x is not special and is not zero | |
1659 | ||
1660 | // if (exp <= -p) return -0.0 or +0.0 | |
1661 | if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 | |
1662 | res.w[1] = x_sign | 0x3040000000000000ull; | |
1663 | res.w[0] = 0x0000000000000000ull; | |
1664 | BID_RETURN (res); | |
1665 | } | |
1666 | // q = nr. of decimal digits in x | |
1667 | // determine first the nr. of bits in x | |
1668 | if (C1.w[1] == 0) { | |
1669 | if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 | |
1670 | // split the 64-bit value in two 32-bit halves to avoid rounding errors | |
1671 | if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 | |
1672 | tmp1.d = (double) (C1.w[0] >> 32); // exact conversion | |
1673 | x_nr_bits = | |
1674 | 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1675 | } else { // x < 2^32 | |
1676 | tmp1.d = (double) (C1.w[0]); // exact conversion | |
1677 | x_nr_bits = | |
1678 | 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1679 | } | |
1680 | } else { // if x < 2^53 | |
1681 | tmp1.d = (double) C1.w[0]; // exact conversion | |
1682 | x_nr_bits = | |
1683 | 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1684 | } | |
1685 | } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) | |
1686 | tmp1.d = (double) C1.w[1]; // exact conversion | |
1687 | x_nr_bits = | |
1688 | 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1689 | } | |
1690 | ||
1691 | q = nr_digits[x_nr_bits - 1].digits; | |
1692 | if (q == 0) { | |
1693 | q = nr_digits[x_nr_bits - 1].digits1; | |
1694 | if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || | |
1695 | (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && | |
1696 | C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) | |
1697 | q++; | |
1698 | } | |
1699 | exp = (x_exp >> 49) - 6176; | |
1700 | if (exp >= 0) { // -exp <= 0 | |
1701 | // the argument is an integer already | |
1702 | res.w[1] = x.w[1]; | |
1703 | res.w[0] = x.w[0]; | |
1704 | BID_RETURN (res); | |
1705 | } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q | |
1706 | // need to shift right -exp digits from the coefficient; the exp will be 0 | |
1707 | ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' | |
1708 | // (number of digits to be chopped off) | |
1709 | // chop off ind digits from the lower part of C1 | |
1710 | // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate | |
1711 | // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP | |
1712 | // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE | |
1713 | // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE | |
1714 | //tmp64 = C1.w[0]; | |
1715 | // if (ind <= 19) { | |
1716 | // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; | |
1717 | // } else { | |
1718 | // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; | |
1719 | // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; | |
1720 | // } | |
1721 | // if (C1.w[0] < tmp64) C1.w[1]++; | |
1722 | // if carry-out from C1.w[0], increment C1.w[1] | |
1723 | // calculate C* and f* | |
1724 | // C* is actually floor(C*) in this case | |
1725 | // C* and f* need shifting and masking, as shown by | |
1726 | // shiftright128[] and maskhigh128[] | |
1727 | // 1 <= x <= 34 | |
1728 | // kx = 10^(-x) = ten2mk128[ind - 1] | |
1729 | // C* = (C1 + 1/2 * 10^x) * 10^(-x) | |
1730 | // the approximation of 10^(-x) was rounded up to 118 bits | |
1731 | __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); | |
1732 | if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 | |
1733 | res.w[1] = P256.w[3]; | |
1734 | res.w[0] = P256.w[2]; | |
1735 | } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 | |
1736 | shift = shiftright128[ind - 1]; // 3 <= shift <= 63 | |
1737 | res.w[1] = (P256.w[3] >> shift); | |
1738 | res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); | |
1739 | } else { // 22 <= ind - 1 <= 33 | |
1740 | shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 | |
1741 | res.w[1] = 0; | |
1742 | res.w[0] = P256.w[3] >> shift; | |
1743 | } | |
1744 | res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; | |
1745 | BID_RETURN (res); | |
1746 | } else { // if exp < 0 and q + exp <= 0 the result is +0 or -0 | |
1747 | res.w[1] = x_sign | 0x3040000000000000ull; | |
1748 | res.w[0] = 0x0000000000000000ull; | |
1749 | BID_RETURN (res); | |
1750 | } | |
1751 | } | |
1752 | ||
1753 | /***************************************************************************** | |
1754 | * BID128_round_integral_nearest_away | |
1755 | ****************************************************************************/ | |
1756 | ||
1757 | BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_away, x) | |
1758 | ||
1759 | UINT128 res; | |
1760 | UINT64 x_sign; | |
1761 | UINT64 x_exp; | |
1762 | int exp; // unbiased exponent | |
1763 | // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo | |
1764 | // (all are UINT64) | |
1765 | UINT64 tmp64; | |
1766 | BID_UI64DOUBLE tmp1; | |
1767 | unsigned int x_nr_bits; | |
1768 | int q, ind, shift; | |
1769 | UINT128 C1; | |
1770 | // UINT128 res is C* at first - represents up to 34 decimal digits ~ | |
1771 | // 113 bits | |
1772 | // UINT256 fstar; | |
1773 | UINT256 P256; | |
1774 | ||
1775 | // check for NaN or Infinity | |
1776 | if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { | |
1777 | // x is special | |
1778 | if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN | |
1779 | // if x = NaN, then res = Q (x) | |
1780 | // check first for non-canonical NaN payload | |
1781 | if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || | |
1782 | (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && | |
1783 | (x.w[0] > 0x38c15b09ffffffffull))) { | |
1784 | x.w[1] = x.w[1] & 0xffffc00000000000ull; | |
1785 | x.w[0] = 0x0ull; | |
1786 | } | |
1787 | if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN | |
1788 | // set invalid flag | |
1789 | *pfpsf |= INVALID_EXCEPTION; | |
1790 | // return quiet (x) | |
1791 | res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] | |
1792 | res.w[0] = x.w[0]; | |
1793 | } else { // x is QNaN | |
1794 | // return x | |
1795 | res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] | |
1796 | res.w[0] = x.w[0]; | |
1797 | } | |
1798 | BID_RETURN (res) | |
1799 | } else { // x is not a NaN, so it must be infinity | |
1800 | if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf | |
1801 | // return +inf | |
1802 | res.w[1] = 0x7800000000000000ull; | |
1803 | res.w[0] = 0x0000000000000000ull; | |
1804 | } else { // x is -inf | |
1805 | // return -inf | |
1806 | res.w[1] = 0xf800000000000000ull; | |
1807 | res.w[0] = 0x0000000000000000ull; | |
1808 | } | |
1809 | BID_RETURN (res); | |
1810 | } | |
1811 | } | |
1812 | // unpack x | |
1813 | x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative | |
1814 | C1.w[1] = x.w[1] & MASK_COEFF; | |
1815 | C1.w[0] = x.w[0]; | |
1816 | ||
1817 | // check for non-canonical values (treated as zero) | |
1818 | if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 | |
1819 | // non-canonical | |
1820 | x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits | |
1821 | C1.w[1] = 0; // significand high | |
1822 | C1.w[0] = 0; // significand low | |
1823 | } else { // G0_G1 != 11 | |
1824 | x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits | |
1825 | if (C1.w[1] > 0x0001ed09bead87c0ull || | |
1826 | (C1.w[1] == 0x0001ed09bead87c0ull | |
1827 | && C1.w[0] > 0x378d8e63ffffffffull)) { | |
1828 | // x is non-canonical if coefficient is larger than 10^34 -1 | |
1829 | C1.w[1] = 0; | |
1830 | C1.w[0] = 0; | |
1831 | } else { // canonical | |
1832 | ; | |
1833 | } | |
1834 | } | |
1835 | ||
1836 | // test for input equal to zero | |
1837 | if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { | |
1838 | // x is 0 | |
1839 | // return 0 preserving the sign bit and the preferred exponent | |
1840 | // of MAX(Q(x), 0) | |
1841 | if (x_exp <= (0x1820ull << 49)) { | |
1842 | res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; | |
1843 | } else { | |
1844 | res.w[1] = x_sign | x_exp; | |
1845 | } | |
1846 | res.w[0] = 0x0000000000000000ull; | |
1847 | BID_RETURN (res); | |
1848 | } | |
1849 | // x is not special and is not zero | |
1850 | ||
1851 | // if (exp <= -(p+1)) return 0.0 | |
1852 | if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35 | |
1853 | res.w[1] = x_sign | 0x3040000000000000ull; | |
1854 | res.w[0] = 0x0000000000000000ull; | |
1855 | BID_RETURN (res); | |
1856 | } | |
1857 | // q = nr. of decimal digits in x | |
1858 | // determine first the nr. of bits in x | |
1859 | if (C1.w[1] == 0) { | |
1860 | if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 | |
1861 | // split the 64-bit value in two 32-bit halves to avoid rounding errors | |
1862 | if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 | |
1863 | tmp1.d = (double) (C1.w[0] >> 32); // exact conversion | |
1864 | x_nr_bits = | |
1865 | 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1866 | } else { // x < 2^32 | |
1867 | tmp1.d = (double) (C1.w[0]); // exact conversion | |
1868 | x_nr_bits = | |
1869 | 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1870 | } | |
1871 | } else { // if x < 2^53 | |
1872 | tmp1.d = (double) C1.w[0]; // exact conversion | |
1873 | x_nr_bits = | |
1874 | 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1875 | } | |
1876 | } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) | |
1877 | tmp1.d = (double) C1.w[1]; // exact conversion | |
1878 | x_nr_bits = | |
1879 | 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1880 | } | |
1881 | ||
1882 | q = nr_digits[x_nr_bits - 1].digits; | |
1883 | if (q == 0) { | |
1884 | q = nr_digits[x_nr_bits - 1].digits1; | |
1885 | if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || | |
1886 | (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && | |
1887 | C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) | |
1888 | q++; | |
1889 | } | |
1890 | exp = (x_exp >> 49) - 6176; | |
1891 | if (exp >= 0) { // -exp <= 0 | |
1892 | // the argument is an integer already | |
1893 | res.w[1] = x.w[1]; | |
1894 | res.w[0] = x.w[0]; | |
1895 | BID_RETURN (res); | |
1896 | } else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q | |
1897 | // need to shift right -exp digits from the coefficient; the exp will be 0 | |
1898 | ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' | |
1899 | // chop off ind digits from the lower part of C1 | |
1900 | // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits | |
1901 | tmp64 = C1.w[0]; | |
1902 | if (ind <= 19) { | |
1903 | C1.w[0] = C1.w[0] + midpoint64[ind - 1]; | |
1904 | } else { | |
1905 | C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; | |
1906 | C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; | |
1907 | } | |
1908 | if (C1.w[0] < tmp64) | |
1909 | C1.w[1]++; | |
1910 | // calculate C* and f* | |
1911 | // C* is actually floor(C*) in this case | |
1912 | // C* and f* need shifting and masking, as shown by | |
1913 | // shiftright128[] and maskhigh128[] | |
1914 | // 1 <= x <= 34 | |
1915 | // kx = 10^(-x) = ten2mk128[ind - 1] | |
1916 | // C* = (C1 + 1/2 * 10^x) * 10^(-x) | |
1917 | // the approximation of 10^(-x) was rounded up to 118 bits | |
1918 | __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); | |
1919 | // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g. | |
1920 | // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999 | |
1921 | // if (0 < f* < 10^(-x)) then the result is a midpoint | |
1922 | // if floor(C*) is even then C* = floor(C*) - logical right | |
1923 | // shift; C* has p decimal digits, correct by Prop. 1) | |
1924 | // else if floor(C*) is odd C* = floor(C*)-1 (logical right | |
1925 | // shift; C* has p decimal digits, correct by Pr. 1) | |
1926 | // else | |
1927 | // C* = floor(C*) (logical right shift; C has p decimal digits, | |
1928 | // correct by Property 1) | |
1929 | // n = C* * 10^(e+x) | |
1930 | ||
1931 | // shift right C* by Ex-128 = shiftright128[ind] | |
1932 | if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 | |
1933 | res.w[1] = P256.w[3]; | |
1934 | res.w[0] = P256.w[2]; | |
1935 | } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 | |
1936 | shift = shiftright128[ind - 1]; // 3 <= shift <= 63 | |
1937 | res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); | |
1938 | res.w[1] = (P256.w[3] >> shift); | |
1939 | } else { // 22 <= ind - 1 <= 33 | |
1940 | shift = shiftright128[ind - 1]; // 2 <= shift <= 38 | |
1941 | res.w[1] = 0; | |
1942 | res.w[0] = (P256.w[3] >> (shift - 64)); // 2 <= shift - 64 <= 38 | |
1943 | } | |
1944 | // if the result was a midpoint, it was already rounded away from zero | |
1945 | res.w[1] |= x_sign | 0x3040000000000000ull; | |
1946 | BID_RETURN (res); | |
1947 | } else { // if ((q + exp) < 0) <=> q < -exp | |
1948 | // the result is +0 or -0 | |
1949 | res.w[1] = x_sign | 0x3040000000000000ull; | |
1950 | res.w[0] = 0x0000000000000000ull; | |
1951 | BID_RETURN (res); | |
1952 | } | |
200359e8 | 1953 | } |