]>
Commit | Line | Data |
---|---|---|
f1717362 | 1 | /* Copyright (C) 2007-2016 Free Software Foundation, Inc. |
9b6b0236 | 2 | |
3 | This file is part of GCC. | |
4 | ||
5 | GCC is free software; you can redistribute it and/or modify it under | |
6 | the terms of the GNU General Public License as published by the Free | |
6bc9506f | 7 | Software Foundation; either version 3, or (at your option) any later |
9b6b0236 | 8 | version. |
9 | ||
9b6b0236 | 10 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 | for more details. | |
14 | ||
6bc9506f | 15 | Under Section 7 of GPL version 3, you are granted additional |
16 | permissions described in the GCC Runtime Library Exception, version | |
17 | 3.1, as published by the Free Software Foundation. | |
18 | ||
19 | You should have received a copy of the GNU General Public License and | |
20 | a copy of the GCC Runtime Library Exception along with this program; | |
21 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 | <http://www.gnu.org/licenses/>. */ | |
9b6b0236 | 23 | |
24 | /***************************************************************************** | |
25 | * | |
26 | * BID64 encoding: | |
27 | * **************************************** | |
28 | * 63 62 53 52 0 | |
29 | * |---|------------------|--------------| | |
30 | * | S | Biased Exp (E) | Coeff (c) | | |
31 | * |---|------------------|--------------| | |
32 | * | |
33 | * bias = 398 | |
34 | * number = (-1)^s * 10^(E-398) * c | |
35 | * coefficient range - 0 to (2^53)-1 | |
36 | * COEFF_MAX = 2^53-1 = 9007199254740991 | |
37 | * | |
38 | ***************************************************************************** | |
39 | * | |
40 | * BID128 encoding: | |
41 | * 1-bit sign | |
42 | * 14-bit biased exponent in [0x21, 0x3020] = [33, 12320] | |
43 | * unbiased exponent in [-6176, 6111]; exponent bias = 6176 | |
44 | * 113-bit unsigned binary integer coefficient (49-bit high + 64-bit low) | |
45 | * Note: 10^34-1 ~ 2^112.945555... < 2^113 => coefficient fits in 113 bits | |
46 | * | |
47 | * Note: assume invalid encodings are not passed to this function | |
48 | * | |
49 | * Round a number C with q decimal digits, represented as a binary integer | |
50 | * to q - x digits. Six different routines are provided for different values | |
51 | * of q. The maximum value of q used in the library is q = 3 * P - 1 where | |
52 | * P = 16 or P = 34 (so q <= 111 decimal digits). | |
53 | * The partitioning is based on the following, where Kx is the scaled | |
54 | * integer representing the value of 10^(-x) rounded up to a number of bits | |
55 | * sufficient to ensure correct rounding: | |
56 | * | |
57 | * -------------------------------------------------------------------------- | |
58 | * q x max. value of a max number min. number | |
59 | * of bits in C of bits in Kx | |
60 | * -------------------------------------------------------------------------- | |
61 | * | |
62 | * GROUP 1: 64 bits | |
63 | * round64_2_18 () | |
64 | * | |
65 | * 2 [1,1] 10^1 - 1 < 2^3.33 4 4 | |
66 | * ... ... ... ... ... | |
67 | * 18 [1,17] 10^18 - 1 < 2^59.80 60 61 | |
68 | * | |
69 | * GROUP 2: 128 bits | |
70 | * round128_19_38 () | |
71 | * | |
72 | * 19 [1,18] 10^19 - 1 < 2^63.11 64 65 | |
73 | * 20 [1,19] 10^20 - 1 < 2^66.44 67 68 | |
74 | * ... ... ... ... ... | |
75 | * 38 [1,37] 10^38 - 1 < 2^126.24 127 128 | |
76 | * | |
77 | * GROUP 3: 192 bits | |
78 | * round192_39_57 () | |
79 | * | |
80 | * 39 [1,38] 10^39 - 1 < 2^129.56 130 131 | |
81 | * ... ... ... ... ... | |
82 | * 57 [1,56] 10^57 - 1 < 2^189.35 190 191 | |
83 | * | |
84 | * GROUP 4: 256 bits | |
85 | * round256_58_76 () | |
86 | * | |
87 | * 58 [1,57] 10^58 - 1 < 2^192.68 193 194 | |
88 | * ... ... ... ... ... | |
89 | * 76 [1,75] 10^76 - 1 < 2^252.47 253 254 | |
90 | * | |
91 | * GROUP 5: 320 bits | |
92 | * round320_77_96 () | |
93 | * | |
94 | * 77 [1,76] 10^77 - 1 < 2^255.79 256 257 | |
95 | * 78 [1,77] 10^78 - 1 < 2^259.12 260 261 | |
96 | * ... ... ... ... ... | |
97 | * 96 [1,95] 10^96 - 1 < 2^318.91 319 320 | |
98 | * | |
99 | * GROUP 6: 384 bits | |
100 | * round384_97_115 () | |
101 | * | |
102 | * 97 [1,96] 10^97 - 1 < 2^322.23 323 324 | |
103 | * ... ... ... ... ... | |
104 | * 115 [1,114] 10^115 - 1 < 2^382.03 383 384 | |
105 | * | |
106 | ****************************************************************************/ | |
107 | ||
108 | #include "bid_internal.h" | |
109 | ||
110 | void | |
84d1fc49 | 111 | round64_2_18 (int q, |
9b6b0236 | 112 | int x, |
113 | UINT64 C, | |
114 | UINT64 * ptr_Cstar, | |
115 | int *incr_exp, | |
116 | int *ptr_is_midpoint_lt_even, | |
117 | int *ptr_is_midpoint_gt_even, | |
118 | int *ptr_is_inexact_lt_midpoint, | |
119 | int *ptr_is_inexact_gt_midpoint) { | |
120 | ||
121 | UINT128 P128; | |
122 | UINT128 fstar; | |
123 | UINT64 Cstar; | |
124 | UINT64 tmp64; | |
125 | int shift; | |
126 | int ind; | |
127 | ||
128 | // Note: | |
129 | // In round128_2_18() positive numbers with 2 <= q <= 18 will be | |
130 | // rounded to nearest only for 1 <= x <= 3: | |
131 | // x = 1 or x = 2 when q = 17 | |
132 | // x = 2 or x = 3 when q = 18 | |
133 | // However, for generality and possible uses outside the frame of IEEE 754R | |
134 | // this implementation works for 1 <= x <= q - 1 | |
135 | ||
136 | // assume *ptr_is_midpoint_lt_even, *ptr_is_midpoint_gt_even, | |
137 | // *ptr_is_inexact_lt_midpoint, and *ptr_is_inexact_gt_midpoint are | |
138 | // initialized to 0 by the caller | |
139 | ||
140 | // round a number C with q decimal digits, 2 <= q <= 18 | |
141 | // to q - x digits, 1 <= x <= 17 | |
142 | // C = C + 1/2 * 10^x where the result C fits in 64 bits | |
143 | // (because the largest value is 999999999999999999 + 50000000000000000 = | |
144 | // 0x0e92596fd628ffff, which fits in 60 bits) | |
84d1fc49 | 145 | ind = x - 1; // 0 <= ind <= 16 |
146 | C = C + midpoint64[ind]; | |
147 | // kx ~= 10^(-x), kx = Kx64[ind] * 2^(-Ex), 0 <= ind <= 16 | |
9b6b0236 | 148 | // P128 = (C + 1/2 * 10^x) * kx * 2^Ex = (C + 1/2 * 10^x) * Kx |
149 | // the approximation kx of 10^(-x) was rounded up to 64 bits | |
84d1fc49 | 150 | __mul_64x64_to_128MACH (P128, C, Kx64[ind]); |
9b6b0236 | 151 | // calculate C* = floor (P128) and f* |
152 | // Cstar = P128 >> Ex | |
153 | // fstar = low Ex bits of P128 | |
84d1fc49 | 154 | shift = Ex64m64[ind]; // in [3, 56] |
9b6b0236 | 155 | Cstar = P128.w[1] >> shift; |
84d1fc49 | 156 | fstar.w[1] = P128.w[1] & mask64[ind]; |
9b6b0236 | 157 | fstar.w[0] = P128.w[0]; |
84d1fc49 | 158 | // the top Ex bits of 10^(-x) are T* = ten2mxtrunc64[ind], e.g. |
159 | // if x=1, T*=ten2mxtrunc64[0]=0xcccccccccccccccc | |
9b6b0236 | 160 | // if (0 < f* < 10^(-x)) then the result is a midpoint |
161 | // if floor(C*) is even then C* = floor(C*) - logical right | |
162 | // shift; C* has q - x decimal digits, correct by Prop. 1) | |
163 | // else if floor(C*) is odd C* = floor(C*)-1 (logical right | |
164 | // shift; C* has q - x decimal digits, correct by Pr. 1) | |
165 | // else | |
166 | // C* = floor(C*) (logical right shift; C has q - x decimal digits, | |
167 | // correct by Property 1) | |
168 | // in the caling function n = C* * 10^(e+x) | |
169 | ||
170 | // determine inexactness of the rounding of C* | |
171 | // if (0 < f* - 1/2 < 10^(-x)) then | |
172 | // the result is exact | |
173 | // else // if (f* - 1/2 > T*) then | |
174 | // the result is inexact | |
84d1fc49 | 175 | if (fstar.w[1] > half64[ind] || |
176 | (fstar.w[1] == half64[ind] && fstar.w[0])) { | |
9b6b0236 | 177 | // f* > 1/2 and the result may be exact |
178 | // Calculate f* - 1/2 | |
84d1fc49 | 179 | tmp64 = fstar.w[1] - half64[ind]; |
180 | if (tmp64 || fstar.w[0] > ten2mxtrunc64[ind]) { // f* - 1/2 > 10^(-x) | |
9b6b0236 | 181 | *ptr_is_inexact_lt_midpoint = 1; |
84d1fc49 | 182 | } // else the result is exact |
183 | } else { // the result is inexact; f2* <= 1/2 | |
9b6b0236 | 184 | *ptr_is_inexact_gt_midpoint = 1; |
185 | } | |
186 | // check for midpoints (could do this before determining inexactness) | |
84d1fc49 | 187 | if (fstar.w[1] == 0 && fstar.w[0] <= ten2mxtrunc64[ind]) { |
9b6b0236 | 188 | // the result is a midpoint |
84d1fc49 | 189 | if (Cstar & 0x01) { // Cstar is odd; MP in [EVEN, ODD] |
9b6b0236 | 190 | // if floor(C*) is odd C = floor(C*) - 1; the result may be 0 |
84d1fc49 | 191 | Cstar--; // Cstar is now even |
9b6b0236 | 192 | *ptr_is_midpoint_gt_even = 1; |
193 | *ptr_is_inexact_lt_midpoint = 0; | |
194 | *ptr_is_inexact_gt_midpoint = 0; | |
84d1fc49 | 195 | } else { // else MP in [ODD, EVEN] |
9b6b0236 | 196 | *ptr_is_midpoint_lt_even = 1; |
197 | *ptr_is_inexact_lt_midpoint = 0; | |
198 | *ptr_is_inexact_gt_midpoint = 0; | |
199 | } | |
200 | } | |
201 | // check for rounding overflow, which occurs if Cstar = 10^(q-x) | |
84d1fc49 | 202 | ind = q - x; // 1 <= ind <= q - 1 |
203 | if (Cstar == ten2k64[ind]) { // if Cstar = 10^(q-x) | |
204 | Cstar = ten2k64[ind - 1]; // Cstar = 10^(q-x-1) | |
9b6b0236 | 205 | *incr_exp = 1; |
84d1fc49 | 206 | } else { // 10^33 <= Cstar <= 10^34 - 1 |
9b6b0236 | 207 | *incr_exp = 0; |
208 | } | |
209 | *ptr_Cstar = Cstar; | |
210 | } | |
211 | ||
212 | ||
213 | void | |
84d1fc49 | 214 | round128_19_38 (int q, |
9b6b0236 | 215 | int x, |
216 | UINT128 C, | |
217 | UINT128 * ptr_Cstar, | |
218 | int *incr_exp, | |
219 | int *ptr_is_midpoint_lt_even, | |
220 | int *ptr_is_midpoint_gt_even, | |
221 | int *ptr_is_inexact_lt_midpoint, | |
222 | int *ptr_is_inexact_gt_midpoint) { | |
223 | ||
224 | UINT256 P256; | |
225 | UINT256 fstar; | |
226 | UINT128 Cstar; | |
227 | UINT64 tmp64; | |
228 | int shift; | |
229 | int ind; | |
230 | ||
231 | // Note: | |
232 | // In round128_19_38() positive numbers with 19 <= q <= 38 will be | |
233 | // rounded to nearest only for 1 <= x <= 23: | |
234 | // x = 3 or x = 4 when q = 19 | |
235 | // x = 4 or x = 5 when q = 20 | |
236 | // ... | |
237 | // x = 18 or x = 19 when q = 34 | |
238 | // x = 1 or x = 2 or x = 19 or x = 20 when q = 35 | |
239 | // x = 2 or x = 3 or x = 20 or x = 21 when q = 36 | |
240 | // x = 3 or x = 4 or x = 21 or x = 22 when q = 37 | |
241 | // x = 4 or x = 5 or x = 22 or x = 23 when q = 38 | |
242 | // However, for generality and possible uses outside the frame of IEEE 754R | |
243 | // this implementation works for 1 <= x <= q - 1 | |
244 | ||
245 | // assume *ptr_is_midpoint_lt_even, *ptr_is_midpoint_gt_even, | |
246 | // *ptr_is_inexact_lt_midpoint, and *ptr_is_inexact_gt_midpoint are | |
247 | // initialized to 0 by the caller | |
248 | ||
249 | // round a number C with q decimal digits, 19 <= q <= 38 | |
250 | // to q - x digits, 1 <= x <= 37 | |
251 | // C = C + 1/2 * 10^x where the result C fits in 128 bits | |
252 | // (because the largest value is 99999999999999999999999999999999999999 + | |
253 | // 5000000000000000000000000000000000000 = | |
254 | // 0x4efe43b0c573e7e68a043d8fffffffff, which fits is 127 bits) | |
255 | ||
84d1fc49 | 256 | ind = x - 1; // 0 <= ind <= 36 |
257 | if (ind <= 18) { // if 0 <= ind <= 18 | |
9b6b0236 | 258 | tmp64 = C.w[0]; |
84d1fc49 | 259 | C.w[0] = C.w[0] + midpoint64[ind]; |
9b6b0236 | 260 | if (C.w[0] < tmp64) |
261 | C.w[1]++; | |
84d1fc49 | 262 | } else { // if 19 <= ind <= 37 |
9b6b0236 | 263 | tmp64 = C.w[0]; |
84d1fc49 | 264 | C.w[0] = C.w[0] + midpoint128[ind - 19].w[0]; |
9b6b0236 | 265 | if (C.w[0] < tmp64) { |
266 | C.w[1]++; | |
267 | } | |
84d1fc49 | 268 | C.w[1] = C.w[1] + midpoint128[ind - 19].w[1]; |
9b6b0236 | 269 | } |
84d1fc49 | 270 | // kx ~= 10^(-x), kx = Kx128[ind] * 2^(-Ex), 0 <= ind <= 36 |
9b6b0236 | 271 | // P256 = (C + 1/2 * 10^x) * kx * 2^Ex = (C + 1/2 * 10^x) * Kx |
272 | // the approximation kx of 10^(-x) was rounded up to 128 bits | |
84d1fc49 | 273 | __mul_128x128_to_256 (P256, C, Kx128[ind]); |
9b6b0236 | 274 | // calculate C* = floor (P256) and f* |
275 | // Cstar = P256 >> Ex | |
276 | // fstar = low Ex bits of P256 | |
84d1fc49 | 277 | shift = Ex128m128[ind]; // in [2, 63] but have to consider two cases |
278 | if (ind <= 18) { // if 0 <= ind <= 18 | |
9b6b0236 | 279 | Cstar.w[0] = (P256.w[2] >> shift) | (P256.w[3] << (64 - shift)); |
280 | Cstar.w[1] = (P256.w[3] >> shift); | |
281 | fstar.w[0] = P256.w[0]; | |
282 | fstar.w[1] = P256.w[1]; | |
84d1fc49 | 283 | fstar.w[2] = P256.w[2] & mask128[ind]; |
9b6b0236 | 284 | fstar.w[3] = 0x0ULL; |
84d1fc49 | 285 | } else { // if 19 <= ind <= 37 |
9b6b0236 | 286 | Cstar.w[0] = P256.w[3] >> shift; |
287 | Cstar.w[1] = 0x0ULL; | |
288 | fstar.w[0] = P256.w[0]; | |
289 | fstar.w[1] = P256.w[1]; | |
290 | fstar.w[2] = P256.w[2]; | |
84d1fc49 | 291 | fstar.w[3] = P256.w[3] & mask128[ind]; |
9b6b0236 | 292 | } |
84d1fc49 | 293 | // the top Ex bits of 10^(-x) are T* = ten2mxtrunc64[ind], e.g. |
294 | // if x=1, T*=ten2mxtrunc128[0]=0xcccccccccccccccccccccccccccccccc | |
9b6b0236 | 295 | // if (0 < f* < 10^(-x)) then the result is a midpoint |
296 | // if floor(C*) is even then C* = floor(C*) - logical right | |
297 | // shift; C* has q - x decimal digits, correct by Prop. 1) | |
298 | // else if floor(C*) is odd C* = floor(C*)-1 (logical right | |
299 | // shift; C* has q - x decimal digits, correct by Pr. 1) | |
300 | // else | |
301 | // C* = floor(C*) (logical right shift; C has q - x decimal digits, | |
302 | // correct by Property 1) | |
303 | // in the caling function n = C* * 10^(e+x) | |
304 | ||
305 | // determine inexactness of the rounding of C* | |
306 | // if (0 < f* - 1/2 < 10^(-x)) then | |
307 | // the result is exact | |
308 | // else // if (f* - 1/2 > T*) then | |
309 | // the result is inexact | |
84d1fc49 | 310 | if (ind <= 18) { // if 0 <= ind <= 18 |
311 | if (fstar.w[2] > half128[ind] || | |
312 | (fstar.w[2] == half128[ind] && (fstar.w[1] || fstar.w[0]))) { | |
9b6b0236 | 313 | // f* > 1/2 and the result may be exact |
314 | // Calculate f* - 1/2 | |
84d1fc49 | 315 | tmp64 = fstar.w[2] - half128[ind]; |
316 | if (tmp64 || fstar.w[1] > ten2mxtrunc128[ind].w[1] || (fstar.w[1] == ten2mxtrunc128[ind].w[1] && fstar.w[0] > ten2mxtrunc128[ind].w[0])) { // f* - 1/2 > 10^(-x) | |
9b6b0236 | 317 | *ptr_is_inexact_lt_midpoint = 1; |
84d1fc49 | 318 | } // else the result is exact |
319 | } else { // the result is inexact; f2* <= 1/2 | |
9b6b0236 | 320 | *ptr_is_inexact_gt_midpoint = 1; |
321 | } | |
84d1fc49 | 322 | } else { // if 19 <= ind <= 37 |
323 | if (fstar.w[3] > half128[ind] || (fstar.w[3] == half128[ind] && | |
324 | (fstar.w[2] || fstar.w[1] | |
325 | || fstar.w[0]))) { | |
9b6b0236 | 326 | // f* > 1/2 and the result may be exact |
327 | // Calculate f* - 1/2 | |
84d1fc49 | 328 | tmp64 = fstar.w[3] - half128[ind]; |
329 | if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mxtrunc128[ind].w[1] || (fstar.w[1] == ten2mxtrunc128[ind].w[1] && fstar.w[0] > ten2mxtrunc128[ind].w[0])) { // f* - 1/2 > 10^(-x) | |
9b6b0236 | 330 | *ptr_is_inexact_lt_midpoint = 1; |
84d1fc49 | 331 | } // else the result is exact |
332 | } else { // the result is inexact; f2* <= 1/2 | |
9b6b0236 | 333 | *ptr_is_inexact_gt_midpoint = 1; |
334 | } | |
335 | } | |
336 | // check for midpoints (could do this before determining inexactness) | |
337 | if (fstar.w[3] == 0 && fstar.w[2] == 0 && | |
84d1fc49 | 338 | (fstar.w[1] < ten2mxtrunc128[ind].w[1] || |
339 | (fstar.w[1] == ten2mxtrunc128[ind].w[1] && | |
340 | fstar.w[0] <= ten2mxtrunc128[ind].w[0]))) { | |
9b6b0236 | 341 | // the result is a midpoint |
84d1fc49 | 342 | if (Cstar.w[0] & 0x01) { // Cstar is odd; MP in [EVEN, ODD] |
9b6b0236 | 343 | // if floor(C*) is odd C = floor(C*) - 1; the result may be 0 |
84d1fc49 | 344 | Cstar.w[0]--; // Cstar is now even |
9b6b0236 | 345 | if (Cstar.w[0] == 0xffffffffffffffffULL) { |
346 | Cstar.w[1]--; | |
347 | } | |
348 | *ptr_is_midpoint_gt_even = 1; | |
349 | *ptr_is_inexact_lt_midpoint = 0; | |
350 | *ptr_is_inexact_gt_midpoint = 0; | |
84d1fc49 | 351 | } else { // else MP in [ODD, EVEN] |
9b6b0236 | 352 | *ptr_is_midpoint_lt_even = 1; |
353 | *ptr_is_inexact_lt_midpoint = 0; | |
354 | *ptr_is_inexact_gt_midpoint = 0; | |
355 | } | |
356 | } | |
357 | // check for rounding overflow, which occurs if Cstar = 10^(q-x) | |
84d1fc49 | 358 | ind = q - x; // 1 <= ind <= q - 1 |
9b6b0236 | 359 | if (ind <= 19) { |
84d1fc49 | 360 | if (Cstar.w[1] == 0x0ULL && Cstar.w[0] == ten2k64[ind]) { |
9b6b0236 | 361 | // if Cstar = 10^(q-x) |
84d1fc49 | 362 | Cstar.w[0] = ten2k64[ind - 1]; // Cstar = 10^(q-x-1) |
9b6b0236 | 363 | *incr_exp = 1; |
364 | } else { | |
365 | *incr_exp = 0; | |
366 | } | |
367 | } else if (ind == 20) { | |
368 | // if ind = 20 | |
84d1fc49 | 369 | if (Cstar.w[1] == ten2k128[0].w[1] |
370 | && Cstar.w[0] == ten2k128[0].w[0]) { | |
9b6b0236 | 371 | // if Cstar = 10^(q-x) |
84d1fc49 | 372 | Cstar.w[0] = ten2k64[19]; // Cstar = 10^(q-x-1) |
9b6b0236 | 373 | Cstar.w[1] = 0x0ULL; |
374 | *incr_exp = 1; | |
375 | } else { | |
376 | *incr_exp = 0; | |
377 | } | |
84d1fc49 | 378 | } else { // if 21 <= ind <= 37 |
379 | if (Cstar.w[1] == ten2k128[ind - 20].w[1] && | |
380 | Cstar.w[0] == ten2k128[ind - 20].w[0]) { | |
9b6b0236 | 381 | // if Cstar = 10^(q-x) |
84d1fc49 | 382 | Cstar.w[0] = ten2k128[ind - 21].w[0]; // Cstar = 10^(q-x-1) |
383 | Cstar.w[1] = ten2k128[ind - 21].w[1]; | |
9b6b0236 | 384 | *incr_exp = 1; |
385 | } else { | |
386 | *incr_exp = 0; | |
387 | } | |
388 | } | |
389 | ptr_Cstar->w[1] = Cstar.w[1]; | |
390 | ptr_Cstar->w[0] = Cstar.w[0]; | |
391 | } | |
392 | ||
393 | ||
394 | void | |
84d1fc49 | 395 | round192_39_57 (int q, |
9b6b0236 | 396 | int x, |
397 | UINT192 C, | |
398 | UINT192 * ptr_Cstar, | |
399 | int *incr_exp, | |
400 | int *ptr_is_midpoint_lt_even, | |
401 | int *ptr_is_midpoint_gt_even, | |
402 | int *ptr_is_inexact_lt_midpoint, | |
403 | int *ptr_is_inexact_gt_midpoint) { | |
404 | ||
405 | UINT384 P384; | |
406 | UINT384 fstar; | |
407 | UINT192 Cstar; | |
408 | UINT64 tmp64; | |
409 | int shift; | |
410 | int ind; | |
411 | ||
412 | // Note: | |
413 | // In round192_39_57() positive numbers with 39 <= q <= 57 will be | |
414 | // rounded to nearest only for 5 <= x <= 42: | |
415 | // x = 23 or x = 24 or x = 5 or x = 6 when q = 39 | |
416 | // x = 24 or x = 25 or x = 6 or x = 7 when q = 40 | |
417 | // ... | |
418 | // x = 41 or x = 42 or x = 23 or x = 24 when q = 57 | |
419 | // However, for generality and possible uses outside the frame of IEEE 754R | |
420 | // this implementation works for 1 <= x <= q - 1 | |
421 | ||
422 | // assume *ptr_is_midpoint_lt_even, *ptr_is_midpoint_gt_even, | |
423 | // *ptr_is_inexact_lt_midpoint, and *ptr_is_inexact_gt_midpoint are | |
424 | // initialized to 0 by the caller | |
425 | ||
426 | // round a number C with q decimal digits, 39 <= q <= 57 | |
427 | // to q - x digits, 1 <= x <= 56 | |
428 | // C = C + 1/2 * 10^x where the result C fits in 192 bits | |
429 | // (because the largest value is | |
430 | // 999999999999999999999999999999999999999999999999999999999 + | |
431 | // 50000000000000000000000000000000000000000000000000000000 = | |
432 | // 0x2ad282f212a1da846afdaf18c034ff09da7fffffffffffff, which fits in 190 bits) | |
84d1fc49 | 433 | ind = x - 1; // 0 <= ind <= 55 |
434 | if (ind <= 18) { // if 0 <= ind <= 18 | |
9b6b0236 | 435 | tmp64 = C.w[0]; |
84d1fc49 | 436 | C.w[0] = C.w[0] + midpoint64[ind]; |
9b6b0236 | 437 | if (C.w[0] < tmp64) { |
438 | C.w[1]++; | |
439 | if (C.w[1] == 0x0) { | |
440 | C.w[2]++; | |
441 | } | |
442 | } | |
84d1fc49 | 443 | } else if (ind <= 37) { // if 19 <= ind <= 37 |
9b6b0236 | 444 | tmp64 = C.w[0]; |
84d1fc49 | 445 | C.w[0] = C.w[0] + midpoint128[ind - 19].w[0]; |
9b6b0236 | 446 | if (C.w[0] < tmp64) { |
447 | C.w[1]++; | |
448 | if (C.w[1] == 0x0) { | |
449 | C.w[2]++; | |
450 | } | |
451 | } | |
452 | tmp64 = C.w[1]; | |
84d1fc49 | 453 | C.w[1] = C.w[1] + midpoint128[ind - 19].w[1]; |
9b6b0236 | 454 | if (C.w[1] < tmp64) { |
455 | C.w[2]++; | |
456 | } | |
84d1fc49 | 457 | } else { // if 38 <= ind <= 57 (actually ind <= 55) |
9b6b0236 | 458 | tmp64 = C.w[0]; |
84d1fc49 | 459 | C.w[0] = C.w[0] + midpoint192[ind - 38].w[0]; |
9b6b0236 | 460 | if (C.w[0] < tmp64) { |
461 | C.w[1]++; | |
462 | if (C.w[1] == 0x0ull) { | |
463 | C.w[2]++; | |
464 | } | |
465 | } | |
466 | tmp64 = C.w[1]; | |
84d1fc49 | 467 | C.w[1] = C.w[1] + midpoint192[ind - 38].w[1]; |
9b6b0236 | 468 | if (C.w[1] < tmp64) { |
469 | C.w[2]++; | |
470 | } | |
84d1fc49 | 471 | C.w[2] = C.w[2] + midpoint192[ind - 38].w[2]; |
9b6b0236 | 472 | } |
84d1fc49 | 473 | // kx ~= 10^(-x), kx = Kx192[ind] * 2^(-Ex), 0 <= ind <= 55 |
9b6b0236 | 474 | // P384 = (C + 1/2 * 10^x) * kx * 2^Ex = (C + 1/2 * 10^x) * Kx |
475 | // the approximation kx of 10^(-x) was rounded up to 192 bits | |
84d1fc49 | 476 | __mul_192x192_to_384 (P384, C, Kx192[ind]); |
9b6b0236 | 477 | // calculate C* = floor (P384) and f* |
478 | // Cstar = P384 >> Ex | |
479 | // fstar = low Ex bits of P384 | |
84d1fc49 | 480 | shift = Ex192m192[ind]; // in [1, 63] but have to consider three cases |
481 | if (ind <= 18) { // if 0 <= ind <= 18 | |
9b6b0236 | 482 | Cstar.w[2] = (P384.w[5] >> shift); |
483 | Cstar.w[1] = (P384.w[5] << (64 - shift)) | (P384.w[4] >> shift); | |
484 | Cstar.w[0] = (P384.w[4] << (64 - shift)) | (P384.w[3] >> shift); | |
485 | fstar.w[5] = 0x0ULL; | |
486 | fstar.w[4] = 0x0ULL; | |
84d1fc49 | 487 | fstar.w[3] = P384.w[3] & mask192[ind]; |
9b6b0236 | 488 | fstar.w[2] = P384.w[2]; |
489 | fstar.w[1] = P384.w[1]; | |
490 | fstar.w[0] = P384.w[0]; | |
84d1fc49 | 491 | } else if (ind <= 37) { // if 19 <= ind <= 37 |
9b6b0236 | 492 | Cstar.w[2] = 0x0ULL; |
493 | Cstar.w[1] = P384.w[5] >> shift; | |
494 | Cstar.w[0] = (P384.w[5] << (64 - shift)) | (P384.w[4] >> shift); | |
495 | fstar.w[5] = 0x0ULL; | |
84d1fc49 | 496 | fstar.w[4] = P384.w[4] & mask192[ind]; |
9b6b0236 | 497 | fstar.w[3] = P384.w[3]; |
498 | fstar.w[2] = P384.w[2]; | |
499 | fstar.w[1] = P384.w[1]; | |
500 | fstar.w[0] = P384.w[0]; | |
84d1fc49 | 501 | } else { // if 38 <= ind <= 57 |
9b6b0236 | 502 | Cstar.w[2] = 0x0ULL; |
503 | Cstar.w[1] = 0x0ULL; | |
504 | Cstar.w[0] = P384.w[5] >> shift; | |
84d1fc49 | 505 | fstar.w[5] = P384.w[5] & mask192[ind]; |
9b6b0236 | 506 | fstar.w[4] = P384.w[4]; |
507 | fstar.w[3] = P384.w[3]; | |
508 | fstar.w[2] = P384.w[2]; | |
509 | fstar.w[1] = P384.w[1]; | |
510 | fstar.w[0] = P384.w[0]; | |
511 | } | |
512 | ||
84d1fc49 | 513 | // the top Ex bits of 10^(-x) are T* = ten2mxtrunc192[ind], e.g. if x=1, |
514 | // T*=ten2mxtrunc192[0]=0xcccccccccccccccccccccccccccccccccccccccccccccccc | |
9b6b0236 | 515 | // if (0 < f* < 10^(-x)) then the result is a midpoint |
516 | // if floor(C*) is even then C* = floor(C*) - logical right | |
517 | // shift; C* has q - x decimal digits, correct by Prop. 1) | |
518 | // else if floor(C*) is odd C* = floor(C*)-1 (logical right | |
519 | // shift; C* has q - x decimal digits, correct by Pr. 1) | |
520 | // else | |
521 | // C* = floor(C*) (logical right shift; C has q - x decimal digits, | |
522 | // correct by Property 1) | |
523 | // in the caling function n = C* * 10^(e+x) | |
524 | ||
525 | // determine inexactness of the rounding of C* | |
526 | // if (0 < f* - 1/2 < 10^(-x)) then | |
527 | // the result is exact | |
528 | // else // if (f* - 1/2 > T*) then | |
529 | // the result is inexact | |
84d1fc49 | 530 | if (ind <= 18) { // if 0 <= ind <= 18 |
531 | if (fstar.w[3] > half192[ind] || (fstar.w[3] == half192[ind] && | |
532 | (fstar.w[2] || fstar.w[1] | |
533 | || fstar.w[0]))) { | |
9b6b0236 | 534 | // f* > 1/2 and the result may be exact |
535 | // Calculate f* - 1/2 | |
84d1fc49 | 536 | tmp64 = fstar.w[3] - half192[ind]; |
537 | if (tmp64 || fstar.w[2] > ten2mxtrunc192[ind].w[2] || (fstar.w[2] == ten2mxtrunc192[ind].w[2] && fstar.w[1] > ten2mxtrunc192[ind].w[1]) || (fstar.w[2] == ten2mxtrunc192[ind].w[2] && fstar.w[1] == ten2mxtrunc192[ind].w[1] && fstar.w[0] > ten2mxtrunc192[ind].w[0])) { // f* - 1/2 > 10^(-x) | |
9b6b0236 | 538 | *ptr_is_inexact_lt_midpoint = 1; |
84d1fc49 | 539 | } // else the result is exact |
540 | } else { // the result is inexact; f2* <= 1/2 | |
9b6b0236 | 541 | *ptr_is_inexact_gt_midpoint = 1; |
542 | } | |
84d1fc49 | 543 | } else if (ind <= 37) { // if 19 <= ind <= 37 |
544 | if (fstar.w[4] > half192[ind] || (fstar.w[4] == half192[ind] && | |
545 | (fstar.w[3] || fstar.w[2] | |
546 | || fstar.w[1] || fstar.w[0]))) { | |
9b6b0236 | 547 | // f* > 1/2 and the result may be exact |
548 | // Calculate f* - 1/2 | |
84d1fc49 | 549 | tmp64 = fstar.w[4] - half192[ind]; |
550 | if (tmp64 || fstar.w[3] || fstar.w[2] > ten2mxtrunc192[ind].w[2] || (fstar.w[2] == ten2mxtrunc192[ind].w[2] && fstar.w[1] > ten2mxtrunc192[ind].w[1]) || (fstar.w[2] == ten2mxtrunc192[ind].w[2] && fstar.w[1] == ten2mxtrunc192[ind].w[1] && fstar.w[0] > ten2mxtrunc192[ind].w[0])) { // f* - 1/2 > 10^(-x) | |
9b6b0236 | 551 | *ptr_is_inexact_lt_midpoint = 1; |
84d1fc49 | 552 | } // else the result is exact |
553 | } else { // the result is inexact; f2* <= 1/2 | |
9b6b0236 | 554 | *ptr_is_inexact_gt_midpoint = 1; |
555 | } | |
84d1fc49 | 556 | } else { // if 38 <= ind <= 55 |
557 | if (fstar.w[5] > half192[ind] || (fstar.w[5] == half192[ind] && | |
558 | (fstar.w[4] || fstar.w[3] | |
559 | || fstar.w[2] || fstar.w[1] | |
560 | || fstar.w[0]))) { | |
9b6b0236 | 561 | // f* > 1/2 and the result may be exact |
562 | // Calculate f* - 1/2 | |
84d1fc49 | 563 | tmp64 = fstar.w[5] - half192[ind]; |
564 | if (tmp64 || fstar.w[4] || fstar.w[3] || fstar.w[2] > ten2mxtrunc192[ind].w[2] || (fstar.w[2] == ten2mxtrunc192[ind].w[2] && fstar.w[1] > ten2mxtrunc192[ind].w[1]) || (fstar.w[2] == ten2mxtrunc192[ind].w[2] && fstar.w[1] == ten2mxtrunc192[ind].w[1] && fstar.w[0] > ten2mxtrunc192[ind].w[0])) { // f* - 1/2 > 10^(-x) | |
9b6b0236 | 565 | *ptr_is_inexact_lt_midpoint = 1; |
84d1fc49 | 566 | } // else the result is exact |
567 | } else { // the result is inexact; f2* <= 1/2 | |
9b6b0236 | 568 | *ptr_is_inexact_gt_midpoint = 1; |
569 | } | |
570 | } | |
571 | // check for midpoints (could do this before determining inexactness) | |
572 | if (fstar.w[5] == 0 && fstar.w[4] == 0 && fstar.w[3] == 0 && | |
84d1fc49 | 573 | (fstar.w[2] < ten2mxtrunc192[ind].w[2] || |
574 | (fstar.w[2] == ten2mxtrunc192[ind].w[2] && | |
575 | fstar.w[1] < ten2mxtrunc192[ind].w[1]) || | |
576 | (fstar.w[2] == ten2mxtrunc192[ind].w[2] && | |
577 | fstar.w[1] == ten2mxtrunc192[ind].w[1] && | |
578 | fstar.w[0] <= ten2mxtrunc192[ind].w[0]))) { | |
9b6b0236 | 579 | // the result is a midpoint |
84d1fc49 | 580 | if (Cstar.w[0] & 0x01) { // Cstar is odd; MP in [EVEN, ODD] |
9b6b0236 | 581 | // if floor(C*) is odd C = floor(C*) - 1; the result may be 0 |
84d1fc49 | 582 | Cstar.w[0]--; // Cstar is now even |
9b6b0236 | 583 | if (Cstar.w[0] == 0xffffffffffffffffULL) { |
584 | Cstar.w[1]--; | |
585 | if (Cstar.w[1] == 0xffffffffffffffffULL) { | |
586 | Cstar.w[2]--; | |
587 | } | |
588 | } | |
589 | *ptr_is_midpoint_gt_even = 1; | |
590 | *ptr_is_inexact_lt_midpoint = 0; | |
591 | *ptr_is_inexact_gt_midpoint = 0; | |
84d1fc49 | 592 | } else { // else MP in [ODD, EVEN] |
9b6b0236 | 593 | *ptr_is_midpoint_lt_even = 1; |
594 | *ptr_is_inexact_lt_midpoint = 0; | |
595 | *ptr_is_inexact_gt_midpoint = 0; | |
596 | } | |
597 | } | |
598 | // check for rounding overflow, which occurs if Cstar = 10^(q-x) | |
84d1fc49 | 599 | ind = q - x; // 1 <= ind <= q - 1 |
9b6b0236 | 600 | if (ind <= 19) { |
601 | if (Cstar.w[2] == 0x0ULL && Cstar.w[1] == 0x0ULL && | |
84d1fc49 | 602 | Cstar.w[0] == ten2k64[ind]) { |
9b6b0236 | 603 | // if Cstar = 10^(q-x) |
84d1fc49 | 604 | Cstar.w[0] = ten2k64[ind - 1]; // Cstar = 10^(q-x-1) |
9b6b0236 | 605 | *incr_exp = 1; |
606 | } else { | |
607 | *incr_exp = 0; | |
608 | } | |
609 | } else if (ind == 20) { | |
610 | // if ind = 20 | |
84d1fc49 | 611 | if (Cstar.w[2] == 0x0ULL && Cstar.w[1] == ten2k128[0].w[1] && |
612 | Cstar.w[0] == ten2k128[0].w[0]) { | |
9b6b0236 | 613 | // if Cstar = 10^(q-x) |
84d1fc49 | 614 | Cstar.w[0] = ten2k64[19]; // Cstar = 10^(q-x-1) |
9b6b0236 | 615 | Cstar.w[1] = 0x0ULL; |
616 | *incr_exp = 1; | |
617 | } else { | |
618 | *incr_exp = 0; | |
619 | } | |
84d1fc49 | 620 | } else if (ind <= 38) { // if 21 <= ind <= 38 |
621 | if (Cstar.w[2] == 0x0ULL && Cstar.w[1] == ten2k128[ind - 20].w[1] && | |
622 | Cstar.w[0] == ten2k128[ind - 20].w[0]) { | |
9b6b0236 | 623 | // if Cstar = 10^(q-x) |
84d1fc49 | 624 | Cstar.w[0] = ten2k128[ind - 21].w[0]; // Cstar = 10^(q-x-1) |
625 | Cstar.w[1] = ten2k128[ind - 21].w[1]; | |
9b6b0236 | 626 | *incr_exp = 1; |
627 | } else { | |
628 | *incr_exp = 0; | |
629 | } | |
630 | } else if (ind == 39) { | |
84d1fc49 | 631 | if (Cstar.w[2] == ten2k256[0].w[2] && Cstar.w[1] == ten2k256[0].w[1] |
632 | && Cstar.w[0] == ten2k256[0].w[0]) { | |
9b6b0236 | 633 | // if Cstar = 10^(q-x) |
84d1fc49 | 634 | Cstar.w[0] = ten2k128[18].w[0]; // Cstar = 10^(q-x-1) |
635 | Cstar.w[1] = ten2k128[18].w[1]; | |
9b6b0236 | 636 | Cstar.w[2] = 0x0ULL; |
637 | *incr_exp = 1; | |
638 | } else { | |
639 | *incr_exp = 0; | |
640 | } | |
84d1fc49 | 641 | } else { // if 40 <= ind <= 56 |
642 | if (Cstar.w[2] == ten2k256[ind - 39].w[2] && | |
643 | Cstar.w[1] == ten2k256[ind - 39].w[1] && | |
644 | Cstar.w[0] == ten2k256[ind - 39].w[0]) { | |
9b6b0236 | 645 | // if Cstar = 10^(q-x) |
84d1fc49 | 646 | Cstar.w[0] = ten2k256[ind - 40].w[0]; // Cstar = 10^(q-x-1) |
647 | Cstar.w[1] = ten2k256[ind - 40].w[1]; | |
648 | Cstar.w[2] = ten2k256[ind - 40].w[2]; | |
9b6b0236 | 649 | *incr_exp = 1; |
650 | } else { | |
651 | *incr_exp = 0; | |
652 | } | |
653 | } | |
654 | ptr_Cstar->w[2] = Cstar.w[2]; | |
655 | ptr_Cstar->w[1] = Cstar.w[1]; | |
656 | ptr_Cstar->w[0] = Cstar.w[0]; | |
657 | } | |
658 | ||
659 | ||
660 | void | |
84d1fc49 | 661 | round256_58_76 (int q, |
9b6b0236 | 662 | int x, |
663 | UINT256 C, | |
664 | UINT256 * ptr_Cstar, | |
665 | int *incr_exp, | |
666 | int *ptr_is_midpoint_lt_even, | |
667 | int *ptr_is_midpoint_gt_even, | |
668 | int *ptr_is_inexact_lt_midpoint, | |
669 | int *ptr_is_inexact_gt_midpoint) { | |
670 | ||
671 | UINT512 P512; | |
672 | UINT512 fstar; | |
673 | UINT256 Cstar; | |
674 | UINT64 tmp64; | |
675 | int shift; | |
676 | int ind; | |
677 | ||
678 | // Note: | |
679 | // In round256_58_76() positive numbers with 58 <= q <= 76 will be | |
680 | // rounded to nearest only for 24 <= x <= 61: | |
681 | // x = 42 or x = 43 or x = 24 or x = 25 when q = 58 | |
682 | // x = 43 or x = 44 or x = 25 or x = 26 when q = 59 | |
683 | // ... | |
684 | // x = 60 or x = 61 or x = 42 or x = 43 when q = 76 | |
685 | // However, for generality and possible uses outside the frame of IEEE 754R | |
686 | // this implementation works for 1 <= x <= q - 1 | |
687 | ||
688 | // assume *ptr_is_midpoint_lt_even, *ptr_is_midpoint_gt_even, | |
689 | // *ptr_is_inexact_lt_midpoint, and *ptr_is_inexact_gt_midpoint are | |
690 | // initialized to 0 by the caller | |
691 | ||
692 | // round a number C with q decimal digits, 58 <= q <= 76 | |
693 | // to q - x digits, 1 <= x <= 75 | |
694 | // C = C + 1/2 * 10^x where the result C fits in 256 bits | |
695 | // (because the largest value is 9999999999999999999999999999999999999999 | |
696 | // 999999999999999999999999999999999999 + 500000000000000000000000000 | |
697 | // 000000000000000000000000000000000000000000000000 = | |
698 | // 0x1736ca15d27a56cae15cf0e7b403d1f2bd6ebb0a50dc83ffffffffffffffffff, | |
699 | // which fits in 253 bits) | |
84d1fc49 | 700 | ind = x - 1; // 0 <= ind <= 74 |
701 | if (ind <= 18) { // if 0 <= ind <= 18 | |
9b6b0236 | 702 | tmp64 = C.w[0]; |
84d1fc49 | 703 | C.w[0] = C.w[0] + midpoint64[ind]; |
9b6b0236 | 704 | if (C.w[0] < tmp64) { |
705 | C.w[1]++; | |
706 | if (C.w[1] == 0x0) { | |
707 | C.w[2]++; | |
708 | if (C.w[2] == 0x0) { | |
709 | C.w[3]++; | |
710 | } | |
711 | } | |
712 | } | |
84d1fc49 | 713 | } else if (ind <= 37) { // if 19 <= ind <= 37 |
9b6b0236 | 714 | tmp64 = C.w[0]; |
84d1fc49 | 715 | C.w[0] = C.w[0] + midpoint128[ind - 19].w[0]; |
9b6b0236 | 716 | if (C.w[0] < tmp64) { |
717 | C.w[1]++; | |
718 | if (C.w[1] == 0x0) { | |
719 | C.w[2]++; | |
720 | if (C.w[2] == 0x0) { | |
721 | C.w[3]++; | |
722 | } | |
723 | } | |
724 | } | |
725 | tmp64 = C.w[1]; | |
84d1fc49 | 726 | C.w[1] = C.w[1] + midpoint128[ind - 19].w[1]; |
9b6b0236 | 727 | if (C.w[1] < tmp64) { |
728 | C.w[2]++; | |
729 | if (C.w[2] == 0x0) { | |
730 | C.w[3]++; | |
731 | } | |
732 | } | |
84d1fc49 | 733 | } else if (ind <= 57) { // if 38 <= ind <= 57 |
9b6b0236 | 734 | tmp64 = C.w[0]; |
84d1fc49 | 735 | C.w[0] = C.w[0] + midpoint192[ind - 38].w[0]; |
9b6b0236 | 736 | if (C.w[0] < tmp64) { |
737 | C.w[1]++; | |
738 | if (C.w[1] == 0x0ull) { | |
739 | C.w[2]++; | |
740 | if (C.w[2] == 0x0) { | |
741 | C.w[3]++; | |
742 | } | |
743 | } | |
744 | } | |
745 | tmp64 = C.w[1]; | |
84d1fc49 | 746 | C.w[1] = C.w[1] + midpoint192[ind - 38].w[1]; |
9b6b0236 | 747 | if (C.w[1] < tmp64) { |
748 | C.w[2]++; | |
749 | if (C.w[2] == 0x0) { | |
750 | C.w[3]++; | |
751 | } | |
752 | } | |
753 | tmp64 = C.w[2]; | |
84d1fc49 | 754 | C.w[2] = C.w[2] + midpoint192[ind - 38].w[2]; |
9b6b0236 | 755 | if (C.w[2] < tmp64) { |
756 | C.w[3]++; | |
757 | } | |
84d1fc49 | 758 | } else { // if 58 <= ind <= 76 (actually 58 <= ind <= 74) |
9b6b0236 | 759 | tmp64 = C.w[0]; |
84d1fc49 | 760 | C.w[0] = C.w[0] + midpoint256[ind - 58].w[0]; |
9b6b0236 | 761 | if (C.w[0] < tmp64) { |
762 | C.w[1]++; | |
763 | if (C.w[1] == 0x0ull) { | |
764 | C.w[2]++; | |
765 | if (C.w[2] == 0x0) { | |
766 | C.w[3]++; | |
767 | } | |
768 | } | |
769 | } | |
770 | tmp64 = C.w[1]; | |
84d1fc49 | 771 | C.w[1] = C.w[1] + midpoint256[ind - 58].w[1]; |
9b6b0236 | 772 | if (C.w[1] < tmp64) { |
773 | C.w[2]++; | |
774 | if (C.w[2] == 0x0) { | |
775 | C.w[3]++; | |
776 | } | |
777 | } | |
778 | tmp64 = C.w[2]; | |
84d1fc49 | 779 | C.w[2] = C.w[2] + midpoint256[ind - 58].w[2]; |
9b6b0236 | 780 | if (C.w[2] < tmp64) { |
781 | C.w[3]++; | |
782 | } | |
84d1fc49 | 783 | C.w[3] = C.w[3] + midpoint256[ind - 58].w[3]; |
9b6b0236 | 784 | } |
84d1fc49 | 785 | // kx ~= 10^(-x), kx = Kx256[ind] * 2^(-Ex), 0 <= ind <= 74 |
9b6b0236 | 786 | // P512 = (C + 1/2 * 10^x) * kx * 2^Ex = (C + 1/2 * 10^x) * Kx |
787 | // the approximation kx of 10^(-x) was rounded up to 192 bits | |
84d1fc49 | 788 | __mul_256x256_to_512 (P512, C, Kx256[ind]); |
9b6b0236 | 789 | // calculate C* = floor (P512) and f* |
790 | // Cstar = P512 >> Ex | |
791 | // fstar = low Ex bits of P512 | |
84d1fc49 | 792 | shift = Ex256m256[ind]; // in [0, 63] but have to consider four cases |
793 | if (ind <= 18) { // if 0 <= ind <= 18 | |
9b6b0236 | 794 | Cstar.w[3] = (P512.w[7] >> shift); |
795 | Cstar.w[2] = (P512.w[7] << (64 - shift)) | (P512.w[6] >> shift); | |
796 | Cstar.w[1] = (P512.w[6] << (64 - shift)) | (P512.w[5] >> shift); | |
797 | Cstar.w[0] = (P512.w[5] << (64 - shift)) | (P512.w[4] >> shift); | |
798 | fstar.w[7] = 0x0ULL; | |
799 | fstar.w[6] = 0x0ULL; | |
800 | fstar.w[5] = 0x0ULL; | |
84d1fc49 | 801 | fstar.w[4] = P512.w[4] & mask256[ind]; |
9b6b0236 | 802 | fstar.w[3] = P512.w[3]; |
803 | fstar.w[2] = P512.w[2]; | |
804 | fstar.w[1] = P512.w[1]; | |
805 | fstar.w[0] = P512.w[0]; | |
84d1fc49 | 806 | } else if (ind <= 37) { // if 19 <= ind <= 37 |
9b6b0236 | 807 | Cstar.w[3] = 0x0ULL; |
808 | Cstar.w[2] = P512.w[7] >> shift; | |
809 | Cstar.w[1] = (P512.w[7] << (64 - shift)) | (P512.w[6] >> shift); | |
810 | Cstar.w[0] = (P512.w[6] << (64 - shift)) | (P512.w[5] >> shift); | |
811 | fstar.w[7] = 0x0ULL; | |
812 | fstar.w[6] = 0x0ULL; | |
84d1fc49 | 813 | fstar.w[5] = P512.w[5] & mask256[ind]; |
9b6b0236 | 814 | fstar.w[4] = P512.w[4]; |
815 | fstar.w[3] = P512.w[3]; | |
816 | fstar.w[2] = P512.w[2]; | |
817 | fstar.w[1] = P512.w[1]; | |
818 | fstar.w[0] = P512.w[0]; | |
84d1fc49 | 819 | } else if (ind <= 56) { // if 38 <= ind <= 56 |
9b6b0236 | 820 | Cstar.w[3] = 0x0ULL; |
821 | Cstar.w[2] = 0x0ULL; | |
822 | Cstar.w[1] = P512.w[7] >> shift; | |
823 | Cstar.w[0] = (P512.w[7] << (64 - shift)) | (P512.w[6] >> shift); | |
824 | fstar.w[7] = 0x0ULL; | |
84d1fc49 | 825 | fstar.w[6] = P512.w[6] & mask256[ind]; |
9b6b0236 | 826 | fstar.w[5] = P512.w[5]; |
827 | fstar.w[4] = P512.w[4]; | |
828 | fstar.w[3] = P512.w[3]; | |
829 | fstar.w[2] = P512.w[2]; | |
830 | fstar.w[1] = P512.w[1]; | |
831 | fstar.w[0] = P512.w[0]; | |
832 | } else if (ind == 57) { | |
833 | Cstar.w[3] = 0x0ULL; | |
834 | Cstar.w[2] = 0x0ULL; | |
835 | Cstar.w[1] = 0x0ULL; | |
836 | Cstar.w[0] = P512.w[7]; | |
837 | fstar.w[7] = 0x0ULL; | |
838 | fstar.w[6] = P512.w[6]; | |
839 | fstar.w[5] = P512.w[5]; | |
840 | fstar.w[4] = P512.w[4]; | |
841 | fstar.w[3] = P512.w[3]; | |
842 | fstar.w[2] = P512.w[2]; | |
843 | fstar.w[1] = P512.w[1]; | |
844 | fstar.w[0] = P512.w[0]; | |
84d1fc49 | 845 | } else { // if 58 <= ind <= 74 |
9b6b0236 | 846 | Cstar.w[3] = 0x0ULL; |
847 | Cstar.w[2] = 0x0ULL; | |
848 | Cstar.w[1] = 0x0ULL; | |
849 | Cstar.w[0] = P512.w[7] >> shift; | |
84d1fc49 | 850 | fstar.w[7] = P512.w[7] & mask256[ind]; |
9b6b0236 | 851 | fstar.w[6] = P512.w[6]; |
852 | fstar.w[5] = P512.w[5]; | |
853 | fstar.w[4] = P512.w[4]; | |
854 | fstar.w[3] = P512.w[3]; | |
855 | fstar.w[2] = P512.w[2]; | |
856 | fstar.w[1] = P512.w[1]; | |
857 | fstar.w[0] = P512.w[0]; | |
858 | } | |
859 | ||
84d1fc49 | 860 | // the top Ex bits of 10^(-x) are T* = ten2mxtrunc256[ind], e.g. if x=1, |
861 | // T*=ten2mxtrunc256[0]= | |
9b6b0236 | 862 | // 0xcccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc |
863 | // if (0 < f* < 10^(-x)) then the result is a midpoint | |
864 | // if floor(C*) is even then C* = floor(C*) - logical right | |
865 | // shift; C* has q - x decimal digits, correct by Prop. 1) | |
866 | // else if floor(C*) is odd C* = floor(C*)-1 (logical right | |
867 | // shift; C* has q - x decimal digits, correct by Pr. 1) | |
868 | // else | |
869 | // C* = floor(C*) (logical right shift; C has q - x decimal digits, | |
870 | // correct by Property 1) | |
871 | // in the caling function n = C* * 10^(e+x) | |
872 | ||
873 | // determine inexactness of the rounding of C* | |
874 | // if (0 < f* - 1/2 < 10^(-x)) then | |
875 | // the result is exact | |
876 | // else // if (f* - 1/2 > T*) then | |
877 | // the result is inexact | |
84d1fc49 | 878 | if (ind <= 18) { // if 0 <= ind <= 18 |
879 | if (fstar.w[4] > half256[ind] || (fstar.w[4] == half256[ind] && | |
880 | (fstar.w[3] || fstar.w[2] | |
881 | || fstar.w[1] || fstar.w[0]))) { | |
9b6b0236 | 882 | // f* > 1/2 and the result may be exact |
883 | // Calculate f* - 1/2 | |
84d1fc49 | 884 | tmp64 = fstar.w[4] - half256[ind]; |
885 | if (tmp64 || fstar.w[3] > ten2mxtrunc256[ind].w[2] || (fstar.w[3] == ten2mxtrunc256[ind].w[3] && fstar.w[2] > ten2mxtrunc256[ind].w[2]) || (fstar.w[3] == ten2mxtrunc256[ind].w[3] && fstar.w[2] == ten2mxtrunc256[ind].w[2] && fstar.w[1] > ten2mxtrunc256[ind].w[1]) || (fstar.w[3] == ten2mxtrunc256[ind].w[3] && fstar.w[2] == ten2mxtrunc256[ind].w[2] && fstar.w[1] == ten2mxtrunc256[ind].w[1] && fstar.w[0] > ten2mxtrunc256[ind].w[0])) { // f* - 1/2 > 10^(-x) | |
9b6b0236 | 886 | *ptr_is_inexact_lt_midpoint = 1; |
84d1fc49 | 887 | } // else the result is exact |
888 | } else { // the result is inexact; f2* <= 1/2 | |
9b6b0236 | 889 | *ptr_is_inexact_gt_midpoint = 1; |
890 | } | |
84d1fc49 | 891 | } else if (ind <= 37) { // if 19 <= ind <= 37 |
892 | if (fstar.w[5] > half256[ind] || (fstar.w[5] == half256[ind] && | |
893 | (fstar.w[4] || fstar.w[3] | |
894 | || fstar.w[2] || fstar.w[1] | |
895 | || fstar.w[0]))) { | |
9b6b0236 | 896 | // f* > 1/2 and the result may be exact |
897 | // Calculate f* - 1/2 | |
84d1fc49 | 898 | tmp64 = fstar.w[5] - half256[ind]; |
899 | if (tmp64 || fstar.w[4] || fstar.w[3] > ten2mxtrunc256[ind].w[3] || (fstar.w[3] == ten2mxtrunc256[ind].w[3] && fstar.w[2] > ten2mxtrunc256[ind].w[2]) || (fstar.w[3] == ten2mxtrunc256[ind].w[3] && fstar.w[2] == ten2mxtrunc256[ind].w[2] && fstar.w[1] > ten2mxtrunc256[ind].w[1]) || (fstar.w[3] == ten2mxtrunc256[ind].w[3] && fstar.w[2] == ten2mxtrunc256[ind].w[2] && fstar.w[1] == ten2mxtrunc256[ind].w[1] && fstar.w[0] > ten2mxtrunc256[ind].w[0])) { // f* - 1/2 > 10^(-x) | |
9b6b0236 | 900 | *ptr_is_inexact_lt_midpoint = 1; |
84d1fc49 | 901 | } // else the result is exact |
902 | } else { // the result is inexact; f2* <= 1/2 | |
9b6b0236 | 903 | *ptr_is_inexact_gt_midpoint = 1; |
904 | } | |
84d1fc49 | 905 | } else if (ind <= 57) { // if 38 <= ind <= 57 |
906 | if (fstar.w[6] > half256[ind] || (fstar.w[6] == half256[ind] && | |
907 | (fstar.w[5] || fstar.w[4] | |
908 | || fstar.w[3] || fstar.w[2] | |
909 | || fstar.w[1] || fstar.w[0]))) { | |
9b6b0236 | 910 | // f* > 1/2 and the result may be exact |
911 | // Calculate f* - 1/2 | |
84d1fc49 | 912 | tmp64 = fstar.w[6] - half256[ind]; |
913 | if (tmp64 || fstar.w[5] || fstar.w[4] || fstar.w[3] > ten2mxtrunc256[ind].w[3] || (fstar.w[3] == ten2mxtrunc256[ind].w[3] && fstar.w[2] > ten2mxtrunc256[ind].w[2]) || (fstar.w[3] == ten2mxtrunc256[ind].w[3] && fstar.w[2] == ten2mxtrunc256[ind].w[2] && fstar.w[1] > ten2mxtrunc256[ind].w[1]) || (fstar.w[3] == ten2mxtrunc256[ind].w[3] && fstar.w[2] == ten2mxtrunc256[ind].w[2] && fstar.w[1] == ten2mxtrunc256[ind].w[1] && fstar.w[0] > ten2mxtrunc256[ind].w[0])) { // f* - 1/2 > 10^(-x) | |
9b6b0236 | 914 | *ptr_is_inexact_lt_midpoint = 1; |
84d1fc49 | 915 | } // else the result is exact |
916 | } else { // the result is inexact; f2* <= 1/2 | |
9b6b0236 | 917 | *ptr_is_inexact_gt_midpoint = 1; |
918 | } | |
84d1fc49 | 919 | } else { // if 58 <= ind <= 74 |
920 | if (fstar.w[7] > half256[ind] || (fstar.w[7] == half256[ind] && | |
921 | (fstar.w[6] || fstar.w[5] | |
922 | || fstar.w[4] || fstar.w[3] | |
923 | || fstar.w[2] || fstar.w[1] | |
924 | || fstar.w[0]))) { | |
9b6b0236 | 925 | // f* > 1/2 and the result may be exact |
926 | // Calculate f* - 1/2 | |
84d1fc49 | 927 | tmp64 = fstar.w[7] - half256[ind]; |
928 | if (tmp64 || fstar.w[6] || fstar.w[5] || fstar.w[4] || fstar.w[3] > ten2mxtrunc256[ind].w[3] || (fstar.w[3] == ten2mxtrunc256[ind].w[3] && fstar.w[2] > ten2mxtrunc256[ind].w[2]) || (fstar.w[3] == ten2mxtrunc256[ind].w[3] && fstar.w[2] == ten2mxtrunc256[ind].w[2] && fstar.w[1] > ten2mxtrunc256[ind].w[1]) || (fstar.w[3] == ten2mxtrunc256[ind].w[3] && fstar.w[2] == ten2mxtrunc256[ind].w[2] && fstar.w[1] == ten2mxtrunc256[ind].w[1] && fstar.w[0] > ten2mxtrunc256[ind].w[0])) { // f* - 1/2 > 10^(-x) | |
9b6b0236 | 929 | *ptr_is_inexact_lt_midpoint = 1; |
84d1fc49 | 930 | } // else the result is exact |
931 | } else { // the result is inexact; f2* <= 1/2 | |
9b6b0236 | 932 | *ptr_is_inexact_gt_midpoint = 1; |
933 | } | |
934 | } | |
935 | // check for midpoints (could do this before determining inexactness) | |
936 | if (fstar.w[7] == 0 && fstar.w[6] == 0 && | |
937 | fstar.w[5] == 0 && fstar.w[4] == 0 && | |
84d1fc49 | 938 | (fstar.w[3] < ten2mxtrunc256[ind].w[3] || |
939 | (fstar.w[3] == ten2mxtrunc256[ind].w[3] && | |
940 | fstar.w[2] < ten2mxtrunc256[ind].w[2]) || | |
941 | (fstar.w[3] == ten2mxtrunc256[ind].w[3] && | |
942 | fstar.w[2] == ten2mxtrunc256[ind].w[2] && | |
943 | fstar.w[1] < ten2mxtrunc256[ind].w[1]) || | |
944 | (fstar.w[3] == ten2mxtrunc256[ind].w[3] && | |
945 | fstar.w[2] == ten2mxtrunc256[ind].w[2] && | |
946 | fstar.w[1] == ten2mxtrunc256[ind].w[1] && | |
947 | fstar.w[0] <= ten2mxtrunc256[ind].w[0]))) { | |
9b6b0236 | 948 | // the result is a midpoint |
84d1fc49 | 949 | if (Cstar.w[0] & 0x01) { // Cstar is odd; MP in [EVEN, ODD] |
9b6b0236 | 950 | // if floor(C*) is odd C = floor(C*) - 1; the result may be 0 |
84d1fc49 | 951 | Cstar.w[0]--; // Cstar is now even |
9b6b0236 | 952 | if (Cstar.w[0] == 0xffffffffffffffffULL) { |
953 | Cstar.w[1]--; | |
954 | if (Cstar.w[1] == 0xffffffffffffffffULL) { | |
955 | Cstar.w[2]--; | |
956 | if (Cstar.w[2] == 0xffffffffffffffffULL) { | |
957 | Cstar.w[3]--; | |
958 | } | |
959 | } | |
960 | } | |
961 | *ptr_is_midpoint_gt_even = 1; | |
962 | *ptr_is_inexact_lt_midpoint = 0; | |
963 | *ptr_is_inexact_gt_midpoint = 0; | |
84d1fc49 | 964 | } else { // else MP in [ODD, EVEN] |
9b6b0236 | 965 | *ptr_is_midpoint_lt_even = 1; |
966 | *ptr_is_inexact_lt_midpoint = 0; | |
967 | *ptr_is_inexact_gt_midpoint = 0; | |
968 | } | |
969 | } | |
970 | // check for rounding overflow, which occurs if Cstar = 10^(q-x) | |
84d1fc49 | 971 | ind = q - x; // 1 <= ind <= q - 1 |
9b6b0236 | 972 | if (ind <= 19) { |
973 | if (Cstar.w[3] == 0x0ULL && Cstar.w[2] == 0x0ULL && | |
84d1fc49 | 974 | Cstar.w[1] == 0x0ULL && Cstar.w[0] == ten2k64[ind]) { |
9b6b0236 | 975 | // if Cstar = 10^(q-x) |
84d1fc49 | 976 | Cstar.w[0] = ten2k64[ind - 1]; // Cstar = 10^(q-x-1) |
9b6b0236 | 977 | *incr_exp = 1; |
978 | } else { | |
979 | *incr_exp = 0; | |
980 | } | |
981 | } else if (ind == 20) { | |
982 | // if ind = 20 | |
983 | if (Cstar.w[3] == 0x0ULL && Cstar.w[2] == 0x0ULL && | |
84d1fc49 | 984 | Cstar.w[1] == ten2k128[0].w[1] |
985 | && Cstar.w[0] == ten2k128[0].w[0]) { | |
9b6b0236 | 986 | // if Cstar = 10^(q-x) |
84d1fc49 | 987 | Cstar.w[0] = ten2k64[19]; // Cstar = 10^(q-x-1) |
9b6b0236 | 988 | Cstar.w[1] = 0x0ULL; |
989 | *incr_exp = 1; | |
990 | } else { | |
991 | *incr_exp = 0; | |
992 | } | |
84d1fc49 | 993 | } else if (ind <= 38) { // if 21 <= ind <= 38 |
9b6b0236 | 994 | if (Cstar.w[3] == 0x0ULL && Cstar.w[2] == 0x0ULL && |
84d1fc49 | 995 | Cstar.w[1] == ten2k128[ind - 20].w[1] && |
996 | Cstar.w[0] == ten2k128[ind - 20].w[0]) { | |
9b6b0236 | 997 | // if Cstar = 10^(q-x) |
84d1fc49 | 998 | Cstar.w[0] = ten2k128[ind - 21].w[0]; // Cstar = 10^(q-x-1) |
999 | Cstar.w[1] = ten2k128[ind - 21].w[1]; | |
9b6b0236 | 1000 | *incr_exp = 1; |
1001 | } else { | |
1002 | *incr_exp = 0; | |
1003 | } | |
1004 | } else if (ind == 39) { | |
84d1fc49 | 1005 | if (Cstar.w[3] == 0x0ULL && Cstar.w[2] == ten2k256[0].w[2] && |
1006 | Cstar.w[1] == ten2k256[0].w[1] | |
1007 | && Cstar.w[0] == ten2k256[0].w[0]) { | |
9b6b0236 | 1008 | // if Cstar = 10^(q-x) |
84d1fc49 | 1009 | Cstar.w[0] = ten2k128[18].w[0]; // Cstar = 10^(q-x-1) |
1010 | Cstar.w[1] = ten2k128[18].w[1]; | |
9b6b0236 | 1011 | Cstar.w[2] = 0x0ULL; |
1012 | *incr_exp = 1; | |
1013 | } else { | |
1014 | *incr_exp = 0; | |
1015 | } | |
84d1fc49 | 1016 | } else if (ind <= 57) { // if 40 <= ind <= 57 |
1017 | if (Cstar.w[3] == 0x0ULL && Cstar.w[2] == ten2k256[ind - 39].w[2] && | |
1018 | Cstar.w[1] == ten2k256[ind - 39].w[1] && | |
1019 | Cstar.w[0] == ten2k256[ind - 39].w[0]) { | |
9b6b0236 | 1020 | // if Cstar = 10^(q-x) |
84d1fc49 | 1021 | Cstar.w[0] = ten2k256[ind - 40].w[0]; // Cstar = 10^(q-x-1) |
1022 | Cstar.w[1] = ten2k256[ind - 40].w[1]; | |
1023 | Cstar.w[2] = ten2k256[ind - 40].w[2]; | |
9b6b0236 | 1024 | *incr_exp = 1; |
1025 | } else { | |
1026 | *incr_exp = 0; | |
1027 | } | |
84d1fc49 | 1028 | // else if (ind == 58) is not needed becauae we do not have ten2k192[] yet |
1029 | } else { // if 58 <= ind <= 77 (actually 58 <= ind <= 74) | |
1030 | if (Cstar.w[3] == ten2k256[ind - 39].w[3] && | |
1031 | Cstar.w[2] == ten2k256[ind - 39].w[2] && | |
1032 | Cstar.w[1] == ten2k256[ind - 39].w[1] && | |
1033 | Cstar.w[0] == ten2k256[ind - 39].w[0]) { | |
9b6b0236 | 1034 | // if Cstar = 10^(q-x) |
84d1fc49 | 1035 | Cstar.w[0] = ten2k256[ind - 40].w[0]; // Cstar = 10^(q-x-1) |
1036 | Cstar.w[1] = ten2k256[ind - 40].w[1]; | |
1037 | Cstar.w[2] = ten2k256[ind - 40].w[2]; | |
1038 | Cstar.w[3] = ten2k256[ind - 40].w[3]; | |
9b6b0236 | 1039 | *incr_exp = 1; |
1040 | } else { | |
1041 | *incr_exp = 0; | |
1042 | } | |
1043 | } | |
1044 | ptr_Cstar->w[3] = Cstar.w[3]; | |
1045 | ptr_Cstar->w[2] = Cstar.w[2]; | |
1046 | ptr_Cstar->w[1] = Cstar.w[1]; | |
1047 | ptr_Cstar->w[0] = Cstar.w[0]; | |
1048 | ||
1049 | } |