]>
Commit | Line | Data |
---|---|---|
8d9254fc | 1 | /* Copyright (C) 2007-2020 Free Software Foundation, Inc. |
200359e8 L |
2 | |
3 | This file is part of GCC. | |
4 | ||
5 | GCC is free software; you can redistribute it and/or modify it under | |
6 | the terms of the GNU General Public License as published by the Free | |
748086b7 | 7 | Software Foundation; either version 3, or (at your option) any later |
200359e8 L |
8 | version. |
9 | ||
200359e8 L |
10 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 | for more details. | |
14 | ||
748086b7 JJ |
15 | Under Section 7 of GPL version 3, you are granted additional |
16 | permissions described in the GCC Runtime Library Exception, version | |
17 | 3.1, as published by the Free Software Foundation. | |
18 | ||
19 | You should have received a copy of the GNU General Public License and | |
20 | a copy of the GCC Runtime Library Exception along with this program; | |
21 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 | <http://www.gnu.org/licenses/>. */ | |
200359e8 L |
23 | |
24 | /***************************************************************************** | |
25 | * | |
26 | * BID128 fma x * y + z | |
27 | * | |
28 | ****************************************************************************/ | |
29 | ||
200359e8 L |
30 | #include "bid_internal.h" |
31 | ||
32 | static void | |
b2a00c89 L |
33 | rounding_correction (unsigned int rnd_mode, |
34 | unsigned int is_inexact_lt_midpoint, | |
35 | unsigned int is_inexact_gt_midpoint, | |
36 | unsigned int is_midpoint_lt_even, | |
37 | unsigned int is_midpoint_gt_even, | |
38 | int unbexp, | |
39 | UINT128 * ptrres, _IDEC_flags * ptrfpsf) { | |
40 | // unbiased true exponent unbexp may be larger than emax | |
200359e8 L |
41 | |
42 | UINT128 res = *ptrres; // expected to have the correct sign and coefficient | |
b2a00c89 | 43 | // (the exponent field is ignored, as unbexp is used instead) |
200359e8 L |
44 | UINT64 sign, exp; |
45 | UINT64 C_hi, C_lo; | |
46 | ||
47 | // general correction from RN to RA, RM, RP, RZ | |
48 | // Note: if the result is negative, then is_inexact_lt_midpoint, | |
49 | // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even | |
50 | // have to be considered as if determined for the absolute value of the | |
51 | // result (so they seem to be reversed) | |
52 | ||
53 | if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || | |
54 | is_midpoint_lt_even || is_midpoint_gt_even) { | |
55 | *ptrfpsf |= INEXACT_EXCEPTION; | |
56 | } | |
57 | // apply correction to result calculated with unbounded exponent | |
58 | sign = res.w[1] & MASK_SIGN; | |
59 | exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax | |
60 | C_hi = res.w[1] & MASK_COEFF; | |
61 | C_lo = res.w[0]; | |
b2a00c89 | 62 | if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) || |
200359e8 | 63 | ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) && |
b2a00c89 L |
64 | is_midpoint_gt_even))) || |
65 | (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) || | |
200359e8 L |
66 | ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) && |
67 | is_midpoint_gt_even)))) { | |
68 | // C = C + 1 | |
69 | C_lo = C_lo + 1; | |
70 | if (C_lo == 0) | |
71 | C_hi = C_hi + 1; | |
72 | if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) { | |
73 | // C = 10^34 => rounding overflow | |
74 | C_hi = 0x0000314dc6448d93ull; | |
75 | C_lo = 0x38c15b0a00000000ull; // 10^33 | |
76 | // exp = exp + EXP_P1; | |
77 | unbexp = unbexp + 1; | |
78 | exp = (UINT64) (unbexp + 6176) << 49; | |
79 | } | |
80 | } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) && | |
b2a00c89 | 81 | ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) || |
200359e8 L |
82 | (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) { |
83 | // C = C - 1 | |
84 | C_lo = C_lo - 1; | |
85 | if (C_lo == 0xffffffffffffffffull) | |
86 | C_hi--; | |
87 | // check if we crossed into the lower decade | |
b2a00c89 L |
88 | if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) { |
89 | // C = 10^33 - 1 | |
90 | if (exp > 0) { | |
91 | C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1 | |
92 | C_lo = 0x378d8e63ffffffffull; | |
93 | // exp = exp - EXP_P1; | |
94 | unbexp = unbexp - 1; | |
95 | exp = (UINT64) (unbexp + 6176) << 49; | |
0f8f303b MC |
96 | } else { // if exp = 0 the result is tiny & inexact |
97 | *ptrfpsf |= UNDERFLOW_EXCEPTION; | |
b2a00c89 | 98 | } |
200359e8 L |
99 | } |
100 | } else { | |
101 | ; // the result is already correct | |
102 | } | |
103 | if (unbexp > expmax) { // 6111 | |
104 | *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
105 | exp = 0; | |
106 | if (!sign) { // result is positive | |
107 | if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf | |
108 | C_hi = 0x7800000000000000ull; | |
109 | C_lo = 0x0000000000000000ull; | |
110 | } else { // res = +MAXFP = (10^34-1) * 10^emax | |
111 | C_hi = 0x5fffed09bead87c0ull; | |
112 | C_lo = 0x378d8e63ffffffffull; | |
113 | } | |
114 | } else { // result is negative | |
115 | if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf | |
116 | C_hi = 0xf800000000000000ull; | |
117 | C_lo = 0x0000000000000000ull; | |
118 | } else { // res = -MAXFP = -(10^34-1) * 10^emax | |
119 | C_hi = 0xdfffed09bead87c0ull; | |
120 | C_lo = 0x378d8e63ffffffffull; | |
121 | } | |
122 | } | |
123 | } | |
124 | // assemble the result | |
125 | res.w[1] = sign | exp | C_hi; | |
126 | res.w[0] = C_lo; | |
127 | *ptrres = res; | |
128 | } | |
129 | ||
130 | static void | |
131 | add256 (UINT256 x, UINT256 y, UINT256 * pz) { | |
132 | // *z = x + yl assume the sum fits in 256 bits | |
133 | UINT256 z; | |
134 | z.w[0] = x.w[0] + y.w[0]; | |
135 | if (z.w[0] < x.w[0]) { | |
136 | x.w[1]++; | |
137 | if (x.w[1] == 0x0000000000000000ull) { | |
138 | x.w[2]++; | |
139 | if (x.w[2] == 0x0000000000000000ull) { | |
140 | x.w[3]++; | |
141 | } | |
142 | } | |
143 | } | |
144 | z.w[1] = x.w[1] + y.w[1]; | |
145 | if (z.w[1] < x.w[1]) { | |
146 | x.w[2]++; | |
147 | if (x.w[2] == 0x0000000000000000ull) { | |
148 | x.w[3]++; | |
149 | } | |
150 | } | |
151 | z.w[2] = x.w[2] + y.w[2]; | |
152 | if (z.w[2] < x.w[2]) { | |
153 | x.w[3]++; | |
154 | } | |
155 | z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible | |
156 | *pz = z; | |
157 | } | |
158 | ||
159 | static void | |
160 | sub256 (UINT256 x, UINT256 y, UINT256 * pz) { | |
161 | // *z = x - y; assume x >= y | |
162 | UINT256 z; | |
163 | z.w[0] = x.w[0] - y.w[0]; | |
164 | if (z.w[0] > x.w[0]) { | |
165 | x.w[1]--; | |
166 | if (x.w[1] == 0xffffffffffffffffull) { | |
167 | x.w[2]--; | |
168 | if (x.w[2] == 0xffffffffffffffffull) { | |
169 | x.w[3]--; | |
170 | } | |
171 | } | |
172 | } | |
173 | z.w[1] = x.w[1] - y.w[1]; | |
174 | if (z.w[1] > x.w[1]) { | |
175 | x.w[2]--; | |
176 | if (x.w[2] == 0xffffffffffffffffull) { | |
177 | x.w[3]--; | |
178 | } | |
179 | } | |
180 | z.w[2] = x.w[2] - y.w[2]; | |
181 | if (z.w[2] > x.w[2]) { | |
182 | x.w[3]--; | |
183 | } | |
184 | z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y | |
185 | *pz = z; | |
186 | } | |
187 | ||
188 | ||
189 | static int | |
b2a00c89 | 190 | nr_digits256 (UINT256 R256) { |
200359e8 L |
191 | int ind; |
192 | // determine the number of decimal digits in R256 | |
193 | if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) { | |
194 | // between 1 and 19 digits | |
195 | for (ind = 1; ind <= 19; ind++) { | |
b2a00c89 | 196 | if (R256.w[0] < ten2k64[ind]) { |
200359e8 L |
197 | break; |
198 | } | |
199 | } | |
200 | // ind digits | |
201 | } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && | |
b2a00c89 L |
202 | (R256.w[1] < ten2k128[0].w[1] || |
203 | (R256.w[1] == ten2k128[0].w[1] | |
204 | && R256.w[0] < ten2k128[0].w[0]))) { | |
200359e8 L |
205 | // 20 digits |
206 | ind = 20; | |
207 | } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) { | |
208 | // between 21 and 38 digits | |
209 | for (ind = 1; ind <= 18; ind++) { | |
b2a00c89 L |
210 | if (R256.w[1] < ten2k128[ind].w[1] || |
211 | (R256.w[1] == ten2k128[ind].w[1] && | |
212 | R256.w[0] < ten2k128[ind].w[0])) { | |
200359e8 L |
213 | break; |
214 | } | |
215 | } | |
216 | // ind + 20 digits | |
217 | ind = ind + 20; | |
b2a00c89 L |
218 | } else if (R256.w[3] == 0x0 && |
219 | (R256.w[2] < ten2k256[0].w[2] || | |
220 | (R256.w[2] == ten2k256[0].w[2] && | |
221 | R256.w[1] < ten2k256[0].w[1]) || | |
222 | (R256.w[2] == ten2k256[0].w[2] && | |
223 | R256.w[1] == ten2k256[0].w[1] && | |
224 | R256.w[0] < ten2k256[0].w[0]))) { | |
200359e8 L |
225 | // 39 digits |
226 | ind = 39; | |
227 | } else { | |
228 | // between 40 and 68 digits | |
229 | for (ind = 1; ind <= 29; ind++) { | |
b2a00c89 L |
230 | if (R256.w[3] < ten2k256[ind].w[3] || |
231 | (R256.w[3] == ten2k256[ind].w[3] && | |
232 | R256.w[2] < ten2k256[ind].w[2]) || | |
233 | (R256.w[3] == ten2k256[ind].w[3] && | |
234 | R256.w[2] == ten2k256[ind].w[2] && | |
235 | R256.w[1] < ten2k256[ind].w[1]) || | |
236 | (R256.w[3] == ten2k256[ind].w[3] && | |
237 | R256.w[2] == ten2k256[ind].w[2] && | |
238 | R256.w[1] == ten2k256[ind].w[1] && | |
239 | R256.w[0] < ten2k256[ind].w[0])) { | |
200359e8 L |
240 | break; |
241 | } | |
242 | } | |
243 | // ind + 39 digits | |
244 | ind = ind + 39; | |
245 | } | |
246 | return (ind); | |
247 | } | |
248 | ||
249 | // add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so | |
250 | // use the rounding information from ptr_is_* to avoid a double rounding error | |
251 | static void | |
252 | add_and_round (int q3, | |
253 | int q4, | |
254 | int e4, | |
255 | int delta, | |
256 | int p34, | |
257 | UINT64 z_sign, | |
258 | UINT64 p_sign, | |
259 | UINT128 C3, | |
260 | UINT256 C4, | |
261 | int rnd_mode, | |
262 | int *ptr_is_midpoint_lt_even, | |
263 | int *ptr_is_midpoint_gt_even, | |
264 | int *ptr_is_inexact_lt_midpoint, | |
265 | int *ptr_is_inexact_gt_midpoint, | |
266 | _IDEC_flags * ptrfpsf, UINT128 * ptrres) { | |
267 | ||
268 | int scale; | |
269 | int x0; | |
270 | int ind; | |
271 | UINT64 R64; | |
272 | UINT128 P128, R128; | |
273 | UINT192 P192, R192; | |
274 | UINT256 R256; | |
275 | int is_midpoint_lt_even = 0; | |
276 | int is_midpoint_gt_even = 0; | |
277 | int is_inexact_lt_midpoint = 0; | |
278 | int is_inexact_gt_midpoint = 0; | |
279 | int is_midpoint_lt_even0 = 0; | |
280 | int is_midpoint_gt_even0 = 0; | |
281 | int is_inexact_lt_midpoint0 = 0; | |
282 | int is_inexact_gt_midpoint0 = 0; | |
283 | int incr_exp = 0; | |
284 | int is_tiny = 0; | |
285 | int lt_half_ulp = 0; | |
286 | int eq_half_ulp = 0; | |
287 | // int gt_half_ulp = 0; | |
288 | UINT128 res = *ptrres; | |
289 | ||
290 | // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66 | |
291 | scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this | |
292 | // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1 | |
293 | ||
294 | // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for | |
295 | // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6)) | |
296 | if (scale == 0) { | |
297 | R256.w[3] = 0x0ull; | |
298 | R256.w[2] = 0x0ull; | |
299 | R256.w[1] = C3.w[1]; | |
300 | R256.w[0] = C3.w[0]; | |
301 | } else if (scale <= 19) { // 10^scale fits in 64 bits | |
302 | P128.w[1] = 0; | |
b2a00c89 | 303 | P128.w[0] = ten2k64[scale]; |
200359e8 L |
304 | __mul_128x128_to_256 (R256, P128, C3); |
305 | } else if (scale <= 38) { // 10^scale fits in 128 bits | |
b2a00c89 | 306 | __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3); |
200359e8 L |
307 | } else if (scale <= 57) { // 39 <= scale <= 57 |
308 | // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits | |
309 | // (10^67 has 223 bits; 10^69 has 230 bits); | |
310 | // must split the computation: | |
311 | // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127 | |
312 | // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty | |
313 | // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits | |
b2a00c89 | 314 | __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3); |
200359e8 | 315 | // now multiply R128 by 10^38 |
b2a00c89 | 316 | __mul_128x128_to_256 (R256, R128, ten2k128[18]); |
200359e8 L |
317 | } else { // 58 <= scale <= 66 |
318 | // 10^scale takes between 193 and 220 bits, | |
319 | // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits) | |
320 | // must split the computation: | |
321 | // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127 | |
322 | // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty | |
323 | // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits | |
324 | // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because | |
325 | // 10^(scale-38) takes more than 64 bits, C3 will take less than 64 | |
b2a00c89 | 326 | __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]); |
200359e8 | 327 | // now calculate 10*38 * 10^(scale-38) * C3 |
b2a00c89 | 328 | __mul_128x128_to_256 (R256, R128, ten2k128[18]); |
200359e8 L |
329 | } |
330 | // C3 * 10^scale is now in R256 | |
331 | ||
332 | // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least | |
333 | // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is | |
334 | // possible | |
335 | // add/subtract C4 and C3 * 10^scale; the exponent is e4 | |
336 | if (p_sign == z_sign) { // R256 = C4 + R256 | |
337 | // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact, | |
338 | // but may require rounding | |
339 | add256 (C4, R256, &R256); | |
340 | } else { // if (p_sign != z_sign) { // R256 = C4 - R256 | |
341 | // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or | |
342 | // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact, | |
343 | // but may require rounding | |
344 | ||
345 | // compare first R256 = C3 * 10^scale and C4 | |
b2a00c89 | 346 | if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) || |
200359e8 L |
347 | (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) || |
348 | (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] && | |
349 | R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4 | |
350 | // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact, | |
351 | // but may require rounding | |
352 | sub256 (R256, C4, &R256); | |
353 | // flip p_sign too, because the result has the sign of z | |
354 | p_sign = z_sign; | |
355 | } else { // if C4 > C3 * 10^scale | |
356 | // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact, | |
357 | // but may require rounding | |
358 | sub256 (C4, R256, &R256); | |
359 | } | |
360 | // if the result is pure zero, the sign depends on the rounding mode | |
361 | // (x*y and z had opposite signs) | |
362 | if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull && | |
363 | R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) { | |
364 | if (rnd_mode != ROUNDING_DOWN) | |
365 | p_sign = 0x0000000000000000ull; | |
366 | else | |
367 | p_sign = 0x8000000000000000ull; | |
368 | // the exponent is max (e4, expmin) | |
369 | if (e4 < -6176) | |
370 | e4 = expmin; | |
371 | // assemble result | |
372 | res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49); | |
373 | res.w[0] = 0x0; | |
374 | *ptrres = res; | |
375 | return; | |
376 | } | |
377 | } | |
378 | ||
379 | // determine the number of decimal digits in R256 | |
b2a00c89 | 380 | ind = nr_digits256 (R256); |
200359e8 L |
381 | |
382 | // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind; | |
383 | // round to the destination precision, with unbounded exponent | |
384 | ||
385 | if (ind <= p34) { | |
386 | // result rounded to the destination precision with unbounded exponent | |
387 | // is exact | |
388 | if (ind + e4 < p34 + expmin) { | |
389 | is_tiny = 1; // applies to all rounding modes | |
390 | } | |
391 | res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1]; | |
392 | res.w[0] = R256.w[0]; | |
393 | // Note: res is correct only if expmin <= e4 <= expmax | |
394 | } else { // if (ind > p34) | |
395 | // if more than P digits, round to nearest to P digits | |
396 | // round R256 to p34 digits | |
397 | x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68 | |
398 | if (ind <= 38) { | |
399 | P128.w[1] = R256.w[1]; | |
400 | P128.w[0] = R256.w[0]; | |
b2a00c89 L |
401 | round128_19_38 (ind, x0, P128, &R128, &incr_exp, |
402 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
403 | &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); | |
200359e8 L |
404 | } else if (ind <= 57) { |
405 | P192.w[2] = R256.w[2]; | |
406 | P192.w[1] = R256.w[1]; | |
407 | P192.w[0] = R256.w[0]; | |
b2a00c89 L |
408 | round192_39_57 (ind, x0, P192, &R192, &incr_exp, |
409 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
410 | &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); | |
200359e8 L |
411 | R128.w[1] = R192.w[1]; |
412 | R128.w[0] = R192.w[0]; | |
413 | } else { // if (ind <= 68) | |
b2a00c89 L |
414 | round256_58_76 (ind, x0, R256, &R256, &incr_exp, |
415 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
416 | &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); | |
200359e8 L |
417 | R128.w[1] = R256.w[1]; |
418 | R128.w[0] = R256.w[0]; | |
419 | } | |
420 | // the rounded result has p34 = 34 digits | |
421 | e4 = e4 + x0 + incr_exp; | |
422 | if (rnd_mode == ROUNDING_TO_NEAREST) { | |
423 | if (e4 < expmin) { | |
424 | is_tiny = 1; // for other rounding modes apply correction | |
425 | } | |
426 | } else { | |
427 | // for RM, RP, RZ, RA apply correction in order to determine tininess | |
428 | // but do not save the result; apply the correction to | |
429 | // (-1)^p_sign * significand * 10^0 | |
430 | P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1]; | |
431 | P128.w[0] = R128.w[0]; | |
432 | rounding_correction (rnd_mode, | |
b2a00c89 L |
433 | is_inexact_lt_midpoint, |
434 | is_inexact_gt_midpoint, is_midpoint_lt_even, | |
435 | is_midpoint_gt_even, 0, &P128, ptrfpsf); | |
200359e8 L |
436 | scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1 |
437 | // the number of digits in the significand is p34 = 34 | |
438 | if (e4 + scale < expmin) { | |
439 | is_tiny = 1; | |
440 | } | |
441 | } | |
442 | ind = p34; // the number of decimal digits in the signifcand of res | |
443 | res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN | |
444 | res.w[0] = R128.w[0]; | |
445 | // Note: res is correct only if expmin <= e4 <= expmax | |
446 | // set the inexact flag after rounding with bounded exponent, if any | |
447 | } | |
448 | // at this point we have the result rounded with unbounded exponent in | |
449 | // res and we know its tininess: | |
450 | // res = (-1)^p_sign * significand * 10^e4, | |
451 | // where q (significand) = ind <= p34 | |
452 | // Note: res is correct only if expmin <= e4 <= expmax | |
453 | ||
454 | // check for overflow if RN | |
455 | if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) { | |
456 | res.w[1] = p_sign | 0x7800000000000000ull; | |
457 | res.w[0] = 0x0000000000000000ull; | |
458 | *ptrres = res; | |
459 | *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
460 | return; // BID_RETURN (res) | |
461 | } // else not overflow or not RN, so continue | |
462 | ||
463 | // if (e4 >= expmin) we have the result rounded with bounded exponent | |
464 | if (e4 < expmin) { | |
465 | x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res | |
466 | // where the result rounded [at most] once is | |
467 | // (-1)^p_sign * significand_res * 10^e4 | |
468 | ||
469 | // avoid double rounding error | |
470 | is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; | |
471 | is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; | |
472 | is_midpoint_lt_even0 = is_midpoint_lt_even; | |
473 | is_midpoint_gt_even0 = is_midpoint_gt_even; | |
474 | is_inexact_lt_midpoint = 0; | |
475 | is_inexact_gt_midpoint = 0; | |
476 | is_midpoint_lt_even = 0; | |
477 | is_midpoint_gt_even = 0; | |
478 | ||
479 | if (x0 > ind) { | |
480 | // nothing is left of res when moving the decimal point left x0 digits | |
481 | is_inexact_lt_midpoint = 1; | |
482 | res.w[1] = p_sign | 0x0000000000000000ull; | |
483 | res.w[0] = 0x0000000000000000ull; | |
484 | e4 = expmin; | |
485 | } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34 | |
486 | // this is <, =, or > 1/2 ulp | |
487 | // compare the ind-digit value in the significand of res with | |
488 | // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is | |
489 | // less than, equal to, or greater than 1/2 ulp (significand of res) | |
490 | R128.w[1] = res.w[1] & MASK_COEFF; | |
491 | R128.w[0] = res.w[0]; | |
492 | if (ind <= 19) { | |
b2a00c89 | 493 | if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp |
200359e8 L |
494 | lt_half_ulp = 1; |
495 | is_inexact_lt_midpoint = 1; | |
b2a00c89 | 496 | } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp |
200359e8 L |
497 | eq_half_ulp = 1; |
498 | is_midpoint_gt_even = 1; | |
499 | } else { // > 1/2 ulp | |
500 | // gt_half_ulp = 1; | |
501 | is_inexact_gt_midpoint = 1; | |
502 | } | |
503 | } else { // if (ind <= 38) { | |
b2a00c89 L |
504 | if (R128.w[1] < midpoint128[ind - 20].w[1] || |
505 | (R128.w[1] == midpoint128[ind - 20].w[1] && | |
506 | R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp | |
200359e8 L |
507 | lt_half_ulp = 1; |
508 | is_inexact_lt_midpoint = 1; | |
b2a00c89 L |
509 | } else if (R128.w[1] == midpoint128[ind - 20].w[1] && |
510 | R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp | |
200359e8 L |
511 | eq_half_ulp = 1; |
512 | is_midpoint_gt_even = 1; | |
513 | } else { // > 1/2 ulp | |
514 | // gt_half_ulp = 1; | |
515 | is_inexact_gt_midpoint = 1; | |
516 | } | |
517 | } | |
518 | if (lt_half_ulp || eq_half_ulp) { | |
519 | // res = +0.0 * 10^expmin | |
520 | res.w[1] = 0x0000000000000000ull; | |
521 | res.w[0] = 0x0000000000000000ull; | |
522 | } else { // if (gt_half_ulp) | |
523 | // res = +1 * 10^expmin | |
524 | res.w[1] = 0x0000000000000000ull; | |
525 | res.w[0] = 0x0000000000000001ull; | |
526 | } | |
527 | res.w[1] = p_sign | res.w[1]; | |
528 | e4 = expmin; | |
529 | } else { // if (1 <= x0 <= ind - 1 <= 33) | |
530 | // round the ind-digit result to ind - x0 digits | |
531 | ||
532 | if (ind <= 18) { // 2 <= ind <= 18 | |
b2a00c89 L |
533 | round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, |
534 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
535 | &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); | |
200359e8 L |
536 | res.w[1] = 0x0; |
537 | res.w[0] = R64; | |
538 | } else if (ind <= 38) { | |
539 | P128.w[1] = res.w[1] & MASK_COEFF; | |
540 | P128.w[0] = res.w[0]; | |
b2a00c89 L |
541 | round128_19_38 (ind, x0, P128, &res, &incr_exp, |
542 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
543 | &is_inexact_lt_midpoint, | |
544 | &is_inexact_gt_midpoint); | |
200359e8 L |
545 | } |
546 | e4 = e4 + x0; // expmin | |
547 | // we want the exponent to be expmin, so if incr_exp = 1 then | |
548 | // multiply the rounded result by 10 - it will still fit in 113 bits | |
549 | if (incr_exp) { | |
550 | // 64 x 128 -> 128 | |
551 | P128.w[1] = res.w[1] & MASK_COEFF; | |
552 | P128.w[0] = res.w[0]; | |
b2a00c89 | 553 | __mul_64x128_to_128 (res, ten2k64[1], P128); |
200359e8 L |
554 | } |
555 | res.w[1] = | |
556 | p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF); | |
557 | // avoid a double rounding error | |
558 | if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && | |
559 | is_midpoint_lt_even) { // double rounding error upward | |
560 | // res = res - 1 | |
561 | res.w[0]--; | |
562 | if (res.w[0] == 0xffffffffffffffffull) | |
563 | res.w[1]--; | |
564 | // Note: a double rounding error upward is not possible; for this | |
565 | // the result after the first rounding would have to be 99...95 | |
566 | // (35 digits in all), possibly followed by a number of zeros; this | |
567 | // is not possible in Cases (2)-(6) or (15)-(17) which may get here | |
568 | is_midpoint_lt_even = 0; | |
569 | is_inexact_lt_midpoint = 1; | |
570 | } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && | |
571 | is_midpoint_gt_even) { // double rounding error downward | |
572 | // res = res + 1 | |
573 | res.w[0]++; | |
574 | if (res.w[0] == 0) | |
575 | res.w[1]++; | |
576 | is_midpoint_gt_even = 0; | |
577 | is_inexact_gt_midpoint = 1; | |
578 | } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
b2a00c89 | 579 | !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { |
200359e8 L |
580 | // if this second rounding was exact the result may still be |
581 | // inexact because of the first rounding | |
582 | if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { | |
583 | is_inexact_gt_midpoint = 1; | |
584 | } | |
585 | if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { | |
586 | is_inexact_lt_midpoint = 1; | |
587 | } | |
588 | } else if (is_midpoint_gt_even && | |
b2a00c89 | 589 | (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { |
200359e8 L |
590 | // pulled up to a midpoint |
591 | is_inexact_lt_midpoint = 1; | |
592 | is_inexact_gt_midpoint = 0; | |
593 | is_midpoint_lt_even = 0; | |
594 | is_midpoint_gt_even = 0; | |
595 | } else if (is_midpoint_lt_even && | |
b2a00c89 | 596 | (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { |
200359e8 L |
597 | // pulled down to a midpoint |
598 | is_inexact_lt_midpoint = 0; | |
599 | is_inexact_gt_midpoint = 1; | |
600 | is_midpoint_lt_even = 0; | |
601 | is_midpoint_gt_even = 0; | |
602 | } else { | |
603 | ; | |
604 | } | |
605 | } | |
606 | } | |
607 | // res contains the correct result | |
608 | // apply correction if not rounding to nearest | |
609 | if (rnd_mode != ROUNDING_TO_NEAREST) { | |
610 | rounding_correction (rnd_mode, | |
b2a00c89 L |
611 | is_inexact_lt_midpoint, is_inexact_gt_midpoint, |
612 | is_midpoint_lt_even, is_midpoint_gt_even, | |
613 | e4, &res, ptrfpsf); | |
200359e8 L |
614 | } |
615 | if (is_midpoint_lt_even || is_midpoint_gt_even || | |
616 | is_inexact_lt_midpoint || is_inexact_gt_midpoint) { | |
617 | // set the inexact flag | |
618 | *ptrfpsf |= INEXACT_EXCEPTION; | |
619 | if (is_tiny) | |
620 | *ptrfpsf |= UNDERFLOW_EXCEPTION; | |
621 | } | |
622 | ||
623 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
624 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
625 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
626 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
627 | *ptrres = res; | |
628 | return; | |
629 | } | |
630 | ||
631 | ||
632 | #if DECIMAL_CALL_BY_REFERENCE | |
b2a00c89 L |
633 | static void |
634 | bid128_ext_fma (int *ptr_is_midpoint_lt_even, | |
635 | int *ptr_is_midpoint_gt_even, | |
636 | int *ptr_is_inexact_lt_midpoint, | |
637 | int *ptr_is_inexact_gt_midpoint, UINT128 * pres, | |
638 | UINT128 * px, UINT128 * py, | |
639 | UINT128 * | |
640 | pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
641 | _EXC_INFO_PARAM) { | |
200359e8 L |
642 | UINT128 x = *px, y = *py, z = *pz; |
643 | #if !DECIMAL_GLOBAL_ROUNDING | |
644 | unsigned int rnd_mode = *prnd_mode; | |
645 | #endif | |
646 | #else | |
b2a00c89 L |
647 | static UINT128 |
648 | bid128_ext_fma (int *ptr_is_midpoint_lt_even, | |
649 | int *ptr_is_midpoint_gt_even, | |
650 | int *ptr_is_inexact_lt_midpoint, | |
651 | int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y, | |
652 | UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM | |
653 | _EXC_MASKS_PARAM _EXC_INFO_PARAM) { | |
200359e8 L |
654 | #endif |
655 | ||
b2a00c89 L |
656 | UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; |
657 | UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign; | |
200359e8 L |
658 | UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp; |
659 | int true_p_exp; | |
660 | UINT128 C1, C2, C3; | |
661 | UINT256 C4; | |
662 | int q1 = 0, q2 = 0, q3 = 0, q4; | |
663 | int e1, e2, e3, e4; | |
664 | int scale, ind, delta, x0; | |
665 | int p34 = P34; // used to modify the limit on the number of digits | |
666 | BID_UI64DOUBLE tmp; | |
667 | int x_nr_bits, y_nr_bits, z_nr_bits; | |
668 | unsigned int save_fpsf; | |
669 | int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0; | |
670 | int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; | |
671 | int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0; | |
672 | int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0; | |
673 | int incr_exp = 0; | |
674 | int lsb; | |
675 | int lt_half_ulp = 0; | |
676 | int eq_half_ulp = 0; | |
677 | int gt_half_ulp = 0; | |
678 | int is_tiny = 0; | |
679 | UINT64 R64, tmp64; | |
680 | UINT128 P128, R128; | |
681 | UINT192 P192, R192; | |
682 | UINT256 R256; | |
683 | ||
684 | // the following are based on the table of special cases for fma; the NaN | |
b2a00c89 | 685 | // behavior is similar to that of the IA-64 Architecture fma |
200359e8 L |
686 | |
687 | // identify cases where at least one operand is NaN | |
688 | ||
b2a00c89 L |
689 | BID_SWAP128 (x); |
690 | BID_SWAP128 (y); | |
691 | BID_SWAP128 (z); | |
200359e8 L |
692 | if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN |
693 | // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y) | |
694 | // check first for non-canonical NaN payload | |
695 | if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || | |
696 | (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && | |
697 | (y.w[0] > 0x38c15b09ffffffffull))) { | |
698 | y.w[1] = y.w[1] & 0xffffc00000000000ull; | |
699 | y.w[0] = 0x0ull; | |
700 | } | |
701 | if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN | |
702 | // set invalid flag | |
703 | *pfpsf |= INVALID_EXCEPTION; | |
704 | // return quiet (y) | |
705 | res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] | |
706 | res.w[0] = y.w[0]; | |
707 | } else { // y is QNaN | |
708 | // return y | |
709 | res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] | |
710 | res.w[0] = y.w[0]; | |
711 | // if z = SNaN or x = SNaN signal invalid exception | |
712 | if ((z.w[1] & MASK_SNAN) == MASK_SNAN || | |
713 | (x.w[1] & MASK_SNAN) == MASK_SNAN) { | |
714 | // set invalid flag | |
715 | *pfpsf |= INVALID_EXCEPTION; | |
716 | } | |
717 | } | |
b2a00c89 L |
718 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
719 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
720 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
721 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
722 | BID_SWAP128 (res); | |
200359e8 L |
723 | BID_RETURN (res) |
724 | } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN | |
725 | // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z) | |
726 | // check first for non-canonical NaN payload | |
727 | if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || | |
728 | (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && | |
729 | (z.w[0] > 0x38c15b09ffffffffull))) { | |
730 | z.w[1] = z.w[1] & 0xffffc00000000000ull; | |
731 | z.w[0] = 0x0ull; | |
732 | } | |
733 | if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN | |
734 | // set invalid flag | |
735 | *pfpsf |= INVALID_EXCEPTION; | |
736 | // return quiet (z) | |
737 | res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] | |
738 | res.w[0] = z.w[0]; | |
739 | } else { // z is QNaN | |
740 | // return z | |
741 | res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] | |
742 | res.w[0] = z.w[0]; | |
743 | // if x = SNaN signal invalid exception | |
744 | if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { | |
745 | // set invalid flag | |
746 | *pfpsf |= INVALID_EXCEPTION; | |
747 | } | |
748 | } | |
b2a00c89 L |
749 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
750 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
751 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
752 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
753 | BID_SWAP128 (res); | |
200359e8 L |
754 | BID_RETURN (res) |
755 | } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN | |
756 | // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x) | |
757 | // check first for non-canonical NaN payload | |
758 | if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || | |
759 | (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && | |
760 | (x.w[0] > 0x38c15b09ffffffffull))) { | |
761 | x.w[1] = x.w[1] & 0xffffc00000000000ull; | |
762 | x.w[0] = 0x0ull; | |
763 | } | |
764 | if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN | |
765 | // set invalid flag | |
766 | *pfpsf |= INVALID_EXCEPTION; | |
767 | // return quiet (x) | |
768 | res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] | |
769 | res.w[0] = x.w[0]; | |
770 | } else { // x is QNaN | |
771 | // return x | |
772 | res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] | |
773 | res.w[0] = x.w[0]; | |
774 | } | |
b2a00c89 L |
775 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
776 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
777 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
778 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
779 | BID_SWAP128 (res); | |
200359e8 L |
780 | BID_RETURN (res) |
781 | } | |
782 | // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check | |
783 | // for non-canonical values | |
784 | ||
785 | x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative | |
786 | C1.w[1] = x.w[1] & MASK_COEFF; | |
787 | C1.w[0] = x.w[0]; | |
788 | if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf | |
789 | // if x is not infinity check for non-canonical values - treated as zero | |
790 | if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 | |
791 | // non-canonical | |
792 | x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits | |
793 | C1.w[1] = 0; // significand high | |
794 | C1.w[0] = 0; // significand low | |
795 | } else { // G0_G1 != 11 | |
796 | x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits | |
797 | if (C1.w[1] > 0x0001ed09bead87c0ull || | |
b2a00c89 L |
798 | (C1.w[1] == 0x0001ed09bead87c0ull && |
799 | C1.w[0] > 0x378d8e63ffffffffull)) { | |
200359e8 L |
800 | // x is non-canonical if coefficient is larger than 10^34 -1 |
801 | C1.w[1] = 0; | |
802 | C1.w[0] = 0; | |
803 | } else { // canonical | |
804 | ; | |
805 | } | |
806 | } | |
807 | } | |
808 | y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative | |
809 | C2.w[1] = y.w[1] & MASK_COEFF; | |
810 | C2.w[0] = y.w[0]; | |
811 | if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf | |
812 | // if y is not infinity check for non-canonical values - treated as zero | |
813 | if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 | |
814 | // non-canonical | |
815 | y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits | |
816 | C2.w[1] = 0; // significand high | |
817 | C2.w[0] = 0; // significand low | |
818 | } else { // G0_G1 != 11 | |
819 | y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits | |
820 | if (C2.w[1] > 0x0001ed09bead87c0ull || | |
b2a00c89 L |
821 | (C2.w[1] == 0x0001ed09bead87c0ull && |
822 | C2.w[0] > 0x378d8e63ffffffffull)) { | |
200359e8 L |
823 | // y is non-canonical if coefficient is larger than 10^34 -1 |
824 | C2.w[1] = 0; | |
825 | C2.w[0] = 0; | |
826 | } else { // canonical | |
827 | ; | |
828 | } | |
829 | } | |
830 | } | |
831 | z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative | |
832 | C3.w[1] = z.w[1] & MASK_COEFF; | |
833 | C3.w[0] = z.w[0]; | |
834 | if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf | |
835 | // if z is not infinity check for non-canonical values - treated as zero | |
836 | if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 | |
837 | // non-canonical | |
838 | z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits | |
839 | C3.w[1] = 0; // significand high | |
840 | C3.w[0] = 0; // significand low | |
841 | } else { // G0_G1 != 11 | |
842 | z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits | |
843 | if (C3.w[1] > 0x0001ed09bead87c0ull || | |
b2a00c89 L |
844 | (C3.w[1] == 0x0001ed09bead87c0ull && |
845 | C3.w[0] > 0x378d8e63ffffffffull)) { | |
200359e8 L |
846 | // z is non-canonical if coefficient is larger than 10^34 -1 |
847 | C3.w[1] = 0; | |
848 | C3.w[0] = 0; | |
849 | } else { // canonical | |
850 | ; | |
851 | } | |
852 | } | |
853 | } | |
854 | ||
855 | p_sign = x_sign ^ y_sign; // sign of the product | |
856 | ||
857 | // identify cases where at least one operand is infinity | |
858 | ||
859 | if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf | |
860 | if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf | |
861 | if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf | |
862 | if (p_sign == z_sign) { | |
863 | res.w[1] = z_sign | MASK_INF; | |
864 | res.w[0] = 0x0; | |
865 | } else { | |
866 | // return QNaN Indefinite | |
867 | res.w[1] = 0x7c00000000000000ull; | |
868 | res.w[0] = 0x0000000000000000ull; | |
869 | // set invalid flag | |
870 | *pfpsf |= INVALID_EXCEPTION; | |
871 | } | |
872 | } else { // z = 0 or z = f | |
873 | res.w[1] = p_sign | MASK_INF; | |
874 | res.w[0] = 0x0; | |
875 | } | |
876 | } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f | |
877 | if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf | |
878 | if (p_sign == z_sign) { | |
879 | res.w[1] = z_sign | MASK_INF; | |
880 | res.w[0] = 0x0; | |
881 | } else { | |
882 | // return QNaN Indefinite | |
883 | res.w[1] = 0x7c00000000000000ull; | |
884 | res.w[0] = 0x0000000000000000ull; | |
885 | // set invalid flag | |
886 | *pfpsf |= INVALID_EXCEPTION; | |
887 | } | |
888 | } else { // z = 0 or z = f | |
889 | res.w[1] = p_sign | MASK_INF; | |
890 | res.w[0] = 0x0; | |
891 | } | |
892 | } else { // y = 0 | |
893 | // return QNaN Indefinite | |
894 | res.w[1] = 0x7c00000000000000ull; | |
895 | res.w[0] = 0x0000000000000000ull; | |
896 | // set invalid flag | |
897 | *pfpsf |= INVALID_EXCEPTION; | |
898 | } | |
b2a00c89 L |
899 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
900 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
901 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
902 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
903 | BID_SWAP128 (res); | |
200359e8 L |
904 | BID_RETURN (res) |
905 | } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf | |
906 | if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf | |
907 | // x = f, necessarily | |
908 | if ((p_sign != z_sign) | |
909 | || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) { | |
910 | // return QNaN Indefinite | |
911 | res.w[1] = 0x7c00000000000000ull; | |
912 | res.w[0] = 0x0000000000000000ull; | |
913 | // set invalid flag | |
914 | *pfpsf |= INVALID_EXCEPTION; | |
915 | } else { | |
b2a00c89 L |
916 | res.w[1] = z_sign | MASK_INF; |
917 | res.w[0] = 0x0; | |
200359e8 L |
918 | } |
919 | } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0 | |
920 | // z = 0, f, inf | |
921 | // return QNaN Indefinite | |
922 | res.w[1] = 0x7c00000000000000ull; | |
923 | res.w[0] = 0x0000000000000000ull; | |
924 | // set invalid flag | |
925 | *pfpsf |= INVALID_EXCEPTION; | |
926 | } else { | |
927 | // x = f and z = 0, f, necessarily | |
928 | res.w[1] = p_sign | MASK_INF; | |
929 | res.w[0] = 0x0; | |
930 | } | |
b2a00c89 L |
931 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
932 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
933 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
934 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
935 | BID_SWAP128 (res); | |
200359e8 L |
936 | BID_RETURN (res) |
937 | } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf | |
938 | // x = 0, f and y = 0, f, necessarily | |
939 | res.w[1] = z_sign | MASK_INF; | |
940 | res.w[0] = 0x0; | |
b2a00c89 L |
941 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
942 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
943 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
944 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
945 | BID_SWAP128 (res); | |
200359e8 L |
946 | BID_RETURN (res) |
947 | } | |
948 | ||
949 | true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176; | |
950 | if (true_p_exp < -6176) | |
951 | p_exp = 0; // cannot be less than EXP_MIN | |
200359e8 L |
952 | else |
953 | p_exp = (UINT64) (true_p_exp + 6176) << 49; | |
954 | ||
b2a00c89 | 955 | if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0 |
200359e8 L |
956 | // the result is 0 |
957 | if (p_exp < z_exp) | |
958 | res.w[1] = p_exp; // preferred exponent | |
959 | else | |
960 | res.w[1] = z_exp; // preferred exponent | |
961 | if (p_sign == z_sign) { | |
962 | res.w[1] |= z_sign; | |
963 | res.w[0] = 0x0; | |
964 | } else { // x * y and z have opposite signs | |
965 | if (rnd_mode == ROUNDING_DOWN) { | |
966 | // res = -0.0 | |
967 | res.w[1] |= MASK_SIGN; | |
968 | res.w[0] = 0x0; | |
969 | } else { | |
970 | // res = +0.0 | |
971 | // res.w[1] |= 0x0; | |
972 | res.w[0] = 0x0; | |
973 | } | |
974 | } | |
b2a00c89 L |
975 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
976 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
977 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
978 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
979 | BID_SWAP128 (res); | |
200359e8 L |
980 | BID_RETURN (res) |
981 | } | |
982 | // from this point on, we may need to know the number of decimal digits | |
983 | // in the significands of x, y, z when x, y, z != 0 | |
984 | ||
985 | if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite) | |
986 | // q1 = nr. of decimal digits in x | |
987 | // determine first the nr. of bits in x | |
988 | if (C1.w[1] == 0) { | |
989 | if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 | |
990 | // split the 64-bit value in two 32-bit halves to avoid rounding errors | |
991 | if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 | |
992 | tmp.d = (double) (C1.w[0] >> 32); // exact conversion | |
993 | x_nr_bits = | |
994 | 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
995 | } else { // x < 2^32 | |
996 | tmp.d = (double) (C1.w[0]); // exact conversion | |
997 | x_nr_bits = | |
998 | 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
999 | } | |
1000 | } else { // if x < 2^53 | |
1001 | tmp.d = (double) C1.w[0]; // exact conversion | |
1002 | x_nr_bits = | |
1003 | 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1004 | } | |
1005 | } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) | |
1006 | tmp.d = (double) C1.w[1]; // exact conversion | |
1007 | x_nr_bits = | |
1008 | 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1009 | } | |
b2a00c89 | 1010 | q1 = nr_digits[x_nr_bits - 1].digits; |
200359e8 | 1011 | if (q1 == 0) { |
b2a00c89 L |
1012 | q1 = nr_digits[x_nr_bits - 1].digits1; |
1013 | if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || | |
1014 | (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && | |
1015 | C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) | |
200359e8 L |
1016 | q1++; |
1017 | } | |
1018 | } | |
1019 | ||
1020 | if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite) | |
1021 | if (C2.w[1] == 0) { | |
1022 | if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53 | |
1023 | // split the 64-bit value in two 32-bit halves to avoid rounding errors | |
1024 | if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32 | |
1025 | tmp.d = (double) (C2.w[0] >> 32); // exact conversion | |
1026 | y_nr_bits = | |
1027 | 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1028 | } else { // y < 2^32 | |
1029 | tmp.d = (double) C2.w[0]; // exact conversion | |
1030 | y_nr_bits = | |
1031 | ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1032 | } | |
1033 | } else { // if y < 2^53 | |
1034 | tmp.d = (double) C2.w[0]; // exact conversion | |
1035 | y_nr_bits = | |
1036 | ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1037 | } | |
1038 | } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1]) | |
1039 | tmp.d = (double) C2.w[1]; // exact conversion | |
1040 | y_nr_bits = | |
1041 | 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1042 | } | |
1043 | ||
b2a00c89 | 1044 | q2 = nr_digits[y_nr_bits].digits; |
200359e8 | 1045 | if (q2 == 0) { |
b2a00c89 L |
1046 | q2 = nr_digits[y_nr_bits].digits1; |
1047 | if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi || | |
1048 | (C2.w[1] == nr_digits[y_nr_bits].threshold_hi && | |
1049 | C2.w[0] >= nr_digits[y_nr_bits].threshold_lo)) | |
200359e8 L |
1050 | q2++; |
1051 | } | |
1052 | } | |
1053 | ||
1054 | if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite) | |
1055 | if (C3.w[1] == 0) { | |
1056 | if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53 | |
1057 | // split the 64-bit value in two 32-bit halves to avoid rounding errors | |
1058 | if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32 | |
1059 | tmp.d = (double) (C3.w[0] >> 32); // exact conversion | |
1060 | z_nr_bits = | |
1061 | 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1062 | } else { // z < 2^32 | |
1063 | tmp.d = (double) C3.w[0]; // exact conversion | |
1064 | z_nr_bits = | |
1065 | ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1066 | } | |
1067 | } else { // if z < 2^53 | |
1068 | tmp.d = (double) C3.w[0]; // exact conversion | |
1069 | z_nr_bits = | |
1070 | ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1071 | } | |
1072 | } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1]) | |
1073 | tmp.d = (double) C3.w[1]; // exact conversion | |
1074 | z_nr_bits = | |
1075 | 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1076 | } | |
1077 | ||
b2a00c89 | 1078 | q3 = nr_digits[z_nr_bits].digits; |
200359e8 | 1079 | if (q3 == 0) { |
b2a00c89 L |
1080 | q3 = nr_digits[z_nr_bits].digits1; |
1081 | if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi || | |
1082 | (C3.w[1] == nr_digits[z_nr_bits].threshold_hi && | |
1083 | C3.w[0] >= nr_digits[z_nr_bits].threshold_lo)) | |
200359e8 L |
1084 | q3++; |
1085 | } | |
1086 | } | |
1087 | ||
b2a00c89 | 1088 | if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) || |
200359e8 | 1089 | (C2.w[1] == 0x0 && C2.w[0] == 0x0)) { |
b2a00c89 | 1090 | // x = 0 or y = 0 |
200359e8 L |
1091 | // z = f, necessarily; for 0 + z return z, with the preferred exponent |
1092 | // the result is z, but need to get the preferred exponent | |
1093 | if (z_exp <= p_exp) { // the preferred exponent is z_exp | |
1094 | res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1]; | |
1095 | res.w[0] = C3.w[0]; | |
1096 | } else { // if (p_exp < z_exp) the preferred exponent is p_exp | |
1097 | // return (C3 * 10^scale) * 10^(z_exp - scale) | |
1098 | // where scale = min (p34-q3, (z_exp-p_exp) >> 49) | |
1099 | scale = p34 - q3; | |
1100 | ind = (z_exp - p_exp) >> 49; | |
1101 | if (ind < scale) | |
1102 | scale = ind; | |
1103 | if (scale == 0) { | |
1104 | res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant | |
1105 | res.w[0] = z.w[0]; | |
1106 | } else if (q3 <= 19) { // z fits in 64 bits | |
1107 | if (scale <= 19) { // 10^scale fits in 64 bits | |
b2a00c89 L |
1108 | // 64 x 64 C3.w[0] * ten2k64[scale] |
1109 | __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); | |
200359e8 | 1110 | } else { // 10^scale fits in 128 bits |
b2a00c89 L |
1111 | // 64 x 128 C3.w[0] * ten2k128[scale - 20] |
1112 | __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); | |
200359e8 L |
1113 | } |
1114 | } else { // z fits in 128 bits, but 10^scale must fit in 64 bits | |
b2a00c89 L |
1115 | // 64 x 128 ten2k64[scale] * C3 |
1116 | __mul_128x64_to_128 (res, ten2k64[scale], C3); | |
200359e8 L |
1117 | } |
1118 | // subtract scale from the exponent | |
1119 | z_exp = z_exp - ((UINT64) scale << 49); | |
1120 | res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
1121 | } | |
b2a00c89 L |
1122 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
1123 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
1124 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
1125 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
1126 | BID_SWAP128 (res); | |
200359e8 L |
1127 | BID_RETURN (res) |
1128 | } else { | |
1129 | ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f | |
1130 | } | |
1131 | ||
1132 | e1 = (x_exp >> 49) - 6176; // unbiased exponent of x | |
1133 | e2 = (y_exp >> 49) - 6176; // unbiased exponent of y | |
1134 | e3 = (z_exp >> 49) - 6176; // unbiased exponent of z | |
1135 | e4 = e1 + e2; // unbiased exponent of the exact x * y | |
1136 | ||
1137 | // calculate C1 * C2 and its number of decimal digits, q4 | |
1138 | ||
1139 | // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits | |
1140 | // where 2 <= q1 + q2 <= 68 | |
1141 | // calculate C4 = C1 * C2 and determine q | |
1142 | C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0; | |
1143 | if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits | |
1144 | C4.w[0] = C1.w[0] * C2.w[0]; | |
1145 | // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 | |
b2a00c89 | 1146 | if (C4.w[0] < ten2k64[q1 + q2 - 1]) |
200359e8 L |
1147 | q4 = q1 + q2 - 1; // q4 in [1, 18] |
1148 | else | |
1149 | q4 = q1 + q2; // q4 in [2, 19] | |
1150 | // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; | |
1151 | } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits | |
1152 | // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits | |
1153 | __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]); | |
1154 | // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20 | |
b2a00c89 | 1155 | if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1 |
200359e8 L |
1156 | // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; |
1157 | q4 = 19; // 19 = q1 + q2 - 1 | |
1158 | } else { | |
1159 | // if (C4.w[1] == 0) | |
1160 | // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; | |
1161 | // else | |
1162 | // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; | |
1163 | q4 = 20; // 20 = q1 + q2 | |
1164 | } | |
1165 | } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38 | |
1166 | // C4 = C1 * C2 fits in 64 or 128 bits | |
1167 | // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits) | |
1168 | // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits | |
1169 | if (q1 <= 19) { | |
1170 | __mul_128x64_to_128 (C4, C1.w[0], C2); | |
1171 | } else { // q2 <= 19 | |
1172 | __mul_128x64_to_128 (C4, C2.w[0], C1); | |
1173 | } | |
1174 | // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 | |
b2a00c89 L |
1175 | if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] || |
1176 | (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] && | |
1177 | C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) { | |
200359e8 L |
1178 | // if (C4.w[1] == 0) // q4 = 20, necessarily |
1179 | // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; | |
1180 | // else | |
1181 | // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; | |
1182 | q4 = q1 + q2 - 1; // q4 in [20, 37] | |
1183 | } else { | |
1184 | // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; | |
1185 | q4 = q1 + q2; // q4 in [21, 38] | |
1186 | } | |
1187 | } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits | |
1188 | // both C1 and C2 fit in 128 bits (actually in 113 bits) | |
1189 | // may replace this by 128x128_to192 | |
1190 | __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0 | |
1191 | // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39 | |
b2a00c89 L |
1192 | if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] || |
1193 | (C4.w[1] == ten2k128[18].w[1] | |
1194 | && C4.w[0] < ten2k128[18].w[0]))) { | |
200359e8 L |
1195 | // 18 = 38 - 20 = q1+q2-1 - 20 |
1196 | // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; | |
1197 | q4 = 38; // 38 = q1 + q2 - 1 | |
1198 | } else { | |
1199 | // if (C4.w[2] == 0) | |
1200 | // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; | |
1201 | // else | |
1202 | // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; | |
1203 | q4 = 39; // 39 = q1 + q2 | |
1204 | } | |
1205 | } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57 | |
1206 | // C4 = C1 * C2 fits in 128 or 192 bits | |
1207 | // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits) | |
1208 | // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one | |
1209 | // may fit in 64 bits | |
1210 | if (C1.w[1] == 0) { // C1 fits in 64 bits | |
1211 | // __mul_64x128_full (REShi64, RESlo128, A64, B128) | |
1212 | __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); | |
1213 | } else if (C2.w[1] == 0) { // C2 fits in 64 bits | |
1214 | // __mul_64x128_full (REShi64, RESlo128, A64, B128) | |
1215 | __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); | |
1216 | } else { // both C1 and C2 require 128 bits | |
1217 | // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1); | |
1218 | __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 | |
1219 | } | |
1220 | // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 | |
b2a00c89 L |
1221 | if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] || |
1222 | (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] && | |
1223 | (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] || | |
1224 | (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] && | |
1225 | C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) { | |
200359e8 L |
1226 | // if (C4.w[2] == 0) // q4 = 39, necessarily |
1227 | // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; | |
1228 | // else | |
1229 | // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; | |
1230 | q4 = q1 + q2 - 1; // q4 in [39, 56] | |
1231 | } else { | |
1232 | // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; | |
1233 | q4 = q1 + q2; // q4 in [40, 57] | |
1234 | } | |
1235 | } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits | |
1236 | // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one | |
1237 | // may fit in 64 bits | |
1238 | if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits | |
1239 | __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192 | |
1240 | } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits | |
1241 | __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192 | |
1242 | } else { // C1 * C2 will fit in 192 bits or in 256 bits | |
1243 | __mul_128x128_to_256 (C4, C1, C2); | |
1244 | } | |
1245 | // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58 | |
b2a00c89 L |
1246 | if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] || |
1247 | (C4.w[2] == ten2k256[18].w[2] | |
1248 | && (C4.w[1] < ten2k256[18].w[1] | |
1249 | || (C4.w[1] == ten2k256[18].w[1] | |
1250 | && C4.w[0] < ten2k256[18].w[0]))))) { | |
1251 | // 18 = 57 - 39 = q1+q2-1 - 39 | |
200359e8 L |
1252 | // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; |
1253 | q4 = 57; // 57 = q1 + q2 - 1 | |
1254 | } else { | |
1255 | // if (C4.w[3] == 0) | |
1256 | // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; | |
1257 | // else | |
1258 | // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; | |
1259 | q4 = 58; // 58 = q1 + q2 | |
1260 | } | |
1261 | } else { // if 59 <= q1 + q2 <= 68 | |
1262 | // C4 = C1 * C2 fits in 192 or 256 bits | |
1263 | // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits) | |
1264 | // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in | |
1265 | // 64 bits | |
1266 | // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1); | |
1267 | __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 | |
1268 | // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 | |
b2a00c89 L |
1269 | if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] || |
1270 | (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] && | |
1271 | (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] || | |
1272 | (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] && | |
1273 | (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] || | |
1274 | (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] && | |
1275 | C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) { | |
200359e8 L |
1276 | // if (C4.w[3] == 0) // q4 = 58, necessarily |
1277 | // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; | |
1278 | // else | |
1279 | // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; | |
1280 | q4 = q1 + q2 - 1; // q4 in [58, 67] | |
1281 | } else { | |
1282 | // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; | |
1283 | q4 = q1 + q2; // q4 in [59, 68] | |
1284 | } | |
1285 | } | |
1286 | ||
1287 | if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0 | |
1288 | save_fpsf = *pfpsf; // sticky bits - caller value must be preserved | |
1289 | *pfpsf = 0; | |
1290 | ||
1291 | if (q4 > p34) { | |
1292 | ||
1293 | // truncate C4 to p34 digits into res | |
1294 | // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68 | |
1295 | x0 = q4 - p34; | |
1296 | if (q4 <= 38) { | |
1297 | P128.w[1] = C4.w[1]; | |
1298 | P128.w[0] = C4.w[0]; | |
b2a00c89 L |
1299 | round128_19_38 (q4, x0, P128, &res, &incr_exp, |
1300 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
1301 | &is_inexact_lt_midpoint, | |
1302 | &is_inexact_gt_midpoint); | |
200359e8 L |
1303 | } else if (q4 <= 57) { // 35 <= q4 <= 57 |
1304 | P192.w[2] = C4.w[2]; | |
1305 | P192.w[1] = C4.w[1]; | |
1306 | P192.w[0] = C4.w[0]; | |
b2a00c89 L |
1307 | round192_39_57 (q4, x0, P192, &R192, &incr_exp, |
1308 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
1309 | &is_inexact_lt_midpoint, | |
1310 | &is_inexact_gt_midpoint); | |
200359e8 L |
1311 | res.w[0] = R192.w[0]; |
1312 | res.w[1] = R192.w[1]; | |
1313 | } else { // if (q4 <= 68) | |
b2a00c89 L |
1314 | round256_58_76 (q4, x0, C4, &R256, &incr_exp, |
1315 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
1316 | &is_inexact_lt_midpoint, | |
1317 | &is_inexact_gt_midpoint); | |
200359e8 L |
1318 | res.w[0] = R256.w[0]; |
1319 | res.w[1] = R256.w[1]; | |
1320 | } | |
1321 | e4 = e4 + x0; | |
1322 | if (incr_exp) { | |
1323 | e4 = e4 + 1; | |
1324 | } | |
1325 | q4 = p34; | |
1326 | // res is now the coefficient of the result rounded to the destination | |
1327 | // precision, with unbounded exponent; the exponent is e4; q4=digits(res) | |
1328 | } else { // if (q4 <= p34) | |
1329 | // C4 * 10^e4 is the result rounded to the destination precision, with | |
1330 | // unbounded exponent (which is exact) | |
1331 | ||
1332 | if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) { | |
1333 | // e4 is too large, but can be brought within range by scaling up C4 | |
1334 | scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2 | |
1335 | // res = (C4 * 10^scale) * 10^expmax | |
1336 | if (q4 <= 19) { // C4 fits in 64 bits | |
1337 | if (scale <= 19) { // 10^scale fits in 64 bits | |
b2a00c89 L |
1338 | // 64 x 64 C4.w[0] * ten2k64[scale] |
1339 | __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]); | |
200359e8 | 1340 | } else { // 10^scale fits in 128 bits |
b2a00c89 L |
1341 | // 64 x 128 C4.w[0] * ten2k128[scale - 20] |
1342 | __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]); | |
200359e8 L |
1343 | } |
1344 | } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits | |
b2a00c89 L |
1345 | // 64 x 128 ten2k64[scale] * CC43 |
1346 | __mul_128x64_to_128 (res, ten2k64[scale], C4); | |
200359e8 L |
1347 | } |
1348 | e4 = e4 - scale; // expmax | |
1349 | q4 = q4 + scale; | |
1350 | } else { | |
1351 | res.w[1] = C4.w[1]; | |
1352 | res.w[0] = C4.w[0]; | |
1353 | } | |
1354 | // res is the coefficient of the result rounded to the destination | |
1355 | // precision, with unbounded exponent (it has q4 digits); the exponent | |
1356 | // is e4 (exact result) | |
1357 | } | |
1358 | ||
1359 | // check for overflow | |
1360 | if (q4 + e4 > p34 + expmax) { | |
1361 | if (rnd_mode == ROUNDING_TO_NEAREST) { | |
1362 | res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf | |
1363 | res.w[0] = 0x0000000000000000ull; | |
1364 | *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
1365 | } else { | |
1366 | res.w[1] = p_sign | res.w[1]; | |
1367 | rounding_correction (rnd_mode, | |
b2a00c89 L |
1368 | is_inexact_lt_midpoint, |
1369 | is_inexact_gt_midpoint, | |
1370 | is_midpoint_lt_even, is_midpoint_gt_even, | |
1371 | e4, &res, pfpsf); | |
200359e8 L |
1372 | } |
1373 | *pfpsf |= save_fpsf; | |
b2a00c89 L |
1374 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
1375 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
1376 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
1377 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
1378 | BID_SWAP128 (res); | |
200359e8 L |
1379 | BID_RETURN (res) |
1380 | } | |
1381 | // check for underflow | |
1382 | if (q4 + e4 < expmin + P34) { | |
1383 | is_tiny = 1; // the result is tiny | |
1384 | if (e4 < expmin) { | |
1385 | // if e4 < expmin, we must truncate more of res | |
1386 | x0 = expmin - e4; // x0 >= 1 | |
1387 | is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; | |
1388 | is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; | |
1389 | is_midpoint_lt_even0 = is_midpoint_lt_even; | |
1390 | is_midpoint_gt_even0 = is_midpoint_gt_even; | |
1391 | is_inexact_lt_midpoint = 0; | |
1392 | is_inexact_gt_midpoint = 0; | |
1393 | is_midpoint_lt_even = 0; | |
1394 | is_midpoint_gt_even = 0; | |
1395 | // the number of decimal digits in res is q4 | |
1396 | if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits | |
1397 | if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17 | |
b2a00c89 L |
1398 | round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp, |
1399 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
1400 | &is_inexact_lt_midpoint, | |
1401 | &is_inexact_gt_midpoint); | |
200359e8 L |
1402 | if (incr_exp) { |
1403 | // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 | |
b2a00c89 | 1404 | R64 = ten2k64[q4 - x0]; |
200359e8 L |
1405 | } |
1406 | // res.w[1] = 0; (from above) | |
1407 | res.w[0] = R64; | |
1408 | } else { // if (q4 <= 34) | |
1409 | // 19 <= q4 <= 38 | |
1410 | P128.w[1] = res.w[1]; | |
1411 | P128.w[0] = res.w[0]; | |
b2a00c89 L |
1412 | round128_19_38 (q4, x0, P128, &res, &incr_exp, |
1413 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
1414 | &is_inexact_lt_midpoint, | |
1415 | &is_inexact_gt_midpoint); | |
200359e8 L |
1416 | if (incr_exp) { |
1417 | // increase coefficient by a factor of 10; this will be <= 10^33 | |
1418 | // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 | |
1419 | if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 | |
b2a00c89 L |
1420 | // res.w[1] = 0; |
1421 | res.w[0] = ten2k64[q4 - x0]; | |
200359e8 | 1422 | } else { // 20 <= q4 - x0 <= 37 |
b2a00c89 L |
1423 | res.w[0] = ten2k128[q4 - x0 - 20].w[0]; |
1424 | res.w[1] = ten2k128[q4 - x0 - 20].w[1]; | |
200359e8 L |
1425 | } |
1426 | } | |
1427 | } | |
1428 | e4 = e4 + x0; // expmin | |
1429 | } else if (x0 == q4) { | |
1430 | // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin | |
1431 | // determine relationship with 1/2 ulp | |
1432 | if (q4 <= 19) { | |
b2a00c89 | 1433 | if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp |
200359e8 L |
1434 | lt_half_ulp = 1; |
1435 | is_inexact_lt_midpoint = 1; | |
b2a00c89 | 1436 | } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp |
200359e8 L |
1437 | eq_half_ulp = 1; |
1438 | is_midpoint_gt_even = 1; | |
1439 | } else { // > 1/2 ulp | |
1440 | // gt_half_ulp = 1; | |
1441 | is_inexact_gt_midpoint = 1; | |
1442 | } | |
1443 | } else { // if (q4 <= 34) | |
b2a00c89 L |
1444 | if (res.w[1] < midpoint128[q4 - 20].w[1] || |
1445 | (res.w[1] == midpoint128[q4 - 20].w[1] && | |
1446 | res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp | |
200359e8 L |
1447 | lt_half_ulp = 1; |
1448 | is_inexact_lt_midpoint = 1; | |
b2a00c89 L |
1449 | } else if (res.w[1] == midpoint128[q4 - 20].w[1] && |
1450 | res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp | |
200359e8 L |
1451 | eq_half_ulp = 1; |
1452 | is_midpoint_gt_even = 1; | |
1453 | } else { // > 1/2 ulp | |
1454 | // gt_half_ulp = 1; | |
1455 | is_inexact_gt_midpoint = 1; | |
1456 | } | |
1457 | } | |
1458 | if (lt_half_ulp || eq_half_ulp) { | |
1459 | // res = +0.0 * 10^expmin | |
1460 | res.w[1] = 0x0000000000000000ull; | |
1461 | res.w[0] = 0x0000000000000000ull; | |
1462 | } else { // if (gt_half_ulp) | |
1463 | // res = +1 * 10^expmin | |
1464 | res.w[1] = 0x0000000000000000ull; | |
1465 | res.w[0] = 0x0000000000000001ull; | |
1466 | } | |
1467 | e4 = expmin; | |
1468 | } else { // if (x0 > q4) | |
1469 | // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin | |
1470 | res.w[1] = 0; | |
1471 | res.w[0] = 0; | |
1472 | e4 = expmin; | |
1473 | is_inexact_lt_midpoint = 1; | |
1474 | } | |
1475 | // avoid a double rounding error | |
1476 | if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && | |
b2a00c89 | 1477 | is_midpoint_lt_even) { // double rounding error upward |
200359e8 L |
1478 | // res = res - 1 |
1479 | res.w[0]--; | |
1480 | if (res.w[0] == 0xffffffffffffffffull) | |
1481 | res.w[1]--; | |
1482 | // Note: a double rounding error upward is not possible; for this | |
1483 | // the result after the first rounding would have to be 99...95 | |
1484 | // (35 digits in all), possibly followed by a number of zeros; this | |
1485 | // not possible for f * f + 0 | |
1486 | is_midpoint_lt_even = 0; | |
1487 | is_inexact_lt_midpoint = 1; | |
1488 | } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && | |
1489 | is_midpoint_gt_even) { // double rounding error downward | |
1490 | // res = res + 1 | |
1491 | res.w[0]++; | |
1492 | if (res.w[0] == 0) | |
1493 | res.w[1]++; | |
1494 | is_midpoint_gt_even = 0; | |
1495 | is_inexact_gt_midpoint = 1; | |
1496 | } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
b2a00c89 | 1497 | !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { |
200359e8 L |
1498 | // if this second rounding was exact the result may still be |
1499 | // inexact because of the first rounding | |
1500 | if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { | |
1501 | is_inexact_gt_midpoint = 1; | |
1502 | } | |
1503 | if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { | |
1504 | is_inexact_lt_midpoint = 1; | |
1505 | } | |
1506 | } else if (is_midpoint_gt_even && | |
b2a00c89 | 1507 | (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { |
200359e8 L |
1508 | // pulled up to a midpoint |
1509 | is_inexact_lt_midpoint = 1; | |
1510 | is_inexact_gt_midpoint = 0; | |
1511 | is_midpoint_lt_even = 0; | |
1512 | is_midpoint_gt_even = 0; | |
1513 | } else if (is_midpoint_lt_even && | |
b2a00c89 | 1514 | (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { |
200359e8 L |
1515 | // pulled down to a midpoint |
1516 | is_inexact_lt_midpoint = 0; | |
1517 | is_inexact_gt_midpoint = 1; | |
1518 | is_midpoint_lt_even = 0; | |
1519 | is_midpoint_gt_even = 0; | |
1520 | } else { | |
1521 | ; | |
1522 | } | |
1523 | } else { // if e4 >= emin then q4 < P and the result is tiny and exact | |
1524 | if (e3 < e4) { | |
1525 | // if (e3 < e4) the preferred exponent is e3 | |
1526 | // return (C4 * 10^scale) * 10^(e4 - scale) | |
1527 | // where scale = min (p34-q4, (e4 - e3)) | |
1528 | scale = p34 - q4; | |
1529 | ind = e4 - e3; | |
1530 | if (ind < scale) | |
1531 | scale = ind; | |
1532 | if (scale == 0) { | |
1533 | ; // res and e4 are unchanged | |
1534 | } else if (q4 <= 19) { // C4 fits in 64 bits | |
1535 | if (scale <= 19) { // 10^scale fits in 64 bits | |
b2a00c89 L |
1536 | // 64 x 64 res.w[0] * ten2k64[scale] |
1537 | __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]); | |
200359e8 | 1538 | } else { // 10^scale fits in 128 bits |
b2a00c89 L |
1539 | // 64 x 128 res.w[0] * ten2k128[scale - 20] |
1540 | __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]); | |
200359e8 L |
1541 | } |
1542 | } else { // res fits in 128 bits, but 10^scale must fit in 64 bits | |
b2a00c89 L |
1543 | // 64 x 128 ten2k64[scale] * C3 |
1544 | __mul_128x64_to_128 (res, ten2k64[scale], res); | |
200359e8 L |
1545 | } |
1546 | // subtract scale from the exponent | |
1547 | e4 = e4 - scale; | |
1548 | } | |
1549 | } | |
b2a00c89 | 1550 | |
200359e8 L |
1551 | // check for inexact result |
1552 | if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || | |
1553 | is_midpoint_lt_even || is_midpoint_gt_even) { | |
1554 | // set the inexact flag and the underflow flag | |
1555 | *pfpsf |= INEXACT_EXCEPTION; | |
1556 | *pfpsf |= UNDERFLOW_EXCEPTION; | |
1557 | } | |
1558 | res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; | |
1559 | if (rnd_mode != ROUNDING_TO_NEAREST) { | |
1560 | rounding_correction (rnd_mode, | |
b2a00c89 L |
1561 | is_inexact_lt_midpoint, |
1562 | is_inexact_gt_midpoint, | |
1563 | is_midpoint_lt_even, is_midpoint_gt_even, | |
1564 | e4, &res, pfpsf); | |
200359e8 L |
1565 | } |
1566 | *pfpsf |= save_fpsf; | |
b2a00c89 L |
1567 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
1568 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
1569 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
1570 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
1571 | BID_SWAP128 (res); | |
200359e8 L |
1572 | BID_RETURN (res) |
1573 | } | |
b2a00c89 L |
1574 | // no overflow, and no underflow for rounding to nearest |
1575 | res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; | |
1576 | ||
1577 | if (rnd_mode != ROUNDING_TO_NEAREST) { | |
1578 | rounding_correction (rnd_mode, | |
1579 | is_inexact_lt_midpoint, | |
1580 | is_inexact_gt_midpoint, | |
1581 | is_midpoint_lt_even, is_midpoint_gt_even, | |
1582 | e4, &res, pfpsf); | |
1583 | // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ) | |
1584 | if (e4 == expmin) { | |
1585 | if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull || | |
1586 | ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && | |
1587 | res.w[0] < 0x38c15b0a00000000ull)) { | |
1588 | is_tiny = 1; | |
1589 | } | |
1590 | } | |
1591 | } | |
200359e8 | 1592 | |
200359e8 L |
1593 | if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || |
1594 | is_midpoint_lt_even || is_midpoint_gt_even) { | |
1595 | // set the inexact flag | |
1596 | *pfpsf |= INEXACT_EXCEPTION; | |
b2a00c89 L |
1597 | if (is_tiny) |
1598 | *pfpsf |= UNDERFLOW_EXCEPTION; | |
200359e8 L |
1599 | } |
1600 | ||
1601 | if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact | |
1602 | // need to ensure that the result has the preferred exponent | |
1603 | p_exp = res.w[1] & MASK_EXP; | |
1604 | if (z_exp < p_exp) { // the preferred exponent is z_exp | |
1605 | // signficand of res in C3 | |
1606 | C3.w[1] = res.w[1] & MASK_COEFF; | |
1607 | C3.w[0] = res.w[0]; | |
1608 | // the number of decimal digits of x * y is q4 <= 34 | |
1609 | // Note: the coefficient fits in 128 bits | |
1610 | ||
1611 | // return (C3 * 10^scale) * 10^(p_exp - scale) | |
1612 | // where scale = min (p34-q4, (p_exp-z_exp) >> 49) | |
1613 | scale = p34 - q4; | |
1614 | ind = (p_exp - z_exp) >> 49; | |
1615 | if (ind < scale) | |
1616 | scale = ind; | |
1617 | // subtract scale from the exponent | |
1618 | p_exp = p_exp - ((UINT64) scale << 49); | |
1619 | if (scale == 0) { | |
1620 | ; // leave res unchanged | |
1621 | } else if (q4 <= 19) { // x * y fits in 64 bits | |
1622 | if (scale <= 19) { // 10^scale fits in 64 bits | |
b2a00c89 L |
1623 | // 64 x 64 C3.w[0] * ten2k64[scale] |
1624 | __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); | |
200359e8 | 1625 | } else { // 10^scale fits in 128 bits |
b2a00c89 L |
1626 | // 64 x 128 C3.w[0] * ten2k128[scale - 20] |
1627 | __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); | |
200359e8 L |
1628 | } |
1629 | res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; | |
1630 | } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits | |
b2a00c89 L |
1631 | // 64 x 128 ten2k64[scale] * C3 |
1632 | __mul_128x64_to_128 (res, ten2k64[scale], C3); | |
200359e8 L |
1633 | res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; |
1634 | } | |
1635 | } // else leave the result as it is, because p_exp <= z_exp | |
1636 | } | |
1637 | *pfpsf |= save_fpsf; | |
b2a00c89 L |
1638 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
1639 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
1640 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
1641 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
1642 | BID_SWAP128 (res); | |
200359e8 L |
1643 | BID_RETURN (res) |
1644 | } // else we have f * f + f | |
1645 | ||
1646 | // continue with x = f, y = f, z = f | |
1647 | ||
1648 | delta = q3 + e3 - q4 - e4; | |
1649 | delta_ge_zero: | |
1650 | if (delta >= 0) { | |
1651 | ||
b2a00c89 | 1652 | if (p34 <= delta - 1 || // Case (1') |
200359e8 L |
1653 | (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A) |
1654 | // check for overflow, which can occur only in Case (1') | |
1655 | if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) { | |
1656 | // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary | |
1657 | // condition for (q3 + e3) > (p34 + expmax) | |
1658 | if (rnd_mode == ROUNDING_TO_NEAREST) { | |
1659 | res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf | |
1660 | res.w[0] = 0x0000000000000000ull; | |
1661 | *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
1662 | } else { | |
1663 | if (p_sign == z_sign) { | |
1664 | is_inexact_lt_midpoint = 1; | |
1665 | } else { | |
1666 | is_inexact_gt_midpoint = 1; | |
1667 | } | |
1668 | // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3) | |
1669 | scale = p34 - q3; | |
1670 | if (scale == 0) { | |
1671 | res.w[1] = z_sign | C3.w[1]; | |
1672 | res.w[0] = C3.w[0]; | |
1673 | } else { | |
1674 | if (q3 <= 19) { // C3 fits in 64 bits | |
1675 | if (scale <= 19) { // 10^scale fits in 64 bits | |
b2a00c89 L |
1676 | // 64 x 64 C3.w[0] * ten2k64[scale] |
1677 | __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); | |
200359e8 | 1678 | } else { // 10^scale fits in 128 bits |
b2a00c89 L |
1679 | // 64 x 128 C3.w[0] * ten2k128[scale - 20] |
1680 | __mul_128x64_to_128 (res, C3.w[0], | |
1681 | ten2k128[scale - 20]); | |
200359e8 L |
1682 | } |
1683 | } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits | |
b2a00c89 L |
1684 | // 64 x 128 ten2k64[scale] * C3 |
1685 | __mul_128x64_to_128 (res, ten2k64[scale], C3); | |
200359e8 L |
1686 | } |
1687 | // the coefficient in res has q3 + scale = p34 digits | |
1688 | } | |
1689 | e3 = e3 - scale; | |
1690 | res.w[1] = z_sign | res.w[1]; | |
1691 | rounding_correction (rnd_mode, | |
b2a00c89 L |
1692 | is_inexact_lt_midpoint, |
1693 | is_inexact_gt_midpoint, | |
1694 | is_midpoint_lt_even, is_midpoint_gt_even, | |
1695 | e3, &res, pfpsf); | |
200359e8 | 1696 | } |
b2a00c89 L |
1697 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
1698 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
1699 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
1700 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
1701 | BID_SWAP128 (res); | |
200359e8 L |
1702 | BID_RETURN (res) |
1703 | } | |
1704 | // res = z | |
1705 | if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3) | |
1706 | // return (C3 * 10^scale) * 10^(z_exp - scale) | |
1707 | // where scale = min (p34-q3, z_exp-EMIN) | |
1708 | scale = p34 - q3; | |
1709 | ind = e3 + 6176; | |
1710 | if (ind < scale) | |
1711 | scale = ind; | |
1712 | if (scale == 0) { | |
1713 | res.w[1] = C3.w[1]; | |
1714 | res.w[0] = C3.w[0]; | |
1715 | } else if (q3 <= 19) { // z fits in 64 bits | |
1716 | if (scale <= 19) { // 10^scale fits in 64 bits | |
b2a00c89 L |
1717 | // 64 x 64 C3.w[0] * ten2k64[scale] |
1718 | __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); | |
200359e8 | 1719 | } else { // 10^scale fits in 128 bits |
b2a00c89 L |
1720 | // 64 x 128 C3.w[0] * ten2k128[scale - 20] |
1721 | __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); | |
200359e8 L |
1722 | } |
1723 | } else { // z fits in 128 bits, but 10^scale must fit in 64 bits | |
b2a00c89 L |
1724 | // 64 x 128 ten2k64[scale] * C3 |
1725 | __mul_128x64_to_128 (res, ten2k64[scale], C3); | |
200359e8 L |
1726 | } |
1727 | // the coefficient in res has q3 + scale digits | |
1728 | // subtract scale from the exponent | |
1729 | z_exp = z_exp - ((UINT64) scale << 49); | |
1730 | e3 = e3 - scale; | |
1731 | res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
1732 | if (scale + q3 < p34) | |
1733 | *pfpsf |= UNDERFLOW_EXCEPTION; | |
1734 | } else { | |
1735 | scale = 0; | |
1736 | res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1]; | |
1737 | res.w[0] = C3.w[0]; | |
1738 | } | |
1739 | ||
1740 | // use the following to avoid double rounding errors when operating on | |
1741 | // mixed formats in rounding to nearest, and for correcting the result | |
1742 | // if not rounding to nearest | |
1743 | if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) { | |
1744 | // there is a gap of exactly one digit between the scaled C3 and C4 | |
1745 | // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case | |
b2a00c89 L |
1746 | if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) || |
1747 | (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) || | |
1748 | (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] || | |
1749 | C3.w[0] != ten2k128[q3 - 21].w[0]))) { | |
1750 | // C3 * 10^ scale != 10^(q3-1) | |
200359e8 L |
1751 | // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull || |
1752 | // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33 | |
1753 | is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value | |
1754 | } else { // if C3 * 10^scale = 10^(q3+scale-1) | |
1755 | // ok from above e3 = (z_exp >> 49) - 6176; | |
1756 | // the result is always inexact | |
1757 | if (q4 == 1) { | |
1758 | R64 = C4.w[0]; | |
1759 | } else { | |
1760 | // if q4 > 1 then truncate C4 from q4 digits to 1 digit; | |
1761 | // x = q4-1, 1 <= x <= 67 and check if this operation is exact | |
1762 | if (q4 <= 18) { // 2 <= q4 <= 18 | |
b2a00c89 L |
1763 | round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, |
1764 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
1765 | &is_inexact_lt_midpoint, | |
1766 | &is_inexact_gt_midpoint); | |
200359e8 L |
1767 | } else if (q4 <= 38) { |
1768 | P128.w[1] = C4.w[1]; | |
1769 | P128.w[0] = C4.w[0]; | |
b2a00c89 L |
1770 | round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, |
1771 | &is_midpoint_lt_even, | |
1772 | &is_midpoint_gt_even, | |
1773 | &is_inexact_lt_midpoint, | |
1774 | &is_inexact_gt_midpoint); | |
200359e8 L |
1775 | R64 = R128.w[0]; // one decimal digit |
1776 | } else if (q4 <= 57) { | |
1777 | P192.w[2] = C4.w[2]; | |
1778 | P192.w[1] = C4.w[1]; | |
1779 | P192.w[0] = C4.w[0]; | |
b2a00c89 L |
1780 | round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, |
1781 | &is_midpoint_lt_even, | |
1782 | &is_midpoint_gt_even, | |
1783 | &is_inexact_lt_midpoint, | |
1784 | &is_inexact_gt_midpoint); | |
200359e8 L |
1785 | R64 = R192.w[0]; // one decimal digit |
1786 | } else { // if (q4 <= 68) | |
b2a00c89 L |
1787 | round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, |
1788 | &is_midpoint_lt_even, | |
1789 | &is_midpoint_gt_even, | |
1790 | &is_inexact_lt_midpoint, | |
1791 | &is_inexact_gt_midpoint); | |
200359e8 L |
1792 | R64 = R256.w[0]; // one decimal digit |
1793 | } | |
1794 | if (incr_exp) { | |
1795 | R64 = 10; | |
1796 | } | |
1797 | } | |
1798 | if (q4 == 1 && C4.w[0] == 5) { | |
1799 | is_inexact_lt_midpoint = 0; | |
1800 | is_inexact_gt_midpoint = 0; | |
1801 | is_midpoint_lt_even = 1; | |
1802 | is_midpoint_gt_even = 0; | |
1803 | } else if ((e3 == expmin) || | |
b2a00c89 | 1804 | R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) { |
200359e8 L |
1805 | // result does not change |
1806 | is_inexact_lt_midpoint = 0; | |
1807 | is_inexact_gt_midpoint = 1; | |
1808 | is_midpoint_lt_even = 0; | |
1809 | is_midpoint_gt_even = 0; | |
1810 | } else { | |
1811 | is_inexact_lt_midpoint = 1; | |
1812 | is_inexact_gt_midpoint = 0; | |
1813 | is_midpoint_lt_even = 0; | |
1814 | is_midpoint_gt_even = 0; | |
1815 | // result decremented is 10^(q3+scale) - 1 | |
1816 | if ((q3 + scale) <= 19) { | |
1817 | res.w[1] = 0; | |
b2a00c89 | 1818 | res.w[0] = ten2k64[q3 + scale]; |
200359e8 | 1819 | } else { // if ((q3 + scale + 1) <= 35) |
b2a00c89 L |
1820 | res.w[1] = ten2k128[q3 + scale - 20].w[1]; |
1821 | res.w[0] = ten2k128[q3 + scale - 20].w[0]; | |
200359e8 L |
1822 | } |
1823 | res.w[0] = res.w[0] - 1; // borrow never occurs | |
1824 | z_exp = z_exp - EXP_P1; | |
1825 | e3 = e3 - 1; | |
1826 | res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; | |
1827 | } | |
1828 | if (e3 == expmin) { | |
1829 | if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) { | |
1830 | ; // result not tiny (in round-to-nearest mode) | |
1831 | } else { | |
1832 | *pfpsf |= UNDERFLOW_EXCEPTION; | |
1833 | } | |
1834 | } | |
b2a00c89 | 1835 | } // end 10^(q3+scale-1) |
200359e8 L |
1836 | // set the inexact flag |
1837 | *pfpsf |= INEXACT_EXCEPTION; | |
1838 | } else { | |
1839 | if (p_sign == z_sign) { | |
1840 | // if (z_sign), set as if for absolute value | |
1841 | is_inexact_lt_midpoint = 1; | |
1842 | } else { // if (p_sign != z_sign) | |
1843 | // if (z_sign), set as if for absolute value | |
1844 | is_inexact_gt_midpoint = 1; | |
1845 | } | |
1846 | *pfpsf |= INEXACT_EXCEPTION; | |
1847 | } | |
1848 | // the result is always inexact => set the inexact flag | |
1849 | // Determine tininess: | |
1850 | // if (exp > expmin) | |
1851 | // the result is not tiny | |
1852 | // else // if exp = emin | |
1853 | // if (q3 + scale < p34) | |
1854 | // the result is tiny | |
1855 | // else // if (q3 + scale = p34) | |
1856 | // if (C3 * 10^scale > 10^33) | |
1857 | // the result is not tiny | |
1858 | // else // if C3 * 10^scale = 10^33 | |
1859 | // if (xy * z > 0) | |
1860 | // the result is not tiny | |
1861 | // else // if (xy * z < 0) | |
1862 | // if (z > 0) | |
1863 | // if rnd_mode != RP | |
1864 | // the result is tiny | |
1865 | // else // if RP | |
1866 | // the result is not tiny | |
1867 | // else // if (z < 0) | |
1868 | // if rnd_mode != RM | |
1869 | // the result is tiny | |
1870 | // else // if RM | |
1871 | // the result is not tiny | |
1872 | // endif | |
1873 | // endif | |
1874 | // endif | |
1875 | // endif | |
1876 | // endif | |
1877 | // endif | |
b2a00c89 L |
1878 | if ((e3 == expmin && (q3 + scale) < p34) || |
1879 | (e3 == expmin && (q3 + scale) == p34 && | |
1880 | (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && // 10^33_high | |
1881 | res.w[0] == 0x38c15b0a00000000ull && // 10^33_low | |
1882 | z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) || | |
200359e8 L |
1883 | (z_sign && rnd_mode != ROUNDING_DOWN)))) { |
1884 | *pfpsf |= UNDERFLOW_EXCEPTION; | |
1885 | } | |
1886 | if (rnd_mode != ROUNDING_TO_NEAREST) { | |
1887 | rounding_correction (rnd_mode, | |
b2a00c89 L |
1888 | is_inexact_lt_midpoint, |
1889 | is_inexact_gt_midpoint, | |
1890 | is_midpoint_lt_even, is_midpoint_gt_even, | |
1891 | e3, &res, pfpsf); | |
200359e8 | 1892 | } |
b2a00c89 L |
1893 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
1894 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
1895 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
1896 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
1897 | BID_SWAP128 (res); | |
200359e8 L |
1898 | BID_RETURN (res) |
1899 | ||
1900 | } else if (p34 == delta) { // Case (1''B) | |
1901 | ||
1902 | // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3 | |
1903 | // and C3 can be scaled up to p34 digits if needed | |
1904 | ||
1905 | // scale C3 to p34 digits if needed | |
1906 | scale = p34 - q3; // 0 <= scale <= p34 - 1 | |
1907 | if (scale == 0) { | |
1908 | res.w[1] = C3.w[1]; | |
1909 | res.w[0] = C3.w[0]; | |
1910 | } else if (q3 <= 19) { // z fits in 64 bits | |
1911 | if (scale <= 19) { // 10^scale fits in 64 bits | |
b2a00c89 L |
1912 | // 64 x 64 C3.w[0] * ten2k64[scale] |
1913 | __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); | |
200359e8 | 1914 | } else { // 10^scale fits in 128 bits |
b2a00c89 L |
1915 | // 64 x 128 C3.w[0] * ten2k128[scale - 20] |
1916 | __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); | |
200359e8 L |
1917 | } |
1918 | } else { // z fits in 128 bits, but 10^scale must fit in 64 bits | |
b2a00c89 L |
1919 | // 64 x 128 ten2k64[scale] * C3 |
1920 | __mul_128x64_to_128 (res, ten2k64[scale], C3); | |
200359e8 L |
1921 | } |
1922 | // subtract scale from the exponent | |
1923 | z_exp = z_exp - ((UINT64) scale << 49); | |
1924 | e3 = e3 - scale; | |
1925 | // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits | |
1926 | ||
1927 | // determine whether x * y is less than, equal to, or greater than | |
1928 | // 1/2 ulp (z) | |
1929 | if (q4 <= 19) { | |
b2a00c89 | 1930 | if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp |
200359e8 | 1931 | lt_half_ulp = 1; |
b2a00c89 | 1932 | } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp |
200359e8 L |
1933 | eq_half_ulp = 1; |
1934 | } else { // > 1/2 ulp | |
1935 | gt_half_ulp = 1; | |
1936 | } | |
1937 | } else if (q4 <= 38) { | |
b2a00c89 L |
1938 | if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] || |
1939 | (C4.w[1] == midpoint128[q4 - 20].w[1] && | |
1940 | C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp | |
200359e8 | 1941 | lt_half_ulp = 1; |
b2a00c89 L |
1942 | } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] && |
1943 | C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp | |
200359e8 L |
1944 | eq_half_ulp = 1; |
1945 | } else { // > 1/2 ulp | |
1946 | gt_half_ulp = 1; | |
1947 | } | |
1948 | } else if (q4 <= 58) { | |
b2a00c89 L |
1949 | if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] || |
1950 | (C4.w[2] == midpoint192[q4 - 39].w[2] && | |
1951 | C4.w[1] < midpoint192[q4 - 39].w[1]) || | |
1952 | (C4.w[2] == midpoint192[q4 - 39].w[2] && | |
1953 | C4.w[1] == midpoint192[q4 - 39].w[1] && | |
1954 | C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp | |
200359e8 | 1955 | lt_half_ulp = 1; |
b2a00c89 L |
1956 | } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] && |
1957 | C4.w[1] == midpoint192[q4 - 39].w[1] && | |
1958 | C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp | |
200359e8 L |
1959 | eq_half_ulp = 1; |
1960 | } else { // > 1/2 ulp | |
1961 | gt_half_ulp = 1; | |
1962 | } | |
1963 | } else { | |
b2a00c89 L |
1964 | if (C4.w[3] < midpoint256[q4 - 59].w[3] || |
1965 | (C4.w[3] == midpoint256[q4 - 59].w[3] && | |
1966 | C4.w[2] < midpoint256[q4 - 59].w[2]) || | |
1967 | (C4.w[3] == midpoint256[q4 - 59].w[3] && | |
1968 | C4.w[2] == midpoint256[q4 - 59].w[2] && | |
1969 | C4.w[1] < midpoint256[q4 - 59].w[1]) || | |
1970 | (C4.w[3] == midpoint256[q4 - 59].w[3] && | |
1971 | C4.w[2] == midpoint256[q4 - 59].w[2] && | |
1972 | C4.w[1] == midpoint256[q4 - 59].w[1] && | |
1973 | C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp | |
200359e8 | 1974 | lt_half_ulp = 1; |
b2a00c89 L |
1975 | } else if (C4.w[3] == midpoint256[q4 - 59].w[3] && |
1976 | C4.w[2] == midpoint256[q4 - 59].w[2] && | |
1977 | C4.w[1] == midpoint256[q4 - 59].w[1] && | |
1978 | C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp | |
200359e8 L |
1979 | eq_half_ulp = 1; |
1980 | } else { // > 1/2 ulp | |
1981 | gt_half_ulp = 1; | |
1982 | } | |
1983 | } | |
1984 | ||
1985 | if (p_sign == z_sign) { | |
1986 | if (lt_half_ulp) { | |
1987 | res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
1988 | // use the following to avoid double rounding errors when operating on | |
1989 | // mixed formats in rounding to nearest | |
1990 | is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value | |
1991 | } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) { | |
1992 | // add 1 ulp to the significand | |
1993 | res.w[0]++; | |
1994 | if (res.w[0] == 0x0ull) | |
1995 | res.w[1]++; | |
1996 | // check for rounding overflow, when coeff == 10^34 | |
1997 | if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull && | |
1998 | res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34 | |
1999 | e3 = e3 + 1; | |
2000 | // coeff = 10^33 | |
2001 | z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP; | |
2002 | res.w[1] = 0x0000314dc6448d93ull; | |
2003 | res.w[0] = 0x38c15b0a00000000ull; | |
2004 | } | |
2005 | // end add 1 ulp | |
2006 | res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
2007 | if (eq_half_ulp) { | |
2008 | is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value | |
2009 | } else { | |
2010 | is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value | |
2011 | } | |
2012 | } else { // if (eq_half_ulp && !(res.w[0] & 0x01)) | |
2013 | // leave unchanged | |
2014 | res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
2015 | is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value | |
2016 | } | |
2017 | // the result is always inexact, and never tiny | |
2018 | // set the inexact flag | |
2019 | *pfpsf |= INEXACT_EXCEPTION; | |
2020 | // check for overflow | |
2021 | if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) { | |
2022 | res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf | |
2023 | res.w[0] = 0x0000000000000000ull; | |
2024 | *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
b2a00c89 L |
2025 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
2026 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
2027 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
2028 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
2029 | BID_SWAP128 (res); | |
200359e8 L |
2030 | BID_RETURN (res) |
2031 | } | |
2032 | if (rnd_mode != ROUNDING_TO_NEAREST) { | |
2033 | rounding_correction (rnd_mode, | |
b2a00c89 L |
2034 | is_inexact_lt_midpoint, |
2035 | is_inexact_gt_midpoint, | |
2036 | is_midpoint_lt_even, is_midpoint_gt_even, | |
2037 | e3, &res, pfpsf); | |
2038 | z_exp = res.w[1] & MASK_EXP; | |
200359e8 L |
2039 | } |
2040 | } else { // if (p_sign != z_sign) | |
2041 | // consider two cases, because C3 * 10^scale = 10^33 is a special case | |
2042 | if (res.w[1] != 0x0000314dc6448d93ull || | |
2043 | res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33 | |
2044 | if (lt_half_ulp) { | |
2045 | res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
2046 | // use the following to avoid double rounding errors when operating | |
2047 | // on mixed formats in rounding to nearest | |
2048 | is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value | |
2049 | } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) { | |
2050 | // subtract 1 ulp from the significand | |
2051 | res.w[0]--; | |
2052 | if (res.w[0] == 0xffffffffffffffffull) | |
2053 | res.w[1]--; | |
2054 | res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
2055 | if (eq_half_ulp) { | |
2056 | is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value | |
2057 | } else { | |
2058 | is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value | |
2059 | } | |
2060 | } else { // if (eq_half_ulp && !(res.w[0] & 0x01)) | |
2061 | // leave unchanged | |
2062 | res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
2063 | is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value | |
2064 | } | |
2065 | // the result is always inexact, and never tiny | |
2066 | // check for overflow for RN | |
2067 | if (e3 > expmax) { | |
2068 | if (rnd_mode == ROUNDING_TO_NEAREST) { | |
2069 | res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf | |
2070 | res.w[0] = 0x0000000000000000ull; | |
2071 | *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
2072 | } else { | |
2073 | rounding_correction (rnd_mode, | |
b2a00c89 L |
2074 | is_inexact_lt_midpoint, |
2075 | is_inexact_gt_midpoint, | |
2076 | is_midpoint_lt_even, | |
2077 | is_midpoint_gt_even, e3, &res, | |
2078 | pfpsf); | |
200359e8 | 2079 | } |
b2a00c89 L |
2080 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
2081 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
2082 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
2083 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
2084 | BID_SWAP128 (res); | |
200359e8 L |
2085 | BID_RETURN (res) |
2086 | } | |
2087 | // set the inexact flag | |
2088 | *pfpsf |= INEXACT_EXCEPTION; | |
2089 | if (rnd_mode != ROUNDING_TO_NEAREST) { | |
2090 | rounding_correction (rnd_mode, | |
b2a00c89 L |
2091 | is_inexact_lt_midpoint, |
2092 | is_inexact_gt_midpoint, | |
2093 | is_midpoint_lt_even, | |
2094 | is_midpoint_gt_even, e3, &res, pfpsf); | |
200359e8 | 2095 | } |
b2a00c89 | 2096 | z_exp = res.w[1] & MASK_EXP; |
200359e8 L |
2097 | } else { // if C3 * 10^scale = 10^33 |
2098 | e3 = (z_exp >> 49) - 6176; | |
2099 | if (e3 > expmin) { | |
2100 | // the result is exact if exp > expmin and C4 = d*10^(q4-1), | |
2101 | // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact | |
2102 | if (q4 == 1) { | |
2103 | // if q4 = 1 the result is exact | |
2104 | // result coefficient = 10^34 - C4 | |
2105 | res.w[1] = 0x0001ed09bead87c0ull; | |
2106 | res.w[0] = 0x378d8e6400000000ull - C4.w[0]; | |
2107 | z_exp = z_exp - EXP_P1; | |
2108 | e3 = e3 - 1; | |
2109 | res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
2110 | } else { | |
2111 | // if q4 > 1 then truncate C4 from q4 digits to 1 digit; | |
2112 | // x = q4-1, 1 <= x <= 67 and check if this operation is exact | |
2113 | if (q4 <= 18) { // 2 <= q4 <= 18 | |
b2a00c89 L |
2114 | round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, |
2115 | &is_midpoint_lt_even, | |
2116 | &is_midpoint_gt_even, | |
2117 | &is_inexact_lt_midpoint, | |
2118 | &is_inexact_gt_midpoint); | |
200359e8 | 2119 | } else if (q4 <= 38) { |
b2a00c89 L |
2120 | P128.w[1] = C4.w[1]; |
2121 | P128.w[0] = C4.w[0]; | |
2122 | round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, | |
2123 | &is_midpoint_lt_even, | |
2124 | &is_midpoint_gt_even, | |
2125 | &is_inexact_lt_midpoint, | |
2126 | &is_inexact_gt_midpoint); | |
2127 | R64 = R128.w[0]; // one decimal digit | |
200359e8 | 2128 | } else if (q4 <= 57) { |
b2a00c89 L |
2129 | P192.w[2] = C4.w[2]; |
2130 | P192.w[1] = C4.w[1]; | |
2131 | P192.w[0] = C4.w[0]; | |
2132 | round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, | |
2133 | &is_midpoint_lt_even, | |
2134 | &is_midpoint_gt_even, | |
2135 | &is_inexact_lt_midpoint, | |
2136 | &is_inexact_gt_midpoint); | |
2137 | R64 = R192.w[0]; // one decimal digit | |
200359e8 | 2138 | } else { // if (q4 <= 68) |
b2a00c89 L |
2139 | round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, |
2140 | &is_midpoint_lt_even, | |
2141 | &is_midpoint_gt_even, | |
2142 | &is_inexact_lt_midpoint, | |
2143 | &is_inexact_gt_midpoint); | |
2144 | R64 = R256.w[0]; // one decimal digit | |
200359e8 L |
2145 | } |
2146 | if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
b2a00c89 L |
2147 | !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { |
2148 | // the result is exact: 10^34 - R64 | |
2149 | // incr_exp = 0 with certainty | |
2150 | z_exp = z_exp - EXP_P1; | |
2151 | e3 = e3 - 1; | |
2152 | res.w[1] = | |
2153 | z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull; | |
2154 | res.w[0] = 0x378d8e6400000000ull - R64; | |
200359e8 | 2155 | } else { |
b2a00c89 L |
2156 | // We want R64 to be the top digit of C4, but we actually |
2157 | // obtained (C4 * 10^(-q4+1))RN; a correction may be needed, | |
2158 | // because the top digit is (C4 * 10^(-q4+1))RZ | |
2159 | // however, if incr_exp = 1 then R64 = 10 with certainty | |
2160 | if (incr_exp) { | |
2161 | R64 = 10; | |
2162 | } | |
2163 | // the result is inexact as C4 has more than 1 significant digit | |
2164 | // and C3 * 10^scale = 10^33 | |
2165 | // example of case that is treated here: | |
2166 | // 100...0 * 10^e3 - 0.41 * 10^e3 = | |
2167 | // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1) | |
2168 | // note that (e3 > expmin} | |
2169 | // in order to round, subtract R64 from 10^34 and then compare | |
2170 | // C4 - R64 * 10^(q4-1) with 1/2 ulp | |
2171 | // calculate 10^34 - R64 | |
2172 | res.w[1] = 0x0001ed09bead87c0ull; | |
2173 | res.w[0] = 0x378d8e6400000000ull - R64; | |
2174 | z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand | |
2175 | // calculate C4 - R64 * 10^(q4-1); this is a rare case and | |
2176 | // R64 is small, 1 <= R64 <= 9 | |
2177 | e3 = e3 - 1; | |
2178 | if (is_inexact_lt_midpoint) { | |
2179 | is_inexact_lt_midpoint = 0; | |
2180 | is_inexact_gt_midpoint = 1; | |
2181 | } else if (is_inexact_gt_midpoint) { | |
2182 | is_inexact_gt_midpoint = 0; | |
2183 | is_inexact_lt_midpoint = 1; | |
2184 | } else if (is_midpoint_lt_even) { | |
2185 | is_midpoint_lt_even = 0; | |
2186 | is_midpoint_gt_even = 1; | |
2187 | } else if (is_midpoint_gt_even) { | |
2188 | is_midpoint_gt_even = 0; | |
2189 | is_midpoint_lt_even = 1; | |
2190 | } else { | |
2191 | ; | |
2192 | } | |
2193 | // the result is always inexact, and never tiny | |
2194 | // check for overflow for RN | |
2195 | if (e3 > expmax) { | |
2196 | if (rnd_mode == ROUNDING_TO_NEAREST) { | |
2197 | res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf | |
2198 | res.w[0] = 0x0000000000000000ull; | |
2199 | *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
2200 | } else { | |
2201 | rounding_correction (rnd_mode, | |
2202 | is_inexact_lt_midpoint, | |
2203 | is_inexact_gt_midpoint, | |
2204 | is_midpoint_lt_even, | |
2205 | is_midpoint_gt_even, e3, &res, | |
2206 | pfpsf); | |
2207 | } | |
2208 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
2209 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
2210 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
2211 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
2212 | BID_SWAP128 (res); | |
2213 | BID_RETURN (res) | |
2214 | } | |
2215 | // set the inexact flag | |
2216 | *pfpsf |= INEXACT_EXCEPTION; | |
2217 | res.w[1] = | |
2218 | z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; | |
2219 | if (rnd_mode != ROUNDING_TO_NEAREST) { | |
2220 | rounding_correction (rnd_mode, | |
2221 | is_inexact_lt_midpoint, | |
2222 | is_inexact_gt_midpoint, | |
2223 | is_midpoint_lt_even, | |
2224 | is_midpoint_gt_even, e3, &res, | |
2225 | pfpsf); | |
2226 | } | |
2227 | z_exp = res.w[1] & MASK_EXP; | |
2228 | } // end result is inexact | |
2229 | } // end q4 > 1 | |
200359e8 L |
2230 | } else { // if (e3 = emin) |
2231 | // if e3 = expmin the result is also tiny (the condition for | |
2232 | // tininess is C4 > 050...0 [q4 digits] which is met because | |
2233 | // the msd of C4 is not zero) | |
2234 | // the result is tiny and inexact in all rounding modes; | |
2235 | // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp, | |
2236 | // gt_half_ulp to calculate) | |
2237 | // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged | |
2238 | ||
2239 | // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp | |
2240 | if (gt_half_ulp) { // res = 10^33 - 1 | |
2241 | res.w[1] = 0x0000314dc6448d93ull; | |
2242 | res.w[0] = 0x38c15b09ffffffffull; | |
2243 | } else { | |
2244 | res.w[1] = 0x0000314dc6448d93ull; | |
2245 | res.w[0] = 0x38c15b0a00000000ull; | |
2246 | } | |
2247 | res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
2248 | *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later | |
2249 | ||
2250 | if (eq_half_ulp) { | |
2251 | is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value | |
2252 | } else if (lt_half_ulp) { | |
2253 | is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value | |
2254 | } else { // if (gt_half_ulp) | |
2255 | is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value | |
2256 | } | |
2257 | ||
2258 | if (rnd_mode != ROUNDING_TO_NEAREST) { | |
2259 | rounding_correction (rnd_mode, | |
b2a00c89 L |
2260 | is_inexact_lt_midpoint, |
2261 | is_inexact_gt_midpoint, | |
2262 | is_midpoint_lt_even, | |
2263 | is_midpoint_gt_even, e3, &res, | |
2264 | pfpsf); | |
2265 | z_exp = res.w[1] & MASK_EXP; | |
200359e8 | 2266 | } |
b2a00c89 | 2267 | } // end e3 = emin |
200359e8 L |
2268 | // set the inexact flag (if the result was not exact) |
2269 | if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || | |
2270 | is_midpoint_lt_even || is_midpoint_gt_even) | |
2271 | *pfpsf |= INEXACT_EXCEPTION; | |
b2a00c89 L |
2272 | } // end 10^33 |
2273 | } // end if (p_sign != z_sign) | |
200359e8 | 2274 | res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; |
b2a00c89 L |
2275 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
2276 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
2277 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
2278 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
2279 | BID_SWAP128 (res); | |
200359e8 L |
2280 | BID_RETURN (res) |
2281 | ||
2282 | } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2) | |
2283 | (q3 <= delta && delta + q4 <= p34) || // Case (3) | |
2284 | (delta < q3 && p34 < delta + q4) || // Case (4) | |
2285 | (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5) | |
b2a00c89 | 2286 | (delta + q4 < q3)) && // Case (6) |
200359e8 L |
2287 | !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6) |
2288 | ||
2289 | // the result has the sign of z | |
2290 | ||
2291 | if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2) | |
2292 | (delta < q3 && p34 < delta + q4)) { // Case (4) | |
2293 | // round first the sum x * y + z with unbounded exponent | |
2294 | // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1, | |
2295 | // 1 <= scale <= 33 | |
2296 | // calculate res = C3 * 10^scale | |
2297 | scale = p34 - q3; | |
2298 | x0 = delta + q4 - p34; | |
2299 | } else if (delta + q4 < q3) { // Case (6) | |
2300 | // make Case (6) look like Case (3) or Case (5) with scale = 0 | |
2301 | // by scaling up C4 by 10^(q3 - delta - q4) | |
2302 | scale = q3 - delta - q4; // 1 <= scale <= 33 | |
2303 | if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits | |
2304 | if (scale <= 19) { // 10^scale fits in 64 bits | |
b2a00c89 L |
2305 | // 64 x 64 C4.w[0] * ten2k64[scale] |
2306 | __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]); | |
200359e8 | 2307 | } else { // 10^scale fits in 128 bits |
b2a00c89 L |
2308 | // 64 x 128 C4.w[0] * ten2k128[scale - 20] |
2309 | __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]); | |
200359e8 L |
2310 | } |
2311 | } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits | |
b2a00c89 L |
2312 | // 64 x 128 ten2k64[scale] * C4 |
2313 | __mul_128x64_to_128 (P128, ten2k64[scale], C4); | |
200359e8 L |
2314 | } |
2315 | C4.w[0] = P128.w[0]; | |
2316 | C4.w[1] = P128.w[1]; | |
2317 | // e4 does not need adjustment, as it is not used from this point on | |
2318 | scale = 0; | |
2319 | x0 = 0; | |
2320 | // now Case (6) looks like Case (3) or Case (5) with scale = 0 | |
2321 | } else { // if Case (3) or Case (5) | |
2322 | // Note: Case (3) is similar to Case (2), but scale differs and the | |
2323 | // result is exact, unless it is tiny (so x0 = 0 when calculating the | |
2324 | // result with unbounded exponent) | |
2325 | ||
2326 | // calculate first the sum x * y + z with unbounded exponent (exact) | |
2327 | // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1, | |
2328 | // 1 <= scale <= 33 | |
2329 | // calculate res = C3 * 10^scale | |
2330 | scale = delta + q4 - q3; | |
2331 | x0 = 0; | |
2332 | // Note: the comments which follow refer [mainly] to Case (2)] | |
2333 | } | |
2334 | ||
2335 | case2_repeat: | |
2336 | if (scale == 0) { // this could happen e.g. if we return to case2_repeat | |
2337 | // or in Case (4) | |
2338 | res.w[1] = C3.w[1]; | |
2339 | res.w[0] = C3.w[0]; | |
2340 | } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits | |
2341 | if (scale <= 19) { // 10^scale fits in 64 bits | |
b2a00c89 L |
2342 | // 64 x 64 C3.w[0] * ten2k64[scale] |
2343 | __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); | |
200359e8 | 2344 | } else { // 10^scale fits in 128 bits |
b2a00c89 L |
2345 | // 64 x 128 C3.w[0] * ten2k128[scale - 20] |
2346 | __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); | |
200359e8 L |
2347 | } |
2348 | } else { // z fits in 128 bits, but 10^scale must fit in 64 bits | |
b2a00c89 L |
2349 | // 64 x 128 ten2k64[scale] * C3 |
2350 | __mul_128x64_to_128 (res, ten2k64[scale], C3); | |
200359e8 L |
2351 | } |
2352 | // e3 is already calculated | |
2353 | e3 = e3 - scale; | |
2354 | // now res = C3 * 10^scale and e3 = e3 - scale | |
2355 | // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat | |
2356 | // because the result was too small | |
2357 | ||
2358 | // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34, | |
2359 | // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67) | |
2360 | // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of | |
2361 | // the rounding fits in 128 bits!) | |
2362 | // x0 = delta + q4 - p34 (calculated before reaching case2_repeat) | |
2363 | // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34 | |
2364 | if (x0 == 0) { // this could happen only if we return to case2_repeat, or | |
b2a00c89 | 2365 | // for Case (3) or Case (6) |
200359e8 L |
2366 | R128.w[1] = C4.w[1]; |
2367 | R128.w[0] = C4.w[0]; | |
2368 | } else if (q4 <= 18) { | |
2369 | // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17 | |
b2a00c89 L |
2370 | round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp, |
2371 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2372 | &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); | |
200359e8 L |
2373 | if (incr_exp) { |
2374 | // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 | |
b2a00c89 | 2375 | R64 = ten2k64[q4 - x0]; |
200359e8 L |
2376 | } |
2377 | R128.w[1] = 0; | |
2378 | R128.w[0] = R64; | |
2379 | } else if (q4 <= 38) { | |
2380 | // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37 | |
2381 | P128.w[1] = C4.w[1]; | |
2382 | P128.w[0] = C4.w[0]; | |
b2a00c89 L |
2383 | round128_19_38 (q4, x0, P128, &R128, &incr_exp, |
2384 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2385 | &is_inexact_lt_midpoint, | |
2386 | &is_inexact_gt_midpoint); | |
200359e8 L |
2387 | if (incr_exp) { |
2388 | // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 | |
2389 | if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 | |
b2a00c89 | 2390 | R128.w[0] = ten2k64[q4 - x0]; |
200359e8 L |
2391 | // R128.w[1] stays 0 |
2392 | } else { // 20 <= q4 - x0 <= 37 | |
b2a00c89 L |
2393 | R128.w[0] = ten2k128[q4 - x0 - 20].w[0]; |
2394 | R128.w[1] = ten2k128[q4 - x0 - 20].w[1]; | |
200359e8 L |
2395 | } |
2396 | } | |
2397 | } else if (q4 <= 57) { | |
2398 | // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56 | |
2399 | P192.w[2] = C4.w[2]; | |
2400 | P192.w[1] = C4.w[1]; | |
2401 | P192.w[0] = C4.w[0]; | |
b2a00c89 L |
2402 | round192_39_57 (q4, x0, P192, &R192, &incr_exp, |
2403 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2404 | &is_inexact_lt_midpoint, | |
2405 | &is_inexact_gt_midpoint); | |
200359e8 L |
2406 | // R192.w[2] is always 0 |
2407 | if (incr_exp) { | |
2408 | // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52 | |
2409 | if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 | |
b2a00c89 | 2410 | R192.w[0] = ten2k64[q4 - x0]; |
200359e8 L |
2411 | // R192.w[1] stays 0 |
2412 | // R192.w[2] stays 0 | |
2413 | } else { // 20 <= q4 - x0 <= 33 | |
b2a00c89 L |
2414 | R192.w[0] = ten2k128[q4 - x0 - 20].w[0]; |
2415 | R192.w[1] = ten2k128[q4 - x0 - 20].w[1]; | |
200359e8 L |
2416 | // R192.w[2] stays 0 |
2417 | } | |
2418 | } | |
2419 | R128.w[1] = R192.w[1]; | |
2420 | R128.w[0] = R192.w[0]; | |
2421 | } else { | |
2422 | // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67 | |
b2a00c89 L |
2423 | round256_58_76 (q4, x0, C4, &R256, &incr_exp, |
2424 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2425 | &is_inexact_lt_midpoint, | |
2426 | &is_inexact_gt_midpoint); | |
200359e8 L |
2427 | // R256.w[3] and R256.w[2] are always 0 |
2428 | if (incr_exp) { | |
2429 | // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43 | |
2430 | if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 | |
b2a00c89 | 2431 | R256.w[0] = ten2k64[q4 - x0]; |
200359e8 L |
2432 | // R256.w[1] stays 0 |
2433 | // R256.w[2] stays 0 | |
2434 | // R256.w[3] stays 0 | |
2435 | } else { // 20 <= q4 - x0 <= 33 | |
b2a00c89 L |
2436 | R256.w[0] = ten2k128[q4 - x0 - 20].w[0]; |
2437 | R256.w[1] = ten2k128[q4 - x0 - 20].w[1]; | |
200359e8 L |
2438 | // R256.w[2] stays 0 |
2439 | // R256.w[3] stays 0 | |
2440 | } | |
2441 | } | |
2442 | R128.w[1] = R256.w[1]; | |
2443 | R128.w[0] = R256.w[0]; | |
2444 | } | |
2445 | // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4, | |
2446 | // rounded to nearest, which were copied into R128 | |
2447 | if (z_sign == p_sign) { | |
2448 | lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale | |
2449 | // the sum can result in [up to] p34 or p34 + 1 digits | |
2450 | res.w[0] = res.w[0] + R128.w[0]; | |
2451 | res.w[1] = res.w[1] + R128.w[1]; | |
2452 | if (res.w[0] < R128.w[0]) | |
2453 | res.w[1]++; // carry | |
2454 | // if res > 10^34 - 1 need to increase x0 and decrease scale by 1 | |
2455 | if (res.w[1] > 0x0001ed09bead87c0ull || | |
2456 | (res.w[1] == 0x0001ed09bead87c0ull && | |
b2a00c89 | 2457 | res.w[0] > 0x378d8e63ffffffffull)) { |
200359e8 L |
2458 | // avoid double rounding error |
2459 | is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; | |
2460 | is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; | |
2461 | is_midpoint_lt_even0 = is_midpoint_lt_even; | |
2462 | is_midpoint_gt_even0 = is_midpoint_gt_even; | |
2463 | is_inexact_lt_midpoint = 0; | |
2464 | is_inexact_gt_midpoint = 0; | |
2465 | is_midpoint_lt_even = 0; | |
2466 | is_midpoint_gt_even = 0; | |
2467 | P128.w[1] = res.w[1]; | |
2468 | P128.w[0] = res.w[0]; | |
b2a00c89 L |
2469 | round128_19_38 (35, 1, P128, &res, &incr_exp, |
2470 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2471 | &is_inexact_lt_midpoint, | |
2472 | &is_inexact_gt_midpoint); | |
200359e8 L |
2473 | // incr_exp is 0 with certainty in this case |
2474 | // avoid a double rounding error | |
2475 | if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && | |
2476 | is_midpoint_lt_even) { // double rounding error upward | |
2477 | // res = res - 1 | |
2478 | res.w[0]--; | |
2479 | if (res.w[0] == 0xffffffffffffffffull) | |
2480 | res.w[1]--; | |
2481 | // Note: a double rounding error upward is not possible; for this | |
2482 | // the result after the first rounding would have to be 99...95 | |
2483 | // (35 digits in all), possibly followed by a number of zeros; this | |
2484 | // not possible in Cases (2)-(6) or (15)-(17) which may get here | |
2485 | is_midpoint_lt_even = 0; | |
2486 | is_inexact_lt_midpoint = 1; | |
2487 | } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && | |
2488 | is_midpoint_gt_even) { // double rounding error downward | |
2489 | // res = res + 1 | |
2490 | res.w[0]++; | |
2491 | if (res.w[0] == 0) | |
2492 | res.w[1]++; | |
2493 | is_midpoint_gt_even = 0; | |
2494 | is_inexact_gt_midpoint = 1; | |
2495 | } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
b2a00c89 L |
2496 | !is_inexact_lt_midpoint |
2497 | && !is_inexact_gt_midpoint) { | |
200359e8 L |
2498 | // if this second rounding was exact the result may still be |
2499 | // inexact because of the first rounding | |
2500 | if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { | |
2501 | is_inexact_gt_midpoint = 1; | |
2502 | } | |
2503 | if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { | |
2504 | is_inexact_lt_midpoint = 1; | |
2505 | } | |
2506 | } else if (is_midpoint_gt_even && | |
b2a00c89 L |
2507 | (is_inexact_gt_midpoint0 |
2508 | || is_midpoint_lt_even0)) { | |
200359e8 L |
2509 | // pulled up to a midpoint |
2510 | is_inexact_lt_midpoint = 1; | |
2511 | is_inexact_gt_midpoint = 0; | |
2512 | is_midpoint_lt_even = 0; | |
2513 | is_midpoint_gt_even = 0; | |
2514 | } else if (is_midpoint_lt_even && | |
b2a00c89 L |
2515 | (is_inexact_lt_midpoint0 |
2516 | || is_midpoint_gt_even0)) { | |
200359e8 L |
2517 | // pulled down to a midpoint |
2518 | is_inexact_lt_midpoint = 0; | |
2519 | is_inexact_gt_midpoint = 1; | |
2520 | is_midpoint_lt_even = 0; | |
2521 | is_midpoint_gt_even = 0; | |
2522 | } else { | |
2523 | ; | |
2524 | } | |
2525 | // adjust exponent | |
2526 | e3 = e3 + 1; | |
2527 | if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
2528 | !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { | |
2529 | if (is_midpoint_lt_even0 || is_midpoint_gt_even0 || | |
b2a00c89 | 2530 | is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) { |
200359e8 L |
2531 | is_inexact_lt_midpoint = 1; |
2532 | } | |
2533 | } | |
2534 | } else { | |
2535 | // this is the result rounded with unbounded exponent, unless a | |
2536 | // correction is needed | |
2537 | res.w[1] = res.w[1] & MASK_COEFF; | |
2538 | if (lsb == 1) { | |
2539 | if (is_midpoint_gt_even) { | |
2540 | // res = res + 1 | |
2541 | is_midpoint_gt_even = 0; | |
2542 | is_midpoint_lt_even = 1; | |
2543 | res.w[0]++; | |
2544 | if (res.w[0] == 0x0) | |
b2a00c89 | 2545 | res.w[1]++; |
200359e8 L |
2546 | // check for rounding overflow |
2547 | if (res.w[1] == 0x0001ed09bead87c0ull && | |
b2a00c89 L |
2548 | res.w[0] == 0x378d8e6400000000ull) { |
2549 | // res = 10^34 => rounding overflow | |
2550 | res.w[1] = 0x0000314dc6448d93ull; | |
2551 | res.w[0] = 0x38c15b0a00000000ull; // 10^33 | |
2552 | e3++; | |
200359e8 L |
2553 | } |
2554 | } else if (is_midpoint_lt_even) { | |
2555 | // res = res - 1 | |
2556 | is_midpoint_lt_even = 0; | |
2557 | is_midpoint_gt_even = 1; | |
2558 | res.w[0]--; | |
2559 | if (res.w[0] == 0xffffffffffffffffull) | |
b2a00c89 | 2560 | res.w[1]--; |
200359e8 L |
2561 | // if the result is pure zero, the sign depends on the rounding |
2562 | // mode (x*y and z had opposite signs) | |
2563 | if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) { | |
b2a00c89 L |
2564 | if (rnd_mode != ROUNDING_DOWN) |
2565 | z_sign = 0x0000000000000000ull; | |
2566 | else | |
2567 | z_sign = 0x8000000000000000ull; | |
2568 | // the exponent is max (e3, expmin) | |
2569 | res.w[1] = 0x0; | |
2570 | res.w[0] = 0x0; | |
2571 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
2572 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
2573 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
2574 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
2575 | BID_SWAP128 (res); | |
2576 | BID_RETURN (res) | |
200359e8 L |
2577 | } |
2578 | } else { | |
2579 | ; | |
2580 | } | |
2581 | } | |
2582 | } | |
2583 | } else { // if (z_sign != p_sign) | |
2584 | lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4 | |
2585 | // used to swap rounding indicators if p_sign != z_sign | |
2586 | // the sum can result in [up to] p34 or p34 - 1 digits | |
2587 | tmp64 = res.w[0]; | |
2588 | res.w[0] = res.w[0] - R128.w[0]; | |
2589 | res.w[1] = res.w[1] - R128.w[1]; | |
2590 | if (res.w[0] > tmp64) | |
2591 | res.w[1]--; // borrow | |
2592 | // if res < 10^33 and exp > expmin need to decrease x0 and | |
2593 | // increase scale by 1 | |
b2a00c89 L |
2594 | if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull || |
2595 | (res.w[1] == 0x0000314dc6448d93ull && | |
2596 | res.w[0] < 0x38c15b0a00000000ull)) || | |
2597 | (is_inexact_lt_midpoint | |
2598 | && res.w[1] == 0x0000314dc6448d93ull | |
2599 | && res.w[0] == 0x38c15b0a00000000ull)) | |
2600 | && x0 >= 1) { | |
200359e8 L |
2601 | x0 = x0 - 1; |
2602 | // first restore e3, otherwise it will be too small | |
2603 | e3 = e3 + scale; | |
2604 | scale = scale + 1; | |
2605 | is_inexact_lt_midpoint = 0; | |
2606 | is_inexact_gt_midpoint = 0; | |
2607 | is_midpoint_lt_even = 0; | |
2608 | is_midpoint_gt_even = 0; | |
2609 | incr_exp = 0; | |
2610 | goto case2_repeat; | |
2611 | } | |
b2a00c89 | 2612 | // else this is the result rounded with unbounded exponent; |
200359e8 L |
2613 | // because the result has opposite sign to that of C4 which was |
2614 | // rounded, need to change the rounding indicators | |
2615 | if (is_inexact_lt_midpoint) { | |
2616 | is_inexact_lt_midpoint = 0; | |
2617 | is_inexact_gt_midpoint = 1; | |
2618 | } else if (is_inexact_gt_midpoint) { | |
2619 | is_inexact_gt_midpoint = 0; | |
2620 | is_inexact_lt_midpoint = 1; | |
2621 | } else if (lsb == 0) { | |
2622 | if (is_midpoint_lt_even) { | |
2623 | is_midpoint_lt_even = 0; | |
2624 | is_midpoint_gt_even = 1; | |
2625 | } else if (is_midpoint_gt_even) { | |
2626 | is_midpoint_gt_even = 0; | |
2627 | is_midpoint_lt_even = 1; | |
2628 | } else { | |
2629 | ; | |
2630 | } | |
2631 | } else if (lsb == 1) { | |
2632 | if (is_midpoint_lt_even) { | |
2633 | // res = res + 1 | |
2634 | res.w[0]++; | |
2635 | if (res.w[0] == 0x0) | |
2636 | res.w[1]++; | |
2637 | // check for rounding overflow | |
2638 | if (res.w[1] == 0x0001ed09bead87c0ull && | |
b2a00c89 | 2639 | res.w[0] == 0x378d8e6400000000ull) { |
200359e8 L |
2640 | // res = 10^34 => rounding overflow |
2641 | res.w[1] = 0x0000314dc6448d93ull; | |
2642 | res.w[0] = 0x38c15b0a00000000ull; // 10^33 | |
2643 | e3++; | |
2644 | } | |
2645 | } else if (is_midpoint_gt_even) { | |
2646 | // res = res - 1 | |
2647 | res.w[0]--; | |
2648 | if (res.w[0] == 0xffffffffffffffffull) | |
2649 | res.w[1]--; | |
2650 | // if the result is pure zero, the sign depends on the rounding | |
2651 | // mode (x*y and z had opposite signs) | |
2652 | if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) { | |
2653 | if (rnd_mode != ROUNDING_DOWN) | |
b2a00c89 | 2654 | z_sign = 0x0000000000000000ull; |
200359e8 | 2655 | else |
b2a00c89 | 2656 | z_sign = 0x8000000000000000ull; |
200359e8 L |
2657 | // the exponent is max (e3, expmin) |
2658 | res.w[1] = 0x0; | |
2659 | res.w[0] = 0x0; | |
b2a00c89 L |
2660 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
2661 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
2662 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
2663 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
2664 | BID_SWAP128 (res); | |
200359e8 L |
2665 | BID_RETURN (res) |
2666 | } | |
2667 | } else { | |
2668 | ; | |
2669 | } | |
2670 | } else { | |
2671 | ; | |
2672 | } | |
2673 | } | |
2674 | // check for underflow | |
b2a00c89 L |
2675 | if (e3 == expmin) { // and if significand < 10^33 => result is tiny |
2676 | if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull || | |
2677 | ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && | |
2678 | res.w[0] < 0x38c15b0a00000000ull)) { | |
2679 | is_tiny = 1; | |
2680 | } | |
2681 | } else if (e3 < expmin) { | |
200359e8 L |
2682 | // the result is tiny, so we must truncate more of res |
2683 | is_tiny = 1; | |
2684 | x0 = expmin - e3; | |
2685 | is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; | |
2686 | is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; | |
2687 | is_midpoint_lt_even0 = is_midpoint_lt_even; | |
2688 | is_midpoint_gt_even0 = is_midpoint_gt_even; | |
2689 | is_inexact_lt_midpoint = 0; | |
2690 | is_inexact_gt_midpoint = 0; | |
2691 | is_midpoint_lt_even = 0; | |
2692 | is_midpoint_gt_even = 0; | |
2693 | // determine the number of decimal digits in res | |
2694 | if (res.w[1] == 0x0) { | |
2695 | // between 1 and 19 digits | |
2696 | for (ind = 1; ind <= 19; ind++) { | |
b2a00c89 | 2697 | if (res.w[0] < ten2k64[ind]) { |
200359e8 L |
2698 | break; |
2699 | } | |
2700 | } | |
2701 | // ind digits | |
b2a00c89 L |
2702 | } else if (res.w[1] < ten2k128[0].w[1] || |
2703 | (res.w[1] == ten2k128[0].w[1] | |
2704 | && res.w[0] < ten2k128[0].w[0])) { | |
200359e8 L |
2705 | // 20 digits |
2706 | ind = 20; | |
2707 | } else { // between 21 and 38 digits | |
2708 | for (ind = 1; ind <= 18; ind++) { | |
b2a00c89 L |
2709 | if (res.w[1] < ten2k128[ind].w[1] || |
2710 | (res.w[1] == ten2k128[ind].w[1] && | |
2711 | res.w[0] < ten2k128[ind].w[0])) { | |
200359e8 L |
2712 | break; |
2713 | } | |
2714 | } | |
2715 | // ind + 20 digits | |
2716 | ind = ind + 20; | |
2717 | } | |
2718 | ||
2719 | // at this point ind >= x0; because delta >= 2 on this path, the case | |
b2a00c89 | 2720 | // ind = x0 can occur only in Case (2) or case (3), when C3 has one |
200359e8 L |
2721 | // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin), |
2722 | // the signs of x * y and z are opposite, and through cancellation | |
2723 | // the most significant decimal digit in res has the weight | |
2724 | // 10^(emin-1); however, it is clear that in this case the most | |
2725 | // significant digit is 9, so the result before rounding is | |
2726 | // 0.9... * 10^emin | |
2727 | // Otherwise, ind > x0 because there are non-zero decimal digits in the | |
2728 | // result with weight of at least 10^emin, and correction for underflow | |
2729 | // can be carried out using the round*_*_2_* () routines | |
2730 | if (x0 == ind) { // the result before rounding is 0.9... * 10^emin | |
2731 | res.w[1] = 0x0; | |
2732 | res.w[0] = 0x1; | |
2733 | is_inexact_gt_midpoint = 1; | |
2734 | } else if (ind <= 18) { // check that 2 <= ind | |
2735 | // 2 <= ind <= 18, 1 <= x0 <= 17 | |
b2a00c89 L |
2736 | round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, |
2737 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2738 | &is_inexact_lt_midpoint, | |
2739 | &is_inexact_gt_midpoint); | |
200359e8 | 2740 | if (incr_exp) { |
b2a00c89 L |
2741 | // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17 |
2742 | R64 = ten2k64[ind - x0]; | |
200359e8 L |
2743 | } |
2744 | res.w[1] = 0; | |
2745 | res.w[0] = R64; | |
2746 | } else if (ind <= 38) { | |
2747 | // 19 <= ind <= 38 | |
2748 | P128.w[1] = res.w[1]; | |
2749 | P128.w[0] = res.w[0]; | |
b2a00c89 L |
2750 | round128_19_38 (ind, x0, P128, &res, &incr_exp, |
2751 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2752 | &is_inexact_lt_midpoint, | |
2753 | &is_inexact_gt_midpoint); | |
200359e8 | 2754 | if (incr_exp) { |
b2a00c89 | 2755 | // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37 |
200359e8 | 2756 | if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19 |
b2a00c89 | 2757 | res.w[0] = ten2k64[ind - x0]; |
200359e8 L |
2758 | // res.w[1] stays 0 |
2759 | } else { // 20 <= ind - x0 <= 37 | |
b2a00c89 L |
2760 | res.w[0] = ten2k128[ind - x0 - 20].w[0]; |
2761 | res.w[1] = ten2k128[ind - x0 - 20].w[1]; | |
200359e8 L |
2762 | } |
2763 | } | |
2764 | } | |
2765 | // avoid a double rounding error | |
2766 | if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && | |
2767 | is_midpoint_lt_even) { // double rounding error upward | |
2768 | // res = res - 1 | |
2769 | res.w[0]--; | |
2770 | if (res.w[0] == 0xffffffffffffffffull) | |
2771 | res.w[1]--; | |
2772 | // Note: a double rounding error upward is not possible; for this | |
2773 | // the result after the first rounding would have to be 99...95 | |
2774 | // (35 digits in all), possibly followed by a number of zeros; this | |
2775 | // not possible in Cases (2)-(6) which may get here | |
2776 | is_midpoint_lt_even = 0; | |
2777 | is_inexact_lt_midpoint = 1; | |
2778 | } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && | |
2779 | is_midpoint_gt_even) { // double rounding error downward | |
2780 | // res = res + 1 | |
2781 | res.w[0]++; | |
2782 | if (res.w[0] == 0) | |
2783 | res.w[1]++; | |
2784 | is_midpoint_gt_even = 0; | |
2785 | is_inexact_gt_midpoint = 1; | |
2786 | } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
b2a00c89 | 2787 | !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { |
200359e8 L |
2788 | // if this second rounding was exact the result may still be |
2789 | // inexact because of the first rounding | |
2790 | if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { | |
2791 | is_inexact_gt_midpoint = 1; | |
2792 | } | |
2793 | if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { | |
2794 | is_inexact_lt_midpoint = 1; | |
2795 | } | |
2796 | } else if (is_midpoint_gt_even && | |
b2a00c89 | 2797 | (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { |
200359e8 L |
2798 | // pulled up to a midpoint |
2799 | is_inexact_lt_midpoint = 1; | |
2800 | is_inexact_gt_midpoint = 0; | |
2801 | is_midpoint_lt_even = 0; | |
2802 | is_midpoint_gt_even = 0; | |
2803 | } else if (is_midpoint_lt_even && | |
b2a00c89 | 2804 | (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { |
200359e8 L |
2805 | // pulled down to a midpoint |
2806 | is_inexact_lt_midpoint = 0; | |
2807 | is_inexact_gt_midpoint = 1; | |
2808 | is_midpoint_lt_even = 0; | |
2809 | is_midpoint_gt_even = 0; | |
2810 | } else { | |
2811 | ; | |
2812 | } | |
2813 | // adjust exponent | |
2814 | e3 = e3 + x0; | |
2815 | if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
2816 | !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { | |
2817 | if (is_midpoint_lt_even0 || is_midpoint_gt_even0 || | |
2818 | is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) { | |
2819 | is_inexact_lt_midpoint = 1; | |
2820 | } | |
2821 | } | |
b2a00c89 L |
2822 | } else { |
2823 | ; // not underflow | |
200359e8 L |
2824 | } |
2825 | // check for inexact result | |
2826 | if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || | |
2827 | is_midpoint_lt_even || is_midpoint_gt_even) { | |
2828 | // set the inexact flag | |
2829 | *pfpsf |= INEXACT_EXCEPTION; | |
2830 | if (is_tiny) | |
2831 | *pfpsf |= UNDERFLOW_EXCEPTION; | |
2832 | } | |
2833 | // now check for significand = 10^34 (may have resulted from going | |
2834 | // back to case2_repeat) | |
2835 | if (res.w[1] == 0x0001ed09bead87c0ull && | |
2836 | res.w[0] == 0x378d8e6400000000ull) { // if res = 10^34 | |
2837 | res.w[1] = 0x0000314dc6448d93ull; // res = 10^33 | |
2838 | res.w[0] = 0x38c15b0a00000000ull; | |
2839 | e3 = e3 + 1; | |
2840 | } | |
2841 | res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; | |
2842 | // check for overflow | |
2843 | if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) { | |
2844 | res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf | |
2845 | res.w[0] = 0x0000000000000000ull; | |
2846 | *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
2847 | } | |
2848 | if (rnd_mode != ROUNDING_TO_NEAREST) { | |
2849 | rounding_correction (rnd_mode, | |
b2a00c89 L |
2850 | is_inexact_lt_midpoint, |
2851 | is_inexact_gt_midpoint, | |
2852 | is_midpoint_lt_even, is_midpoint_gt_even, | |
2853 | e3, &res, pfpsf); | |
200359e8 | 2854 | } |
b2a00c89 L |
2855 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
2856 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
2857 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
2858 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
2859 | BID_SWAP128 (res); | |
200359e8 L |
2860 | BID_RETURN (res) |
2861 | ||
2862 | } else { | |
2863 | ||
2864 | // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and | |
2865 | // the signs of x*y and z are opposite; in these cases massive | |
2866 | // cancellation can occur, so it is better to scale either C3 or C4 and | |
2867 | // to perform the subtraction before rounding; rounding is performed | |
2868 | // next, depending on the number of decimal digits in the result and on | |
2869 | // the exponent value | |
2870 | // Note: overlow is not possible in this case | |
2871 | // this is similar to Cases (15), (16), and (17) | |
2872 | ||
2873 | if (delta + q4 < q3) { // from Case (6) | |
2874 | // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and | |
2875 | // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign) | |
2876 | // and call add_and_round; delta stays positive | |
2877 | // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3 | |
2878 | P128.w[1] = C3.w[1]; | |
2879 | P128.w[0] = C3.w[0]; | |
2880 | C3.w[1] = C4.w[1]; | |
2881 | C3.w[0] = C4.w[0]; | |
2882 | C4.w[1] = P128.w[1]; | |
2883 | C4.w[0] = P128.w[0]; | |
2884 | ind = q3; | |
2885 | q3 = q4; | |
2886 | q4 = ind; | |
2887 | ind = e3; | |
2888 | e3 = e4; | |
2889 | e4 = ind; | |
b2a00c89 | 2890 | tmp_sign = z_sign; |
200359e8 | 2891 | z_sign = p_sign; |
b2a00c89 | 2892 | p_sign = tmp_sign; |
200359e8 L |
2893 | } else { // from Cases (2), (3), (4), (5) |
2894 | // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be | |
2895 | // scaled up by q4 + delta - q3; this is the same as in Cases (15), | |
2896 | // (16), and (17) if we just change the sign of delta | |
2897 | delta = -delta; | |
2898 | } | |
2899 | add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4, | |
b2a00c89 L |
2900 | rnd_mode, &is_midpoint_lt_even, |
2901 | &is_midpoint_gt_even, &is_inexact_lt_midpoint, | |
2902 | &is_inexact_gt_midpoint, pfpsf, &res); | |
2903 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
2904 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
2905 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
2906 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
2907 | BID_SWAP128 (res); | |
200359e8 L |
2908 | BID_RETURN (res) |
2909 | ||
2910 | } | |
2911 | ||
2912 | } else { // if delta < 0 | |
2913 | ||
2914 | delta = -delta; | |
2915 | ||
2916 | if (p34 < q4 && q4 <= delta) { // Case (7) | |
2917 | ||
2918 | // truncate C4 to p34 digits into res | |
2919 | // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68 | |
2920 | x0 = q4 - p34; | |
2921 | if (q4 <= 38) { | |
2922 | P128.w[1] = C4.w[1]; | |
2923 | P128.w[0] = C4.w[0]; | |
b2a00c89 L |
2924 | round128_19_38 (q4, x0, P128, &res, &incr_exp, |
2925 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2926 | &is_inexact_lt_midpoint, | |
2927 | &is_inexact_gt_midpoint); | |
200359e8 L |
2928 | } else if (q4 <= 57) { // 35 <= q4 <= 57 |
2929 | P192.w[2] = C4.w[2]; | |
2930 | P192.w[1] = C4.w[1]; | |
2931 | P192.w[0] = C4.w[0]; | |
b2a00c89 L |
2932 | round192_39_57 (q4, x0, P192, &R192, &incr_exp, |
2933 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2934 | &is_inexact_lt_midpoint, | |
2935 | &is_inexact_gt_midpoint); | |
200359e8 L |
2936 | res.w[0] = R192.w[0]; |
2937 | res.w[1] = R192.w[1]; | |
2938 | } else { // if (q4 <= 68) | |
b2a00c89 L |
2939 | round256_58_76 (q4, x0, C4, &R256, &incr_exp, |
2940 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2941 | &is_inexact_lt_midpoint, | |
2942 | &is_inexact_gt_midpoint); | |
200359e8 L |
2943 | res.w[0] = R256.w[0]; |
2944 | res.w[1] = R256.w[1]; | |
2945 | } | |
2946 | e4 = e4 + x0; | |
2947 | if (incr_exp) { | |
2948 | e4 = e4 + 1; | |
2949 | } | |
2950 | if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
2951 | !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { | |
2952 | // if C4 rounded to p34 digits is exact then the result is inexact, | |
2953 | // in a way that depends on the signs of x * y and z | |
2954 | if (p_sign == z_sign) { | |
2955 | is_inexact_lt_midpoint = 1; | |
2956 | } else { // if (p_sign != z_sign) | |
2957 | if (res.w[1] != 0x0000314dc6448d93ull || | |
2958 | res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33 | |
2959 | is_inexact_gt_midpoint = 1; | |
2960 | } else { // res = 10^33 and exact is a special case | |
2961 | // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1 | |
2962 | // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1 | |
2963 | // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1 | |
2964 | // Note: ulp is really ulp/10 (after borrow which propagates to msd) | |
2965 | if (delta > p34 + 1) { // C3 < 1/2 | |
2966 | // res = 10^33, unchanged | |
2967 | is_inexact_gt_midpoint = 1; | |
2968 | } else { // if (delta == p34 + 1) | |
2969 | if (q3 <= 19) { | |
b2a00c89 L |
2970 | if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp |
2971 | // res = 10^33, unchanged | |
2972 | is_inexact_gt_midpoint = 1; | |
2973 | } else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp | |
2974 | // res = 10^33, unchanged | |
2975 | is_midpoint_lt_even = 1; | |
2976 | } else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp | |
2977 | res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 | |
2978 | res.w[0] = 0x378d8e63ffffffffull; | |
2979 | e4 = e4 - 1; | |
2980 | is_inexact_lt_midpoint = 1; | |
2981 | } | |
200359e8 | 2982 | } else { // if (20 <= q3 <=34) |
b2a00c89 L |
2983 | if (C3.w[1] < midpoint128[q3 - 20].w[1] || |
2984 | (C3.w[1] == midpoint128[q3 - 20].w[1] && | |
2985 | C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp | |
2986 | // res = 10^33, unchanged | |
2987 | is_inexact_gt_midpoint = 1; | |
2988 | } else if (C3.w[1] == midpoint128[q3 - 20].w[1] && | |
2989 | C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp | |
2990 | // res = 10^33, unchanged | |
2991 | is_midpoint_lt_even = 1; | |
2992 | } else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp | |
2993 | res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 | |
2994 | res.w[0] = 0x378d8e63ffffffffull; | |
2995 | e4 = e4 - 1; | |
2996 | is_inexact_lt_midpoint = 1; | |
2997 | } | |
200359e8 L |
2998 | } |
2999 | } | |
3000 | } | |
3001 | } | |
3002 | } else if (is_midpoint_lt_even) { | |
3003 | if (z_sign != p_sign) { | |
3004 | // needs correction: res = res - 1 | |
3005 | res.w[0] = res.w[0] - 1; | |
3006 | if (res.w[0] == 0xffffffffffffffffull) | |
3007 | res.w[1]--; | |
3008 | // if it is (10^33-1)*10^e4 then the corect result is | |
3009 | // (10^34-1)*10(e4-1) | |
3010 | if (res.w[1] == 0x0000314dc6448d93ull && | |
3011 | res.w[0] == 0x38c15b09ffffffffull) { | |
3012 | res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 | |
3013 | res.w[0] = 0x378d8e63ffffffffull; | |
3014 | e4 = e4 - 1; | |
3015 | } | |
3016 | is_midpoint_lt_even = 0; | |
3017 | is_inexact_lt_midpoint = 1; | |
3018 | } else { // if (z_sign == p_sign) | |
3019 | is_midpoint_lt_even = 0; | |
3020 | is_inexact_gt_midpoint = 1; | |
3021 | } | |
3022 | } else if (is_midpoint_gt_even) { | |
3023 | if (z_sign == p_sign) { | |
3024 | // needs correction: res = res + 1 (cannot cross in the next binade) | |
3025 | res.w[0] = res.w[0] + 1; | |
3026 | if (res.w[0] == 0x0000000000000000ull) | |
3027 | res.w[1]++; | |
3028 | is_midpoint_gt_even = 0; | |
3029 | is_inexact_gt_midpoint = 1; | |
3030 | } else { // if (z_sign != p_sign) | |
3031 | is_midpoint_gt_even = 0; | |
3032 | is_inexact_lt_midpoint = 1; | |
3033 | } | |
3034 | } else { | |
3035 | ; // the rounded result is already correct | |
3036 | } | |
3037 | // check for overflow | |
3038 | if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) { | |
3039 | res.w[1] = p_sign | 0x7800000000000000ull; | |
3040 | res.w[0] = 0x0000000000000000ull; | |
3041 | *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION); | |
3042 | } else { // no overflow or not RN | |
3043 | p_exp = ((UINT64) (e4 + 6176) << 49); | |
3044 | res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; | |
3045 | } | |
3046 | if (rnd_mode != ROUNDING_TO_NEAREST) { | |
3047 | rounding_correction (rnd_mode, | |
b2a00c89 L |
3048 | is_inexact_lt_midpoint, |
3049 | is_inexact_gt_midpoint, | |
3050 | is_midpoint_lt_even, is_midpoint_gt_even, | |
3051 | e4, &res, pfpsf); | |
200359e8 L |
3052 | } |
3053 | if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || | |
3054 | is_midpoint_lt_even || is_midpoint_gt_even) { | |
3055 | // set the inexact flag | |
3056 | *pfpsf |= INEXACT_EXCEPTION; | |
3057 | } | |
b2a00c89 L |
3058 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
3059 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
3060 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
3061 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
3062 | BID_SWAP128 (res); | |
200359e8 L |
3063 | BID_RETURN (res) |
3064 | ||
3065 | } else if ((q4 <= p34 && p34 <= delta) || // Case (8) | |
3066 | (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9) | |
3067 | (q4 <= delta && delta + q3 <= p34) || // Case (10) | |
3068 | (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13) | |
3069 | (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14) | |
3070 | (delta + q3 < q4 && q4 <= p34)) { // Case (18) | |
3071 | ||
3072 | // Case (8) is similar to Case (1), with C3 and C4 swapped | |
3073 | // Case (9) is similar to Case (2), with C3 and C4 swapped | |
3074 | // Case (10) is similar to Case (3), with C3 and C4 swapped | |
3075 | // Case (13) is similar to Case (4), with C3 and C4 swapped | |
3076 | // Case (14) is similar to Case (5), with C3 and C4 swapped | |
3077 | // Case (18) is similar to Case (6), with C3 and C4 swapped | |
3078 | ||
3079 | // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp) | |
3080 | // and go back to delta_ge_zero | |
3081 | // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3 | |
3082 | P128.w[1] = C3.w[1]; | |
3083 | P128.w[0] = C3.w[0]; | |
3084 | C3.w[1] = C4.w[1]; | |
3085 | C3.w[0] = C4.w[0]; | |
3086 | C4.w[1] = P128.w[1]; | |
3087 | C4.w[0] = P128.w[0]; | |
3088 | ind = q3; | |
3089 | q3 = q4; | |
3090 | q4 = ind; | |
3091 | ind = e3; | |
3092 | e3 = e4; | |
3093 | e4 = ind; | |
b2a00c89 | 3094 | tmp_sign = z_sign; |
200359e8 | 3095 | z_sign = p_sign; |
b2a00c89 L |
3096 | p_sign = tmp_sign; |
3097 | tmp.ui64 = z_exp; | |
200359e8 | 3098 | z_exp = p_exp; |
b2a00c89 | 3099 | p_exp = tmp.ui64; |
200359e8 L |
3100 | goto delta_ge_zero; |
3101 | ||
3102 | } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11) | |
3103 | (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12) | |
3104 | ||
3105 | // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3, | |
3106 | // 1 <= x0 <= q3 - 1 <= p34 - 1 | |
3107 | x0 = e4 - e3; // or x0 = delta + q3 - q4 | |
3108 | if (q3 <= 18) { // 2 <= q3 <= 18 | |
b2a00c89 L |
3109 | round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp, |
3110 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
3111 | &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); | |
200359e8 L |
3112 | // C3.w[1] = 0; |
3113 | C3.w[0] = R64; | |
3114 | } else if (q3 <= 38) { | |
b2a00c89 L |
3115 | round128_19_38 (q3, x0, C3, &R128, &incr_exp, |
3116 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
3117 | &is_inexact_lt_midpoint, | |
3118 | &is_inexact_gt_midpoint); | |
200359e8 L |
3119 | C3.w[1] = R128.w[1]; |
3120 | C3.w[0] = R128.w[0]; | |
3121 | } | |
3122 | // the rounded result has q3 - x0 digits | |
3123 | // we want the exponent to be e4, so if incr_exp = 1 then | |
3124 | // multiply the rounded result by 10 - it will still fit in 113 bits | |
3125 | if (incr_exp) { | |
3126 | // 64 x 128 -> 128 | |
3127 | P128.w[1] = C3.w[1]; | |
3128 | P128.w[0] = C3.w[0]; | |
b2a00c89 | 3129 | __mul_64x128_to_128 (C3, ten2k64[1], P128); |
200359e8 L |
3130 | } |
3131 | e3 = e3 + x0; // this is e4 | |
3132 | // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3; | |
3133 | // the result will have the sign of x * y; the exponent is e4 | |
3134 | R256.w[3] = 0; | |
3135 | R256.w[2] = 0; | |
3136 | R256.w[1] = C3.w[1]; | |
3137 | R256.w[0] = C3.w[0]; | |
3138 | if (p_sign == z_sign) { // R256 = C4 + R256 | |
3139 | add256 (C4, R256, &R256); | |
3140 | } else { // if (p_sign != z_sign) { // R256 = C4 - R256 | |
3141 | sub256 (C4, R256, &R256); // the result cannot be pure zero | |
3142 | // because the result has opposite sign to that of R256 which was | |
3143 | // rounded, need to change the rounding indicators | |
3144 | lsb = C4.w[0] & 0x01; | |
3145 | if (is_inexact_lt_midpoint) { | |
3146 | is_inexact_lt_midpoint = 0; | |
3147 | is_inexact_gt_midpoint = 1; | |
3148 | } else if (is_inexact_gt_midpoint) { | |
3149 | is_inexact_gt_midpoint = 0; | |
3150 | is_inexact_lt_midpoint = 1; | |
3151 | } else if (lsb == 0) { | |
3152 | if (is_midpoint_lt_even) { | |
3153 | is_midpoint_lt_even = 0; | |
3154 | is_midpoint_gt_even = 1; | |
3155 | } else if (is_midpoint_gt_even) { | |
3156 | is_midpoint_gt_even = 0; | |
3157 | is_midpoint_lt_even = 1; | |
3158 | } else { | |
3159 | ; | |
3160 | } | |
3161 | } else if (lsb == 1) { | |
3162 | if (is_midpoint_lt_even) { | |
3163 | // res = res + 1 | |
3164 | R256.w[0]++; | |
3165 | if (R256.w[0] == 0x0) { | |
3166 | R256.w[1]++; | |
3167 | if (R256.w[1] == 0x0) { | |
b2a00c89 L |
3168 | R256.w[2]++; |
3169 | if (R256.w[2] == 0x0) { | |
3170 | R256.w[3]++; | |
3171 | } | |
200359e8 L |
3172 | } |
3173 | } | |
3174 | // no check for rounding overflow - R256 was a difference | |
3175 | } else if (is_midpoint_gt_even) { | |
3176 | // res = res - 1 | |
3177 | R256.w[0]--; | |
3178 | if (R256.w[0] == 0xffffffffffffffffull) { | |
3179 | R256.w[1]--; | |
3180 | if (R256.w[1] == 0xffffffffffffffffull) { | |
b2a00c89 L |
3181 | R256.w[2]--; |
3182 | if (R256.w[2] == 0xffffffffffffffffull) { | |
3183 | R256.w[3]--; | |
3184 | } | |
200359e8 L |
3185 | } |
3186 | } | |
3187 | } else { | |
3188 | ; | |
3189 | } | |
3190 | } else { | |
3191 | ; | |
3192 | } | |
3193 | } | |
3194 | // determine the number of decimal digits in R256 | |
b2a00c89 | 3195 | ind = nr_digits256 (R256); // ind >= p34 |
200359e8 L |
3196 | // if R256 is sum, then ind > p34; if R256 is a difference, then |
3197 | // ind >= p34; this means that we can calculate the result rounded to | |
3198 | // the destination precision, with unbounded exponent, starting from R256 | |
3199 | // and using the indicators from the rounding of C3 to avoid a double | |
3200 | // rounding error | |
3201 | ||
3202 | if (ind < p34) { | |
3203 | ; | |
3204 | } else if (ind == p34) { | |
3205 | // the result rounded to the destination precision with | |
3206 | // unbounded exponent | |
3207 | // is (-1)^p_sign * R256 * 10^e4 | |
3208 | res.w[1] = R256.w[1]; | |
3209 | res.w[0] = R256.w[0]; | |
3210 | } else { // if (ind > p34) | |
3211 | // if more than P digits, round to nearest to P digits | |
3212 | // round R256 to p34 digits | |
3213 | x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68 | |
3214 | // save C3 rounding indicators to help avoid double rounding error | |
3215 | is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; | |
3216 | is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; | |
3217 | is_midpoint_lt_even0 = is_midpoint_lt_even; | |
3218 | is_midpoint_gt_even0 = is_midpoint_gt_even; | |
3219 | // initialize rounding indicators | |
3220 | is_inexact_lt_midpoint = 0; | |
3221 | is_inexact_gt_midpoint = 0; | |
3222 | is_midpoint_lt_even = 0; | |
3223 | is_midpoint_gt_even = 0; | |
3224 | // round to p34 digits; the result fits in 113 bits | |
3225 | if (ind <= 38) { | |
3226 | P128.w[1] = R256.w[1]; | |
3227 | P128.w[0] = R256.w[0]; | |
b2a00c89 L |
3228 | round128_19_38 (ind, x0, P128, &R128, &incr_exp, |
3229 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
3230 | &is_inexact_lt_midpoint, | |
3231 | &is_inexact_gt_midpoint); | |
200359e8 L |
3232 | } else if (ind <= 57) { |
3233 | P192.w[2] = R256.w[2]; | |
3234 | P192.w[1] = R256.w[1]; | |
3235 | P192.w[0] = R256.w[0]; | |
b2a00c89 L |
3236 | round192_39_57 (ind, x0, P192, &R192, &incr_exp, |
3237 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
3238 | &is_inexact_lt_midpoint, | |
3239 | &is_inexact_gt_midpoint); | |
200359e8 L |
3240 | R128.w[1] = R192.w[1]; |
3241 | R128.w[0] = R192.w[0]; | |
3242 | } else { // if (ind <= 68) | |
b2a00c89 L |
3243 | round256_58_76 (ind, x0, R256, &R256, &incr_exp, |
3244 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
3245 | &is_inexact_lt_midpoint, | |
3246 | &is_inexact_gt_midpoint); | |
200359e8 L |
3247 | R128.w[1] = R256.w[1]; |
3248 | R128.w[0] = R256.w[0]; | |
3249 | } | |
3250 | // the rounded result has p34 = 34 digits | |
3251 | e4 = e4 + x0 + incr_exp; | |
3252 | ||
3253 | res.w[1] = R128.w[1]; | |
3254 | res.w[0] = R128.w[0]; | |
3255 | ||
3256 | // avoid a double rounding error | |
3257 | if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && | |
3258 | is_midpoint_lt_even) { // double rounding error upward | |
3259 | // res = res - 1 | |
3260 | res.w[0]--; | |
3261 | if (res.w[0] == 0xffffffffffffffffull) | |
3262 | res.w[1]--; | |
3263 | is_midpoint_lt_even = 0; | |
3264 | is_inexact_lt_midpoint = 1; | |
3265 | // Note: a double rounding error upward is not possible; for this | |
3266 | // the result after the first rounding would have to be 99...95 | |
3267 | // (35 digits in all), possibly followed by a number of zeros; this | |
3268 | // not possible in Cases (2)-(6) or (15)-(17) which may get here | |
3269 | // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent | |
3270 | if (res.w[1] == 0x0000314dc6448d93ull && | |
b2a00c89 | 3271 | res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1 |
200359e8 L |
3272 | res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 |
3273 | res.w[0] = 0x378d8e63ffffffffull; | |
3274 | e4--; | |
3275 | } | |
3276 | } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && | |
3277 | is_midpoint_gt_even) { // double rounding error downward | |
3278 | // res = res + 1 | |
3279 | res.w[0]++; | |
3280 | if (res.w[0] == 0) | |
3281 | res.w[1]++; | |
3282 | is_midpoint_gt_even = 0; | |
3283 | is_inexact_gt_midpoint = 1; | |
3284 | } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
b2a00c89 | 3285 | !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { |
200359e8 L |
3286 | // if this second rounding was exact the result may still be |
3287 | // inexact because of the first rounding | |
3288 | if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { | |
3289 | is_inexact_gt_midpoint = 1; | |
3290 | } | |
3291 | if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { | |
3292 | is_inexact_lt_midpoint = 1; | |
3293 | } | |
3294 | } else if (is_midpoint_gt_even && | |
b2a00c89 | 3295 | (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { |
200359e8 L |
3296 | // pulled up to a midpoint |
3297 | is_inexact_lt_midpoint = 1; | |
3298 | is_inexact_gt_midpoint = 0; | |
3299 | is_midpoint_lt_even = 0; | |
3300 | is_midpoint_gt_even = 0; | |
3301 | } else if (is_midpoint_lt_even && | |
b2a00c89 | 3302 | (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { |
200359e8 L |
3303 | // pulled down to a midpoint |
3304 | is_inexact_lt_midpoint = 0; | |
3305 | is_inexact_gt_midpoint = 1; | |
3306 | is_midpoint_lt_even = 0; | |
3307 | is_midpoint_gt_even = 0; | |
3308 | } else { | |
3309 | ; | |
3310 | } | |
3311 | } | |
3312 | ||
3313 | // determine tininess | |
3314 | if (rnd_mode == ROUNDING_TO_NEAREST) { | |
3315 | if (e4 < expmin) { | |
3316 | is_tiny = 1; // for other rounding modes apply correction | |
3317 | } | |
3318 | } else { | |
3319 | // for RM, RP, RZ, RA apply correction in order to determine tininess | |
3320 | // but do not save the result; apply the correction to | |
3321 | // (-1)^p_sign * res * 10^0 | |
3322 | P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1]; | |
3323 | P128.w[0] = res.w[0]; | |
3324 | rounding_correction (rnd_mode, | |
b2a00c89 L |
3325 | is_inexact_lt_midpoint, |
3326 | is_inexact_gt_midpoint, | |
3327 | is_midpoint_lt_even, is_midpoint_gt_even, | |
3328 | 0, &P128, pfpsf); | |
200359e8 L |
3329 | scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1 |
3330 | // the number of digits in the significand is p34 = 34 | |
3331 | if (e4 + scale < expmin) { | |
3332 | is_tiny = 1; | |
3333 | } | |
3334 | } | |
3335 | ||
3336 | // the result rounded to the destination precision with unbounded exponent | |
3337 | // is (-1)^p_sign * res * 10^e4 | |
3338 | res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN | |
3339 | // res.w[0] unchanged; | |
3340 | // Note: res is correct only if expmin <= e4 <= expmax | |
3341 | ind = p34; // the number of decimal digits in the signifcand of res | |
3342 | ||
3343 | // at this point we have the result rounded with unbounded exponent in | |
3344 | // res and we know its tininess: | |
3345 | // res = (-1)^p_sign * significand * 10^e4, | |
3346 | // where q (significand) = ind = p34 | |
3347 | // Note: res is correct only if expmin <= e4 <= expmax | |
3348 | ||
3349 | // check for overflow if RN | |
3350 | if (rnd_mode == ROUNDING_TO_NEAREST | |
3351 | && (ind + e4) > (p34 + expmax)) { | |
3352 | res.w[1] = p_sign | 0x7800000000000000ull; | |
3353 | res.w[0] = 0x0000000000000000ull; | |
3354 | *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
b2a00c89 L |
3355 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
3356 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
3357 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
3358 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
3359 | BID_SWAP128 (res); | |
200359e8 | 3360 | BID_RETURN (res) |
b2a00c89 | 3361 | } // else not overflow or not RN, so continue |
200359e8 L |
3362 | |
3363 | // from this point on this is similar to the last part of the computation | |
3364 | // for Cases (15), (16), (17) | |
3365 | ||
3366 | // if (e4 >= expmin) we have the result rounded with bounded exponent | |
3367 | if (e4 < expmin) { | |
3368 | x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res | |
3369 | // where the result rounded [at most] once is | |
3370 | // (-1)^p_sign * significand_res * 10^e4 | |
3371 | ||
3372 | // avoid double rounding error | |
3373 | is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; | |
3374 | is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; | |
3375 | is_midpoint_lt_even0 = is_midpoint_lt_even; | |
3376 | is_midpoint_gt_even0 = is_midpoint_gt_even; | |
3377 | is_inexact_lt_midpoint = 0; | |
3378 | is_inexact_gt_midpoint = 0; | |
3379 | is_midpoint_lt_even = 0; | |
3380 | is_midpoint_gt_even = 0; | |
3381 | ||
3382 | if (x0 > ind) { | |
3383 | // nothing is left of res when moving the decimal point left x0 digits | |
3384 | is_inexact_lt_midpoint = 1; | |
3385 | res.w[1] = p_sign | 0x0000000000000000ull; | |
3386 | res.w[0] = 0x0000000000000000ull; | |
3387 | e4 = expmin; | |
3388 | } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34 | |
3389 | // this is <, =, or > 1/2 ulp | |
3390 | // compare the ind-digit value in the significand of res with | |
3391 | // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is | |
3392 | // less than, equal to, or greater than 1/2 ulp (significand of res) | |
3393 | R128.w[1] = res.w[1] & MASK_COEFF; | |
3394 | R128.w[0] = res.w[0]; | |
3395 | if (ind <= 19) { | |
b2a00c89 | 3396 | if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp |
200359e8 L |
3397 | lt_half_ulp = 1; |
3398 | is_inexact_lt_midpoint = 1; | |
b2a00c89 | 3399 | } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp |
200359e8 L |
3400 | eq_half_ulp = 1; |
3401 | is_midpoint_gt_even = 1; | |
3402 | } else { // > 1/2 ulp | |
3403 | gt_half_ulp = 1; | |
3404 | is_inexact_gt_midpoint = 1; | |
3405 | } | |
3406 | } else { // if (ind <= 38) | |
b2a00c89 L |
3407 | if (R128.w[1] < midpoint128[ind - 20].w[1] || |
3408 | (R128.w[1] == midpoint128[ind - 20].w[1] && | |
3409 | R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp | |
200359e8 L |
3410 | lt_half_ulp = 1; |
3411 | is_inexact_lt_midpoint = 1; | |
b2a00c89 L |
3412 | } else if (R128.w[1] == midpoint128[ind - 20].w[1] && |
3413 | R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp | |
200359e8 L |
3414 | eq_half_ulp = 1; |
3415 | is_midpoint_gt_even = 1; | |
3416 | } else { // > 1/2 ulp | |
3417 | gt_half_ulp = 1; | |
3418 | is_inexact_gt_midpoint = 1; | |
3419 | } | |
3420 | } | |
3421 | if (lt_half_ulp || eq_half_ulp) { | |
3422 | // res = +0.0 * 10^expmin | |
3423 | res.w[1] = 0x0000000000000000ull; | |
3424 | res.w[0] = 0x0000000000000000ull; | |
3425 | } else { // if (gt_half_ulp) | |
3426 | // res = +1 * 10^expmin | |
3427 | res.w[1] = 0x0000000000000000ull; | |
3428 | res.w[0] = 0x0000000000000001ull; | |
3429 | } | |
3430 | res.w[1] = p_sign | res.w[1]; | |
3431 | e4 = expmin; | |
3432 | } else { // if (1 <= x0 <= ind - 1 <= 33) | |
3433 | // round the ind-digit result to ind - x0 digits | |
3434 | ||
3435 | if (ind <= 18) { // 2 <= ind <= 18 | |
b2a00c89 L |
3436 | round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, |
3437 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
3438 | &is_inexact_lt_midpoint, | |
3439 | &is_inexact_gt_midpoint); | |
200359e8 L |
3440 | res.w[1] = 0x0; |
3441 | res.w[0] = R64; | |
3442 | } else if (ind <= 38) { | |
3443 | P128.w[1] = res.w[1] & MASK_COEFF; | |
3444 | P128.w[0] = res.w[0]; | |
b2a00c89 L |
3445 | round128_19_38 (ind, x0, P128, &res, &incr_exp, |
3446 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
3447 | &is_inexact_lt_midpoint, | |
3448 | &is_inexact_gt_midpoint); | |
200359e8 L |
3449 | } |
3450 | e4 = e4 + x0; // expmin | |
3451 | // we want the exponent to be expmin, so if incr_exp = 1 then | |
3452 | // multiply the rounded result by 10 - it will still fit in 113 bits | |
3453 | if (incr_exp) { | |
3454 | // 64 x 128 -> 128 | |
3455 | P128.w[1] = res.w[1] & MASK_COEFF; | |
3456 | P128.w[0] = res.w[0]; | |
b2a00c89 | 3457 | __mul_64x128_to_128 (res, ten2k64[1], P128); |
200359e8 L |
3458 | } |
3459 | res.w[1] = | |
b2a00c89 L |
3460 | p_sign | ((UINT64) (e4 + 6176) << 49) | (res. |
3461 | w[1] & MASK_COEFF); | |
200359e8 L |
3462 | // avoid a double rounding error |
3463 | if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && | |
b2a00c89 | 3464 | is_midpoint_lt_even) { // double rounding error upward |
200359e8 L |
3465 | // res = res - 1 |
3466 | res.w[0]--; | |
3467 | if (res.w[0] == 0xffffffffffffffffull) | |
3468 | res.w[1]--; | |
3469 | // Note: a double rounding error upward is not possible; for this | |
3470 | // the result after the first rounding would have to be 99...95 | |
3471 | // (35 digits in all), possibly followed by a number of zeros; this | |
3472 | // not possible in this underflow case | |
3473 | is_midpoint_lt_even = 0; | |
3474 | is_inexact_lt_midpoint = 1; | |
3475 | } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && | |
b2a00c89 | 3476 | is_midpoint_gt_even) { // double rounding error downward |
200359e8 L |
3477 | // res = res + 1 |
3478 | res.w[0]++; | |
3479 | if (res.w[0] == 0) | |
3480 | res.w[1]++; | |
3481 | is_midpoint_gt_even = 0; | |
3482 | is_inexact_gt_midpoint = 1; | |
3483 | } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
b2a00c89 L |
3484 | !is_inexact_lt_midpoint |
3485 | && !is_inexact_gt_midpoint) { | |
200359e8 L |
3486 | // if this second rounding was exact the result may still be |
3487 | // inexact because of the first rounding | |
3488 | if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { | |
3489 | is_inexact_gt_midpoint = 1; | |
3490 | } | |
3491 | if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { | |
3492 | is_inexact_lt_midpoint = 1; | |
3493 | } | |
3494 | } else if (is_midpoint_gt_even && | |
b2a00c89 L |
3495 | (is_inexact_gt_midpoint0 |
3496 | || is_midpoint_lt_even0)) { | |
200359e8 L |
3497 | // pulled up to a midpoint |
3498 | is_inexact_lt_midpoint = 1; | |
3499 | is_inexact_gt_midpoint = 0; | |
3500 | is_midpoint_lt_even = 0; | |
3501 | is_midpoint_gt_even = 0; | |
3502 | } else if (is_midpoint_lt_even && | |
b2a00c89 L |
3503 | (is_inexact_lt_midpoint0 |
3504 | || is_midpoint_gt_even0)) { | |
200359e8 L |
3505 | // pulled down to a midpoint |
3506 | is_inexact_lt_midpoint = 0; | |
3507 | is_inexact_gt_midpoint = 1; | |
3508 | is_midpoint_lt_even = 0; | |
3509 | is_midpoint_gt_even = 0; | |
3510 | } else { | |
3511 | ; | |
3512 | } | |
3513 | } | |
3514 | } | |
3515 | // res contains the correct result | |
3516 | // apply correction if not rounding to nearest | |
3517 | if (rnd_mode != ROUNDING_TO_NEAREST) { | |
3518 | rounding_correction (rnd_mode, | |
b2a00c89 L |
3519 | is_inexact_lt_midpoint, |
3520 | is_inexact_gt_midpoint, | |
3521 | is_midpoint_lt_even, is_midpoint_gt_even, | |
3522 | e4, &res, pfpsf); | |
200359e8 L |
3523 | } |
3524 | if (is_midpoint_lt_even || is_midpoint_gt_even || | |
3525 | is_inexact_lt_midpoint || is_inexact_gt_midpoint) { | |
3526 | // set the inexact flag | |
3527 | *pfpsf |= INEXACT_EXCEPTION; | |
3528 | if (is_tiny) | |
3529 | *pfpsf |= UNDERFLOW_EXCEPTION; | |
3530 | } | |
b2a00c89 L |
3531 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
3532 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
3533 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
3534 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
3535 | BID_SWAP128 (res); | |
200359e8 L |
3536 | BID_RETURN (res) |
3537 | ||
3538 | } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15) | |
3539 | (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16) | |
3540 | (delta + q3 <= p34 && p34 < q4)) { // Case (17) | |
3541 | ||
3542 | // calculate first the result rounded to the destination precision, with | |
3543 | // unbounded exponent | |
3544 | ||
3545 | add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4, | |
b2a00c89 L |
3546 | rnd_mode, &is_midpoint_lt_even, |
3547 | &is_midpoint_gt_even, &is_inexact_lt_midpoint, | |
3548 | &is_inexact_gt_midpoint, pfpsf, &res); | |
3549 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
3550 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
3551 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
3552 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
3553 | BID_SWAP128 (res); | |
200359e8 L |
3554 | BID_RETURN (res) |
3555 | ||
3556 | } else { | |
3557 | ; | |
3558 | } | |
3559 | ||
b2a00c89 | 3560 | } // end if delta < 0 |
200359e8 | 3561 | |
b2a00c89 L |
3562 | *ptr_is_midpoint_lt_even = is_midpoint_lt_even; |
3563 | *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
3564 | *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
3565 | *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
3566 | BID_SWAP128 (res); | |
200359e8 L |
3567 | BID_RETURN (res) |
3568 | ||
3569 | } | |
b2a00c89 L |
3570 | |
3571 | ||
3572 | #if DECIMAL_CALL_BY_REFERENCE | |
3573 | void | |
3574 | bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz | |
3575 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3576 | _EXC_INFO_PARAM) { | |
3577 | UINT128 x = *px, y = *py, z = *pz; | |
3578 | #if !DECIMAL_GLOBAL_ROUNDING | |
3579 | unsigned int rnd_mode = *prnd_mode; | |
3580 | #endif | |
3581 | #else | |
3582 | UINT128 | |
3583 | bid128_fma (UINT128 x, UINT128 y, UINT128 z | |
3584 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3585 | _EXC_INFO_PARAM) { | |
3586 | #endif | |
3587 | int is_midpoint_lt_even, is_midpoint_gt_even, | |
3588 | is_inexact_lt_midpoint, is_inexact_gt_midpoint; | |
3589 | UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
3590 | ||
3591 | #if DECIMAL_CALL_BY_REFERENCE | |
3592 | bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3593 | &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, | |
3594 | &res, &x, &y, &z | |
3595 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3596 | _EXC_INFO_ARG); | |
3597 | #else | |
3598 | res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3599 | &is_inexact_lt_midpoint, | |
3600 | &is_inexact_gt_midpoint, x, y, | |
3601 | z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3602 | _EXC_INFO_ARG); | |
3603 | #endif | |
3604 | BID_RETURN (res); | |
3605 | } | |
3606 | ||
3607 | ||
3608 | #if DECIMAL_CALL_BY_REFERENCE | |
3609 | void | |
3610 | bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz | |
3611 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3612 | _EXC_INFO_PARAM) { | |
3613 | UINT64 x = *px, y = *py, z = *pz; | |
3614 | #if !DECIMAL_GLOBAL_ROUNDING | |
3615 | unsigned int rnd_mode = *prnd_mode; | |
3616 | #endif | |
3617 | #else | |
3618 | UINT128 | |
3619 | bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z | |
3620 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3621 | _EXC_INFO_PARAM) { | |
3622 | #endif | |
3623 | int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, | |
3624 | is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; | |
3625 | UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
3626 | UINT128 x1, y1, z1; | |
3627 | ||
3628 | #if DECIMAL_CALL_BY_REFERENCE | |
3629 | bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3630 | bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3631 | bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3632 | bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3633 | &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, | |
3634 | &res, &x1, &y1, &z1 | |
3635 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3636 | _EXC_INFO_ARG); | |
3637 | #else | |
3638 | x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3639 | y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3640 | z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3641 | res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3642 | &is_inexact_lt_midpoint, | |
3643 | &is_inexact_gt_midpoint, x1, y1, | |
3644 | z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3645 | _EXC_INFO_ARG); | |
3646 | #endif | |
3647 | BID_RETURN (res); | |
3648 | } | |
3649 | ||
3650 | ||
3651 | #if DECIMAL_CALL_BY_REFERENCE | |
3652 | void | |
3653 | bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz | |
3654 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3655 | _EXC_INFO_PARAM) { | |
3656 | UINT64 x = *px, y = *py; | |
3657 | UINT128 z = *pz; | |
3658 | #if !DECIMAL_GLOBAL_ROUNDING | |
3659 | unsigned int rnd_mode = *prnd_mode; | |
3660 | #endif | |
3661 | #else | |
3662 | UINT128 | |
3663 | bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z | |
3664 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3665 | _EXC_INFO_PARAM) { | |
3666 | #endif | |
3667 | int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, | |
3668 | is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; | |
3669 | UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
3670 | UINT128 x1, y1; | |
3671 | ||
3672 | #if DECIMAL_CALL_BY_REFERENCE | |
3673 | bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3674 | bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3675 | bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3676 | &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, | |
3677 | &res, &x1, &y1, &z | |
3678 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3679 | _EXC_INFO_ARG); | |
3680 | #else | |
3681 | x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3682 | y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3683 | res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3684 | &is_inexact_lt_midpoint, | |
3685 | &is_inexact_gt_midpoint, x1, y1, | |
3686 | z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3687 | _EXC_INFO_ARG); | |
3688 | #endif | |
3689 | BID_RETURN (res); | |
3690 | } | |
3691 | ||
3692 | ||
3693 | #if DECIMAL_CALL_BY_REFERENCE | |
3694 | void | |
3695 | bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz | |
3696 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3697 | _EXC_INFO_PARAM) { | |
3698 | UINT64 x = *px, z = *pz; | |
3699 | #if !DECIMAL_GLOBAL_ROUNDING | |
3700 | unsigned int rnd_mode = *prnd_mode; | |
3701 | #endif | |
3702 | #else | |
3703 | UINT128 | |
3704 | bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z | |
3705 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3706 | _EXC_INFO_PARAM) { | |
3707 | #endif | |
3708 | int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, | |
3709 | is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; | |
3710 | UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
3711 | UINT128 x1, z1; | |
3712 | ||
3713 | #if DECIMAL_CALL_BY_REFERENCE | |
3714 | bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3715 | bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3716 | bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3717 | &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, | |
3718 | &res, &x1, py, &z1 | |
3719 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3720 | _EXC_INFO_ARG); | |
3721 | #else | |
3722 | x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3723 | z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3724 | res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3725 | &is_inexact_lt_midpoint, | |
3726 | &is_inexact_gt_midpoint, x1, y, | |
3727 | z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3728 | _EXC_INFO_ARG); | |
3729 | #endif | |
3730 | BID_RETURN (res); | |
3731 | } | |
3732 | ||
3733 | ||
3734 | #if DECIMAL_CALL_BY_REFERENCE | |
3735 | void | |
3736 | bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz | |
3737 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3738 | _EXC_INFO_PARAM) { | |
3739 | UINT64 x = *px; | |
3740 | #if !DECIMAL_GLOBAL_ROUNDING | |
3741 | unsigned int rnd_mode = *prnd_mode; | |
3742 | #endif | |
3743 | #else | |
3744 | UINT128 | |
3745 | bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z | |
3746 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3747 | _EXC_INFO_PARAM) { | |
3748 | #endif | |
3749 | int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, | |
3750 | is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; | |
3751 | UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
3752 | UINT128 x1; | |
3753 | ||
3754 | #if DECIMAL_CALL_BY_REFERENCE | |
3755 | bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3756 | bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3757 | &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, | |
3758 | &res, &x1, py, pz | |
3759 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3760 | _EXC_INFO_ARG); | |
3761 | #else | |
3762 | x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3763 | res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3764 | &is_inexact_lt_midpoint, | |
3765 | &is_inexact_gt_midpoint, x1, y, | |
3766 | z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3767 | _EXC_INFO_ARG); | |
3768 | #endif | |
3769 | BID_RETURN (res); | |
3770 | } | |
3771 | ||
3772 | ||
3773 | #if DECIMAL_CALL_BY_REFERENCE | |
3774 | void | |
3775 | bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz | |
3776 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3777 | _EXC_INFO_PARAM) { | |
3778 | UINT64 y = *py, z = *pz; | |
3779 | #if !DECIMAL_GLOBAL_ROUNDING | |
3780 | unsigned int rnd_mode = *prnd_mode; | |
3781 | #endif | |
3782 | #else | |
3783 | UINT128 | |
3784 | bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z | |
3785 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3786 | _EXC_INFO_PARAM) { | |
3787 | #endif | |
3788 | int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, | |
3789 | is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; | |
3790 | UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
3791 | UINT128 y1, z1; | |
3792 | ||
3793 | #if DECIMAL_CALL_BY_REFERENCE | |
3794 | bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3795 | bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3796 | bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3797 | &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, | |
3798 | &res, px, &y1, &z1 | |
3799 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3800 | _EXC_INFO_ARG); | |
3801 | #else | |
3802 | y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3803 | z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3804 | res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3805 | &is_inexact_lt_midpoint, | |
3806 | &is_inexact_gt_midpoint, x, y1, | |
3807 | z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3808 | _EXC_INFO_ARG); | |
3809 | #endif | |
3810 | BID_RETURN (res); | |
3811 | } | |
3812 | ||
3813 | ||
3814 | #if DECIMAL_CALL_BY_REFERENCE | |
3815 | void | |
3816 | bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz | |
3817 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3818 | _EXC_INFO_PARAM) { | |
3819 | UINT64 y = *py; | |
3820 | #if !DECIMAL_GLOBAL_ROUNDING | |
3821 | unsigned int rnd_mode = *prnd_mode; | |
3822 | #endif | |
3823 | #else | |
3824 | UINT128 | |
3825 | bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z | |
3826 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3827 | _EXC_INFO_PARAM) { | |
3828 | #endif | |
3829 | int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, | |
3830 | is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; | |
3831 | UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
3832 | UINT128 y1; | |
3833 | ||
3834 | #if DECIMAL_CALL_BY_REFERENCE | |
3835 | bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3836 | bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3837 | &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, | |
3838 | &res, px, &y1, pz | |
3839 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3840 | _EXC_INFO_ARG); | |
3841 | #else | |
3842 | y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3843 | res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3844 | &is_inexact_lt_midpoint, | |
3845 | &is_inexact_gt_midpoint, x, y1, | |
3846 | z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3847 | _EXC_INFO_ARG); | |
3848 | #endif | |
3849 | BID_RETURN (res); | |
3850 | } | |
3851 | ||
3852 | ||
3853 | #if DECIMAL_CALL_BY_REFERENCE | |
3854 | void | |
3855 | bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz | |
3856 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3857 | _EXC_INFO_PARAM) { | |
3858 | UINT64 z = *pz; | |
3859 | #if !DECIMAL_GLOBAL_ROUNDING | |
3860 | unsigned int rnd_mode = *prnd_mode; | |
3861 | #endif | |
3862 | #else | |
3863 | UINT128 | |
3864 | bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z | |
3865 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3866 | _EXC_INFO_PARAM) { | |
3867 | #endif | |
3868 | int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, | |
3869 | is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; | |
3870 | UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
3871 | UINT128 z1; | |
3872 | ||
3873 | #if DECIMAL_CALL_BY_REFERENCE | |
3874 | bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3875 | bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3876 | &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, | |
3877 | &res, px, py, &z1 | |
3878 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3879 | _EXC_INFO_ARG); | |
3880 | #else | |
3881 | z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3882 | res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3883 | &is_inexact_lt_midpoint, | |
3884 | &is_inexact_gt_midpoint, x, y, | |
3885 | z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3886 | _EXC_INFO_ARG); | |
3887 | #endif | |
3888 | BID_RETURN (res); | |
3889 | } | |
3890 | ||
3891 | // Note: bid128qqq_fma is represented by bid128_fma | |
3892 | ||
3893 | // Note: bid64ddd_fma is represented by bid64_fma | |
3894 | ||
3895 | #if DECIMAL_CALL_BY_REFERENCE | |
3896 | void | |
3897 | bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz | |
3898 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3899 | _EXC_INFO_PARAM) { | |
3900 | UINT64 x = *px, y = *py; | |
3901 | #if !DECIMAL_GLOBAL_ROUNDING | |
3902 | unsigned int rnd_mode = *prnd_mode; | |
3903 | #endif | |
3904 | #else | |
3905 | UINT64 | |
3906 | bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z | |
3907 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3908 | _EXC_INFO_PARAM) { | |
3909 | #endif | |
3910 | UINT64 res1 = 0xbaddbaddbaddbaddull; | |
3911 | UINT128 x1, y1; | |
3912 | ||
3913 | #if DECIMAL_CALL_BY_REFERENCE | |
3914 | bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3915 | bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3916 | bid64qqq_fma (&res1, &x1, &y1, pz | |
3917 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3918 | _EXC_INFO_ARG); | |
3919 | #else | |
3920 | x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3921 | y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3922 | res1 = bid64qqq_fma (x1, y1, z | |
3923 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3924 | _EXC_INFO_ARG); | |
3925 | #endif | |
3926 | BID_RETURN (res1); | |
3927 | } | |
3928 | ||
3929 | ||
3930 | #if DECIMAL_CALL_BY_REFERENCE | |
3931 | void | |
3932 | bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz | |
3933 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3934 | _EXC_INFO_PARAM) { | |
3935 | UINT64 x = *px, z = *pz; | |
3936 | #if !DECIMAL_GLOBAL_ROUNDING | |
3937 | unsigned int rnd_mode = *prnd_mode; | |
3938 | #endif | |
3939 | #else | |
3940 | UINT64 | |
3941 | bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z | |
3942 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3943 | _EXC_INFO_PARAM) { | |
3944 | #endif | |
3945 | UINT64 res1 = 0xbaddbaddbaddbaddull; | |
3946 | UINT128 x1, z1; | |
3947 | ||
3948 | #if DECIMAL_CALL_BY_REFERENCE | |
3949 | bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3950 | bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3951 | bid64qqq_fma (&res1, &x1, py, &z1 | |
3952 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3953 | _EXC_INFO_ARG); | |
3954 | #else | |
3955 | x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3956 | z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3957 | res1 = bid64qqq_fma (x1, y, z1 | |
3958 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3959 | _EXC_INFO_ARG); | |
3960 | #endif | |
3961 | BID_RETURN (res1); | |
3962 | } | |
3963 | ||
3964 | ||
3965 | #if DECIMAL_CALL_BY_REFERENCE | |
3966 | void | |
3967 | bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz | |
3968 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3969 | _EXC_INFO_PARAM) { | |
3970 | UINT64 x = *px; | |
3971 | #if !DECIMAL_GLOBAL_ROUNDING | |
3972 | unsigned int rnd_mode = *prnd_mode; | |
3973 | #endif | |
3974 | #else | |
3975 | UINT64 | |
3976 | bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z | |
3977 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3978 | _EXC_INFO_PARAM) { | |
3979 | #endif | |
3980 | UINT64 res1 = 0xbaddbaddbaddbaddull; | |
3981 | UINT128 x1; | |
3982 | ||
3983 | #if DECIMAL_CALL_BY_REFERENCE | |
3984 | bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3985 | bid64qqq_fma (&res1, &x1, py, pz | |
3986 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3987 | _EXC_INFO_ARG); | |
3988 | #else | |
3989 | x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3990 | res1 = bid64qqq_fma (x1, y, z | |
3991 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3992 | _EXC_INFO_ARG); | |
3993 | #endif | |
3994 | BID_RETURN (res1); | |
3995 | } | |
3996 | ||
3997 | ||
3998 | #if DECIMAL_CALL_BY_REFERENCE | |
3999 | void | |
4000 | bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz | |
4001 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
4002 | _EXC_INFO_PARAM) { | |
4003 | UINT64 y = *py, z = *pz; | |
4004 | #if !DECIMAL_GLOBAL_ROUNDING | |
4005 | unsigned int rnd_mode = *prnd_mode; | |
4006 | #endif | |
4007 | #else | |
4008 | UINT64 | |
4009 | bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z | |
4010 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
4011 | _EXC_INFO_PARAM) { | |
4012 | #endif | |
4013 | UINT64 res1 = 0xbaddbaddbaddbaddull; | |
4014 | UINT128 y1, z1; | |
4015 | ||
4016 | #if DECIMAL_CALL_BY_REFERENCE | |
4017 | bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
4018 | bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
4019 | bid64qqq_fma (&res1, px, &y1, &z1 | |
4020 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
4021 | _EXC_INFO_ARG); | |
4022 | #else | |
4023 | y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
4024 | z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
4025 | res1 = bid64qqq_fma (x, y1, z1 | |
4026 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
4027 | _EXC_INFO_ARG); | |
4028 | #endif | |
4029 | BID_RETURN (res1); | |
4030 | } | |
4031 | ||
4032 | ||
4033 | #if DECIMAL_CALL_BY_REFERENCE | |
4034 | void | |
4035 | bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz | |
4036 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
4037 | _EXC_INFO_PARAM) { | |
4038 | UINT64 y = *py; | |
4039 | #if !DECIMAL_GLOBAL_ROUNDING | |
4040 | unsigned int rnd_mode = *prnd_mode; | |
4041 | #endif | |
4042 | #else | |
4043 | UINT64 | |
4044 | bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z | |
4045 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
4046 | _EXC_INFO_PARAM) { | |
4047 | #endif | |
4048 | UINT64 res1 = 0xbaddbaddbaddbaddull; | |
4049 | UINT128 y1; | |
4050 | ||
4051 | #if DECIMAL_CALL_BY_REFERENCE | |
4052 | bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
4053 | bid64qqq_fma (&res1, px, &y1, pz | |
4054 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
4055 | _EXC_INFO_ARG); | |
4056 | #else | |
4057 | y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
4058 | res1 = bid64qqq_fma (x, y1, z | |
4059 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
4060 | _EXC_INFO_ARG); | |
4061 | #endif | |
4062 | BID_RETURN (res1); | |
4063 | } | |
4064 | ||
4065 | ||
4066 | #if DECIMAL_CALL_BY_REFERENCE | |
4067 | void | |
4068 | bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz | |
4069 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
4070 | _EXC_INFO_PARAM) { | |
4071 | UINT64 z = *pz; | |
4072 | #if !DECIMAL_GLOBAL_ROUNDING | |
4073 | unsigned int rnd_mode = *prnd_mode; | |
4074 | #endif | |
4075 | #else | |
4076 | UINT64 | |
4077 | bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z | |
4078 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
4079 | _EXC_INFO_PARAM) { | |
4080 | #endif | |
4081 | UINT64 res1 = 0xbaddbaddbaddbaddull; | |
4082 | UINT128 z1; | |
4083 | ||
4084 | #if DECIMAL_CALL_BY_REFERENCE | |
4085 | bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
4086 | bid64qqq_fma (&res1, px, py, &z1 | |
4087 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
4088 | _EXC_INFO_ARG); | |
4089 | #else | |
4090 | z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
4091 | res1 = bid64qqq_fma (x, y, z1 | |
4092 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
4093 | _EXC_INFO_ARG); | |
4094 | #endif | |
4095 | BID_RETURN (res1); | |
4096 | } | |
4097 | ||
4098 | ||
4099 | #if DECIMAL_CALL_BY_REFERENCE | |
4100 | void | |
4101 | bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz | |
4102 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
4103 | _EXC_INFO_PARAM) { | |
4104 | UINT128 x = *px, y = *py, z = *pz; | |
4105 | #if !DECIMAL_GLOBAL_ROUNDING | |
4106 | unsigned int rnd_mode = *prnd_mode; | |
4107 | #endif | |
4108 | #else | |
4109 | UINT64 | |
4110 | bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z | |
4111 | _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
4112 | _EXC_INFO_PARAM) { | |
4113 | #endif | |
4114 | int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0, | |
4115 | is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0; | |
4116 | int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, | |
4117 | is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; | |
4118 | int incr_exp; | |
4119 | UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
4120 | UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
4121 | UINT64 res1 = 0xbaddbaddbaddbaddull; | |
4122 | unsigned int save_fpsf; // needed because of the call to bid128_ext_fma | |
4123 | UINT64 sign; | |
4124 | UINT64 exp; | |
4125 | int unbexp; | |
4126 | UINT128 C; | |
4127 | BID_UI64DOUBLE tmp; | |
4128 | int nr_bits; | |
4129 | int q, x0; | |
4130 | int scale; | |
4131 | int lt_half_ulp = 0, eq_half_ulp = 0; | |
4132 | ||
4133 | // Note: for rounding modes other than RN or RA, the result can be obtained | |
4134 | // by rounding first to BID128 and then to BID64 | |
4135 | ||
4136 | save_fpsf = *pfpsf; // sticky bits - caller value must be preserved | |
4137 | *pfpsf = 0; | |
4138 | ||
4139 | #if DECIMAL_CALL_BY_REFERENCE | |
4140 | bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0, | |
4141 | &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0, | |
4142 | &res, &x, &y, &z | |
4143 | _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
4144 | _EXC_INFO_ARG); | |
4145 | #else | |
4146 | res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0, | |
4147 | &is_inexact_lt_midpoint0, | |
4148 | &is_inexact_gt_midpoint0, x, y, | |
4149 | z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
4150 | _EXC_INFO_ARG); | |
4151 | #endif | |
4152 | ||
4153 | if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) || | |
4154 | (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible | |
4155 | ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN) | |
4156 | ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity | |
4157 | #if DECIMAL_CALL_BY_REFERENCE | |
4158 | bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG); | |
4159 | #else | |
4160 | res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG); | |
4161 | #endif | |
4162 | // determine the unbiased exponent of the result | |
4163 | unbexp = ((res1 >> 53) & 0x3ff) - 398; | |
4164 | ||
4165 | // if subnormal, res1 must have exp = -398 | |
4166 | // if tiny and inexact set underflow and inexact status flags | |
4167 | if (!((res1 & MASK_NAN) == MASK_NAN) && // res1 not NaN | |
4168 | (unbexp == -398) | |
4169 | && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull) | |
4170 | && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 | |
4171 | || is_midpoint_lt_even0 || is_midpoint_gt_even0)) { | |
4172 | // set the inexact flag and the underflow flag | |
4173 | *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION); | |
4174 | } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 || | |
4175 | is_midpoint_lt_even0 || is_midpoint_gt_even0) { | |
4176 | // set the inexact flag and the underflow flag | |
4177 | *pfpsf |= INEXACT_EXCEPTION; | |
4178 | } | |
4179 | ||
4180 | *pfpsf |= save_fpsf; | |
4181 | BID_RETURN (res1); | |
4182 | } // else continue, and use rounding to nearest to round to 16 digits | |
4183 | ||
4184 | // at this point the result is rounded to nearest (even or away) to 34 digits | |
4185 | // (or less if exact), and it is zero or finite non-zero canonical [sub]normal | |
4186 | sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative | |
4187 | exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits | |
4188 | unbexp = (exp >> 49) - 6176; | |
4189 | C.w[1] = res.w[HIGH_128W] & MASK_COEFF; | |
4190 | C.w[0] = res.w[LOW_128W]; | |
4191 | ||
4192 | if ((C.w[1] == 0x0 && C.w[0] == 0x0) || // result is zero | |
4193 | (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) { | |
4194 | // clear under/overflow | |
4195 | #if DECIMAL_CALL_BY_REFERENCE | |
4196 | bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG); | |
4197 | #else | |
4198 | res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG); | |
4199 | #endif | |
4200 | *pfpsf |= save_fpsf; | |
4201 | BID_RETURN (res1); | |
4202 | } // else continue | |
4203 | ||
4204 | // -398 - 34 <= unbexp <= 369 + 15 | |
4205 | if (rnd_mode == ROUNDING_TIES_AWAY) { | |
4206 | // apply correction, if needed, to make the result rounded to nearest-even | |
4207 | if (is_midpoint_gt_even) { | |
4208 | // res = res - 1 | |
4209 | res1--; // res1 is now even | |
4210 | } // else the result is already correctly rounded to nearest-even | |
4211 | } | |
4212 | // at this point the result is finite, non-zero canonical normal or subnormal, | |
4213 | // and in most cases overflow or underflow will not occur | |
4214 | ||
4215 | // determine the number of digits q in the result | |
4216 | // q = nr. of decimal digits in x | |
4217 | // determine first the nr. of bits in x | |
4218 | if (C.w[1] == 0) { | |
4219 | if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53 | |
4220 | // split the 64-bit value in two 32-bit halves to avoid rounding errors | |
4221 | if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32 | |
4222 | tmp.d = (double) (C.w[0] >> 32); // exact conversion | |
4223 | nr_bits = | |
4224 | 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
4225 | } else { // x < 2^32 | |
4226 | tmp.d = (double) (C.w[0]); // exact conversion | |
4227 | nr_bits = | |
4228 | 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
4229 | } | |
4230 | } else { // if x < 2^53 | |
4231 | tmp.d = (double) C.w[0]; // exact conversion | |
4232 | nr_bits = | |
4233 | 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
4234 | } | |
4235 | } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1]) | |
4236 | tmp.d = (double) C.w[1]; // exact conversion | |
4237 | nr_bits = | |
4238 | 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
4239 | } | |
4240 | q = nr_digits[nr_bits - 1].digits; | |
4241 | if (q == 0) { | |
4242 | q = nr_digits[nr_bits - 1].digits1; | |
4243 | if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi || | |
4244 | (C.w[1] == nr_digits[nr_bits - 1].threshold_hi && | |
4245 | C.w[0] >= nr_digits[nr_bits - 1].threshold_lo)) | |
4246 | q++; | |
4247 | } | |
4248 | // if q > 16, round to nearest even to 16 digits (but for underflow it may | |
4249 | // have to be truncated even more) | |
4250 | if (q > 16) { | |
4251 | x0 = q - 16; | |
4252 | if (q <= 18) { | |
4253 | round64_2_18 (q, x0, C.w[0], &res1, &incr_exp, | |
4254 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
4255 | &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); | |
4256 | } else { // 19 <= q <= 34 | |
4257 | round128_19_38 (q, x0, C, &res128, &incr_exp, | |
4258 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
4259 | &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); | |
4260 | res1 = res128.w[0]; // the result fits in 64 bits | |
4261 | } | |
4262 | unbexp = unbexp + x0; | |
4263 | if (incr_exp) | |
4264 | unbexp++; | |
4265 | q = 16; // need to set in case denormalization is necessary | |
4266 | } else { | |
4267 | // the result does not require a second rounding (and it must have | |
4268 | // been exact in the first rounding, since q <= 16) | |
4269 | res1 = C.w[0]; | |
4270 | } | |
4271 | ||
4272 | // avoid a double rounding error | |
4273 | if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && | |
4274 | is_midpoint_lt_even) { // double rounding error upward | |
4275 | // res = res - 1 | |
4276 | res1--; // res1 becomes odd | |
4277 | is_midpoint_lt_even = 0; | |
4278 | is_inexact_lt_midpoint = 1; | |
4279 | if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1 | |
4280 | res1 = 0x002386f26fc0ffffull; // 10^16 - 1 | |
4281 | unbexp--; | |
4282 | } | |
4283 | } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && | |
4284 | is_midpoint_gt_even) { // double rounding error downward | |
4285 | // res = res + 1 | |
4286 | res1++; // res1 becomes odd (so it cannot be 10^16) | |
4287 | is_midpoint_gt_even = 0; | |
4288 | is_inexact_gt_midpoint = 1; | |
4289 | } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
4290 | !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { | |
4291 | // if this second rounding was exact the result may still be | |
4292 | // inexact because of the first rounding | |
4293 | if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { | |
4294 | is_inexact_gt_midpoint = 1; | |
4295 | } | |
4296 | if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { | |
4297 | is_inexact_lt_midpoint = 1; | |
4298 | } | |
4299 | } else if (is_midpoint_gt_even && | |
4300 | (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { | |
4301 | // pulled up to a midpoint | |
4302 | is_inexact_lt_midpoint = 1; | |
4303 | is_inexact_gt_midpoint = 0; | |
4304 | is_midpoint_lt_even = 0; | |
4305 | is_midpoint_gt_even = 0; | |
4306 | } else if (is_midpoint_lt_even && | |
4307 | (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { | |
4308 | // pulled down to a midpoint | |
4309 | is_inexact_lt_midpoint = 0; | |
4310 | is_inexact_gt_midpoint = 1; | |
4311 | is_midpoint_lt_even = 0; | |
4312 | is_midpoint_gt_even = 0; | |
4313 | } else { | |
4314 | ; | |
4315 | } | |
4316 | // this is the result rounded correctly to nearest even, with unbounded exp. | |
4317 | ||
4318 | // check for overflow | |
4319 | if (q + unbexp > P16 + expmax16) { | |
4320 | res1 = sign | 0x7800000000000000ull; | |
4321 | *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
4322 | *pfpsf |= save_fpsf; | |
4323 | BID_RETURN (res1) | |
4324 | } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16 | |
4325 | // not overflow; the result must be exact, and we can multiply res1 by | |
4326 | // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits | |
4327 | scale = unbexp - expmax16; | |
4328 | res1 = res1 * ten2k64[scale]; // res1 * 10^scale | |
4329 | unbexp = expmax16; // unbexp - scale | |
4330 | } else { | |
4331 | ; // continue | |
4332 | } | |
4333 | ||
4334 | // check for underflow | |
4335 | if (q + unbexp < P16 + expmin16) { | |
4336 | if (unbexp < expmin16) { | |
4337 | // we must truncate more of res | |
4338 | x0 = expmin16 - unbexp; // x0 >= 1 | |
4339 | is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; | |
4340 | is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; | |
4341 | is_midpoint_lt_even0 = is_midpoint_lt_even; | |
4342 | is_midpoint_gt_even0 = is_midpoint_gt_even; | |
4343 | is_inexact_lt_midpoint = 0; | |
4344 | is_inexact_gt_midpoint = 0; | |
4345 | is_midpoint_lt_even = 0; | |
4346 | is_midpoint_gt_even = 0; | |
4347 | // the number of decimal digits in res1 is q | |
4348 | if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits | |
4349 | // 2 <= q <= 16, 1 <= x0 <= 15 | |
4350 | round64_2_18 (q, x0, res1, &res1, &incr_exp, | |
4351 | &is_midpoint_lt_even, &is_midpoint_gt_even, | |
4352 | &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); | |
4353 | if (incr_exp) { | |
4354 | // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15 | |
4355 | res1 = ten2k64[q - x0]; | |
4356 | } | |
4357 | unbexp = unbexp + x0; // expmin16 | |
4358 | } else if (x0 == q) { | |
4359 | // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin | |
4360 | // determine relationship with 1/2 ulp | |
4361 | // q <= 16 | |
4362 | if (res1 < midpoint64[q - 1]) { // < 1/2 ulp | |
4363 | lt_half_ulp = 1; | |
4364 | is_inexact_lt_midpoint = 1; | |
4365 | } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp | |
4366 | eq_half_ulp = 1; | |
4367 | is_midpoint_gt_even = 1; | |
4368 | } else { // > 1/2 ulp | |
4369 | // gt_half_ulp = 1; | |
4370 | is_inexact_gt_midpoint = 1; | |
4371 | } | |
4372 | if (lt_half_ulp || eq_half_ulp) { | |
4373 | // res = +0.0 * 10^expmin16 | |
4374 | res1 = 0x0000000000000000ull; | |
4375 | } else { // if (gt_half_ulp) | |
4376 | // res = +1 * 10^expmin16 | |
4377 | res1 = 0x0000000000000001ull; | |
4378 | } | |
4379 | unbexp = expmin16; | |
4380 | } else { // if (x0 > q) | |
4381 | // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin | |
4382 | res1 = 0x0000000000000000ull; | |
4383 | unbexp = expmin16; | |
4384 | is_inexact_lt_midpoint = 1; | |
4385 | } | |
4386 | // avoid a double rounding error | |
4387 | if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && | |
4388 | is_midpoint_lt_even) { // double rounding error upward | |
4389 | // res = res - 1 | |
4390 | res1--; // res1 becomes odd | |
4391 | is_midpoint_lt_even = 0; | |
4392 | is_inexact_lt_midpoint = 1; | |
4393 | } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && | |
4394 | is_midpoint_gt_even) { // double rounding error downward | |
4395 | // res = res + 1 | |
4396 | res1++; // res1 becomes odd | |
4397 | is_midpoint_gt_even = 0; | |
4398 | is_inexact_gt_midpoint = 1; | |
4399 | } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
4400 | !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { | |
4401 | // if this rounding was exact the result may still be | |
4402 | // inexact because of the previous roundings | |
4403 | if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { | |
4404 | is_inexact_gt_midpoint = 1; | |
4405 | } | |
4406 | if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { | |
4407 | is_inexact_lt_midpoint = 1; | |
4408 | } | |
4409 | } else if (is_midpoint_gt_even && | |
4410 | (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { | |
4411 | // pulled up to a midpoint | |
4412 | is_inexact_lt_midpoint = 1; | |
4413 | is_inexact_gt_midpoint = 0; | |
4414 | is_midpoint_lt_even = 0; | |
4415 | is_midpoint_gt_even = 0; | |
4416 | } else if (is_midpoint_lt_even && | |
4417 | (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { | |
4418 | // pulled down to a midpoint | |
4419 | is_inexact_lt_midpoint = 0; | |
4420 | is_inexact_gt_midpoint = 1; | |
4421 | is_midpoint_lt_even = 0; | |
4422 | is_midpoint_gt_even = 0; | |
4423 | } else { | |
4424 | ; | |
4425 | } | |
4426 | } | |
4427 | // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16) | |
4428 | // and the result is tiny and exact | |
4429 | ||
4430 | // check for inexact result | |
4431 | if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || | |
4432 | is_midpoint_lt_even || is_midpoint_gt_even || | |
4433 | is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 || | |
4434 | is_midpoint_lt_even0 || is_midpoint_gt_even0) { | |
4435 | // set the inexact flag and the underflow flag | |
4436 | *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION); | |
4437 | } | |
4438 | } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || | |
4439 | is_midpoint_lt_even || is_midpoint_gt_even) { | |
4440 | *pfpsf |= INEXACT_EXCEPTION; | |
4441 | } | |
4442 | // this is the result rounded correctly to nearest, with bounded exponent | |
4443 | ||
4444 | if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction | |
4445 | // res = res + 1 | |
4446 | res1++; // res1 is now odd | |
4447 | } // else the result is already correct | |
4448 | ||
4449 | // assemble the result | |
4450 | if (res1 < 0x0020000000000000ull) { // res < 2^53 | |
4451 | res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1; | |
4452 | } else { // res1 >= 2^53 | |
4453 | res1 = sign | MASK_STEERING_BITS | | |
4454 | ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2); | |
4455 | } | |
4456 | *pfpsf |= save_fpsf; | |
4457 | BID_RETURN (res1); | |
4458 | } |