]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/libbid/bid128_fma.c
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / libbid / bid128_fma.c
CommitLineData
8d9254fc 1/* Copyright (C) 2007-2020 Free Software Foundation, Inc.
200359e8
L
2
3This file is part of GCC.
4
5GCC is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License as published by the Free
748086b7 7Software Foundation; either version 3, or (at your option) any later
200359e8
L
8version.
9
200359e8
L
10GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11WARRANTY; without even the implied warranty of MERCHANTABILITY or
12FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13for more details.
14
748086b7
JJ
15Under Section 7 of GPL version 3, you are granted additional
16permissions described in the GCC Runtime Library Exception, version
173.1, as published by the Free Software Foundation.
18
19You should have received a copy of the GNU General Public License and
20a copy of the GCC Runtime Library Exception along with this program;
21see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22<http://www.gnu.org/licenses/>. */
200359e8
L
23
24/*****************************************************************************
25 *
26 * BID128 fma x * y + z
27 *
28 ****************************************************************************/
29
200359e8
L
30#include "bid_internal.h"
31
32static void
b2a00c89
L
33rounding_correction (unsigned int rnd_mode,
34 unsigned int is_inexact_lt_midpoint,
35 unsigned int is_inexact_gt_midpoint,
36 unsigned int is_midpoint_lt_even,
37 unsigned int is_midpoint_gt_even,
38 int unbexp,
39 UINT128 * ptrres, _IDEC_flags * ptrfpsf) {
40 // unbiased true exponent unbexp may be larger than emax
200359e8
L
41
42 UINT128 res = *ptrres; // expected to have the correct sign and coefficient
b2a00c89 43 // (the exponent field is ignored, as unbexp is used instead)
200359e8
L
44 UINT64 sign, exp;
45 UINT64 C_hi, C_lo;
46
47 // general correction from RN to RA, RM, RP, RZ
48 // Note: if the result is negative, then is_inexact_lt_midpoint,
49 // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even
50 // have to be considered as if determined for the absolute value of the
51 // result (so they seem to be reversed)
52
53 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
54 is_midpoint_lt_even || is_midpoint_gt_even) {
55 *ptrfpsf |= INEXACT_EXCEPTION;
56 }
57 // apply correction to result calculated with unbounded exponent
58 sign = res.w[1] & MASK_SIGN;
59 exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax
60 C_hi = res.w[1] & MASK_COEFF;
61 C_lo = res.w[0];
b2a00c89 62 if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) ||
200359e8 63 ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) &&
b2a00c89
L
64 is_midpoint_gt_even))) ||
65 (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) ||
200359e8
L
66 ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) &&
67 is_midpoint_gt_even)))) {
68 // C = C + 1
69 C_lo = C_lo + 1;
70 if (C_lo == 0)
71 C_hi = C_hi + 1;
72 if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) {
73 // C = 10^34 => rounding overflow
74 C_hi = 0x0000314dc6448d93ull;
75 C_lo = 0x38c15b0a00000000ull; // 10^33
76 // exp = exp + EXP_P1;
77 unbexp = unbexp + 1;
78 exp = (UINT64) (unbexp + 6176) << 49;
79 }
80 } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) &&
b2a00c89 81 ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) ||
200359e8
L
82 (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) {
83 // C = C - 1
84 C_lo = C_lo - 1;
85 if (C_lo == 0xffffffffffffffffull)
86 C_hi--;
87 // check if we crossed into the lower decade
b2a00c89
L
88 if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) {
89 // C = 10^33 - 1
90 if (exp > 0) {
91 C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
92 C_lo = 0x378d8e63ffffffffull;
93 // exp = exp - EXP_P1;
94 unbexp = unbexp - 1;
95 exp = (UINT64) (unbexp + 6176) << 49;
0f8f303b
MC
96 } else { // if exp = 0 the result is tiny & inexact
97 *ptrfpsf |= UNDERFLOW_EXCEPTION;
b2a00c89 98 }
200359e8
L
99 }
100 } else {
101 ; // the result is already correct
102 }
103 if (unbexp > expmax) { // 6111
104 *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
105 exp = 0;
106 if (!sign) { // result is positive
107 if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf
108 C_hi = 0x7800000000000000ull;
109 C_lo = 0x0000000000000000ull;
110 } else { // res = +MAXFP = (10^34-1) * 10^emax
111 C_hi = 0x5fffed09bead87c0ull;
112 C_lo = 0x378d8e63ffffffffull;
113 }
114 } else { // result is negative
115 if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf
116 C_hi = 0xf800000000000000ull;
117 C_lo = 0x0000000000000000ull;
118 } else { // res = -MAXFP = -(10^34-1) * 10^emax
119 C_hi = 0xdfffed09bead87c0ull;
120 C_lo = 0x378d8e63ffffffffull;
121 }
122 }
123 }
124 // assemble the result
125 res.w[1] = sign | exp | C_hi;
126 res.w[0] = C_lo;
127 *ptrres = res;
128}
129
130static void
131add256 (UINT256 x, UINT256 y, UINT256 * pz) {
132 // *z = x + yl assume the sum fits in 256 bits
133 UINT256 z;
134 z.w[0] = x.w[0] + y.w[0];
135 if (z.w[0] < x.w[0]) {
136 x.w[1]++;
137 if (x.w[1] == 0x0000000000000000ull) {
138 x.w[2]++;
139 if (x.w[2] == 0x0000000000000000ull) {
140 x.w[3]++;
141 }
142 }
143 }
144 z.w[1] = x.w[1] + y.w[1];
145 if (z.w[1] < x.w[1]) {
146 x.w[2]++;
147 if (x.w[2] == 0x0000000000000000ull) {
148 x.w[3]++;
149 }
150 }
151 z.w[2] = x.w[2] + y.w[2];
152 if (z.w[2] < x.w[2]) {
153 x.w[3]++;
154 }
155 z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible
156 *pz = z;
157}
158
159static void
160sub256 (UINT256 x, UINT256 y, UINT256 * pz) {
161 // *z = x - y; assume x >= y
162 UINT256 z;
163 z.w[0] = x.w[0] - y.w[0];
164 if (z.w[0] > x.w[0]) {
165 x.w[1]--;
166 if (x.w[1] == 0xffffffffffffffffull) {
167 x.w[2]--;
168 if (x.w[2] == 0xffffffffffffffffull) {
169 x.w[3]--;
170 }
171 }
172 }
173 z.w[1] = x.w[1] - y.w[1];
174 if (z.w[1] > x.w[1]) {
175 x.w[2]--;
176 if (x.w[2] == 0xffffffffffffffffull) {
177 x.w[3]--;
178 }
179 }
180 z.w[2] = x.w[2] - y.w[2];
181 if (z.w[2] > x.w[2]) {
182 x.w[3]--;
183 }
184 z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y
185 *pz = z;
186}
187
188
189static int
b2a00c89 190nr_digits256 (UINT256 R256) {
200359e8
L
191 int ind;
192 // determine the number of decimal digits in R256
193 if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) {
194 // between 1 and 19 digits
195 for (ind = 1; ind <= 19; ind++) {
b2a00c89 196 if (R256.w[0] < ten2k64[ind]) {
200359e8
L
197 break;
198 }
199 }
200 // ind digits
201 } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 &&
b2a00c89
L
202 (R256.w[1] < ten2k128[0].w[1] ||
203 (R256.w[1] == ten2k128[0].w[1]
204 && R256.w[0] < ten2k128[0].w[0]))) {
200359e8
L
205 // 20 digits
206 ind = 20;
207 } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) {
208 // between 21 and 38 digits
209 for (ind = 1; ind <= 18; ind++) {
b2a00c89
L
210 if (R256.w[1] < ten2k128[ind].w[1] ||
211 (R256.w[1] == ten2k128[ind].w[1] &&
212 R256.w[0] < ten2k128[ind].w[0])) {
200359e8
L
213 break;
214 }
215 }
216 // ind + 20 digits
217 ind = ind + 20;
b2a00c89
L
218 } else if (R256.w[3] == 0x0 &&
219 (R256.w[2] < ten2k256[0].w[2] ||
220 (R256.w[2] == ten2k256[0].w[2] &&
221 R256.w[1] < ten2k256[0].w[1]) ||
222 (R256.w[2] == ten2k256[0].w[2] &&
223 R256.w[1] == ten2k256[0].w[1] &&
224 R256.w[0] < ten2k256[0].w[0]))) {
200359e8
L
225 // 39 digits
226 ind = 39;
227 } else {
228 // between 40 and 68 digits
229 for (ind = 1; ind <= 29; ind++) {
b2a00c89
L
230 if (R256.w[3] < ten2k256[ind].w[3] ||
231 (R256.w[3] == ten2k256[ind].w[3] &&
232 R256.w[2] < ten2k256[ind].w[2]) ||
233 (R256.w[3] == ten2k256[ind].w[3] &&
234 R256.w[2] == ten2k256[ind].w[2] &&
235 R256.w[1] < ten2k256[ind].w[1]) ||
236 (R256.w[3] == ten2k256[ind].w[3] &&
237 R256.w[2] == ten2k256[ind].w[2] &&
238 R256.w[1] == ten2k256[ind].w[1] &&
239 R256.w[0] < ten2k256[ind].w[0])) {
200359e8
L
240 break;
241 }
242 }
243 // ind + 39 digits
244 ind = ind + 39;
245 }
246 return (ind);
247}
248
249// add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so
250// use the rounding information from ptr_is_* to avoid a double rounding error
251static void
252add_and_round (int q3,
253 int q4,
254 int e4,
255 int delta,
256 int p34,
257 UINT64 z_sign,
258 UINT64 p_sign,
259 UINT128 C3,
260 UINT256 C4,
261 int rnd_mode,
262 int *ptr_is_midpoint_lt_even,
263 int *ptr_is_midpoint_gt_even,
264 int *ptr_is_inexact_lt_midpoint,
265 int *ptr_is_inexact_gt_midpoint,
266 _IDEC_flags * ptrfpsf, UINT128 * ptrres) {
267
268 int scale;
269 int x0;
270 int ind;
271 UINT64 R64;
272 UINT128 P128, R128;
273 UINT192 P192, R192;
274 UINT256 R256;
275 int is_midpoint_lt_even = 0;
276 int is_midpoint_gt_even = 0;
277 int is_inexact_lt_midpoint = 0;
278 int is_inexact_gt_midpoint = 0;
279 int is_midpoint_lt_even0 = 0;
280 int is_midpoint_gt_even0 = 0;
281 int is_inexact_lt_midpoint0 = 0;
282 int is_inexact_gt_midpoint0 = 0;
283 int incr_exp = 0;
284 int is_tiny = 0;
285 int lt_half_ulp = 0;
286 int eq_half_ulp = 0;
287 // int gt_half_ulp = 0;
288 UINT128 res = *ptrres;
289
290 // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66
291 scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this
292 // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1
293
294 // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for
295 // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6))
296 if (scale == 0) {
297 R256.w[3] = 0x0ull;
298 R256.w[2] = 0x0ull;
299 R256.w[1] = C3.w[1];
300 R256.w[0] = C3.w[0];
301 } else if (scale <= 19) { // 10^scale fits in 64 bits
302 P128.w[1] = 0;
b2a00c89 303 P128.w[0] = ten2k64[scale];
200359e8
L
304 __mul_128x128_to_256 (R256, P128, C3);
305 } else if (scale <= 38) { // 10^scale fits in 128 bits
b2a00c89 306 __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3);
200359e8
L
307 } else if (scale <= 57) { // 39 <= scale <= 57
308 // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits
309 // (10^67 has 223 bits; 10^69 has 230 bits);
310 // must split the computation:
311 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
312 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
313 // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits
b2a00c89 314 __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3);
200359e8 315 // now multiply R128 by 10^38
b2a00c89 316 __mul_128x128_to_256 (R256, R128, ten2k128[18]);
200359e8
L
317 } else { // 58 <= scale <= 66
318 // 10^scale takes between 193 and 220 bits,
319 // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits)
320 // must split the computation:
321 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
322 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
323 // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits
324 // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because
325 // 10^(scale-38) takes more than 64 bits, C3 will take less than 64
b2a00c89 326 __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]);
200359e8 327 // now calculate 10*38 * 10^(scale-38) * C3
b2a00c89 328 __mul_128x128_to_256 (R256, R128, ten2k128[18]);
200359e8
L
329 }
330 // C3 * 10^scale is now in R256
331
332 // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least
333 // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is
334 // possible
335 // add/subtract C4 and C3 * 10^scale; the exponent is e4
336 if (p_sign == z_sign) { // R256 = C4 + R256
337 // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact,
338 // but may require rounding
339 add256 (C4, R256, &R256);
340 } else { // if (p_sign != z_sign) { // R256 = C4 - R256
341 // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or
342 // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact,
343 // but may require rounding
344
345 // compare first R256 = C3 * 10^scale and C4
b2a00c89 346 if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) ||
200359e8
L
347 (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) ||
348 (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] &&
349 R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4
350 // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact,
351 // but may require rounding
352 sub256 (R256, C4, &R256);
353 // flip p_sign too, because the result has the sign of z
354 p_sign = z_sign;
355 } else { // if C4 > C3 * 10^scale
356 // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact,
357 // but may require rounding
358 sub256 (C4, R256, &R256);
359 }
360 // if the result is pure zero, the sign depends on the rounding mode
361 // (x*y and z had opposite signs)
362 if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull &&
363 R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) {
364 if (rnd_mode != ROUNDING_DOWN)
365 p_sign = 0x0000000000000000ull;
366 else
367 p_sign = 0x8000000000000000ull;
368 // the exponent is max (e4, expmin)
369 if (e4 < -6176)
370 e4 = expmin;
371 // assemble result
372 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49);
373 res.w[0] = 0x0;
374 *ptrres = res;
375 return;
376 }
377 }
378
379 // determine the number of decimal digits in R256
b2a00c89 380 ind = nr_digits256 (R256);
200359e8
L
381
382 // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind;
383 // round to the destination precision, with unbounded exponent
384
385 if (ind <= p34) {
386 // result rounded to the destination precision with unbounded exponent
387 // is exact
388 if (ind + e4 < p34 + expmin) {
389 is_tiny = 1; // applies to all rounding modes
390 }
391 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1];
392 res.w[0] = R256.w[0];
393 // Note: res is correct only if expmin <= e4 <= expmax
394 } else { // if (ind > p34)
395 // if more than P digits, round to nearest to P digits
396 // round R256 to p34 digits
397 x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
398 if (ind <= 38) {
399 P128.w[1] = R256.w[1];
400 P128.w[0] = R256.w[0];
b2a00c89
L
401 round128_19_38 (ind, x0, P128, &R128, &incr_exp,
402 &is_midpoint_lt_even, &is_midpoint_gt_even,
403 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
200359e8
L
404 } else if (ind <= 57) {
405 P192.w[2] = R256.w[2];
406 P192.w[1] = R256.w[1];
407 P192.w[0] = R256.w[0];
b2a00c89
L
408 round192_39_57 (ind, x0, P192, &R192, &incr_exp,
409 &is_midpoint_lt_even, &is_midpoint_gt_even,
410 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
200359e8
L
411 R128.w[1] = R192.w[1];
412 R128.w[0] = R192.w[0];
413 } else { // if (ind <= 68)
b2a00c89
L
414 round256_58_76 (ind, x0, R256, &R256, &incr_exp,
415 &is_midpoint_lt_even, &is_midpoint_gt_even,
416 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
200359e8
L
417 R128.w[1] = R256.w[1];
418 R128.w[0] = R256.w[0];
419 }
420 // the rounded result has p34 = 34 digits
421 e4 = e4 + x0 + incr_exp;
422 if (rnd_mode == ROUNDING_TO_NEAREST) {
423 if (e4 < expmin) {
424 is_tiny = 1; // for other rounding modes apply correction
425 }
426 } else {
427 // for RM, RP, RZ, RA apply correction in order to determine tininess
428 // but do not save the result; apply the correction to
429 // (-1)^p_sign * significand * 10^0
430 P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1];
431 P128.w[0] = R128.w[0];
432 rounding_correction (rnd_mode,
b2a00c89
L
433 is_inexact_lt_midpoint,
434 is_inexact_gt_midpoint, is_midpoint_lt_even,
435 is_midpoint_gt_even, 0, &P128, ptrfpsf);
200359e8
L
436 scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
437 // the number of digits in the significand is p34 = 34
438 if (e4 + scale < expmin) {
439 is_tiny = 1;
440 }
441 }
442 ind = p34; // the number of decimal digits in the signifcand of res
443 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN
444 res.w[0] = R128.w[0];
445 // Note: res is correct only if expmin <= e4 <= expmax
446 // set the inexact flag after rounding with bounded exponent, if any
447 }
448 // at this point we have the result rounded with unbounded exponent in
449 // res and we know its tininess:
450 // res = (-1)^p_sign * significand * 10^e4,
451 // where q (significand) = ind <= p34
452 // Note: res is correct only if expmin <= e4 <= expmax
453
454 // check for overflow if RN
455 if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) {
456 res.w[1] = p_sign | 0x7800000000000000ull;
457 res.w[0] = 0x0000000000000000ull;
458 *ptrres = res;
459 *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
460 return; // BID_RETURN (res)
461 } // else not overflow or not RN, so continue
462
463 // if (e4 >= expmin) we have the result rounded with bounded exponent
464 if (e4 < expmin) {
465 x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
466 // where the result rounded [at most] once is
467 // (-1)^p_sign * significand_res * 10^e4
468
469 // avoid double rounding error
470 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
471 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
472 is_midpoint_lt_even0 = is_midpoint_lt_even;
473 is_midpoint_gt_even0 = is_midpoint_gt_even;
474 is_inexact_lt_midpoint = 0;
475 is_inexact_gt_midpoint = 0;
476 is_midpoint_lt_even = 0;
477 is_midpoint_gt_even = 0;
478
479 if (x0 > ind) {
480 // nothing is left of res when moving the decimal point left x0 digits
481 is_inexact_lt_midpoint = 1;
482 res.w[1] = p_sign | 0x0000000000000000ull;
483 res.w[0] = 0x0000000000000000ull;
484 e4 = expmin;
485 } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
486 // this is <, =, or > 1/2 ulp
487 // compare the ind-digit value in the significand of res with
488 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
489 // less than, equal to, or greater than 1/2 ulp (significand of res)
490 R128.w[1] = res.w[1] & MASK_COEFF;
491 R128.w[0] = res.w[0];
492 if (ind <= 19) {
b2a00c89 493 if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
200359e8
L
494 lt_half_ulp = 1;
495 is_inexact_lt_midpoint = 1;
b2a00c89 496 } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
200359e8
L
497 eq_half_ulp = 1;
498 is_midpoint_gt_even = 1;
499 } else { // > 1/2 ulp
500 // gt_half_ulp = 1;
501 is_inexact_gt_midpoint = 1;
502 }
503 } else { // if (ind <= 38) {
b2a00c89
L
504 if (R128.w[1] < midpoint128[ind - 20].w[1] ||
505 (R128.w[1] == midpoint128[ind - 20].w[1] &&
506 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
200359e8
L
507 lt_half_ulp = 1;
508 is_inexact_lt_midpoint = 1;
b2a00c89
L
509 } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
510 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
200359e8
L
511 eq_half_ulp = 1;
512 is_midpoint_gt_even = 1;
513 } else { // > 1/2 ulp
514 // gt_half_ulp = 1;
515 is_inexact_gt_midpoint = 1;
516 }
517 }
518 if (lt_half_ulp || eq_half_ulp) {
519 // res = +0.0 * 10^expmin
520 res.w[1] = 0x0000000000000000ull;
521 res.w[0] = 0x0000000000000000ull;
522 } else { // if (gt_half_ulp)
523 // res = +1 * 10^expmin
524 res.w[1] = 0x0000000000000000ull;
525 res.w[0] = 0x0000000000000001ull;
526 }
527 res.w[1] = p_sign | res.w[1];
528 e4 = expmin;
529 } else { // if (1 <= x0 <= ind - 1 <= 33)
530 // round the ind-digit result to ind - x0 digits
531
532 if (ind <= 18) { // 2 <= ind <= 18
b2a00c89
L
533 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
534 &is_midpoint_lt_even, &is_midpoint_gt_even,
535 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
200359e8
L
536 res.w[1] = 0x0;
537 res.w[0] = R64;
538 } else if (ind <= 38) {
539 P128.w[1] = res.w[1] & MASK_COEFF;
540 P128.w[0] = res.w[0];
b2a00c89
L
541 round128_19_38 (ind, x0, P128, &res, &incr_exp,
542 &is_midpoint_lt_even, &is_midpoint_gt_even,
543 &is_inexact_lt_midpoint,
544 &is_inexact_gt_midpoint);
200359e8
L
545 }
546 e4 = e4 + x0; // expmin
547 // we want the exponent to be expmin, so if incr_exp = 1 then
548 // multiply the rounded result by 10 - it will still fit in 113 bits
549 if (incr_exp) {
550 // 64 x 128 -> 128
551 P128.w[1] = res.w[1] & MASK_COEFF;
552 P128.w[0] = res.w[0];
b2a00c89 553 __mul_64x128_to_128 (res, ten2k64[1], P128);
200359e8
L
554 }
555 res.w[1] =
556 p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF);
557 // avoid a double rounding error
558 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
559 is_midpoint_lt_even) { // double rounding error upward
560 // res = res - 1
561 res.w[0]--;
562 if (res.w[0] == 0xffffffffffffffffull)
563 res.w[1]--;
564 // Note: a double rounding error upward is not possible; for this
565 // the result after the first rounding would have to be 99...95
566 // (35 digits in all), possibly followed by a number of zeros; this
567 // is not possible in Cases (2)-(6) or (15)-(17) which may get here
568 is_midpoint_lt_even = 0;
569 is_inexact_lt_midpoint = 1;
570 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
571 is_midpoint_gt_even) { // double rounding error downward
572 // res = res + 1
573 res.w[0]++;
574 if (res.w[0] == 0)
575 res.w[1]++;
576 is_midpoint_gt_even = 0;
577 is_inexact_gt_midpoint = 1;
578 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
b2a00c89 579 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
200359e8
L
580 // if this second rounding was exact the result may still be
581 // inexact because of the first rounding
582 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
583 is_inexact_gt_midpoint = 1;
584 }
585 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
586 is_inexact_lt_midpoint = 1;
587 }
588 } else if (is_midpoint_gt_even &&
b2a00c89 589 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
200359e8
L
590 // pulled up to a midpoint
591 is_inexact_lt_midpoint = 1;
592 is_inexact_gt_midpoint = 0;
593 is_midpoint_lt_even = 0;
594 is_midpoint_gt_even = 0;
595 } else if (is_midpoint_lt_even &&
b2a00c89 596 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
200359e8
L
597 // pulled down to a midpoint
598 is_inexact_lt_midpoint = 0;
599 is_inexact_gt_midpoint = 1;
600 is_midpoint_lt_even = 0;
601 is_midpoint_gt_even = 0;
602 } else {
603 ;
604 }
605 }
606 }
607 // res contains the correct result
608 // apply correction if not rounding to nearest
609 if (rnd_mode != ROUNDING_TO_NEAREST) {
610 rounding_correction (rnd_mode,
b2a00c89
L
611 is_inexact_lt_midpoint, is_inexact_gt_midpoint,
612 is_midpoint_lt_even, is_midpoint_gt_even,
613 e4, &res, ptrfpsf);
200359e8
L
614 }
615 if (is_midpoint_lt_even || is_midpoint_gt_even ||
616 is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
617 // set the inexact flag
618 *ptrfpsf |= INEXACT_EXCEPTION;
619 if (is_tiny)
620 *ptrfpsf |= UNDERFLOW_EXCEPTION;
621 }
622
623 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
624 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
625 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
626 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
627 *ptrres = res;
628 return;
629}
630
631
632#if DECIMAL_CALL_BY_REFERENCE
b2a00c89
L
633static void
634bid128_ext_fma (int *ptr_is_midpoint_lt_even,
635 int *ptr_is_midpoint_gt_even,
636 int *ptr_is_inexact_lt_midpoint,
637 int *ptr_is_inexact_gt_midpoint, UINT128 * pres,
638 UINT128 * px, UINT128 * py,
639 UINT128 *
640 pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
641 _EXC_INFO_PARAM) {
200359e8
L
642 UINT128 x = *px, y = *py, z = *pz;
643#if !DECIMAL_GLOBAL_ROUNDING
644 unsigned int rnd_mode = *prnd_mode;
645#endif
646#else
b2a00c89
L
647static UINT128
648bid128_ext_fma (int *ptr_is_midpoint_lt_even,
649 int *ptr_is_midpoint_gt_even,
650 int *ptr_is_inexact_lt_midpoint,
651 int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y,
652 UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
653 _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
200359e8
L
654#endif
655
b2a00c89
L
656 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
657 UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign;
200359e8
L
658 UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp;
659 int true_p_exp;
660 UINT128 C1, C2, C3;
661 UINT256 C4;
662 int q1 = 0, q2 = 0, q3 = 0, q4;
663 int e1, e2, e3, e4;
664 int scale, ind, delta, x0;
665 int p34 = P34; // used to modify the limit on the number of digits
666 BID_UI64DOUBLE tmp;
667 int x_nr_bits, y_nr_bits, z_nr_bits;
668 unsigned int save_fpsf;
669 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
670 int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
671 int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0;
672 int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
673 int incr_exp = 0;
674 int lsb;
675 int lt_half_ulp = 0;
676 int eq_half_ulp = 0;
677 int gt_half_ulp = 0;
678 int is_tiny = 0;
679 UINT64 R64, tmp64;
680 UINT128 P128, R128;
681 UINT192 P192, R192;
682 UINT256 R256;
683
684 // the following are based on the table of special cases for fma; the NaN
b2a00c89 685 // behavior is similar to that of the IA-64 Architecture fma
200359e8
L
686
687 // identify cases where at least one operand is NaN
688
b2a00c89
L
689 BID_SWAP128 (x);
690 BID_SWAP128 (y);
691 BID_SWAP128 (z);
200359e8
L
692 if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN
693 // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
694 // check first for non-canonical NaN payload
695 if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
696 (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
697 (y.w[0] > 0x38c15b09ffffffffull))) {
698 y.w[1] = y.w[1] & 0xffffc00000000000ull;
699 y.w[0] = 0x0ull;
700 }
701 if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN
702 // set invalid flag
703 *pfpsf |= INVALID_EXCEPTION;
704 // return quiet (y)
705 res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
706 res.w[0] = y.w[0];
707 } else { // y is QNaN
708 // return y
709 res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
710 res.w[0] = y.w[0];
711 // if z = SNaN or x = SNaN signal invalid exception
712 if ((z.w[1] & MASK_SNAN) == MASK_SNAN ||
713 (x.w[1] & MASK_SNAN) == MASK_SNAN) {
714 // set invalid flag
715 *pfpsf |= INVALID_EXCEPTION;
716 }
717 }
b2a00c89
L
718 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
719 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
720 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
721 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
722 BID_SWAP128 (res);
200359e8
L
723 BID_RETURN (res)
724 } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN
725 // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
726 // check first for non-canonical NaN payload
727 if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
728 (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
729 (z.w[0] > 0x38c15b09ffffffffull))) {
730 z.w[1] = z.w[1] & 0xffffc00000000000ull;
731 z.w[0] = 0x0ull;
732 }
733 if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN
734 // set invalid flag
735 *pfpsf |= INVALID_EXCEPTION;
736 // return quiet (z)
737 res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
738 res.w[0] = z.w[0];
739 } else { // z is QNaN
740 // return z
741 res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
742 res.w[0] = z.w[0];
743 // if x = SNaN signal invalid exception
744 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {
745 // set invalid flag
746 *pfpsf |= INVALID_EXCEPTION;
747 }
748 }
b2a00c89
L
749 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
750 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
751 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
752 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
753 BID_SWAP128 (res);
200359e8
L
754 BID_RETURN (res)
755 } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
756 // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
757 // check first for non-canonical NaN payload
758 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
759 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
760 (x.w[0] > 0x38c15b09ffffffffull))) {
761 x.w[1] = x.w[1] & 0xffffc00000000000ull;
762 x.w[0] = 0x0ull;
763 }
764 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
765 // set invalid flag
766 *pfpsf |= INVALID_EXCEPTION;
767 // return quiet (x)
768 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
769 res.w[0] = x.w[0];
770 } else { // x is QNaN
771 // return x
772 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
773 res.w[0] = x.w[0];
774 }
b2a00c89
L
775 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
776 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
777 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
778 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
779 BID_SWAP128 (res);
200359e8
L
780 BID_RETURN (res)
781 }
782 // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check
783 // for non-canonical values
784
785 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
786 C1.w[1] = x.w[1] & MASK_COEFF;
787 C1.w[0] = x.w[0];
788 if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf
789 // if x is not infinity check for non-canonical values - treated as zero
790 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
791 // non-canonical
792 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
793 C1.w[1] = 0; // significand high
794 C1.w[0] = 0; // significand low
795 } else { // G0_G1 != 11
796 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
797 if (C1.w[1] > 0x0001ed09bead87c0ull ||
b2a00c89
L
798 (C1.w[1] == 0x0001ed09bead87c0ull &&
799 C1.w[0] > 0x378d8e63ffffffffull)) {
200359e8
L
800 // x is non-canonical if coefficient is larger than 10^34 -1
801 C1.w[1] = 0;
802 C1.w[0] = 0;
803 } else { // canonical
804 ;
805 }
806 }
807 }
808 y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
809 C2.w[1] = y.w[1] & MASK_COEFF;
810 C2.w[0] = y.w[0];
811 if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf
812 // if y is not infinity check for non-canonical values - treated as zero
813 if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
814 // non-canonical
815 y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
816 C2.w[1] = 0; // significand high
817 C2.w[0] = 0; // significand low
818 } else { // G0_G1 != 11
819 y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits
820 if (C2.w[1] > 0x0001ed09bead87c0ull ||
b2a00c89
L
821 (C2.w[1] == 0x0001ed09bead87c0ull &&
822 C2.w[0] > 0x378d8e63ffffffffull)) {
200359e8
L
823 // y is non-canonical if coefficient is larger than 10^34 -1
824 C2.w[1] = 0;
825 C2.w[0] = 0;
826 } else { // canonical
827 ;
828 }
829 }
830 }
831 z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
832 C3.w[1] = z.w[1] & MASK_COEFF;
833 C3.w[0] = z.w[0];
834 if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf
835 // if z is not infinity check for non-canonical values - treated as zero
836 if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
837 // non-canonical
838 z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
839 C3.w[1] = 0; // significand high
840 C3.w[0] = 0; // significand low
841 } else { // G0_G1 != 11
842 z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits
843 if (C3.w[1] > 0x0001ed09bead87c0ull ||
b2a00c89
L
844 (C3.w[1] == 0x0001ed09bead87c0ull &&
845 C3.w[0] > 0x378d8e63ffffffffull)) {
200359e8
L
846 // z is non-canonical if coefficient is larger than 10^34 -1
847 C3.w[1] = 0;
848 C3.w[0] = 0;
849 } else { // canonical
850 ;
851 }
852 }
853 }
854
855 p_sign = x_sign ^ y_sign; // sign of the product
856
857 // identify cases where at least one operand is infinity
858
859 if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf
860 if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
861 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
862 if (p_sign == z_sign) {
863 res.w[1] = z_sign | MASK_INF;
864 res.w[0] = 0x0;
865 } else {
866 // return QNaN Indefinite
867 res.w[1] = 0x7c00000000000000ull;
868 res.w[0] = 0x0000000000000000ull;
869 // set invalid flag
870 *pfpsf |= INVALID_EXCEPTION;
871 }
872 } else { // z = 0 or z = f
873 res.w[1] = p_sign | MASK_INF;
874 res.w[0] = 0x0;
875 }
876 } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f
877 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
878 if (p_sign == z_sign) {
879 res.w[1] = z_sign | MASK_INF;
880 res.w[0] = 0x0;
881 } else {
882 // return QNaN Indefinite
883 res.w[1] = 0x7c00000000000000ull;
884 res.w[0] = 0x0000000000000000ull;
885 // set invalid flag
886 *pfpsf |= INVALID_EXCEPTION;
887 }
888 } else { // z = 0 or z = f
889 res.w[1] = p_sign | MASK_INF;
890 res.w[0] = 0x0;
891 }
892 } else { // y = 0
893 // return QNaN Indefinite
894 res.w[1] = 0x7c00000000000000ull;
895 res.w[0] = 0x0000000000000000ull;
896 // set invalid flag
897 *pfpsf |= INVALID_EXCEPTION;
898 }
b2a00c89
L
899 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
900 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
901 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
902 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
903 BID_SWAP128 (res);
200359e8
L
904 BID_RETURN (res)
905 } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
906 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
907 // x = f, necessarily
908 if ((p_sign != z_sign)
909 || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) {
910 // return QNaN Indefinite
911 res.w[1] = 0x7c00000000000000ull;
912 res.w[0] = 0x0000000000000000ull;
913 // set invalid flag
914 *pfpsf |= INVALID_EXCEPTION;
915 } else {
b2a00c89
L
916 res.w[1] = z_sign | MASK_INF;
917 res.w[0] = 0x0;
200359e8
L
918 }
919 } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0
920 // z = 0, f, inf
921 // return QNaN Indefinite
922 res.w[1] = 0x7c00000000000000ull;
923 res.w[0] = 0x0000000000000000ull;
924 // set invalid flag
925 *pfpsf |= INVALID_EXCEPTION;
926 } else {
927 // x = f and z = 0, f, necessarily
928 res.w[1] = p_sign | MASK_INF;
929 res.w[0] = 0x0;
930 }
b2a00c89
L
931 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
932 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
933 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
934 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
935 BID_SWAP128 (res);
200359e8
L
936 BID_RETURN (res)
937 } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
938 // x = 0, f and y = 0, f, necessarily
939 res.w[1] = z_sign | MASK_INF;
940 res.w[0] = 0x0;
b2a00c89
L
941 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
942 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
943 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
944 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
945 BID_SWAP128 (res);
200359e8
L
946 BID_RETURN (res)
947 }
948
949 true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176;
950 if (true_p_exp < -6176)
951 p_exp = 0; // cannot be less than EXP_MIN
200359e8
L
952 else
953 p_exp = (UINT64) (true_p_exp + 6176) << 49;
954
b2a00c89 955 if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0
200359e8
L
956 // the result is 0
957 if (p_exp < z_exp)
958 res.w[1] = p_exp; // preferred exponent
959 else
960 res.w[1] = z_exp; // preferred exponent
961 if (p_sign == z_sign) {
962 res.w[1] |= z_sign;
963 res.w[0] = 0x0;
964 } else { // x * y and z have opposite signs
965 if (rnd_mode == ROUNDING_DOWN) {
966 // res = -0.0
967 res.w[1] |= MASK_SIGN;
968 res.w[0] = 0x0;
969 } else {
970 // res = +0.0
971 // res.w[1] |= 0x0;
972 res.w[0] = 0x0;
973 }
974 }
b2a00c89
L
975 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
976 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
977 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
978 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
979 BID_SWAP128 (res);
200359e8
L
980 BID_RETURN (res)
981 }
982 // from this point on, we may need to know the number of decimal digits
983 // in the significands of x, y, z when x, y, z != 0
984
985 if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite)
986 // q1 = nr. of decimal digits in x
987 // determine first the nr. of bits in x
988 if (C1.w[1] == 0) {
989 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
990 // split the 64-bit value in two 32-bit halves to avoid rounding errors
991 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
992 tmp.d = (double) (C1.w[0] >> 32); // exact conversion
993 x_nr_bits =
994 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
995 } else { // x < 2^32
996 tmp.d = (double) (C1.w[0]); // exact conversion
997 x_nr_bits =
998 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
999 }
1000 } else { // if x < 2^53
1001 tmp.d = (double) C1.w[0]; // exact conversion
1002 x_nr_bits =
1003 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1004 }
1005 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1006 tmp.d = (double) C1.w[1]; // exact conversion
1007 x_nr_bits =
1008 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1009 }
b2a00c89 1010 q1 = nr_digits[x_nr_bits - 1].digits;
200359e8 1011 if (q1 == 0) {
b2a00c89
L
1012 q1 = nr_digits[x_nr_bits - 1].digits1;
1013 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
1014 (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
1015 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
200359e8
L
1016 q1++;
1017 }
1018 }
1019
1020 if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite)
1021 if (C2.w[1] == 0) {
1022 if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53
1023 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1024 if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32
1025 tmp.d = (double) (C2.w[0] >> 32); // exact conversion
1026 y_nr_bits =
1027 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1028 } else { // y < 2^32
1029 tmp.d = (double) C2.w[0]; // exact conversion
1030 y_nr_bits =
1031 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1032 }
1033 } else { // if y < 2^53
1034 tmp.d = (double) C2.w[0]; // exact conversion
1035 y_nr_bits =
1036 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1037 }
1038 } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1])
1039 tmp.d = (double) C2.w[1]; // exact conversion
1040 y_nr_bits =
1041 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1042 }
1043
b2a00c89 1044 q2 = nr_digits[y_nr_bits].digits;
200359e8 1045 if (q2 == 0) {
b2a00c89
L
1046 q2 = nr_digits[y_nr_bits].digits1;
1047 if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi ||
1048 (C2.w[1] == nr_digits[y_nr_bits].threshold_hi &&
1049 C2.w[0] >= nr_digits[y_nr_bits].threshold_lo))
200359e8
L
1050 q2++;
1051 }
1052 }
1053
1054 if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite)
1055 if (C3.w[1] == 0) {
1056 if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53
1057 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1058 if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32
1059 tmp.d = (double) (C3.w[0] >> 32); // exact conversion
1060 z_nr_bits =
1061 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1062 } else { // z < 2^32
1063 tmp.d = (double) C3.w[0]; // exact conversion
1064 z_nr_bits =
1065 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1066 }
1067 } else { // if z < 2^53
1068 tmp.d = (double) C3.w[0]; // exact conversion
1069 z_nr_bits =
1070 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1071 }
1072 } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1])
1073 tmp.d = (double) C3.w[1]; // exact conversion
1074 z_nr_bits =
1075 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1076 }
1077
b2a00c89 1078 q3 = nr_digits[z_nr_bits].digits;
200359e8 1079 if (q3 == 0) {
b2a00c89
L
1080 q3 = nr_digits[z_nr_bits].digits1;
1081 if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi ||
1082 (C3.w[1] == nr_digits[z_nr_bits].threshold_hi &&
1083 C3.w[0] >= nr_digits[z_nr_bits].threshold_lo))
200359e8
L
1084 q3++;
1085 }
1086 }
1087
b2a00c89 1088 if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) ||
200359e8 1089 (C2.w[1] == 0x0 && C2.w[0] == 0x0)) {
b2a00c89 1090 // x = 0 or y = 0
200359e8
L
1091 // z = f, necessarily; for 0 + z return z, with the preferred exponent
1092 // the result is z, but need to get the preferred exponent
1093 if (z_exp <= p_exp) { // the preferred exponent is z_exp
1094 res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1];
1095 res.w[0] = C3.w[0];
1096 } else { // if (p_exp < z_exp) the preferred exponent is p_exp
1097 // return (C3 * 10^scale) * 10^(z_exp - scale)
1098 // where scale = min (p34-q3, (z_exp-p_exp) >> 49)
1099 scale = p34 - q3;
1100 ind = (z_exp - p_exp) >> 49;
1101 if (ind < scale)
1102 scale = ind;
1103 if (scale == 0) {
1104 res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant
1105 res.w[0] = z.w[0];
1106 } else if (q3 <= 19) { // z fits in 64 bits
1107 if (scale <= 19) { // 10^scale fits in 64 bits
b2a00c89
L
1108 // 64 x 64 C3.w[0] * ten2k64[scale]
1109 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
200359e8 1110 } else { // 10^scale fits in 128 bits
b2a00c89
L
1111 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1112 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
200359e8
L
1113 }
1114 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
b2a00c89
L
1115 // 64 x 128 ten2k64[scale] * C3
1116 __mul_128x64_to_128 (res, ten2k64[scale], C3);
200359e8
L
1117 }
1118 // subtract scale from the exponent
1119 z_exp = z_exp - ((UINT64) scale << 49);
1120 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1121 }
b2a00c89
L
1122 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1123 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1124 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1125 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1126 BID_SWAP128 (res);
200359e8
L
1127 BID_RETURN (res)
1128 } else {
1129 ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f
1130 }
1131
1132 e1 = (x_exp >> 49) - 6176; // unbiased exponent of x
1133 e2 = (y_exp >> 49) - 6176; // unbiased exponent of y
1134 e3 = (z_exp >> 49) - 6176; // unbiased exponent of z
1135 e4 = e1 + e2; // unbiased exponent of the exact x * y
1136
1137 // calculate C1 * C2 and its number of decimal digits, q4
1138
1139 // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits
1140 // where 2 <= q1 + q2 <= 68
1141 // calculate C4 = C1 * C2 and determine q
1142 C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0;
1143 if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits
1144 C4.w[0] = C1.w[0] * C2.w[0];
1145 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
b2a00c89 1146 if (C4.w[0] < ten2k64[q1 + q2 - 1])
200359e8
L
1147 q4 = q1 + q2 - 1; // q4 in [1, 18]
1148 else
1149 q4 = q1 + q2; // q4 in [2, 19]
1150 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1151 } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits
1152 // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits
1153 __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]);
1154 // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20
b2a00c89 1155 if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1
200359e8
L
1156 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1157 q4 = 19; // 19 = q1 + q2 - 1
1158 } else {
1159 // if (C4.w[1] == 0)
1160 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1161 // else
1162 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1163 q4 = 20; // 20 = q1 + q2
1164 }
1165 } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38
1166 // C4 = C1 * C2 fits in 64 or 128 bits
1167 // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits)
1168 // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits
1169 if (q1 <= 19) {
1170 __mul_128x64_to_128 (C4, C1.w[0], C2);
1171 } else { // q2 <= 19
1172 __mul_128x64_to_128 (C4, C2.w[0], C1);
1173 }
1174 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
b2a00c89
L
1175 if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] ||
1176 (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] &&
1177 C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) {
200359e8
L
1178 // if (C4.w[1] == 0) // q4 = 20, necessarily
1179 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1180 // else
1181 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1182 q4 = q1 + q2 - 1; // q4 in [20, 37]
1183 } else {
1184 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1185 q4 = q1 + q2; // q4 in [21, 38]
1186 }
1187 } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits
1188 // both C1 and C2 fit in 128 bits (actually in 113 bits)
1189 // may replace this by 128x128_to192
1190 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0
1191 // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39
b2a00c89
L
1192 if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] ||
1193 (C4.w[1] == ten2k128[18].w[1]
1194 && C4.w[0] < ten2k128[18].w[0]))) {
200359e8
L
1195 // 18 = 38 - 20 = q1+q2-1 - 20
1196 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1197 q4 = 38; // 38 = q1 + q2 - 1
1198 } else {
1199 // if (C4.w[2] == 0)
1200 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1201 // else
1202 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1203 q4 = 39; // 39 = q1 + q2
1204 }
1205 } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57
1206 // C4 = C1 * C2 fits in 128 or 192 bits
1207 // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits)
1208 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1209 // may fit in 64 bits
1210 if (C1.w[1] == 0) { // C1 fits in 64 bits
1211 // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1212 __mul_64x128_full (C4.w[2], C4, C1.w[0], C2);
1213 } else if (C2.w[1] == 0) { // C2 fits in 64 bits
1214 // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1215 __mul_64x128_full (C4.w[2], C4, C2.w[0], C1);
1216 } else { // both C1 and C2 require 128 bits
1217 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1218 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
1219 }
1220 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
b2a00c89
L
1221 if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
1222 (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
1223 (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
1224 (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
1225 C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) {
200359e8
L
1226 // if (C4.w[2] == 0) // q4 = 39, necessarily
1227 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1228 // else
1229 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1230 q4 = q1 + q2 - 1; // q4 in [39, 56]
1231 } else {
1232 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1233 q4 = q1 + q2; // q4 in [40, 57]
1234 }
1235 } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits
1236 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1237 // may fit in 64 bits
1238 if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits
1239 __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192
1240 } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits
1241 __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192
1242 } else { // C1 * C2 will fit in 192 bits or in 256 bits
1243 __mul_128x128_to_256 (C4, C1, C2);
1244 }
1245 // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58
b2a00c89
L
1246 if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] ||
1247 (C4.w[2] == ten2k256[18].w[2]
1248 && (C4.w[1] < ten2k256[18].w[1]
1249 || (C4.w[1] == ten2k256[18].w[1]
1250 && C4.w[0] < ten2k256[18].w[0]))))) {
1251 // 18 = 57 - 39 = q1+q2-1 - 39
200359e8
L
1252 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1253 q4 = 57; // 57 = q1 + q2 - 1
1254 } else {
1255 // if (C4.w[3] == 0)
1256 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1257 // else
1258 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1259 q4 = 58; // 58 = q1 + q2
1260 }
1261 } else { // if 59 <= q1 + q2 <= 68
1262 // C4 = C1 * C2 fits in 192 or 256 bits
1263 // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits)
1264 // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in
1265 // 64 bits
1266 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1267 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
1268 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
b2a00c89
L
1269 if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] ||
1270 (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] &&
1271 (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
1272 (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
1273 (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
1274 (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
1275 C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) {
200359e8
L
1276 // if (C4.w[3] == 0) // q4 = 58, necessarily
1277 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1278 // else
1279 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1280 q4 = q1 + q2 - 1; // q4 in [58, 67]
1281 } else {
1282 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1283 q4 = q1 + q2; // q4 in [59, 68]
1284 }
1285 }
1286
1287 if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0
1288 save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
1289 *pfpsf = 0;
1290
1291 if (q4 > p34) {
1292
1293 // truncate C4 to p34 digits into res
1294 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
1295 x0 = q4 - p34;
1296 if (q4 <= 38) {
1297 P128.w[1] = C4.w[1];
1298 P128.w[0] = C4.w[0];
b2a00c89
L
1299 round128_19_38 (q4, x0, P128, &res, &incr_exp,
1300 &is_midpoint_lt_even, &is_midpoint_gt_even,
1301 &is_inexact_lt_midpoint,
1302 &is_inexact_gt_midpoint);
200359e8
L
1303 } else if (q4 <= 57) { // 35 <= q4 <= 57
1304 P192.w[2] = C4.w[2];
1305 P192.w[1] = C4.w[1];
1306 P192.w[0] = C4.w[0];
b2a00c89
L
1307 round192_39_57 (q4, x0, P192, &R192, &incr_exp,
1308 &is_midpoint_lt_even, &is_midpoint_gt_even,
1309 &is_inexact_lt_midpoint,
1310 &is_inexact_gt_midpoint);
200359e8
L
1311 res.w[0] = R192.w[0];
1312 res.w[1] = R192.w[1];
1313 } else { // if (q4 <= 68)
b2a00c89
L
1314 round256_58_76 (q4, x0, C4, &R256, &incr_exp,
1315 &is_midpoint_lt_even, &is_midpoint_gt_even,
1316 &is_inexact_lt_midpoint,
1317 &is_inexact_gt_midpoint);
200359e8
L
1318 res.w[0] = R256.w[0];
1319 res.w[1] = R256.w[1];
1320 }
1321 e4 = e4 + x0;
1322 if (incr_exp) {
1323 e4 = e4 + 1;
1324 }
1325 q4 = p34;
1326 // res is now the coefficient of the result rounded to the destination
1327 // precision, with unbounded exponent; the exponent is e4; q4=digits(res)
1328 } else { // if (q4 <= p34)
1329 // C4 * 10^e4 is the result rounded to the destination precision, with
1330 // unbounded exponent (which is exact)
1331
1332 if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) {
1333 // e4 is too large, but can be brought within range by scaling up C4
1334 scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2
1335 // res = (C4 * 10^scale) * 10^expmax
1336 if (q4 <= 19) { // C4 fits in 64 bits
1337 if (scale <= 19) { // 10^scale fits in 64 bits
b2a00c89
L
1338 // 64 x 64 C4.w[0] * ten2k64[scale]
1339 __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]);
200359e8 1340 } else { // 10^scale fits in 128 bits
b2a00c89
L
1341 // 64 x 128 C4.w[0] * ten2k128[scale - 20]
1342 __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]);
200359e8
L
1343 }
1344 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
b2a00c89
L
1345 // 64 x 128 ten2k64[scale] * CC43
1346 __mul_128x64_to_128 (res, ten2k64[scale], C4);
200359e8
L
1347 }
1348 e4 = e4 - scale; // expmax
1349 q4 = q4 + scale;
1350 } else {
1351 res.w[1] = C4.w[1];
1352 res.w[0] = C4.w[0];
1353 }
1354 // res is the coefficient of the result rounded to the destination
1355 // precision, with unbounded exponent (it has q4 digits); the exponent
1356 // is e4 (exact result)
1357 }
1358
1359 // check for overflow
1360 if (q4 + e4 > p34 + expmax) {
1361 if (rnd_mode == ROUNDING_TO_NEAREST) {
1362 res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf
1363 res.w[0] = 0x0000000000000000ull;
1364 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
1365 } else {
1366 res.w[1] = p_sign | res.w[1];
1367 rounding_correction (rnd_mode,
b2a00c89
L
1368 is_inexact_lt_midpoint,
1369 is_inexact_gt_midpoint,
1370 is_midpoint_lt_even, is_midpoint_gt_even,
1371 e4, &res, pfpsf);
200359e8
L
1372 }
1373 *pfpsf |= save_fpsf;
b2a00c89
L
1374 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1375 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1376 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1377 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1378 BID_SWAP128 (res);
200359e8
L
1379 BID_RETURN (res)
1380 }
1381 // check for underflow
1382 if (q4 + e4 < expmin + P34) {
1383 is_tiny = 1; // the result is tiny
1384 if (e4 < expmin) {
1385 // if e4 < expmin, we must truncate more of res
1386 x0 = expmin - e4; // x0 >= 1
1387 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
1388 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
1389 is_midpoint_lt_even0 = is_midpoint_lt_even;
1390 is_midpoint_gt_even0 = is_midpoint_gt_even;
1391 is_inexact_lt_midpoint = 0;
1392 is_inexact_gt_midpoint = 0;
1393 is_midpoint_lt_even = 0;
1394 is_midpoint_gt_even = 0;
1395 // the number of decimal digits in res is q4
1396 if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits
1397 if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17
b2a00c89
L
1398 round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp,
1399 &is_midpoint_lt_even, &is_midpoint_gt_even,
1400 &is_inexact_lt_midpoint,
1401 &is_inexact_gt_midpoint);
200359e8
L
1402 if (incr_exp) {
1403 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
b2a00c89 1404 R64 = ten2k64[q4 - x0];
200359e8
L
1405 }
1406 // res.w[1] = 0; (from above)
1407 res.w[0] = R64;
1408 } else { // if (q4 <= 34)
1409 // 19 <= q4 <= 38
1410 P128.w[1] = res.w[1];
1411 P128.w[0] = res.w[0];
b2a00c89
L
1412 round128_19_38 (q4, x0, P128, &res, &incr_exp,
1413 &is_midpoint_lt_even, &is_midpoint_gt_even,
1414 &is_inexact_lt_midpoint,
1415 &is_inexact_gt_midpoint);
200359e8
L
1416 if (incr_exp) {
1417 // increase coefficient by a factor of 10; this will be <= 10^33
1418 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
1419 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
b2a00c89
L
1420 // res.w[1] = 0;
1421 res.w[0] = ten2k64[q4 - x0];
200359e8 1422 } else { // 20 <= q4 - x0 <= 37
b2a00c89
L
1423 res.w[0] = ten2k128[q4 - x0 - 20].w[0];
1424 res.w[1] = ten2k128[q4 - x0 - 20].w[1];
200359e8
L
1425 }
1426 }
1427 }
1428 e4 = e4 + x0; // expmin
1429 } else if (x0 == q4) {
1430 // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin
1431 // determine relationship with 1/2 ulp
1432 if (q4 <= 19) {
b2a00c89 1433 if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
200359e8
L
1434 lt_half_ulp = 1;
1435 is_inexact_lt_midpoint = 1;
b2a00c89 1436 } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
200359e8
L
1437 eq_half_ulp = 1;
1438 is_midpoint_gt_even = 1;
1439 } else { // > 1/2 ulp
1440 // gt_half_ulp = 1;
1441 is_inexact_gt_midpoint = 1;
1442 }
1443 } else { // if (q4 <= 34)
b2a00c89
L
1444 if (res.w[1] < midpoint128[q4 - 20].w[1] ||
1445 (res.w[1] == midpoint128[q4 - 20].w[1] &&
1446 res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp
200359e8
L
1447 lt_half_ulp = 1;
1448 is_inexact_lt_midpoint = 1;
b2a00c89
L
1449 } else if (res.w[1] == midpoint128[q4 - 20].w[1] &&
1450 res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
200359e8
L
1451 eq_half_ulp = 1;
1452 is_midpoint_gt_even = 1;
1453 } else { // > 1/2 ulp
1454 // gt_half_ulp = 1;
1455 is_inexact_gt_midpoint = 1;
1456 }
1457 }
1458 if (lt_half_ulp || eq_half_ulp) {
1459 // res = +0.0 * 10^expmin
1460 res.w[1] = 0x0000000000000000ull;
1461 res.w[0] = 0x0000000000000000ull;
1462 } else { // if (gt_half_ulp)
1463 // res = +1 * 10^expmin
1464 res.w[1] = 0x0000000000000000ull;
1465 res.w[0] = 0x0000000000000001ull;
1466 }
1467 e4 = expmin;
1468 } else { // if (x0 > q4)
1469 // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin
1470 res.w[1] = 0;
1471 res.w[0] = 0;
1472 e4 = expmin;
1473 is_inexact_lt_midpoint = 1;
1474 }
1475 // avoid a double rounding error
1476 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
b2a00c89 1477 is_midpoint_lt_even) { // double rounding error upward
200359e8
L
1478 // res = res - 1
1479 res.w[0]--;
1480 if (res.w[0] == 0xffffffffffffffffull)
1481 res.w[1]--;
1482 // Note: a double rounding error upward is not possible; for this
1483 // the result after the first rounding would have to be 99...95
1484 // (35 digits in all), possibly followed by a number of zeros; this
1485 // not possible for f * f + 0
1486 is_midpoint_lt_even = 0;
1487 is_inexact_lt_midpoint = 1;
1488 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
1489 is_midpoint_gt_even) { // double rounding error downward
1490 // res = res + 1
1491 res.w[0]++;
1492 if (res.w[0] == 0)
1493 res.w[1]++;
1494 is_midpoint_gt_even = 0;
1495 is_inexact_gt_midpoint = 1;
1496 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
b2a00c89 1497 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
200359e8
L
1498 // if this second rounding was exact the result may still be
1499 // inexact because of the first rounding
1500 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
1501 is_inexact_gt_midpoint = 1;
1502 }
1503 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
1504 is_inexact_lt_midpoint = 1;
1505 }
1506 } else if (is_midpoint_gt_even &&
b2a00c89 1507 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
200359e8
L
1508 // pulled up to a midpoint
1509 is_inexact_lt_midpoint = 1;
1510 is_inexact_gt_midpoint = 0;
1511 is_midpoint_lt_even = 0;
1512 is_midpoint_gt_even = 0;
1513 } else if (is_midpoint_lt_even &&
b2a00c89 1514 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
200359e8
L
1515 // pulled down to a midpoint
1516 is_inexact_lt_midpoint = 0;
1517 is_inexact_gt_midpoint = 1;
1518 is_midpoint_lt_even = 0;
1519 is_midpoint_gt_even = 0;
1520 } else {
1521 ;
1522 }
1523 } else { // if e4 >= emin then q4 < P and the result is tiny and exact
1524 if (e3 < e4) {
1525 // if (e3 < e4) the preferred exponent is e3
1526 // return (C4 * 10^scale) * 10^(e4 - scale)
1527 // where scale = min (p34-q4, (e4 - e3))
1528 scale = p34 - q4;
1529 ind = e4 - e3;
1530 if (ind < scale)
1531 scale = ind;
1532 if (scale == 0) {
1533 ; // res and e4 are unchanged
1534 } else if (q4 <= 19) { // C4 fits in 64 bits
1535 if (scale <= 19) { // 10^scale fits in 64 bits
b2a00c89
L
1536 // 64 x 64 res.w[0] * ten2k64[scale]
1537 __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]);
200359e8 1538 } else { // 10^scale fits in 128 bits
b2a00c89
L
1539 // 64 x 128 res.w[0] * ten2k128[scale - 20]
1540 __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]);
200359e8
L
1541 }
1542 } else { // res fits in 128 bits, but 10^scale must fit in 64 bits
b2a00c89
L
1543 // 64 x 128 ten2k64[scale] * C3
1544 __mul_128x64_to_128 (res, ten2k64[scale], res);
200359e8
L
1545 }
1546 // subtract scale from the exponent
1547 e4 = e4 - scale;
1548 }
1549 }
b2a00c89 1550
200359e8
L
1551 // check for inexact result
1552 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
1553 is_midpoint_lt_even || is_midpoint_gt_even) {
1554 // set the inexact flag and the underflow flag
1555 *pfpsf |= INEXACT_EXCEPTION;
1556 *pfpsf |= UNDERFLOW_EXCEPTION;
1557 }
1558 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
1559 if (rnd_mode != ROUNDING_TO_NEAREST) {
1560 rounding_correction (rnd_mode,
b2a00c89
L
1561 is_inexact_lt_midpoint,
1562 is_inexact_gt_midpoint,
1563 is_midpoint_lt_even, is_midpoint_gt_even,
1564 e4, &res, pfpsf);
200359e8
L
1565 }
1566 *pfpsf |= save_fpsf;
b2a00c89
L
1567 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1568 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1569 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1570 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1571 BID_SWAP128 (res);
200359e8
L
1572 BID_RETURN (res)
1573 }
b2a00c89
L
1574 // no overflow, and no underflow for rounding to nearest
1575 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
1576
1577 if (rnd_mode != ROUNDING_TO_NEAREST) {
1578 rounding_correction (rnd_mode,
1579 is_inexact_lt_midpoint,
1580 is_inexact_gt_midpoint,
1581 is_midpoint_lt_even, is_midpoint_gt_even,
1582 e4, &res, pfpsf);
1583 // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ)
1584 if (e4 == expmin) {
1585 if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
1586 ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
1587 res.w[0] < 0x38c15b0a00000000ull)) {
1588 is_tiny = 1;
1589 }
1590 }
1591 }
200359e8 1592
200359e8
L
1593 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
1594 is_midpoint_lt_even || is_midpoint_gt_even) {
1595 // set the inexact flag
1596 *pfpsf |= INEXACT_EXCEPTION;
b2a00c89
L
1597 if (is_tiny)
1598 *pfpsf |= UNDERFLOW_EXCEPTION;
200359e8
L
1599 }
1600
1601 if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact
1602 // need to ensure that the result has the preferred exponent
1603 p_exp = res.w[1] & MASK_EXP;
1604 if (z_exp < p_exp) { // the preferred exponent is z_exp
1605 // signficand of res in C3
1606 C3.w[1] = res.w[1] & MASK_COEFF;
1607 C3.w[0] = res.w[0];
1608 // the number of decimal digits of x * y is q4 <= 34
1609 // Note: the coefficient fits in 128 bits
1610
1611 // return (C3 * 10^scale) * 10^(p_exp - scale)
1612 // where scale = min (p34-q4, (p_exp-z_exp) >> 49)
1613 scale = p34 - q4;
1614 ind = (p_exp - z_exp) >> 49;
1615 if (ind < scale)
1616 scale = ind;
1617 // subtract scale from the exponent
1618 p_exp = p_exp - ((UINT64) scale << 49);
1619 if (scale == 0) {
1620 ; // leave res unchanged
1621 } else if (q4 <= 19) { // x * y fits in 64 bits
1622 if (scale <= 19) { // 10^scale fits in 64 bits
b2a00c89
L
1623 // 64 x 64 C3.w[0] * ten2k64[scale]
1624 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
200359e8 1625 } else { // 10^scale fits in 128 bits
b2a00c89
L
1626 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1627 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
200359e8
L
1628 }
1629 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
1630 } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits
b2a00c89
L
1631 // 64 x 128 ten2k64[scale] * C3
1632 __mul_128x64_to_128 (res, ten2k64[scale], C3);
200359e8
L
1633 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
1634 }
1635 } // else leave the result as it is, because p_exp <= z_exp
1636 }
1637 *pfpsf |= save_fpsf;
b2a00c89
L
1638 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1639 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1640 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1641 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1642 BID_SWAP128 (res);
200359e8
L
1643 BID_RETURN (res)
1644 } // else we have f * f + f
1645
1646 // continue with x = f, y = f, z = f
1647
1648 delta = q3 + e3 - q4 - e4;
1649delta_ge_zero:
1650 if (delta >= 0) {
1651
b2a00c89 1652 if (p34 <= delta - 1 || // Case (1')
200359e8
L
1653 (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A)
1654 // check for overflow, which can occur only in Case (1')
1655 if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) {
1656 // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary
1657 // condition for (q3 + e3) > (p34 + expmax)
1658 if (rnd_mode == ROUNDING_TO_NEAREST) {
1659 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
1660 res.w[0] = 0x0000000000000000ull;
1661 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
1662 } else {
1663 if (p_sign == z_sign) {
1664 is_inexact_lt_midpoint = 1;
1665 } else {
1666 is_inexact_gt_midpoint = 1;
1667 }
1668 // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3)
1669 scale = p34 - q3;
1670 if (scale == 0) {
1671 res.w[1] = z_sign | C3.w[1];
1672 res.w[0] = C3.w[0];
1673 } else {
1674 if (q3 <= 19) { // C3 fits in 64 bits
1675 if (scale <= 19) { // 10^scale fits in 64 bits
b2a00c89
L
1676 // 64 x 64 C3.w[0] * ten2k64[scale]
1677 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
200359e8 1678 } else { // 10^scale fits in 128 bits
b2a00c89
L
1679 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1680 __mul_128x64_to_128 (res, C3.w[0],
1681 ten2k128[scale - 20]);
200359e8
L
1682 }
1683 } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits
b2a00c89
L
1684 // 64 x 128 ten2k64[scale] * C3
1685 __mul_128x64_to_128 (res, ten2k64[scale], C3);
200359e8
L
1686 }
1687 // the coefficient in res has q3 + scale = p34 digits
1688 }
1689 e3 = e3 - scale;
1690 res.w[1] = z_sign | res.w[1];
1691 rounding_correction (rnd_mode,
b2a00c89
L
1692 is_inexact_lt_midpoint,
1693 is_inexact_gt_midpoint,
1694 is_midpoint_lt_even, is_midpoint_gt_even,
1695 e3, &res, pfpsf);
200359e8 1696 }
b2a00c89
L
1697 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1698 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1699 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1700 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1701 BID_SWAP128 (res);
200359e8
L
1702 BID_RETURN (res)
1703 }
1704 // res = z
1705 if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3)
1706 // return (C3 * 10^scale) * 10^(z_exp - scale)
1707 // where scale = min (p34-q3, z_exp-EMIN)
1708 scale = p34 - q3;
1709 ind = e3 + 6176;
1710 if (ind < scale)
1711 scale = ind;
1712 if (scale == 0) {
1713 res.w[1] = C3.w[1];
1714 res.w[0] = C3.w[0];
1715 } else if (q3 <= 19) { // z fits in 64 bits
1716 if (scale <= 19) { // 10^scale fits in 64 bits
b2a00c89
L
1717 // 64 x 64 C3.w[0] * ten2k64[scale]
1718 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
200359e8 1719 } else { // 10^scale fits in 128 bits
b2a00c89
L
1720 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1721 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
200359e8
L
1722 }
1723 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
b2a00c89
L
1724 // 64 x 128 ten2k64[scale] * C3
1725 __mul_128x64_to_128 (res, ten2k64[scale], C3);
200359e8
L
1726 }
1727 // the coefficient in res has q3 + scale digits
1728 // subtract scale from the exponent
1729 z_exp = z_exp - ((UINT64) scale << 49);
1730 e3 = e3 - scale;
1731 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1732 if (scale + q3 < p34)
1733 *pfpsf |= UNDERFLOW_EXCEPTION;
1734 } else {
1735 scale = 0;
1736 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1];
1737 res.w[0] = C3.w[0];
1738 }
1739
1740 // use the following to avoid double rounding errors when operating on
1741 // mixed formats in rounding to nearest, and for correcting the result
1742 // if not rounding to nearest
1743 if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) {
1744 // there is a gap of exactly one digit between the scaled C3 and C4
1745 // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case
b2a00c89
L
1746 if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) ||
1747 (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) ||
1748 (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] ||
1749 C3.w[0] != ten2k128[q3 - 21].w[0]))) {
1750 // C3 * 10^ scale != 10^(q3-1)
200359e8
L
1751 // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull ||
1752 // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
1753 is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value
1754 } else { // if C3 * 10^scale = 10^(q3+scale-1)
1755 // ok from above e3 = (z_exp >> 49) - 6176;
1756 // the result is always inexact
1757 if (q4 == 1) {
1758 R64 = C4.w[0];
1759 } else {
1760 // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
1761 // x = q4-1, 1 <= x <= 67 and check if this operation is exact
1762 if (q4 <= 18) { // 2 <= q4 <= 18
b2a00c89
L
1763 round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
1764 &is_midpoint_lt_even, &is_midpoint_gt_even,
1765 &is_inexact_lt_midpoint,
1766 &is_inexact_gt_midpoint);
200359e8
L
1767 } else if (q4 <= 38) {
1768 P128.w[1] = C4.w[1];
1769 P128.w[0] = C4.w[0];
b2a00c89
L
1770 round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
1771 &is_midpoint_lt_even,
1772 &is_midpoint_gt_even,
1773 &is_inexact_lt_midpoint,
1774 &is_inexact_gt_midpoint);
200359e8
L
1775 R64 = R128.w[0]; // one decimal digit
1776 } else if (q4 <= 57) {
1777 P192.w[2] = C4.w[2];
1778 P192.w[1] = C4.w[1];
1779 P192.w[0] = C4.w[0];
b2a00c89
L
1780 round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
1781 &is_midpoint_lt_even,
1782 &is_midpoint_gt_even,
1783 &is_inexact_lt_midpoint,
1784 &is_inexact_gt_midpoint);
200359e8
L
1785 R64 = R192.w[0]; // one decimal digit
1786 } else { // if (q4 <= 68)
b2a00c89
L
1787 round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
1788 &is_midpoint_lt_even,
1789 &is_midpoint_gt_even,
1790 &is_inexact_lt_midpoint,
1791 &is_inexact_gt_midpoint);
200359e8
L
1792 R64 = R256.w[0]; // one decimal digit
1793 }
1794 if (incr_exp) {
1795 R64 = 10;
1796 }
1797 }
1798 if (q4 == 1 && C4.w[0] == 5) {
1799 is_inexact_lt_midpoint = 0;
1800 is_inexact_gt_midpoint = 0;
1801 is_midpoint_lt_even = 1;
1802 is_midpoint_gt_even = 0;
1803 } else if ((e3 == expmin) ||
b2a00c89 1804 R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) {
200359e8
L
1805 // result does not change
1806 is_inexact_lt_midpoint = 0;
1807 is_inexact_gt_midpoint = 1;
1808 is_midpoint_lt_even = 0;
1809 is_midpoint_gt_even = 0;
1810 } else {
1811 is_inexact_lt_midpoint = 1;
1812 is_inexact_gt_midpoint = 0;
1813 is_midpoint_lt_even = 0;
1814 is_midpoint_gt_even = 0;
1815 // result decremented is 10^(q3+scale) - 1
1816 if ((q3 + scale) <= 19) {
1817 res.w[1] = 0;
b2a00c89 1818 res.w[0] = ten2k64[q3 + scale];
200359e8 1819 } else { // if ((q3 + scale + 1) <= 35)
b2a00c89
L
1820 res.w[1] = ten2k128[q3 + scale - 20].w[1];
1821 res.w[0] = ten2k128[q3 + scale - 20].w[0];
200359e8
L
1822 }
1823 res.w[0] = res.w[0] - 1; // borrow never occurs
1824 z_exp = z_exp - EXP_P1;
1825 e3 = e3 - 1;
1826 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
1827 }
1828 if (e3 == expmin) {
1829 if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) {
1830 ; // result not tiny (in round-to-nearest mode)
1831 } else {
1832 *pfpsf |= UNDERFLOW_EXCEPTION;
1833 }
1834 }
b2a00c89 1835 } // end 10^(q3+scale-1)
200359e8
L
1836 // set the inexact flag
1837 *pfpsf |= INEXACT_EXCEPTION;
1838 } else {
1839 if (p_sign == z_sign) {
1840 // if (z_sign), set as if for absolute value
1841 is_inexact_lt_midpoint = 1;
1842 } else { // if (p_sign != z_sign)
1843 // if (z_sign), set as if for absolute value
1844 is_inexact_gt_midpoint = 1;
1845 }
1846 *pfpsf |= INEXACT_EXCEPTION;
1847 }
1848 // the result is always inexact => set the inexact flag
1849 // Determine tininess:
1850 // if (exp > expmin)
1851 // the result is not tiny
1852 // else // if exp = emin
1853 // if (q3 + scale < p34)
1854 // the result is tiny
1855 // else // if (q3 + scale = p34)
1856 // if (C3 * 10^scale > 10^33)
1857 // the result is not tiny
1858 // else // if C3 * 10^scale = 10^33
1859 // if (xy * z > 0)
1860 // the result is not tiny
1861 // else // if (xy * z < 0)
1862 // if (z > 0)
1863 // if rnd_mode != RP
1864 // the result is tiny
1865 // else // if RP
1866 // the result is not tiny
1867 // else // if (z < 0)
1868 // if rnd_mode != RM
1869 // the result is tiny
1870 // else // if RM
1871 // the result is not tiny
1872 // endif
1873 // endif
1874 // endif
1875 // endif
1876 // endif
1877 // endif
b2a00c89
L
1878 if ((e3 == expmin && (q3 + scale) < p34) ||
1879 (e3 == expmin && (q3 + scale) == p34 &&
1880 (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && // 10^33_high
1881 res.w[0] == 0x38c15b0a00000000ull && // 10^33_low
1882 z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) ||
200359e8
L
1883 (z_sign && rnd_mode != ROUNDING_DOWN)))) {
1884 *pfpsf |= UNDERFLOW_EXCEPTION;
1885 }
1886 if (rnd_mode != ROUNDING_TO_NEAREST) {
1887 rounding_correction (rnd_mode,
b2a00c89
L
1888 is_inexact_lt_midpoint,
1889 is_inexact_gt_midpoint,
1890 is_midpoint_lt_even, is_midpoint_gt_even,
1891 e3, &res, pfpsf);
200359e8 1892 }
b2a00c89
L
1893 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1894 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1895 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1896 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1897 BID_SWAP128 (res);
200359e8
L
1898 BID_RETURN (res)
1899
1900 } else if (p34 == delta) { // Case (1''B)
1901
1902 // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3
1903 // and C3 can be scaled up to p34 digits if needed
1904
1905 // scale C3 to p34 digits if needed
1906 scale = p34 - q3; // 0 <= scale <= p34 - 1
1907 if (scale == 0) {
1908 res.w[1] = C3.w[1];
1909 res.w[0] = C3.w[0];
1910 } else if (q3 <= 19) { // z fits in 64 bits
1911 if (scale <= 19) { // 10^scale fits in 64 bits
b2a00c89
L
1912 // 64 x 64 C3.w[0] * ten2k64[scale]
1913 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
200359e8 1914 } else { // 10^scale fits in 128 bits
b2a00c89
L
1915 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1916 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
200359e8
L
1917 }
1918 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
b2a00c89
L
1919 // 64 x 128 ten2k64[scale] * C3
1920 __mul_128x64_to_128 (res, ten2k64[scale], C3);
200359e8
L
1921 }
1922 // subtract scale from the exponent
1923 z_exp = z_exp - ((UINT64) scale << 49);
1924 e3 = e3 - scale;
1925 // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits
1926
1927 // determine whether x * y is less than, equal to, or greater than
1928 // 1/2 ulp (z)
1929 if (q4 <= 19) {
b2a00c89 1930 if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
200359e8 1931 lt_half_ulp = 1;
b2a00c89 1932 } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
200359e8
L
1933 eq_half_ulp = 1;
1934 } else { // > 1/2 ulp
1935 gt_half_ulp = 1;
1936 }
1937 } else if (q4 <= 38) {
b2a00c89
L
1938 if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] ||
1939 (C4.w[1] == midpoint128[q4 - 20].w[1] &&
1940 C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp
200359e8 1941 lt_half_ulp = 1;
b2a00c89
L
1942 } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] &&
1943 C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
200359e8
L
1944 eq_half_ulp = 1;
1945 } else { // > 1/2 ulp
1946 gt_half_ulp = 1;
1947 }
1948 } else if (q4 <= 58) {
b2a00c89
L
1949 if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] ||
1950 (C4.w[2] == midpoint192[q4 - 39].w[2] &&
1951 C4.w[1] < midpoint192[q4 - 39].w[1]) ||
1952 (C4.w[2] == midpoint192[q4 - 39].w[2] &&
1953 C4.w[1] == midpoint192[q4 - 39].w[1] &&
1954 C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp
200359e8 1955 lt_half_ulp = 1;
b2a00c89
L
1956 } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] &&
1957 C4.w[1] == midpoint192[q4 - 39].w[1] &&
1958 C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp
200359e8
L
1959 eq_half_ulp = 1;
1960 } else { // > 1/2 ulp
1961 gt_half_ulp = 1;
1962 }
1963 } else {
b2a00c89
L
1964 if (C4.w[3] < midpoint256[q4 - 59].w[3] ||
1965 (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1966 C4.w[2] < midpoint256[q4 - 59].w[2]) ||
1967 (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1968 C4.w[2] == midpoint256[q4 - 59].w[2] &&
1969 C4.w[1] < midpoint256[q4 - 59].w[1]) ||
1970 (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1971 C4.w[2] == midpoint256[q4 - 59].w[2] &&
1972 C4.w[1] == midpoint256[q4 - 59].w[1] &&
1973 C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp
200359e8 1974 lt_half_ulp = 1;
b2a00c89
L
1975 } else if (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1976 C4.w[2] == midpoint256[q4 - 59].w[2] &&
1977 C4.w[1] == midpoint256[q4 - 59].w[1] &&
1978 C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp
200359e8
L
1979 eq_half_ulp = 1;
1980 } else { // > 1/2 ulp
1981 gt_half_ulp = 1;
1982 }
1983 }
1984
1985 if (p_sign == z_sign) {
1986 if (lt_half_ulp) {
1987 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1988 // use the following to avoid double rounding errors when operating on
1989 // mixed formats in rounding to nearest
1990 is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value
1991 } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
1992 // add 1 ulp to the significand
1993 res.w[0]++;
1994 if (res.w[0] == 0x0ull)
1995 res.w[1]++;
1996 // check for rounding overflow, when coeff == 10^34
1997 if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull &&
1998 res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34
1999 e3 = e3 + 1;
2000 // coeff = 10^33
2001 z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP;
2002 res.w[1] = 0x0000314dc6448d93ull;
2003 res.w[0] = 0x38c15b0a00000000ull;
2004 }
2005 // end add 1 ulp
2006 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2007 if (eq_half_ulp) {
2008 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2009 } else {
2010 is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
2011 }
2012 } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2013 // leave unchanged
2014 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2015 is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
2016 }
2017 // the result is always inexact, and never tiny
2018 // set the inexact flag
2019 *pfpsf |= INEXACT_EXCEPTION;
2020 // check for overflow
2021 if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) {
2022 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2023 res.w[0] = 0x0000000000000000ull;
2024 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
b2a00c89
L
2025 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2026 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2027 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2028 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2029 BID_SWAP128 (res);
200359e8
L
2030 BID_RETURN (res)
2031 }
2032 if (rnd_mode != ROUNDING_TO_NEAREST) {
2033 rounding_correction (rnd_mode,
b2a00c89
L
2034 is_inexact_lt_midpoint,
2035 is_inexact_gt_midpoint,
2036 is_midpoint_lt_even, is_midpoint_gt_even,
2037 e3, &res, pfpsf);
2038 z_exp = res.w[1] & MASK_EXP;
200359e8
L
2039 }
2040 } else { // if (p_sign != z_sign)
2041 // consider two cases, because C3 * 10^scale = 10^33 is a special case
2042 if (res.w[1] != 0x0000314dc6448d93ull ||
2043 res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
2044 if (lt_half_ulp) {
2045 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2046 // use the following to avoid double rounding errors when operating
2047 // on mixed formats in rounding to nearest
2048 is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
2049 } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
2050 // subtract 1 ulp from the significand
2051 res.w[0]--;
2052 if (res.w[0] == 0xffffffffffffffffull)
2053 res.w[1]--;
2054 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2055 if (eq_half_ulp) {
2056 is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
2057 } else {
2058 is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
2059 }
2060 } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2061 // leave unchanged
2062 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2063 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2064 }
2065 // the result is always inexact, and never tiny
2066 // check for overflow for RN
2067 if (e3 > expmax) {
2068 if (rnd_mode == ROUNDING_TO_NEAREST) {
2069 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2070 res.w[0] = 0x0000000000000000ull;
2071 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2072 } else {
2073 rounding_correction (rnd_mode,
b2a00c89
L
2074 is_inexact_lt_midpoint,
2075 is_inexact_gt_midpoint,
2076 is_midpoint_lt_even,
2077 is_midpoint_gt_even, e3, &res,
2078 pfpsf);
200359e8 2079 }
b2a00c89
L
2080 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2081 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2082 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2083 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2084 BID_SWAP128 (res);
200359e8
L
2085 BID_RETURN (res)
2086 }
2087 // set the inexact flag
2088 *pfpsf |= INEXACT_EXCEPTION;
2089 if (rnd_mode != ROUNDING_TO_NEAREST) {
2090 rounding_correction (rnd_mode,
b2a00c89
L
2091 is_inexact_lt_midpoint,
2092 is_inexact_gt_midpoint,
2093 is_midpoint_lt_even,
2094 is_midpoint_gt_even, e3, &res, pfpsf);
200359e8 2095 }
b2a00c89 2096 z_exp = res.w[1] & MASK_EXP;
200359e8
L
2097 } else { // if C3 * 10^scale = 10^33
2098 e3 = (z_exp >> 49) - 6176;
2099 if (e3 > expmin) {
2100 // the result is exact if exp > expmin and C4 = d*10^(q4-1),
2101 // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact
2102 if (q4 == 1) {
2103 // if q4 = 1 the result is exact
2104 // result coefficient = 10^34 - C4
2105 res.w[1] = 0x0001ed09bead87c0ull;
2106 res.w[0] = 0x378d8e6400000000ull - C4.w[0];
2107 z_exp = z_exp - EXP_P1;
2108 e3 = e3 - 1;
2109 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2110 } else {
2111 // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
2112 // x = q4-1, 1 <= x <= 67 and check if this operation is exact
2113 if (q4 <= 18) { // 2 <= q4 <= 18
b2a00c89
L
2114 round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
2115 &is_midpoint_lt_even,
2116 &is_midpoint_gt_even,
2117 &is_inexact_lt_midpoint,
2118 &is_inexact_gt_midpoint);
200359e8 2119 } else if (q4 <= 38) {
b2a00c89
L
2120 P128.w[1] = C4.w[1];
2121 P128.w[0] = C4.w[0];
2122 round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
2123 &is_midpoint_lt_even,
2124 &is_midpoint_gt_even,
2125 &is_inexact_lt_midpoint,
2126 &is_inexact_gt_midpoint);
2127 R64 = R128.w[0]; // one decimal digit
200359e8 2128 } else if (q4 <= 57) {
b2a00c89
L
2129 P192.w[2] = C4.w[2];
2130 P192.w[1] = C4.w[1];
2131 P192.w[0] = C4.w[0];
2132 round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
2133 &is_midpoint_lt_even,
2134 &is_midpoint_gt_even,
2135 &is_inexact_lt_midpoint,
2136 &is_inexact_gt_midpoint);
2137 R64 = R192.w[0]; // one decimal digit
200359e8 2138 } else { // if (q4 <= 68)
b2a00c89
L
2139 round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
2140 &is_midpoint_lt_even,
2141 &is_midpoint_gt_even,
2142 &is_inexact_lt_midpoint,
2143 &is_inexact_gt_midpoint);
2144 R64 = R256.w[0]; // one decimal digit
200359e8
L
2145 }
2146 if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
b2a00c89
L
2147 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2148 // the result is exact: 10^34 - R64
2149 // incr_exp = 0 with certainty
2150 z_exp = z_exp - EXP_P1;
2151 e3 = e3 - 1;
2152 res.w[1] =
2153 z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull;
2154 res.w[0] = 0x378d8e6400000000ull - R64;
200359e8 2155 } else {
b2a00c89
L
2156 // We want R64 to be the top digit of C4, but we actually
2157 // obtained (C4 * 10^(-q4+1))RN; a correction may be needed,
2158 // because the top digit is (C4 * 10^(-q4+1))RZ
2159 // however, if incr_exp = 1 then R64 = 10 with certainty
2160 if (incr_exp) {
2161 R64 = 10;
2162 }
2163 // the result is inexact as C4 has more than 1 significant digit
2164 // and C3 * 10^scale = 10^33
2165 // example of case that is treated here:
2166 // 100...0 * 10^e3 - 0.41 * 10^e3 =
2167 // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1)
2168 // note that (e3 > expmin}
2169 // in order to round, subtract R64 from 10^34 and then compare
2170 // C4 - R64 * 10^(q4-1) with 1/2 ulp
2171 // calculate 10^34 - R64
2172 res.w[1] = 0x0001ed09bead87c0ull;
2173 res.w[0] = 0x378d8e6400000000ull - R64;
2174 z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand
2175 // calculate C4 - R64 * 10^(q4-1); this is a rare case and
2176 // R64 is small, 1 <= R64 <= 9
2177 e3 = e3 - 1;
2178 if (is_inexact_lt_midpoint) {
2179 is_inexact_lt_midpoint = 0;
2180 is_inexact_gt_midpoint = 1;
2181 } else if (is_inexact_gt_midpoint) {
2182 is_inexact_gt_midpoint = 0;
2183 is_inexact_lt_midpoint = 1;
2184 } else if (is_midpoint_lt_even) {
2185 is_midpoint_lt_even = 0;
2186 is_midpoint_gt_even = 1;
2187 } else if (is_midpoint_gt_even) {
2188 is_midpoint_gt_even = 0;
2189 is_midpoint_lt_even = 1;
2190 } else {
2191 ;
2192 }
2193 // the result is always inexact, and never tiny
2194 // check for overflow for RN
2195 if (e3 > expmax) {
2196 if (rnd_mode == ROUNDING_TO_NEAREST) {
2197 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2198 res.w[0] = 0x0000000000000000ull;
2199 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2200 } else {
2201 rounding_correction (rnd_mode,
2202 is_inexact_lt_midpoint,
2203 is_inexact_gt_midpoint,
2204 is_midpoint_lt_even,
2205 is_midpoint_gt_even, e3, &res,
2206 pfpsf);
2207 }
2208 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2209 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2210 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2211 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2212 BID_SWAP128 (res);
2213 BID_RETURN (res)
2214 }
2215 // set the inexact flag
2216 *pfpsf |= INEXACT_EXCEPTION;
2217 res.w[1] =
2218 z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
2219 if (rnd_mode != ROUNDING_TO_NEAREST) {
2220 rounding_correction (rnd_mode,
2221 is_inexact_lt_midpoint,
2222 is_inexact_gt_midpoint,
2223 is_midpoint_lt_even,
2224 is_midpoint_gt_even, e3, &res,
2225 pfpsf);
2226 }
2227 z_exp = res.w[1] & MASK_EXP;
2228 } // end result is inexact
2229 } // end q4 > 1
200359e8
L
2230 } else { // if (e3 = emin)
2231 // if e3 = expmin the result is also tiny (the condition for
2232 // tininess is C4 > 050...0 [q4 digits] which is met because
2233 // the msd of C4 is not zero)
2234 // the result is tiny and inexact in all rounding modes;
2235 // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp,
2236 // gt_half_ulp to calculate)
2237 // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged
2238
2239 // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp
2240 if (gt_half_ulp) { // res = 10^33 - 1
2241 res.w[1] = 0x0000314dc6448d93ull;
2242 res.w[0] = 0x38c15b09ffffffffull;
2243 } else {
2244 res.w[1] = 0x0000314dc6448d93ull;
2245 res.w[0] = 0x38c15b0a00000000ull;
2246 }
2247 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2248 *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later
2249
2250 if (eq_half_ulp) {
2251 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2252 } else if (lt_half_ulp) {
2253 is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value
2254 } else { // if (gt_half_ulp)
2255 is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
2256 }
2257
2258 if (rnd_mode != ROUNDING_TO_NEAREST) {
2259 rounding_correction (rnd_mode,
b2a00c89
L
2260 is_inexact_lt_midpoint,
2261 is_inexact_gt_midpoint,
2262 is_midpoint_lt_even,
2263 is_midpoint_gt_even, e3, &res,
2264 pfpsf);
2265 z_exp = res.w[1] & MASK_EXP;
200359e8 2266 }
b2a00c89 2267 } // end e3 = emin
200359e8
L
2268 // set the inexact flag (if the result was not exact)
2269 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
2270 is_midpoint_lt_even || is_midpoint_gt_even)
2271 *pfpsf |= INEXACT_EXCEPTION;
b2a00c89
L
2272 } // end 10^33
2273 } // end if (p_sign != z_sign)
200359e8 2274 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
b2a00c89
L
2275 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2276 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2277 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2278 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2279 BID_SWAP128 (res);
200359e8
L
2280 BID_RETURN (res)
2281
2282 } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
2283 (q3 <= delta && delta + q4 <= p34) || // Case (3)
2284 (delta < q3 && p34 < delta + q4) || // Case (4)
2285 (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5)
b2a00c89 2286 (delta + q4 < q3)) && // Case (6)
200359e8
L
2287 !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6)
2288
2289 // the result has the sign of z
2290
2291 if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
2292 (delta < q3 && p34 < delta + q4)) { // Case (4)
2293 // round first the sum x * y + z with unbounded exponent
2294 // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1,
2295 // 1 <= scale <= 33
2296 // calculate res = C3 * 10^scale
2297 scale = p34 - q3;
2298 x0 = delta + q4 - p34;
2299 } else if (delta + q4 < q3) { // Case (6)
2300 // make Case (6) look like Case (3) or Case (5) with scale = 0
2301 // by scaling up C4 by 10^(q3 - delta - q4)
2302 scale = q3 - delta - q4; // 1 <= scale <= 33
2303 if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits
2304 if (scale <= 19) { // 10^scale fits in 64 bits
b2a00c89
L
2305 // 64 x 64 C4.w[0] * ten2k64[scale]
2306 __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]);
200359e8 2307 } else { // 10^scale fits in 128 bits
b2a00c89
L
2308 // 64 x 128 C4.w[0] * ten2k128[scale - 20]
2309 __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]);
200359e8
L
2310 }
2311 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
b2a00c89
L
2312 // 64 x 128 ten2k64[scale] * C4
2313 __mul_128x64_to_128 (P128, ten2k64[scale], C4);
200359e8
L
2314 }
2315 C4.w[0] = P128.w[0];
2316 C4.w[1] = P128.w[1];
2317 // e4 does not need adjustment, as it is not used from this point on
2318 scale = 0;
2319 x0 = 0;
2320 // now Case (6) looks like Case (3) or Case (5) with scale = 0
2321 } else { // if Case (3) or Case (5)
2322 // Note: Case (3) is similar to Case (2), but scale differs and the
2323 // result is exact, unless it is tiny (so x0 = 0 when calculating the
2324 // result with unbounded exponent)
2325
2326 // calculate first the sum x * y + z with unbounded exponent (exact)
2327 // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1,
2328 // 1 <= scale <= 33
2329 // calculate res = C3 * 10^scale
2330 scale = delta + q4 - q3;
2331 x0 = 0;
2332 // Note: the comments which follow refer [mainly] to Case (2)]
2333 }
2334
2335 case2_repeat:
2336 if (scale == 0) { // this could happen e.g. if we return to case2_repeat
2337 // or in Case (4)
2338 res.w[1] = C3.w[1];
2339 res.w[0] = C3.w[0];
2340 } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits
2341 if (scale <= 19) { // 10^scale fits in 64 bits
b2a00c89
L
2342 // 64 x 64 C3.w[0] * ten2k64[scale]
2343 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
200359e8 2344 } else { // 10^scale fits in 128 bits
b2a00c89
L
2345 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
2346 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
200359e8
L
2347 }
2348 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
b2a00c89
L
2349 // 64 x 128 ten2k64[scale] * C3
2350 __mul_128x64_to_128 (res, ten2k64[scale], C3);
200359e8
L
2351 }
2352 // e3 is already calculated
2353 e3 = e3 - scale;
2354 // now res = C3 * 10^scale and e3 = e3 - scale
2355 // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat
2356 // because the result was too small
2357
2358 // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34,
2359 // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67)
2360 // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of
2361 // the rounding fits in 128 bits!)
2362 // x0 = delta + q4 - p34 (calculated before reaching case2_repeat)
2363 // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34
2364 if (x0 == 0) { // this could happen only if we return to case2_repeat, or
b2a00c89 2365 // for Case (3) or Case (6)
200359e8
L
2366 R128.w[1] = C4.w[1];
2367 R128.w[0] = C4.w[0];
2368 } else if (q4 <= 18) {
2369 // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17
b2a00c89
L
2370 round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp,
2371 &is_midpoint_lt_even, &is_midpoint_gt_even,
2372 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
200359e8
L
2373 if (incr_exp) {
2374 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
b2a00c89 2375 R64 = ten2k64[q4 - x0];
200359e8
L
2376 }
2377 R128.w[1] = 0;
2378 R128.w[0] = R64;
2379 } else if (q4 <= 38) {
2380 // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37
2381 P128.w[1] = C4.w[1];
2382 P128.w[0] = C4.w[0];
b2a00c89
L
2383 round128_19_38 (q4, x0, P128, &R128, &incr_exp,
2384 &is_midpoint_lt_even, &is_midpoint_gt_even,
2385 &is_inexact_lt_midpoint,
2386 &is_inexact_gt_midpoint);
200359e8
L
2387 if (incr_exp) {
2388 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
2389 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
b2a00c89 2390 R128.w[0] = ten2k64[q4 - x0];
200359e8
L
2391 // R128.w[1] stays 0
2392 } else { // 20 <= q4 - x0 <= 37
b2a00c89
L
2393 R128.w[0] = ten2k128[q4 - x0 - 20].w[0];
2394 R128.w[1] = ten2k128[q4 - x0 - 20].w[1];
200359e8
L
2395 }
2396 }
2397 } else if (q4 <= 57) {
2398 // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56
2399 P192.w[2] = C4.w[2];
2400 P192.w[1] = C4.w[1];
2401 P192.w[0] = C4.w[0];
b2a00c89
L
2402 round192_39_57 (q4, x0, P192, &R192, &incr_exp,
2403 &is_midpoint_lt_even, &is_midpoint_gt_even,
2404 &is_inexact_lt_midpoint,
2405 &is_inexact_gt_midpoint);
200359e8
L
2406 // R192.w[2] is always 0
2407 if (incr_exp) {
2408 // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52
2409 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
b2a00c89 2410 R192.w[0] = ten2k64[q4 - x0];
200359e8
L
2411 // R192.w[1] stays 0
2412 // R192.w[2] stays 0
2413 } else { // 20 <= q4 - x0 <= 33
b2a00c89
L
2414 R192.w[0] = ten2k128[q4 - x0 - 20].w[0];
2415 R192.w[1] = ten2k128[q4 - x0 - 20].w[1];
200359e8
L
2416 // R192.w[2] stays 0
2417 }
2418 }
2419 R128.w[1] = R192.w[1];
2420 R128.w[0] = R192.w[0];
2421 } else {
2422 // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67
b2a00c89
L
2423 round256_58_76 (q4, x0, C4, &R256, &incr_exp,
2424 &is_midpoint_lt_even, &is_midpoint_gt_even,
2425 &is_inexact_lt_midpoint,
2426 &is_inexact_gt_midpoint);
200359e8
L
2427 // R256.w[3] and R256.w[2] are always 0
2428 if (incr_exp) {
2429 // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43
2430 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
b2a00c89 2431 R256.w[0] = ten2k64[q4 - x0];
200359e8
L
2432 // R256.w[1] stays 0
2433 // R256.w[2] stays 0
2434 // R256.w[3] stays 0
2435 } else { // 20 <= q4 - x0 <= 33
b2a00c89
L
2436 R256.w[0] = ten2k128[q4 - x0 - 20].w[0];
2437 R256.w[1] = ten2k128[q4 - x0 - 20].w[1];
200359e8
L
2438 // R256.w[2] stays 0
2439 // R256.w[3] stays 0
2440 }
2441 }
2442 R128.w[1] = R256.w[1];
2443 R128.w[0] = R256.w[0];
2444 }
2445 // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4,
2446 // rounded to nearest, which were copied into R128
2447 if (z_sign == p_sign) {
2448 lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale
2449 // the sum can result in [up to] p34 or p34 + 1 digits
2450 res.w[0] = res.w[0] + R128.w[0];
2451 res.w[1] = res.w[1] + R128.w[1];
2452 if (res.w[0] < R128.w[0])
2453 res.w[1]++; // carry
2454 // if res > 10^34 - 1 need to increase x0 and decrease scale by 1
2455 if (res.w[1] > 0x0001ed09bead87c0ull ||
2456 (res.w[1] == 0x0001ed09bead87c0ull &&
b2a00c89 2457 res.w[0] > 0x378d8e63ffffffffull)) {
200359e8
L
2458 // avoid double rounding error
2459 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
2460 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
2461 is_midpoint_lt_even0 = is_midpoint_lt_even;
2462 is_midpoint_gt_even0 = is_midpoint_gt_even;
2463 is_inexact_lt_midpoint = 0;
2464 is_inexact_gt_midpoint = 0;
2465 is_midpoint_lt_even = 0;
2466 is_midpoint_gt_even = 0;
2467 P128.w[1] = res.w[1];
2468 P128.w[0] = res.w[0];
b2a00c89
L
2469 round128_19_38 (35, 1, P128, &res, &incr_exp,
2470 &is_midpoint_lt_even, &is_midpoint_gt_even,
2471 &is_inexact_lt_midpoint,
2472 &is_inexact_gt_midpoint);
200359e8
L
2473 // incr_exp is 0 with certainty in this case
2474 // avoid a double rounding error
2475 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
2476 is_midpoint_lt_even) { // double rounding error upward
2477 // res = res - 1
2478 res.w[0]--;
2479 if (res.w[0] == 0xffffffffffffffffull)
2480 res.w[1]--;
2481 // Note: a double rounding error upward is not possible; for this
2482 // the result after the first rounding would have to be 99...95
2483 // (35 digits in all), possibly followed by a number of zeros; this
2484 // not possible in Cases (2)-(6) or (15)-(17) which may get here
2485 is_midpoint_lt_even = 0;
2486 is_inexact_lt_midpoint = 1;
2487 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
2488 is_midpoint_gt_even) { // double rounding error downward
2489 // res = res + 1
2490 res.w[0]++;
2491 if (res.w[0] == 0)
2492 res.w[1]++;
2493 is_midpoint_gt_even = 0;
2494 is_inexact_gt_midpoint = 1;
2495 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
b2a00c89
L
2496 !is_inexact_lt_midpoint
2497 && !is_inexact_gt_midpoint) {
200359e8
L
2498 // if this second rounding was exact the result may still be
2499 // inexact because of the first rounding
2500 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
2501 is_inexact_gt_midpoint = 1;
2502 }
2503 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
2504 is_inexact_lt_midpoint = 1;
2505 }
2506 } else if (is_midpoint_gt_even &&
b2a00c89
L
2507 (is_inexact_gt_midpoint0
2508 || is_midpoint_lt_even0)) {
200359e8
L
2509 // pulled up to a midpoint
2510 is_inexact_lt_midpoint = 1;
2511 is_inexact_gt_midpoint = 0;
2512 is_midpoint_lt_even = 0;
2513 is_midpoint_gt_even = 0;
2514 } else if (is_midpoint_lt_even &&
b2a00c89
L
2515 (is_inexact_lt_midpoint0
2516 || is_midpoint_gt_even0)) {
200359e8
L
2517 // pulled down to a midpoint
2518 is_inexact_lt_midpoint = 0;
2519 is_inexact_gt_midpoint = 1;
2520 is_midpoint_lt_even = 0;
2521 is_midpoint_gt_even = 0;
2522 } else {
2523 ;
2524 }
2525 // adjust exponent
2526 e3 = e3 + 1;
2527 if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2528 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2529 if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
b2a00c89 2530 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
200359e8
L
2531 is_inexact_lt_midpoint = 1;
2532 }
2533 }
2534 } else {
2535 // this is the result rounded with unbounded exponent, unless a
2536 // correction is needed
2537 res.w[1] = res.w[1] & MASK_COEFF;
2538 if (lsb == 1) {
2539 if (is_midpoint_gt_even) {
2540 // res = res + 1
2541 is_midpoint_gt_even = 0;
2542 is_midpoint_lt_even = 1;
2543 res.w[0]++;
2544 if (res.w[0] == 0x0)
b2a00c89 2545 res.w[1]++;
200359e8
L
2546 // check for rounding overflow
2547 if (res.w[1] == 0x0001ed09bead87c0ull &&
b2a00c89
L
2548 res.w[0] == 0x378d8e6400000000ull) {
2549 // res = 10^34 => rounding overflow
2550 res.w[1] = 0x0000314dc6448d93ull;
2551 res.w[0] = 0x38c15b0a00000000ull; // 10^33
2552 e3++;
200359e8
L
2553 }
2554 } else if (is_midpoint_lt_even) {
2555 // res = res - 1
2556 is_midpoint_lt_even = 0;
2557 is_midpoint_gt_even = 1;
2558 res.w[0]--;
2559 if (res.w[0] == 0xffffffffffffffffull)
b2a00c89 2560 res.w[1]--;
200359e8
L
2561 // if the result is pure zero, the sign depends on the rounding
2562 // mode (x*y and z had opposite signs)
2563 if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
b2a00c89
L
2564 if (rnd_mode != ROUNDING_DOWN)
2565 z_sign = 0x0000000000000000ull;
2566 else
2567 z_sign = 0x8000000000000000ull;
2568 // the exponent is max (e3, expmin)
2569 res.w[1] = 0x0;
2570 res.w[0] = 0x0;
2571 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2572 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2573 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2574 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2575 BID_SWAP128 (res);
2576 BID_RETURN (res)
200359e8
L
2577 }
2578 } else {
2579 ;
2580 }
2581 }
2582 }
2583 } else { // if (z_sign != p_sign)
2584 lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4
2585 // used to swap rounding indicators if p_sign != z_sign
2586 // the sum can result in [up to] p34 or p34 - 1 digits
2587 tmp64 = res.w[0];
2588 res.w[0] = res.w[0] - R128.w[0];
2589 res.w[1] = res.w[1] - R128.w[1];
2590 if (res.w[0] > tmp64)
2591 res.w[1]--; // borrow
2592 // if res < 10^33 and exp > expmin need to decrease x0 and
2593 // increase scale by 1
b2a00c89
L
2594 if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull ||
2595 (res.w[1] == 0x0000314dc6448d93ull &&
2596 res.w[0] < 0x38c15b0a00000000ull)) ||
2597 (is_inexact_lt_midpoint
2598 && res.w[1] == 0x0000314dc6448d93ull
2599 && res.w[0] == 0x38c15b0a00000000ull))
2600 && x0 >= 1) {
200359e8
L
2601 x0 = x0 - 1;
2602 // first restore e3, otherwise it will be too small
2603 e3 = e3 + scale;
2604 scale = scale + 1;
2605 is_inexact_lt_midpoint = 0;
2606 is_inexact_gt_midpoint = 0;
2607 is_midpoint_lt_even = 0;
2608 is_midpoint_gt_even = 0;
2609 incr_exp = 0;
2610 goto case2_repeat;
2611 }
b2a00c89 2612 // else this is the result rounded with unbounded exponent;
200359e8
L
2613 // because the result has opposite sign to that of C4 which was
2614 // rounded, need to change the rounding indicators
2615 if (is_inexact_lt_midpoint) {
2616 is_inexact_lt_midpoint = 0;
2617 is_inexact_gt_midpoint = 1;
2618 } else if (is_inexact_gt_midpoint) {
2619 is_inexact_gt_midpoint = 0;
2620 is_inexact_lt_midpoint = 1;
2621 } else if (lsb == 0) {
2622 if (is_midpoint_lt_even) {
2623 is_midpoint_lt_even = 0;
2624 is_midpoint_gt_even = 1;
2625 } else if (is_midpoint_gt_even) {
2626 is_midpoint_gt_even = 0;
2627 is_midpoint_lt_even = 1;
2628 } else {
2629 ;
2630 }
2631 } else if (lsb == 1) {
2632 if (is_midpoint_lt_even) {
2633 // res = res + 1
2634 res.w[0]++;
2635 if (res.w[0] == 0x0)
2636 res.w[1]++;
2637 // check for rounding overflow
2638 if (res.w[1] == 0x0001ed09bead87c0ull &&
b2a00c89 2639 res.w[0] == 0x378d8e6400000000ull) {
200359e8
L
2640 // res = 10^34 => rounding overflow
2641 res.w[1] = 0x0000314dc6448d93ull;
2642 res.w[0] = 0x38c15b0a00000000ull; // 10^33
2643 e3++;
2644 }
2645 } else if (is_midpoint_gt_even) {
2646 // res = res - 1
2647 res.w[0]--;
2648 if (res.w[0] == 0xffffffffffffffffull)
2649 res.w[1]--;
2650 // if the result is pure zero, the sign depends on the rounding
2651 // mode (x*y and z had opposite signs)
2652 if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
2653 if (rnd_mode != ROUNDING_DOWN)
b2a00c89 2654 z_sign = 0x0000000000000000ull;
200359e8 2655 else
b2a00c89 2656 z_sign = 0x8000000000000000ull;
200359e8
L
2657 // the exponent is max (e3, expmin)
2658 res.w[1] = 0x0;
2659 res.w[0] = 0x0;
b2a00c89
L
2660 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2661 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2662 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2663 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2664 BID_SWAP128 (res);
200359e8
L
2665 BID_RETURN (res)
2666 }
2667 } else {
2668 ;
2669 }
2670 } else {
2671 ;
2672 }
2673 }
2674 // check for underflow
b2a00c89
L
2675 if (e3 == expmin) { // and if significand < 10^33 => result is tiny
2676 if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
2677 ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
2678 res.w[0] < 0x38c15b0a00000000ull)) {
2679 is_tiny = 1;
2680 }
2681 } else if (e3 < expmin) {
200359e8
L
2682 // the result is tiny, so we must truncate more of res
2683 is_tiny = 1;
2684 x0 = expmin - e3;
2685 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
2686 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
2687 is_midpoint_lt_even0 = is_midpoint_lt_even;
2688 is_midpoint_gt_even0 = is_midpoint_gt_even;
2689 is_inexact_lt_midpoint = 0;
2690 is_inexact_gt_midpoint = 0;
2691 is_midpoint_lt_even = 0;
2692 is_midpoint_gt_even = 0;
2693 // determine the number of decimal digits in res
2694 if (res.w[1] == 0x0) {
2695 // between 1 and 19 digits
2696 for (ind = 1; ind <= 19; ind++) {
b2a00c89 2697 if (res.w[0] < ten2k64[ind]) {
200359e8
L
2698 break;
2699 }
2700 }
2701 // ind digits
b2a00c89
L
2702 } else if (res.w[1] < ten2k128[0].w[1] ||
2703 (res.w[1] == ten2k128[0].w[1]
2704 && res.w[0] < ten2k128[0].w[0])) {
200359e8
L
2705 // 20 digits
2706 ind = 20;
2707 } else { // between 21 and 38 digits
2708 for (ind = 1; ind <= 18; ind++) {
b2a00c89
L
2709 if (res.w[1] < ten2k128[ind].w[1] ||
2710 (res.w[1] == ten2k128[ind].w[1] &&
2711 res.w[0] < ten2k128[ind].w[0])) {
200359e8
L
2712 break;
2713 }
2714 }
2715 // ind + 20 digits
2716 ind = ind + 20;
2717 }
2718
2719 // at this point ind >= x0; because delta >= 2 on this path, the case
b2a00c89 2720 // ind = x0 can occur only in Case (2) or case (3), when C3 has one
200359e8
L
2721 // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin),
2722 // the signs of x * y and z are opposite, and through cancellation
2723 // the most significant decimal digit in res has the weight
2724 // 10^(emin-1); however, it is clear that in this case the most
2725 // significant digit is 9, so the result before rounding is
2726 // 0.9... * 10^emin
2727 // Otherwise, ind > x0 because there are non-zero decimal digits in the
2728 // result with weight of at least 10^emin, and correction for underflow
2729 // can be carried out using the round*_*_2_* () routines
2730 if (x0 == ind) { // the result before rounding is 0.9... * 10^emin
2731 res.w[1] = 0x0;
2732 res.w[0] = 0x1;
2733 is_inexact_gt_midpoint = 1;
2734 } else if (ind <= 18) { // check that 2 <= ind
2735 // 2 <= ind <= 18, 1 <= x0 <= 17
b2a00c89
L
2736 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
2737 &is_midpoint_lt_even, &is_midpoint_gt_even,
2738 &is_inexact_lt_midpoint,
2739 &is_inexact_gt_midpoint);
200359e8 2740 if (incr_exp) {
b2a00c89
L
2741 // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17
2742 R64 = ten2k64[ind - x0];
200359e8
L
2743 }
2744 res.w[1] = 0;
2745 res.w[0] = R64;
2746 } else if (ind <= 38) {
2747 // 19 <= ind <= 38
2748 P128.w[1] = res.w[1];
2749 P128.w[0] = res.w[0];
b2a00c89
L
2750 round128_19_38 (ind, x0, P128, &res, &incr_exp,
2751 &is_midpoint_lt_even, &is_midpoint_gt_even,
2752 &is_inexact_lt_midpoint,
2753 &is_inexact_gt_midpoint);
200359e8 2754 if (incr_exp) {
b2a00c89 2755 // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37
200359e8 2756 if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19
b2a00c89 2757 res.w[0] = ten2k64[ind - x0];
200359e8
L
2758 // res.w[1] stays 0
2759 } else { // 20 <= ind - x0 <= 37
b2a00c89
L
2760 res.w[0] = ten2k128[ind - x0 - 20].w[0];
2761 res.w[1] = ten2k128[ind - x0 - 20].w[1];
200359e8
L
2762 }
2763 }
2764 }
2765 // avoid a double rounding error
2766 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
2767 is_midpoint_lt_even) { // double rounding error upward
2768 // res = res - 1
2769 res.w[0]--;
2770 if (res.w[0] == 0xffffffffffffffffull)
2771 res.w[1]--;
2772 // Note: a double rounding error upward is not possible; for this
2773 // the result after the first rounding would have to be 99...95
2774 // (35 digits in all), possibly followed by a number of zeros; this
2775 // not possible in Cases (2)-(6) which may get here
2776 is_midpoint_lt_even = 0;
2777 is_inexact_lt_midpoint = 1;
2778 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
2779 is_midpoint_gt_even) { // double rounding error downward
2780 // res = res + 1
2781 res.w[0]++;
2782 if (res.w[0] == 0)
2783 res.w[1]++;
2784 is_midpoint_gt_even = 0;
2785 is_inexact_gt_midpoint = 1;
2786 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
b2a00c89 2787 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
200359e8
L
2788 // if this second rounding was exact the result may still be
2789 // inexact because of the first rounding
2790 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
2791 is_inexact_gt_midpoint = 1;
2792 }
2793 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
2794 is_inexact_lt_midpoint = 1;
2795 }
2796 } else if (is_midpoint_gt_even &&
b2a00c89 2797 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
200359e8
L
2798 // pulled up to a midpoint
2799 is_inexact_lt_midpoint = 1;
2800 is_inexact_gt_midpoint = 0;
2801 is_midpoint_lt_even = 0;
2802 is_midpoint_gt_even = 0;
2803 } else if (is_midpoint_lt_even &&
b2a00c89 2804 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
200359e8
L
2805 // pulled down to a midpoint
2806 is_inexact_lt_midpoint = 0;
2807 is_inexact_gt_midpoint = 1;
2808 is_midpoint_lt_even = 0;
2809 is_midpoint_gt_even = 0;
2810 } else {
2811 ;
2812 }
2813 // adjust exponent
2814 e3 = e3 + x0;
2815 if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2816 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2817 if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
2818 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
2819 is_inexact_lt_midpoint = 1;
2820 }
2821 }
b2a00c89
L
2822 } else {
2823 ; // not underflow
200359e8
L
2824 }
2825 // check for inexact result
2826 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
2827 is_midpoint_lt_even || is_midpoint_gt_even) {
2828 // set the inexact flag
2829 *pfpsf |= INEXACT_EXCEPTION;
2830 if (is_tiny)
2831 *pfpsf |= UNDERFLOW_EXCEPTION;
2832 }
2833 // now check for significand = 10^34 (may have resulted from going
2834 // back to case2_repeat)
2835 if (res.w[1] == 0x0001ed09bead87c0ull &&
2836 res.w[0] == 0x378d8e6400000000ull) { // if res = 10^34
2837 res.w[1] = 0x0000314dc6448d93ull; // res = 10^33
2838 res.w[0] = 0x38c15b0a00000000ull;
2839 e3 = e3 + 1;
2840 }
2841 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
2842 // check for overflow
2843 if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) {
2844 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2845 res.w[0] = 0x0000000000000000ull;
2846 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2847 }
2848 if (rnd_mode != ROUNDING_TO_NEAREST) {
2849 rounding_correction (rnd_mode,
b2a00c89
L
2850 is_inexact_lt_midpoint,
2851 is_inexact_gt_midpoint,
2852 is_midpoint_lt_even, is_midpoint_gt_even,
2853 e3, &res, pfpsf);
200359e8 2854 }
b2a00c89
L
2855 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2856 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2857 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2858 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2859 BID_SWAP128 (res);
200359e8
L
2860 BID_RETURN (res)
2861
2862 } else {
2863
2864 // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and
2865 // the signs of x*y and z are opposite; in these cases massive
2866 // cancellation can occur, so it is better to scale either C3 or C4 and
2867 // to perform the subtraction before rounding; rounding is performed
2868 // next, depending on the number of decimal digits in the result and on
2869 // the exponent value
2870 // Note: overlow is not possible in this case
2871 // this is similar to Cases (15), (16), and (17)
2872
2873 if (delta + q4 < q3) { // from Case (6)
2874 // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and
2875 // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign)
2876 // and call add_and_round; delta stays positive
2877 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
2878 P128.w[1] = C3.w[1];
2879 P128.w[0] = C3.w[0];
2880 C3.w[1] = C4.w[1];
2881 C3.w[0] = C4.w[0];
2882 C4.w[1] = P128.w[1];
2883 C4.w[0] = P128.w[0];
2884 ind = q3;
2885 q3 = q4;
2886 q4 = ind;
2887 ind = e3;
2888 e3 = e4;
2889 e4 = ind;
b2a00c89 2890 tmp_sign = z_sign;
200359e8 2891 z_sign = p_sign;
b2a00c89 2892 p_sign = tmp_sign;
200359e8
L
2893 } else { // from Cases (2), (3), (4), (5)
2894 // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be
2895 // scaled up by q4 + delta - q3; this is the same as in Cases (15),
2896 // (16), and (17) if we just change the sign of delta
2897 delta = -delta;
2898 }
2899 add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
b2a00c89
L
2900 rnd_mode, &is_midpoint_lt_even,
2901 &is_midpoint_gt_even, &is_inexact_lt_midpoint,
2902 &is_inexact_gt_midpoint, pfpsf, &res);
2903 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2904 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2905 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2906 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2907 BID_SWAP128 (res);
200359e8
L
2908 BID_RETURN (res)
2909
2910 }
2911
2912 } else { // if delta < 0
2913
2914 delta = -delta;
2915
2916 if (p34 < q4 && q4 <= delta) { // Case (7)
2917
2918 // truncate C4 to p34 digits into res
2919 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
2920 x0 = q4 - p34;
2921 if (q4 <= 38) {
2922 P128.w[1] = C4.w[1];
2923 P128.w[0] = C4.w[0];
b2a00c89
L
2924 round128_19_38 (q4, x0, P128, &res, &incr_exp,
2925 &is_midpoint_lt_even, &is_midpoint_gt_even,
2926 &is_inexact_lt_midpoint,
2927 &is_inexact_gt_midpoint);
200359e8
L
2928 } else if (q4 <= 57) { // 35 <= q4 <= 57
2929 P192.w[2] = C4.w[2];
2930 P192.w[1] = C4.w[1];
2931 P192.w[0] = C4.w[0];
b2a00c89
L
2932 round192_39_57 (q4, x0, P192, &R192, &incr_exp,
2933 &is_midpoint_lt_even, &is_midpoint_gt_even,
2934 &is_inexact_lt_midpoint,
2935 &is_inexact_gt_midpoint);
200359e8
L
2936 res.w[0] = R192.w[0];
2937 res.w[1] = R192.w[1];
2938 } else { // if (q4 <= 68)
b2a00c89
L
2939 round256_58_76 (q4, x0, C4, &R256, &incr_exp,
2940 &is_midpoint_lt_even, &is_midpoint_gt_even,
2941 &is_inexact_lt_midpoint,
2942 &is_inexact_gt_midpoint);
200359e8
L
2943 res.w[0] = R256.w[0];
2944 res.w[1] = R256.w[1];
2945 }
2946 e4 = e4 + x0;
2947 if (incr_exp) {
2948 e4 = e4 + 1;
2949 }
2950 if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2951 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2952 // if C4 rounded to p34 digits is exact then the result is inexact,
2953 // in a way that depends on the signs of x * y and z
2954 if (p_sign == z_sign) {
2955 is_inexact_lt_midpoint = 1;
2956 } else { // if (p_sign != z_sign)
2957 if (res.w[1] != 0x0000314dc6448d93ull ||
2958 res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33
2959 is_inexact_gt_midpoint = 1;
2960 } else { // res = 10^33 and exact is a special case
2961 // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1
2962 // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1
2963 // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1
2964 // Note: ulp is really ulp/10 (after borrow which propagates to msd)
2965 if (delta > p34 + 1) { // C3 < 1/2
2966 // res = 10^33, unchanged
2967 is_inexact_gt_midpoint = 1;
2968 } else { // if (delta == p34 + 1)
2969 if (q3 <= 19) {
b2a00c89
L
2970 if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp
2971 // res = 10^33, unchanged
2972 is_inexact_gt_midpoint = 1;
2973 } else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp
2974 // res = 10^33, unchanged
2975 is_midpoint_lt_even = 1;
2976 } else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp
2977 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
2978 res.w[0] = 0x378d8e63ffffffffull;
2979 e4 = e4 - 1;
2980 is_inexact_lt_midpoint = 1;
2981 }
200359e8 2982 } else { // if (20 <= q3 <=34)
b2a00c89
L
2983 if (C3.w[1] < midpoint128[q3 - 20].w[1] ||
2984 (C3.w[1] == midpoint128[q3 - 20].w[1] &&
2985 C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp
2986 // res = 10^33, unchanged
2987 is_inexact_gt_midpoint = 1;
2988 } else if (C3.w[1] == midpoint128[q3 - 20].w[1] &&
2989 C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp
2990 // res = 10^33, unchanged
2991 is_midpoint_lt_even = 1;
2992 } else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp
2993 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
2994 res.w[0] = 0x378d8e63ffffffffull;
2995 e4 = e4 - 1;
2996 is_inexact_lt_midpoint = 1;
2997 }
200359e8
L
2998 }
2999 }
3000 }
3001 }
3002 } else if (is_midpoint_lt_even) {
3003 if (z_sign != p_sign) {
3004 // needs correction: res = res - 1
3005 res.w[0] = res.w[0] - 1;
3006 if (res.w[0] == 0xffffffffffffffffull)
3007 res.w[1]--;
3008 // if it is (10^33-1)*10^e4 then the corect result is
3009 // (10^34-1)*10(e4-1)
3010 if (res.w[1] == 0x0000314dc6448d93ull &&
3011 res.w[0] == 0x38c15b09ffffffffull) {
3012 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3013 res.w[0] = 0x378d8e63ffffffffull;
3014 e4 = e4 - 1;
3015 }
3016 is_midpoint_lt_even = 0;
3017 is_inexact_lt_midpoint = 1;
3018 } else { // if (z_sign == p_sign)
3019 is_midpoint_lt_even = 0;
3020 is_inexact_gt_midpoint = 1;
3021 }
3022 } else if (is_midpoint_gt_even) {
3023 if (z_sign == p_sign) {
3024 // needs correction: res = res + 1 (cannot cross in the next binade)
3025 res.w[0] = res.w[0] + 1;
3026 if (res.w[0] == 0x0000000000000000ull)
3027 res.w[1]++;
3028 is_midpoint_gt_even = 0;
3029 is_inexact_gt_midpoint = 1;
3030 } else { // if (z_sign != p_sign)
3031 is_midpoint_gt_even = 0;
3032 is_inexact_lt_midpoint = 1;
3033 }
3034 } else {
3035 ; // the rounded result is already correct
3036 }
3037 // check for overflow
3038 if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) {
3039 res.w[1] = p_sign | 0x7800000000000000ull;
3040 res.w[0] = 0x0000000000000000ull;
3041 *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
3042 } else { // no overflow or not RN
3043 p_exp = ((UINT64) (e4 + 6176) << 49);
3044 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
3045 }
3046 if (rnd_mode != ROUNDING_TO_NEAREST) {
3047 rounding_correction (rnd_mode,
b2a00c89
L
3048 is_inexact_lt_midpoint,
3049 is_inexact_gt_midpoint,
3050 is_midpoint_lt_even, is_midpoint_gt_even,
3051 e4, &res, pfpsf);
200359e8
L
3052 }
3053 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
3054 is_midpoint_lt_even || is_midpoint_gt_even) {
3055 // set the inexact flag
3056 *pfpsf |= INEXACT_EXCEPTION;
3057 }
b2a00c89
L
3058 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3059 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3060 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3061 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3062 BID_SWAP128 (res);
200359e8
L
3063 BID_RETURN (res)
3064
3065 } else if ((q4 <= p34 && p34 <= delta) || // Case (8)
3066 (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9)
3067 (q4 <= delta && delta + q3 <= p34) || // Case (10)
3068 (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13)
3069 (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14)
3070 (delta + q3 < q4 && q4 <= p34)) { // Case (18)
3071
3072 // Case (8) is similar to Case (1), with C3 and C4 swapped
3073 // Case (9) is similar to Case (2), with C3 and C4 swapped
3074 // Case (10) is similar to Case (3), with C3 and C4 swapped
3075 // Case (13) is similar to Case (4), with C3 and C4 swapped
3076 // Case (14) is similar to Case (5), with C3 and C4 swapped
3077 // Case (18) is similar to Case (6), with C3 and C4 swapped
3078
3079 // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp)
3080 // and go back to delta_ge_zero
3081 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
3082 P128.w[1] = C3.w[1];
3083 P128.w[0] = C3.w[0];
3084 C3.w[1] = C4.w[1];
3085 C3.w[0] = C4.w[0];
3086 C4.w[1] = P128.w[1];
3087 C4.w[0] = P128.w[0];
3088 ind = q3;
3089 q3 = q4;
3090 q4 = ind;
3091 ind = e3;
3092 e3 = e4;
3093 e4 = ind;
b2a00c89 3094 tmp_sign = z_sign;
200359e8 3095 z_sign = p_sign;
b2a00c89
L
3096 p_sign = tmp_sign;
3097 tmp.ui64 = z_exp;
200359e8 3098 z_exp = p_exp;
b2a00c89 3099 p_exp = tmp.ui64;
200359e8
L
3100 goto delta_ge_zero;
3101
3102 } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11)
3103 (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12)
3104
3105 // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3,
3106 // 1 <= x0 <= q3 - 1 <= p34 - 1
3107 x0 = e4 - e3; // or x0 = delta + q3 - q4
3108 if (q3 <= 18) { // 2 <= q3 <= 18
b2a00c89
L
3109 round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp,
3110 &is_midpoint_lt_even, &is_midpoint_gt_even,
3111 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
200359e8
L
3112 // C3.w[1] = 0;
3113 C3.w[0] = R64;
3114 } else if (q3 <= 38) {
b2a00c89
L
3115 round128_19_38 (q3, x0, C3, &R128, &incr_exp,
3116 &is_midpoint_lt_even, &is_midpoint_gt_even,
3117 &is_inexact_lt_midpoint,
3118 &is_inexact_gt_midpoint);
200359e8
L
3119 C3.w[1] = R128.w[1];
3120 C3.w[0] = R128.w[0];
3121 }
3122 // the rounded result has q3 - x0 digits
3123 // we want the exponent to be e4, so if incr_exp = 1 then
3124 // multiply the rounded result by 10 - it will still fit in 113 bits
3125 if (incr_exp) {
3126 // 64 x 128 -> 128
3127 P128.w[1] = C3.w[1];
3128 P128.w[0] = C3.w[0];
b2a00c89 3129 __mul_64x128_to_128 (C3, ten2k64[1], P128);
200359e8
L
3130 }
3131 e3 = e3 + x0; // this is e4
3132 // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3;
3133 // the result will have the sign of x * y; the exponent is e4
3134 R256.w[3] = 0;
3135 R256.w[2] = 0;
3136 R256.w[1] = C3.w[1];
3137 R256.w[0] = C3.w[0];
3138 if (p_sign == z_sign) { // R256 = C4 + R256
3139 add256 (C4, R256, &R256);
3140 } else { // if (p_sign != z_sign) { // R256 = C4 - R256
3141 sub256 (C4, R256, &R256); // the result cannot be pure zero
3142 // because the result has opposite sign to that of R256 which was
3143 // rounded, need to change the rounding indicators
3144 lsb = C4.w[0] & 0x01;
3145 if (is_inexact_lt_midpoint) {
3146 is_inexact_lt_midpoint = 0;
3147 is_inexact_gt_midpoint = 1;
3148 } else if (is_inexact_gt_midpoint) {
3149 is_inexact_gt_midpoint = 0;
3150 is_inexact_lt_midpoint = 1;
3151 } else if (lsb == 0) {
3152 if (is_midpoint_lt_even) {
3153 is_midpoint_lt_even = 0;
3154 is_midpoint_gt_even = 1;
3155 } else if (is_midpoint_gt_even) {
3156 is_midpoint_gt_even = 0;
3157 is_midpoint_lt_even = 1;
3158 } else {
3159 ;
3160 }
3161 } else if (lsb == 1) {
3162 if (is_midpoint_lt_even) {
3163 // res = res + 1
3164 R256.w[0]++;
3165 if (R256.w[0] == 0x0) {
3166 R256.w[1]++;
3167 if (R256.w[1] == 0x0) {
b2a00c89
L
3168 R256.w[2]++;
3169 if (R256.w[2] == 0x0) {
3170 R256.w[3]++;
3171 }
200359e8
L
3172 }
3173 }
3174 // no check for rounding overflow - R256 was a difference
3175 } else if (is_midpoint_gt_even) {
3176 // res = res - 1
3177 R256.w[0]--;
3178 if (R256.w[0] == 0xffffffffffffffffull) {
3179 R256.w[1]--;
3180 if (R256.w[1] == 0xffffffffffffffffull) {
b2a00c89
L
3181 R256.w[2]--;
3182 if (R256.w[2] == 0xffffffffffffffffull) {
3183 R256.w[3]--;
3184 }
200359e8
L
3185 }
3186 }
3187 } else {
3188 ;
3189 }
3190 } else {
3191 ;
3192 }
3193 }
3194 // determine the number of decimal digits in R256
b2a00c89 3195 ind = nr_digits256 (R256); // ind >= p34
200359e8
L
3196 // if R256 is sum, then ind > p34; if R256 is a difference, then
3197 // ind >= p34; this means that we can calculate the result rounded to
3198 // the destination precision, with unbounded exponent, starting from R256
3199 // and using the indicators from the rounding of C3 to avoid a double
3200 // rounding error
3201
3202 if (ind < p34) {
3203 ;
3204 } else if (ind == p34) {
3205 // the result rounded to the destination precision with
3206 // unbounded exponent
3207 // is (-1)^p_sign * R256 * 10^e4
3208 res.w[1] = R256.w[1];
3209 res.w[0] = R256.w[0];
3210 } else { // if (ind > p34)
3211 // if more than P digits, round to nearest to P digits
3212 // round R256 to p34 digits
3213 x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
3214 // save C3 rounding indicators to help avoid double rounding error
3215 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
3216 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
3217 is_midpoint_lt_even0 = is_midpoint_lt_even;
3218 is_midpoint_gt_even0 = is_midpoint_gt_even;
3219 // initialize rounding indicators
3220 is_inexact_lt_midpoint = 0;
3221 is_inexact_gt_midpoint = 0;
3222 is_midpoint_lt_even = 0;
3223 is_midpoint_gt_even = 0;
3224 // round to p34 digits; the result fits in 113 bits
3225 if (ind <= 38) {
3226 P128.w[1] = R256.w[1];
3227 P128.w[0] = R256.w[0];
b2a00c89
L
3228 round128_19_38 (ind, x0, P128, &R128, &incr_exp,
3229 &is_midpoint_lt_even, &is_midpoint_gt_even,
3230 &is_inexact_lt_midpoint,
3231 &is_inexact_gt_midpoint);
200359e8
L
3232 } else if (ind <= 57) {
3233 P192.w[2] = R256.w[2];
3234 P192.w[1] = R256.w[1];
3235 P192.w[0] = R256.w[0];
b2a00c89
L
3236 round192_39_57 (ind, x0, P192, &R192, &incr_exp,
3237 &is_midpoint_lt_even, &is_midpoint_gt_even,
3238 &is_inexact_lt_midpoint,
3239 &is_inexact_gt_midpoint);
200359e8
L
3240 R128.w[1] = R192.w[1];
3241 R128.w[0] = R192.w[0];
3242 } else { // if (ind <= 68)
b2a00c89
L
3243 round256_58_76 (ind, x0, R256, &R256, &incr_exp,
3244 &is_midpoint_lt_even, &is_midpoint_gt_even,
3245 &is_inexact_lt_midpoint,
3246 &is_inexact_gt_midpoint);
200359e8
L
3247 R128.w[1] = R256.w[1];
3248 R128.w[0] = R256.w[0];
3249 }
3250 // the rounded result has p34 = 34 digits
3251 e4 = e4 + x0 + incr_exp;
3252
3253 res.w[1] = R128.w[1];
3254 res.w[0] = R128.w[0];
3255
3256 // avoid a double rounding error
3257 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
3258 is_midpoint_lt_even) { // double rounding error upward
3259 // res = res - 1
3260 res.w[0]--;
3261 if (res.w[0] == 0xffffffffffffffffull)
3262 res.w[1]--;
3263 is_midpoint_lt_even = 0;
3264 is_inexact_lt_midpoint = 1;
3265 // Note: a double rounding error upward is not possible; for this
3266 // the result after the first rounding would have to be 99...95
3267 // (35 digits in all), possibly followed by a number of zeros; this
3268 // not possible in Cases (2)-(6) or (15)-(17) which may get here
3269 // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent
3270 if (res.w[1] == 0x0000314dc6448d93ull &&
b2a00c89 3271 res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1
200359e8
L
3272 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3273 res.w[0] = 0x378d8e63ffffffffull;
3274 e4--;
3275 }
3276 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
3277 is_midpoint_gt_even) { // double rounding error downward
3278 // res = res + 1
3279 res.w[0]++;
3280 if (res.w[0] == 0)
3281 res.w[1]++;
3282 is_midpoint_gt_even = 0;
3283 is_inexact_gt_midpoint = 1;
3284 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
b2a00c89 3285 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
200359e8
L
3286 // if this second rounding was exact the result may still be
3287 // inexact because of the first rounding
3288 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
3289 is_inexact_gt_midpoint = 1;
3290 }
3291 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
3292 is_inexact_lt_midpoint = 1;
3293 }
3294 } else if (is_midpoint_gt_even &&
b2a00c89 3295 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
200359e8
L
3296 // pulled up to a midpoint
3297 is_inexact_lt_midpoint = 1;
3298 is_inexact_gt_midpoint = 0;
3299 is_midpoint_lt_even = 0;
3300 is_midpoint_gt_even = 0;
3301 } else if (is_midpoint_lt_even &&
b2a00c89 3302 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
200359e8
L
3303 // pulled down to a midpoint
3304 is_inexact_lt_midpoint = 0;
3305 is_inexact_gt_midpoint = 1;
3306 is_midpoint_lt_even = 0;
3307 is_midpoint_gt_even = 0;
3308 } else {
3309 ;
3310 }
3311 }
3312
3313 // determine tininess
3314 if (rnd_mode == ROUNDING_TO_NEAREST) {
3315 if (e4 < expmin) {
3316 is_tiny = 1; // for other rounding modes apply correction
3317 }
3318 } else {
3319 // for RM, RP, RZ, RA apply correction in order to determine tininess
3320 // but do not save the result; apply the correction to
3321 // (-1)^p_sign * res * 10^0
3322 P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1];
3323 P128.w[0] = res.w[0];
3324 rounding_correction (rnd_mode,
b2a00c89
L
3325 is_inexact_lt_midpoint,
3326 is_inexact_gt_midpoint,
3327 is_midpoint_lt_even, is_midpoint_gt_even,
3328 0, &P128, pfpsf);
200359e8
L
3329 scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
3330 // the number of digits in the significand is p34 = 34
3331 if (e4 + scale < expmin) {
3332 is_tiny = 1;
3333 }
3334 }
3335
3336 // the result rounded to the destination precision with unbounded exponent
3337 // is (-1)^p_sign * res * 10^e4
3338 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN
3339 // res.w[0] unchanged;
3340 // Note: res is correct only if expmin <= e4 <= expmax
3341 ind = p34; // the number of decimal digits in the signifcand of res
3342
3343 // at this point we have the result rounded with unbounded exponent in
3344 // res and we know its tininess:
3345 // res = (-1)^p_sign * significand * 10^e4,
3346 // where q (significand) = ind = p34
3347 // Note: res is correct only if expmin <= e4 <= expmax
3348
3349 // check for overflow if RN
3350 if (rnd_mode == ROUNDING_TO_NEAREST
3351 && (ind + e4) > (p34 + expmax)) {
3352 res.w[1] = p_sign | 0x7800000000000000ull;
3353 res.w[0] = 0x0000000000000000ull;
3354 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
b2a00c89
L
3355 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3356 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3357 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3358 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3359 BID_SWAP128 (res);
200359e8 3360 BID_RETURN (res)
b2a00c89 3361 } // else not overflow or not RN, so continue
200359e8
L
3362
3363 // from this point on this is similar to the last part of the computation
3364 // for Cases (15), (16), (17)
3365
3366 // if (e4 >= expmin) we have the result rounded with bounded exponent
3367 if (e4 < expmin) {
3368 x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
3369 // where the result rounded [at most] once is
3370 // (-1)^p_sign * significand_res * 10^e4
3371
3372 // avoid double rounding error
3373 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
3374 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
3375 is_midpoint_lt_even0 = is_midpoint_lt_even;
3376 is_midpoint_gt_even0 = is_midpoint_gt_even;
3377 is_inexact_lt_midpoint = 0;
3378 is_inexact_gt_midpoint = 0;
3379 is_midpoint_lt_even = 0;
3380 is_midpoint_gt_even = 0;
3381
3382 if (x0 > ind) {
3383 // nothing is left of res when moving the decimal point left x0 digits
3384 is_inexact_lt_midpoint = 1;
3385 res.w[1] = p_sign | 0x0000000000000000ull;
3386 res.w[0] = 0x0000000000000000ull;
3387 e4 = expmin;
3388 } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
3389 // this is <, =, or > 1/2 ulp
3390 // compare the ind-digit value in the significand of res with
3391 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
3392 // less than, equal to, or greater than 1/2 ulp (significand of res)
3393 R128.w[1] = res.w[1] & MASK_COEFF;
3394 R128.w[0] = res.w[0];
3395 if (ind <= 19) {
b2a00c89 3396 if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
200359e8
L
3397 lt_half_ulp = 1;
3398 is_inexact_lt_midpoint = 1;
b2a00c89 3399 } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
200359e8
L
3400 eq_half_ulp = 1;
3401 is_midpoint_gt_even = 1;
3402 } else { // > 1/2 ulp
3403 gt_half_ulp = 1;
3404 is_inexact_gt_midpoint = 1;
3405 }
3406 } else { // if (ind <= 38)
b2a00c89
L
3407 if (R128.w[1] < midpoint128[ind - 20].w[1] ||
3408 (R128.w[1] == midpoint128[ind - 20].w[1] &&
3409 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
200359e8
L
3410 lt_half_ulp = 1;
3411 is_inexact_lt_midpoint = 1;
b2a00c89
L
3412 } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
3413 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
200359e8
L
3414 eq_half_ulp = 1;
3415 is_midpoint_gt_even = 1;
3416 } else { // > 1/2 ulp
3417 gt_half_ulp = 1;
3418 is_inexact_gt_midpoint = 1;
3419 }
3420 }
3421 if (lt_half_ulp || eq_half_ulp) {
3422 // res = +0.0 * 10^expmin
3423 res.w[1] = 0x0000000000000000ull;
3424 res.w[0] = 0x0000000000000000ull;
3425 } else { // if (gt_half_ulp)
3426 // res = +1 * 10^expmin
3427 res.w[1] = 0x0000000000000000ull;
3428 res.w[0] = 0x0000000000000001ull;
3429 }
3430 res.w[1] = p_sign | res.w[1];
3431 e4 = expmin;
3432 } else { // if (1 <= x0 <= ind - 1 <= 33)
3433 // round the ind-digit result to ind - x0 digits
3434
3435 if (ind <= 18) { // 2 <= ind <= 18
b2a00c89
L
3436 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
3437 &is_midpoint_lt_even, &is_midpoint_gt_even,
3438 &is_inexact_lt_midpoint,
3439 &is_inexact_gt_midpoint);
200359e8
L
3440 res.w[1] = 0x0;
3441 res.w[0] = R64;
3442 } else if (ind <= 38) {
3443 P128.w[1] = res.w[1] & MASK_COEFF;
3444 P128.w[0] = res.w[0];
b2a00c89
L
3445 round128_19_38 (ind, x0, P128, &res, &incr_exp,
3446 &is_midpoint_lt_even, &is_midpoint_gt_even,
3447 &is_inexact_lt_midpoint,
3448 &is_inexact_gt_midpoint);
200359e8
L
3449 }
3450 e4 = e4 + x0; // expmin
3451 // we want the exponent to be expmin, so if incr_exp = 1 then
3452 // multiply the rounded result by 10 - it will still fit in 113 bits
3453 if (incr_exp) {
3454 // 64 x 128 -> 128
3455 P128.w[1] = res.w[1] & MASK_COEFF;
3456 P128.w[0] = res.w[0];
b2a00c89 3457 __mul_64x128_to_128 (res, ten2k64[1], P128);
200359e8
L
3458 }
3459 res.w[1] =
b2a00c89
L
3460 p_sign | ((UINT64) (e4 + 6176) << 49) | (res.
3461 w[1] & MASK_COEFF);
200359e8
L
3462 // avoid a double rounding error
3463 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
b2a00c89 3464 is_midpoint_lt_even) { // double rounding error upward
200359e8
L
3465 // res = res - 1
3466 res.w[0]--;
3467 if (res.w[0] == 0xffffffffffffffffull)
3468 res.w[1]--;
3469 // Note: a double rounding error upward is not possible; for this
3470 // the result after the first rounding would have to be 99...95
3471 // (35 digits in all), possibly followed by a number of zeros; this
3472 // not possible in this underflow case
3473 is_midpoint_lt_even = 0;
3474 is_inexact_lt_midpoint = 1;
3475 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
b2a00c89 3476 is_midpoint_gt_even) { // double rounding error downward
200359e8
L
3477 // res = res + 1
3478 res.w[0]++;
3479 if (res.w[0] == 0)
3480 res.w[1]++;
3481 is_midpoint_gt_even = 0;
3482 is_inexact_gt_midpoint = 1;
3483 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
b2a00c89
L
3484 !is_inexact_lt_midpoint
3485 && !is_inexact_gt_midpoint) {
200359e8
L
3486 // if this second rounding was exact the result may still be
3487 // inexact because of the first rounding
3488 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
3489 is_inexact_gt_midpoint = 1;
3490 }
3491 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
3492 is_inexact_lt_midpoint = 1;
3493 }
3494 } else if (is_midpoint_gt_even &&
b2a00c89
L
3495 (is_inexact_gt_midpoint0
3496 || is_midpoint_lt_even0)) {
200359e8
L
3497 // pulled up to a midpoint
3498 is_inexact_lt_midpoint = 1;
3499 is_inexact_gt_midpoint = 0;
3500 is_midpoint_lt_even = 0;
3501 is_midpoint_gt_even = 0;
3502 } else if (is_midpoint_lt_even &&
b2a00c89
L
3503 (is_inexact_lt_midpoint0
3504 || is_midpoint_gt_even0)) {
200359e8
L
3505 // pulled down to a midpoint
3506 is_inexact_lt_midpoint = 0;
3507 is_inexact_gt_midpoint = 1;
3508 is_midpoint_lt_even = 0;
3509 is_midpoint_gt_even = 0;
3510 } else {
3511 ;
3512 }
3513 }
3514 }
3515 // res contains the correct result
3516 // apply correction if not rounding to nearest
3517 if (rnd_mode != ROUNDING_TO_NEAREST) {
3518 rounding_correction (rnd_mode,
b2a00c89
L
3519 is_inexact_lt_midpoint,
3520 is_inexact_gt_midpoint,
3521 is_midpoint_lt_even, is_midpoint_gt_even,
3522 e4, &res, pfpsf);
200359e8
L
3523 }
3524 if (is_midpoint_lt_even || is_midpoint_gt_even ||
3525 is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
3526 // set the inexact flag
3527 *pfpsf |= INEXACT_EXCEPTION;
3528 if (is_tiny)
3529 *pfpsf |= UNDERFLOW_EXCEPTION;
3530 }
b2a00c89
L
3531 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3532 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3533 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3534 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3535 BID_SWAP128 (res);
200359e8
L
3536 BID_RETURN (res)
3537
3538 } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15)
3539 (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16)
3540 (delta + q3 <= p34 && p34 < q4)) { // Case (17)
3541
3542 // calculate first the result rounded to the destination precision, with
3543 // unbounded exponent
3544
3545 add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
b2a00c89
L
3546 rnd_mode, &is_midpoint_lt_even,
3547 &is_midpoint_gt_even, &is_inexact_lt_midpoint,
3548 &is_inexact_gt_midpoint, pfpsf, &res);
3549 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3550 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3551 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3552 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3553 BID_SWAP128 (res);
200359e8
L
3554 BID_RETURN (res)
3555
3556 } else {
3557 ;
3558 }
3559
b2a00c89 3560 } // end if delta < 0
200359e8 3561
b2a00c89
L
3562 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3563 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3564 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3565 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3566 BID_SWAP128 (res);
200359e8
L
3567 BID_RETURN (res)
3568
3569}
b2a00c89
L
3570
3571
3572#if DECIMAL_CALL_BY_REFERENCE
3573void
3574bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
3575 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3576 _EXC_INFO_PARAM) {
3577 UINT128 x = *px, y = *py, z = *pz;
3578#if !DECIMAL_GLOBAL_ROUNDING
3579 unsigned int rnd_mode = *prnd_mode;
3580#endif
3581#else
3582UINT128
3583bid128_fma (UINT128 x, UINT128 y, UINT128 z
3584 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3585 _EXC_INFO_PARAM) {
3586#endif
3587 int is_midpoint_lt_even, is_midpoint_gt_even,
3588 is_inexact_lt_midpoint, is_inexact_gt_midpoint;
3589 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3590
3591#if DECIMAL_CALL_BY_REFERENCE
3592 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3593 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3594 &res, &x, &y, &z
3595 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3596 _EXC_INFO_ARG);
3597#else
3598 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3599 &is_inexact_lt_midpoint,
3600 &is_inexact_gt_midpoint, x, y,
3601 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3602 _EXC_INFO_ARG);
3603#endif
3604 BID_RETURN (res);
3605}
3606
3607
3608#if DECIMAL_CALL_BY_REFERENCE
3609void
3610bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz
3611 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3612 _EXC_INFO_PARAM) {
3613 UINT64 x = *px, y = *py, z = *pz;
3614#if !DECIMAL_GLOBAL_ROUNDING
3615 unsigned int rnd_mode = *prnd_mode;
3616#endif
3617#else
3618UINT128
3619bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z
3620 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3621 _EXC_INFO_PARAM) {
3622#endif
3623 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3624 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3625 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3626 UINT128 x1, y1, z1;
3627
3628#if DECIMAL_CALL_BY_REFERENCE
3629 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3630 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3631 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3632 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3633 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3634 &res, &x1, &y1, &z1
3635 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3636 _EXC_INFO_ARG);
3637#else
3638 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3639 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3640 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3641 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3642 &is_inexact_lt_midpoint,
3643 &is_inexact_gt_midpoint, x1, y1,
3644 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3645 _EXC_INFO_ARG);
3646#endif
3647 BID_RETURN (res);
3648}
3649
3650
3651#if DECIMAL_CALL_BY_REFERENCE
3652void
3653bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
3654 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3655 _EXC_INFO_PARAM) {
3656 UINT64 x = *px, y = *py;
3657 UINT128 z = *pz;
3658#if !DECIMAL_GLOBAL_ROUNDING
3659 unsigned int rnd_mode = *prnd_mode;
3660#endif
3661#else
3662UINT128
3663bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z
3664 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3665 _EXC_INFO_PARAM) {
3666#endif
3667 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3668 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3669 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3670 UINT128 x1, y1;
3671
3672#if DECIMAL_CALL_BY_REFERENCE
3673 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3674 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3675 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3676 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3677 &res, &x1, &y1, &z
3678 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3679 _EXC_INFO_ARG);
3680#else
3681 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3682 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3683 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3684 &is_inexact_lt_midpoint,
3685 &is_inexact_gt_midpoint, x1, y1,
3686 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3687 _EXC_INFO_ARG);
3688#endif
3689 BID_RETURN (res);
3690}
3691
3692
3693#if DECIMAL_CALL_BY_REFERENCE
3694void
3695bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
3696 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3697 _EXC_INFO_PARAM) {
3698 UINT64 x = *px, z = *pz;
3699#if !DECIMAL_GLOBAL_ROUNDING
3700 unsigned int rnd_mode = *prnd_mode;
3701#endif
3702#else
3703UINT128
3704bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z
3705 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3706 _EXC_INFO_PARAM) {
3707#endif
3708 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3709 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3710 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3711 UINT128 x1, z1;
3712
3713#if DECIMAL_CALL_BY_REFERENCE
3714 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3715 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3716 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3717 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3718 &res, &x1, py, &z1
3719 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3720 _EXC_INFO_ARG);
3721#else
3722 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3723 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3724 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3725 &is_inexact_lt_midpoint,
3726 &is_inexact_gt_midpoint, x1, y,
3727 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3728 _EXC_INFO_ARG);
3729#endif
3730 BID_RETURN (res);
3731}
3732
3733
3734#if DECIMAL_CALL_BY_REFERENCE
3735void
3736bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
3737 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3738 _EXC_INFO_PARAM) {
3739 UINT64 x = *px;
3740#if !DECIMAL_GLOBAL_ROUNDING
3741 unsigned int rnd_mode = *prnd_mode;
3742#endif
3743#else
3744UINT128
3745bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z
3746 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3747 _EXC_INFO_PARAM) {
3748#endif
3749 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3750 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3751 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3752 UINT128 x1;
3753
3754#if DECIMAL_CALL_BY_REFERENCE
3755 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3756 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3757 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3758 &res, &x1, py, pz
3759 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3760 _EXC_INFO_ARG);
3761#else
3762 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3763 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3764 &is_inexact_lt_midpoint,
3765 &is_inexact_gt_midpoint, x1, y,
3766 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3767 _EXC_INFO_ARG);
3768#endif
3769 BID_RETURN (res);
3770}
3771
3772
3773#if DECIMAL_CALL_BY_REFERENCE
3774void
3775bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
3776 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3777 _EXC_INFO_PARAM) {
3778 UINT64 y = *py, z = *pz;
3779#if !DECIMAL_GLOBAL_ROUNDING
3780 unsigned int rnd_mode = *prnd_mode;
3781#endif
3782#else
3783UINT128
3784bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z
3785 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3786 _EXC_INFO_PARAM) {
3787#endif
3788 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3789 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3790 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3791 UINT128 y1, z1;
3792
3793#if DECIMAL_CALL_BY_REFERENCE
3794 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3795 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3796 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3797 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3798 &res, px, &y1, &z1
3799 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3800 _EXC_INFO_ARG);
3801#else
3802 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3803 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3804 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3805 &is_inexact_lt_midpoint,
3806 &is_inexact_gt_midpoint, x, y1,
3807 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3808 _EXC_INFO_ARG);
3809#endif
3810 BID_RETURN (res);
3811}
3812
3813
3814#if DECIMAL_CALL_BY_REFERENCE
3815void
3816bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
3817 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3818 _EXC_INFO_PARAM) {
3819 UINT64 y = *py;
3820#if !DECIMAL_GLOBAL_ROUNDING
3821 unsigned int rnd_mode = *prnd_mode;
3822#endif
3823#else
3824UINT128
3825bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z
3826 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3827 _EXC_INFO_PARAM) {
3828#endif
3829 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3830 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3831 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3832 UINT128 y1;
3833
3834#if DECIMAL_CALL_BY_REFERENCE
3835 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3836 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3837 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3838 &res, px, &y1, pz
3839 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3840 _EXC_INFO_ARG);
3841#else
3842 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3843 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3844 &is_inexact_lt_midpoint,
3845 &is_inexact_gt_midpoint, x, y1,
3846 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3847 _EXC_INFO_ARG);
3848#endif
3849 BID_RETURN (res);
3850}
3851
3852
3853#if DECIMAL_CALL_BY_REFERENCE
3854void
3855bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
3856 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3857 _EXC_INFO_PARAM) {
3858 UINT64 z = *pz;
3859#if !DECIMAL_GLOBAL_ROUNDING
3860 unsigned int rnd_mode = *prnd_mode;
3861#endif
3862#else
3863UINT128
3864bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z
3865 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3866 _EXC_INFO_PARAM) {
3867#endif
3868 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3869 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3870 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3871 UINT128 z1;
3872
3873#if DECIMAL_CALL_BY_REFERENCE
3874 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3875 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3876 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3877 &res, px, py, &z1
3878 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3879 _EXC_INFO_ARG);
3880#else
3881 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3882 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3883 &is_inexact_lt_midpoint,
3884 &is_inexact_gt_midpoint, x, y,
3885 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3886 _EXC_INFO_ARG);
3887#endif
3888 BID_RETURN (res);
3889}
3890
3891// Note: bid128qqq_fma is represented by bid128_fma
3892
3893// Note: bid64ddd_fma is represented by bid64_fma
3894
3895#if DECIMAL_CALL_BY_REFERENCE
3896void
3897bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
3898 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3899 _EXC_INFO_PARAM) {
3900 UINT64 x = *px, y = *py;
3901#if !DECIMAL_GLOBAL_ROUNDING
3902 unsigned int rnd_mode = *prnd_mode;
3903#endif
3904#else
3905UINT64
3906bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z
3907 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3908 _EXC_INFO_PARAM) {
3909#endif
3910 UINT64 res1 = 0xbaddbaddbaddbaddull;
3911 UINT128 x1, y1;
3912
3913#if DECIMAL_CALL_BY_REFERENCE
3914 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3915 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3916 bid64qqq_fma (&res1, &x1, &y1, pz
3917 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3918 _EXC_INFO_ARG);
3919#else
3920 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3921 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3922 res1 = bid64qqq_fma (x1, y1, z
3923 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3924 _EXC_INFO_ARG);
3925#endif
3926 BID_RETURN (res1);
3927}
3928
3929
3930#if DECIMAL_CALL_BY_REFERENCE
3931void
3932bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
3933 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3934 _EXC_INFO_PARAM) {
3935 UINT64 x = *px, z = *pz;
3936#if !DECIMAL_GLOBAL_ROUNDING
3937 unsigned int rnd_mode = *prnd_mode;
3938#endif
3939#else
3940UINT64
3941bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z
3942 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3943 _EXC_INFO_PARAM) {
3944#endif
3945 UINT64 res1 = 0xbaddbaddbaddbaddull;
3946 UINT128 x1, z1;
3947
3948#if DECIMAL_CALL_BY_REFERENCE
3949 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3950 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3951 bid64qqq_fma (&res1, &x1, py, &z1
3952 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3953 _EXC_INFO_ARG);
3954#else
3955 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3956 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3957 res1 = bid64qqq_fma (x1, y, z1
3958 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3959 _EXC_INFO_ARG);
3960#endif
3961 BID_RETURN (res1);
3962}
3963
3964
3965#if DECIMAL_CALL_BY_REFERENCE
3966void
3967bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
3968 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3969 _EXC_INFO_PARAM) {
3970 UINT64 x = *px;
3971#if !DECIMAL_GLOBAL_ROUNDING
3972 unsigned int rnd_mode = *prnd_mode;
3973#endif
3974#else
3975UINT64
3976bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z
3977 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3978 _EXC_INFO_PARAM) {
3979#endif
3980 UINT64 res1 = 0xbaddbaddbaddbaddull;
3981 UINT128 x1;
3982
3983#if DECIMAL_CALL_BY_REFERENCE
3984 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3985 bid64qqq_fma (&res1, &x1, py, pz
3986 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3987 _EXC_INFO_ARG);
3988#else
3989 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3990 res1 = bid64qqq_fma (x1, y, z
3991 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3992 _EXC_INFO_ARG);
3993#endif
3994 BID_RETURN (res1);
3995}
3996
3997
3998#if DECIMAL_CALL_BY_REFERENCE
3999void
4000bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
4001 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4002 _EXC_INFO_PARAM) {
4003 UINT64 y = *py, z = *pz;
4004#if !DECIMAL_GLOBAL_ROUNDING
4005 unsigned int rnd_mode = *prnd_mode;
4006#endif
4007#else
4008UINT64
4009bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z
4010 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4011 _EXC_INFO_PARAM) {
4012#endif
4013 UINT64 res1 = 0xbaddbaddbaddbaddull;
4014 UINT128 y1, z1;
4015
4016#if DECIMAL_CALL_BY_REFERENCE
4017 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4018 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4019 bid64qqq_fma (&res1, px, &y1, &z1
4020 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4021 _EXC_INFO_ARG);
4022#else
4023 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4024 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4025 res1 = bid64qqq_fma (x, y1, z1
4026 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4027 _EXC_INFO_ARG);
4028#endif
4029 BID_RETURN (res1);
4030}
4031
4032
4033#if DECIMAL_CALL_BY_REFERENCE
4034void
4035bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
4036 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4037 _EXC_INFO_PARAM) {
4038 UINT64 y = *py;
4039#if !DECIMAL_GLOBAL_ROUNDING
4040 unsigned int rnd_mode = *prnd_mode;
4041#endif
4042#else
4043UINT64
4044bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z
4045 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4046 _EXC_INFO_PARAM) {
4047#endif
4048 UINT64 res1 = 0xbaddbaddbaddbaddull;
4049 UINT128 y1;
4050
4051#if DECIMAL_CALL_BY_REFERENCE
4052 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4053 bid64qqq_fma (&res1, px, &y1, pz
4054 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4055 _EXC_INFO_ARG);
4056#else
4057 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4058 res1 = bid64qqq_fma (x, y1, z
4059 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4060 _EXC_INFO_ARG);
4061#endif
4062 BID_RETURN (res1);
4063}
4064
4065
4066#if DECIMAL_CALL_BY_REFERENCE
4067void
4068bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
4069 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4070 _EXC_INFO_PARAM) {
4071 UINT64 z = *pz;
4072#if !DECIMAL_GLOBAL_ROUNDING
4073 unsigned int rnd_mode = *prnd_mode;
4074#endif
4075#else
4076UINT64
4077bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z
4078 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4079 _EXC_INFO_PARAM) {
4080#endif
4081 UINT64 res1 = 0xbaddbaddbaddbaddull;
4082 UINT128 z1;
4083
4084#if DECIMAL_CALL_BY_REFERENCE
4085 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4086 bid64qqq_fma (&res1, px, py, &z1
4087 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4088 _EXC_INFO_ARG);
4089#else
4090 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4091 res1 = bid64qqq_fma (x, y, z1
4092 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4093 _EXC_INFO_ARG);
4094#endif
4095 BID_RETURN (res1);
4096}
4097
4098
4099#if DECIMAL_CALL_BY_REFERENCE
4100void
4101bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
4102 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4103 _EXC_INFO_PARAM) {
4104 UINT128 x = *px, y = *py, z = *pz;
4105#if !DECIMAL_GLOBAL_ROUNDING
4106 unsigned int rnd_mode = *prnd_mode;
4107#endif
4108#else
4109UINT64
4110bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z
4111 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4112 _EXC_INFO_PARAM) {
4113#endif
4114 int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0,
4115 is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
4116 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
4117 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
4118 int incr_exp;
4119 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
4120 UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
4121 UINT64 res1 = 0xbaddbaddbaddbaddull;
4122 unsigned int save_fpsf; // needed because of the call to bid128_ext_fma
4123 UINT64 sign;
4124 UINT64 exp;
4125 int unbexp;
4126 UINT128 C;
4127 BID_UI64DOUBLE tmp;
4128 int nr_bits;
4129 int q, x0;
4130 int scale;
4131 int lt_half_ulp = 0, eq_half_ulp = 0;
4132
4133 // Note: for rounding modes other than RN or RA, the result can be obtained
4134 // by rounding first to BID128 and then to BID64
4135
4136 save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
4137 *pfpsf = 0;
4138
4139#if DECIMAL_CALL_BY_REFERENCE
4140 bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
4141 &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0,
4142 &res, &x, &y, &z
4143 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4144 _EXC_INFO_ARG);
4145#else
4146 res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
4147 &is_inexact_lt_midpoint0,
4148 &is_inexact_gt_midpoint0, x, y,
4149 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4150 _EXC_INFO_ARG);
4151#endif
4152
4153 if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) ||
4154 (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible
4155 ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN)
4156 ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity
4157#if DECIMAL_CALL_BY_REFERENCE
4158 bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
4159#else
4160 res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
4161#endif
4162 // determine the unbiased exponent of the result
4163 unbexp = ((res1 >> 53) & 0x3ff) - 398;
4164
4165 // if subnormal, res1 must have exp = -398
4166 // if tiny and inexact set underflow and inexact status flags
4167 if (!((res1 & MASK_NAN) == MASK_NAN) && // res1 not NaN
4168 (unbexp == -398)
4169 && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull)
4170 && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0
4171 || is_midpoint_lt_even0 || is_midpoint_gt_even0)) {
4172 // set the inexact flag and the underflow flag
4173 *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
4174 } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
4175 is_midpoint_lt_even0 || is_midpoint_gt_even0) {
4176 // set the inexact flag and the underflow flag
4177 *pfpsf |= INEXACT_EXCEPTION;
4178 }
4179
4180 *pfpsf |= save_fpsf;
4181 BID_RETURN (res1);
4182 } // else continue, and use rounding to nearest to round to 16 digits
4183
4184 // at this point the result is rounded to nearest (even or away) to 34 digits
4185 // (or less if exact), and it is zero or finite non-zero canonical [sub]normal
4186 sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
4187 exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits
4188 unbexp = (exp >> 49) - 6176;
4189 C.w[1] = res.w[HIGH_128W] & MASK_COEFF;
4190 C.w[0] = res.w[LOW_128W];
4191
4192 if ((C.w[1] == 0x0 && C.w[0] == 0x0) || // result is zero
4193 (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) {
4194 // clear under/overflow
4195#if DECIMAL_CALL_BY_REFERENCE
4196 bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
4197#else
4198 res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
4199#endif
4200 *pfpsf |= save_fpsf;
4201 BID_RETURN (res1);
4202 } // else continue
4203
4204 // -398 - 34 <= unbexp <= 369 + 15
4205 if (rnd_mode == ROUNDING_TIES_AWAY) {
4206 // apply correction, if needed, to make the result rounded to nearest-even
4207 if (is_midpoint_gt_even) {
4208 // res = res - 1
4209 res1--; // res1 is now even
4210 } // else the result is already correctly rounded to nearest-even
4211 }
4212 // at this point the result is finite, non-zero canonical normal or subnormal,
4213 // and in most cases overflow or underflow will not occur
4214
4215 // determine the number of digits q in the result
4216 // q = nr. of decimal digits in x
4217 // determine first the nr. of bits in x
4218 if (C.w[1] == 0) {
4219 if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53
4220 // split the 64-bit value in two 32-bit halves to avoid rounding errors
4221 if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32
4222 tmp.d = (double) (C.w[0] >> 32); // exact conversion
4223 nr_bits =
4224 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4225 } else { // x < 2^32
4226 tmp.d = (double) (C.w[0]); // exact conversion
4227 nr_bits =
4228 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4229 }
4230 } else { // if x < 2^53
4231 tmp.d = (double) C.w[0]; // exact conversion
4232 nr_bits =
4233 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4234 }
4235 } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1])
4236 tmp.d = (double) C.w[1]; // exact conversion
4237 nr_bits =
4238 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4239 }
4240 q = nr_digits[nr_bits - 1].digits;
4241 if (q == 0) {
4242 q = nr_digits[nr_bits - 1].digits1;
4243 if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi ||
4244 (C.w[1] == nr_digits[nr_bits - 1].threshold_hi &&
4245 C.w[0] >= nr_digits[nr_bits - 1].threshold_lo))
4246 q++;
4247 }
4248 // if q > 16, round to nearest even to 16 digits (but for underflow it may
4249 // have to be truncated even more)
4250 if (q > 16) {
4251 x0 = q - 16;
4252 if (q <= 18) {
4253 round64_2_18 (q, x0, C.w[0], &res1, &incr_exp,
4254 &is_midpoint_lt_even, &is_midpoint_gt_even,
4255 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4256 } else { // 19 <= q <= 34
4257 round128_19_38 (q, x0, C, &res128, &incr_exp,
4258 &is_midpoint_lt_even, &is_midpoint_gt_even,
4259 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4260 res1 = res128.w[0]; // the result fits in 64 bits
4261 }
4262 unbexp = unbexp + x0;
4263 if (incr_exp)
4264 unbexp++;
4265 q = 16; // need to set in case denormalization is necessary
4266 } else {
4267 // the result does not require a second rounding (and it must have
4268 // been exact in the first rounding, since q <= 16)
4269 res1 = C.w[0];
4270 }
4271
4272 // avoid a double rounding error
4273 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
4274 is_midpoint_lt_even) { // double rounding error upward
4275 // res = res - 1
4276 res1--; // res1 becomes odd
4277 is_midpoint_lt_even = 0;
4278 is_inexact_lt_midpoint = 1;
4279 if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1
4280 res1 = 0x002386f26fc0ffffull; // 10^16 - 1
4281 unbexp--;
4282 }
4283 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
4284 is_midpoint_gt_even) { // double rounding error downward
4285 // res = res + 1
4286 res1++; // res1 becomes odd (so it cannot be 10^16)
4287 is_midpoint_gt_even = 0;
4288 is_inexact_gt_midpoint = 1;
4289 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
4290 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
4291 // if this second rounding was exact the result may still be
4292 // inexact because of the first rounding
4293 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
4294 is_inexact_gt_midpoint = 1;
4295 }
4296 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
4297 is_inexact_lt_midpoint = 1;
4298 }
4299 } else if (is_midpoint_gt_even &&
4300 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
4301 // pulled up to a midpoint
4302 is_inexact_lt_midpoint = 1;
4303 is_inexact_gt_midpoint = 0;
4304 is_midpoint_lt_even = 0;
4305 is_midpoint_gt_even = 0;
4306 } else if (is_midpoint_lt_even &&
4307 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
4308 // pulled down to a midpoint
4309 is_inexact_lt_midpoint = 0;
4310 is_inexact_gt_midpoint = 1;
4311 is_midpoint_lt_even = 0;
4312 is_midpoint_gt_even = 0;
4313 } else {
4314 ;
4315 }
4316 // this is the result rounded correctly to nearest even, with unbounded exp.
4317
4318 // check for overflow
4319 if (q + unbexp > P16 + expmax16) {
4320 res1 = sign | 0x7800000000000000ull;
4321 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
4322 *pfpsf |= save_fpsf;
4323 BID_RETURN (res1)
4324 } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16
4325 // not overflow; the result must be exact, and we can multiply res1 by
4326 // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits
4327 scale = unbexp - expmax16;
4328 res1 = res1 * ten2k64[scale]; // res1 * 10^scale
4329 unbexp = expmax16; // unbexp - scale
4330 } else {
4331 ; // continue
4332 }
4333
4334 // check for underflow
4335 if (q + unbexp < P16 + expmin16) {
4336 if (unbexp < expmin16) {
4337 // we must truncate more of res
4338 x0 = expmin16 - unbexp; // x0 >= 1
4339 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
4340 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
4341 is_midpoint_lt_even0 = is_midpoint_lt_even;
4342 is_midpoint_gt_even0 = is_midpoint_gt_even;
4343 is_inexact_lt_midpoint = 0;
4344 is_inexact_gt_midpoint = 0;
4345 is_midpoint_lt_even = 0;
4346 is_midpoint_gt_even = 0;
4347 // the number of decimal digits in res1 is q
4348 if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits
4349 // 2 <= q <= 16, 1 <= x0 <= 15
4350 round64_2_18 (q, x0, res1, &res1, &incr_exp,
4351 &is_midpoint_lt_even, &is_midpoint_gt_even,
4352 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4353 if (incr_exp) {
4354 // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15
4355 res1 = ten2k64[q - x0];
4356 }
4357 unbexp = unbexp + x0; // expmin16
4358 } else if (x0 == q) {
4359 // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin
4360 // determine relationship with 1/2 ulp
4361 // q <= 16
4362 if (res1 < midpoint64[q - 1]) { // < 1/2 ulp
4363 lt_half_ulp = 1;
4364 is_inexact_lt_midpoint = 1;
4365 } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp
4366 eq_half_ulp = 1;
4367 is_midpoint_gt_even = 1;
4368 } else { // > 1/2 ulp
4369 // gt_half_ulp = 1;
4370 is_inexact_gt_midpoint = 1;
4371 }
4372 if (lt_half_ulp || eq_half_ulp) {
4373 // res = +0.0 * 10^expmin16
4374 res1 = 0x0000000000000000ull;
4375 } else { // if (gt_half_ulp)
4376 // res = +1 * 10^expmin16
4377 res1 = 0x0000000000000001ull;
4378 }
4379 unbexp = expmin16;
4380 } else { // if (x0 > q)
4381 // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin
4382 res1 = 0x0000000000000000ull;
4383 unbexp = expmin16;
4384 is_inexact_lt_midpoint = 1;
4385 }
4386 // avoid a double rounding error
4387 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
4388 is_midpoint_lt_even) { // double rounding error upward
4389 // res = res - 1
4390 res1--; // res1 becomes odd
4391 is_midpoint_lt_even = 0;
4392 is_inexact_lt_midpoint = 1;
4393 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
4394 is_midpoint_gt_even) { // double rounding error downward
4395 // res = res + 1
4396 res1++; // res1 becomes odd
4397 is_midpoint_gt_even = 0;
4398 is_inexact_gt_midpoint = 1;
4399 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
4400 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
4401 // if this rounding was exact the result may still be
4402 // inexact because of the previous roundings
4403 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
4404 is_inexact_gt_midpoint = 1;
4405 }
4406 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
4407 is_inexact_lt_midpoint = 1;
4408 }
4409 } else if (is_midpoint_gt_even &&
4410 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
4411 // pulled up to a midpoint
4412 is_inexact_lt_midpoint = 1;
4413 is_inexact_gt_midpoint = 0;
4414 is_midpoint_lt_even = 0;
4415 is_midpoint_gt_even = 0;
4416 } else if (is_midpoint_lt_even &&
4417 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
4418 // pulled down to a midpoint
4419 is_inexact_lt_midpoint = 0;
4420 is_inexact_gt_midpoint = 1;
4421 is_midpoint_lt_even = 0;
4422 is_midpoint_gt_even = 0;
4423 } else {
4424 ;
4425 }
4426 }
4427 // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16)
4428 // and the result is tiny and exact
4429
4430 // check for inexact result
4431 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
4432 is_midpoint_lt_even || is_midpoint_gt_even ||
4433 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
4434 is_midpoint_lt_even0 || is_midpoint_gt_even0) {
4435 // set the inexact flag and the underflow flag
4436 *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
4437 }
4438 } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
4439 is_midpoint_lt_even || is_midpoint_gt_even) {
4440 *pfpsf |= INEXACT_EXCEPTION;
4441 }
4442 // this is the result rounded correctly to nearest, with bounded exponent
4443
4444 if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction
4445 // res = res + 1
4446 res1++; // res1 is now odd
4447 } // else the result is already correct
4448
4449 // assemble the result
4450 if (res1 < 0x0020000000000000ull) { // res < 2^53
4451 res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1;
4452 } else { // res1 >= 2^53
4453 res1 = sign | MASK_STEERING_BITS |
4454 ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2);
4455 }
4456 *pfpsf |= save_fpsf;
4457 BID_RETURN (res1);
4458}