libgcc/config/libbid/bid128_fma.c

   1 /* Copyright (C) 2007-2019 Free Software Foundation, Inc.
   2
   3 This file is part of GCC.
   4
   5 GCC is free software; you can redistribute it and/or modify it under
   6 the terms of the GNU General Public License as published by the Free
   7 Software Foundation; either version 3, or (at your option) any later
   8 version.
   9
  10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 for more details.
  14
  15 Under Section 7 of GPL version 3, you are granted additional
  16 permissions described in the GCC Runtime Library Exception, version
  17 3.1, as published by the Free Software Foundation.
  18
  19 You should have received a copy of the GNU General Public License and
  20 a copy of the GCC Runtime Library Exception along with this program;
  21 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24 /*****************************************************************************
  25  *
  26  *  BID128 fma   x * y + z
  27  *
  28  ****************************************************************************/
  29
  30 #include "bid_internal.h"
  31
  32 static void
  33 rounding_correction (unsigned int rnd_mode,
  34                      unsigned int is_inexact_lt_midpoint,
  35                      unsigned int is_inexact_gt_midpoint,
  36                      unsigned int is_midpoint_lt_even,
  37                      unsigned int is_midpoint_gt_even,
  38                      int unbexp,
  39                      UINT128 * ptrres, _IDEC_flags * ptrfpsf) {
  40   // unbiased true exponent unbexp may be larger than emax
  41
  42   UINT128 res = *ptrres; // expected to have the correct sign and coefficient
  43   // (the exponent field is ignored, as unbexp is used instead)
  44   UINT64 sign, exp;
  45   UINT64 C_hi, C_lo;
  46
  47   // general correction from RN to RA, RM, RP, RZ
  48   // Note: if the result is negative, then is_inexact_lt_midpoint,
  49   // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even
  50   // have to be considered as if determined for the absolute value of the
  51   // result (so they seem to be reversed)
  52
  53   if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
  54       is_midpoint_lt_even || is_midpoint_gt_even) {
  55     *ptrfpsf |= INEXACT_EXCEPTION;
  56   }
  57   // apply correction to result calculated with unbounded exponent
  58   sign = res.w[1] & MASK_SIGN;
  59   exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax
  60   C_hi = res.w[1] & MASK_COEFF;
  61   C_lo = res.w[0];
  62   if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) ||
  63       ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) &&
  64       is_midpoint_gt_even))) ||
  65       (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) ||
  66       ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) &&
  67       is_midpoint_gt_even)))) {
  68     // C = C + 1
  69     C_lo = C_lo + 1;
  70     if (C_lo == 0)
  71       C_hi = C_hi + 1;
  72     if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) {
  73       // C = 10^34 => rounding overflow
  74       C_hi = 0x0000314dc6448d93ull;
  75       C_lo = 0x38c15b0a00000000ull; // 10^33
  76       // exp = exp + EXP_P1;
  77       unbexp = unbexp + 1;
  78       exp = (UINT64) (unbexp + 6176) << 49;
  79     }
  80   } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) &&
  81       ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) ||
  82       (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) {
  83     // C = C - 1
  84     C_lo = C_lo - 1;
  85     if (C_lo == 0xffffffffffffffffull)
  86       C_hi--;
  87     // check if we crossed into the lower decade
  88     if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) {
  89       // C = 10^33 - 1
  90       if (exp > 0) {
  91         C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
  92         C_lo = 0x378d8e63ffffffffull;
  93         // exp = exp - EXP_P1;
  94         unbexp = unbexp - 1;
  95         exp = (UINT64) (unbexp + 6176) << 49;
  96       } else { // if exp = 0 the result is tiny & inexact
  97         *ptrfpsf |= UNDERFLOW_EXCEPTION;
  98       }
  99     }
 100   } else {
 101     ; // the result is already correct
 102   }
 103   if (unbexp > expmax) { // 6111
 104     *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
 105     exp = 0;
 106     if (!sign) { // result is positive
 107       if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf
 108         C_hi = 0x7800000000000000ull;
 109         C_lo = 0x0000000000000000ull;
 110       } else { // res = +MAXFP = (10^34-1) * 10^emax
 111         C_hi = 0x5fffed09bead87c0ull;
 112         C_lo = 0x378d8e63ffffffffull;
 113       }
 114     } else { // result is negative
 115       if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf
 116         C_hi = 0xf800000000000000ull;
 117         C_lo = 0x0000000000000000ull;
 118       } else { // res = -MAXFP = -(10^34-1) * 10^emax
 119         C_hi = 0xdfffed09bead87c0ull;
 120         C_lo = 0x378d8e63ffffffffull;
 121       }
 122     }
 123   }
 124   // assemble the result
 125   res.w[1] = sign | exp | C_hi;
 126   res.w[0] = C_lo;
 127   *ptrres = res;
 128 }
 129
 130 static void
 131 add256 (UINT256 x, UINT256 y, UINT256 * pz) {
 132   // *z = x + yl assume the sum fits in 256 bits
 133   UINT256 z;
 134   z.w[0] = x.w[0] + y.w[0];
 135   if (z.w[0] < x.w[0]) {
 136     x.w[1]++;
 137     if (x.w[1] == 0x0000000000000000ull) {
 138       x.w[2]++;
 139       if (x.w[2] == 0x0000000000000000ull) {
 140         x.w[3]++;
 141       }
 142     }
 143   }
 144   z.w[1] = x.w[1] + y.w[1];
 145   if (z.w[1] < x.w[1]) {
 146     x.w[2]++;
 147     if (x.w[2] == 0x0000000000000000ull) {
 148       x.w[3]++;
 149     }
 150   }
 151   z.w[2] = x.w[2] + y.w[2];
 152   if (z.w[2] < x.w[2]) {
 153     x.w[3]++;
 154   }
 155   z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible
 156   *pz = z;
 157 }
 158
 159 static void
 160 sub256 (UINT256 x, UINT256 y, UINT256 * pz) {
 161   // *z = x - y; assume x >= y
 162   UINT256 z;
 163   z.w[0] = x.w[0] - y.w[0];
 164   if (z.w[0] > x.w[0]) {
 165     x.w[1]--;
 166     if (x.w[1] == 0xffffffffffffffffull) {
 167       x.w[2]--;
 168       if (x.w[2] == 0xffffffffffffffffull) {
 169         x.w[3]--;
 170       }
 171     }
 172   }
 173   z.w[1] = x.w[1] - y.w[1];
 174   if (z.w[1] > x.w[1]) {
 175     x.w[2]--;
 176     if (x.w[2] == 0xffffffffffffffffull) {
 177       x.w[3]--;
 178     }
 179   }
 180   z.w[2] = x.w[2] - y.w[2];
 181   if (z.w[2] > x.w[2]) {
 182     x.w[3]--;
 183   }
 184   z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y
 185   *pz = z;
 186 }
 187
 188
 189 static int
 190 nr_digits256 (UINT256 R256) {
 191   int ind;
 192   // determine the number of decimal digits in R256
 193   if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) {
 194     // between 1 and 19 digits
 195     for (ind = 1; ind <= 19; ind++) {
 196       if (R256.w[0] < ten2k64[ind]) {
 197         break;
 198       }
 199     }
 200     // ind digits
 201   } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 &&
 202              (R256.w[1] < ten2k128[0].w[1] ||
 203               (R256.w[1] == ten2k128[0].w[1]
 204                && R256.w[0] < ten2k128[0].w[0]))) {
 205     // 20 digits
 206     ind = 20;
 207   } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) {
 208     // between 21 and 38 digits
 209     for (ind = 1; ind <= 18; ind++) {
 210       if (R256.w[1] < ten2k128[ind].w[1] ||
 211           (R256.w[1] == ten2k128[ind].w[1] &&
 212            R256.w[0] < ten2k128[ind].w[0])) {
 213         break;
 214       }
 215     }
 216     // ind + 20 digits
 217     ind = ind + 20;
 218   } else if (R256.w[3] == 0x0 &&
 219              (R256.w[2] < ten2k256[0].w[2] ||
 220               (R256.w[2] == ten2k256[0].w[2] &&
 221                R256.w[1] < ten2k256[0].w[1]) ||
 222               (R256.w[2] == ten2k256[0].w[2] &&
 223                R256.w[1] == ten2k256[0].w[1] &&
 224                R256.w[0] < ten2k256[0].w[0]))) {
 225     // 39 digits
 226     ind = 39;
 227   } else {
 228     // between 40 and 68 digits
 229     for (ind = 1; ind <= 29; ind++) {
 230       if (R256.w[3] < ten2k256[ind].w[3] ||
 231           (R256.w[3] == ten2k256[ind].w[3] &&
 232            R256.w[2] < ten2k256[ind].w[2]) ||
 233           (R256.w[3] == ten2k256[ind].w[3] &&
 234            R256.w[2] == ten2k256[ind].w[2] &&
 235            R256.w[1] < ten2k256[ind].w[1]) ||
 236           (R256.w[3] == ten2k256[ind].w[3] &&
 237            R256.w[2] == ten2k256[ind].w[2] &&
 238            R256.w[1] == ten2k256[ind].w[1] &&
 239            R256.w[0] < ten2k256[ind].w[0])) {
 240         break;
 241       }
 242     }
 243     // ind + 39 digits
 244     ind = ind + 39;
 245   }
 246   return (ind);
 247 }
 248
 249 // add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so
 250 // use the rounding information from ptr_is_* to avoid a double rounding error
 251 static void
 252 add_and_round (int q3,
 253                int q4,
 254                int e4,
 255                int delta,
 256                int p34,
 257                UINT64 z_sign,
 258                UINT64 p_sign,
 259                UINT128 C3,
 260                UINT256 C4,
 261                int rnd_mode,
 262                int *ptr_is_midpoint_lt_even,
 263                int *ptr_is_midpoint_gt_even,
 264                int *ptr_is_inexact_lt_midpoint,
 265                int *ptr_is_inexact_gt_midpoint,
 266                _IDEC_flags * ptrfpsf, UINT128 * ptrres) {
 267
 268   int scale;
 269   int x0;
 270   int ind;
 271   UINT64 R64;
 272   UINT128 P128, R128;
 273   UINT192 P192, R192;
 274   UINT256 R256;
 275   int is_midpoint_lt_even = 0;
 276   int is_midpoint_gt_even = 0;
 277   int is_inexact_lt_midpoint = 0;
 278   int is_inexact_gt_midpoint = 0;
 279   int is_midpoint_lt_even0 = 0;
 280   int is_midpoint_gt_even0 = 0;
 281   int is_inexact_lt_midpoint0 = 0;
 282   int is_inexact_gt_midpoint0 = 0;
 283   int incr_exp = 0;
 284   int is_tiny = 0;
 285   int lt_half_ulp = 0;
 286   int eq_half_ulp = 0;
 287   // int gt_half_ulp = 0;
 288   UINT128 res = *ptrres;
 289
 290   // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66
 291   scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this
 292   // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1
 293
 294   // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for
 295   // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6))
 296   if (scale == 0) {
 297     R256.w[3] = 0x0ull;
 298     R256.w[2] = 0x0ull;
 299     R256.w[1] = C3.w[1];
 300     R256.w[0] = C3.w[0];
 301   } else if (scale <= 19) { // 10^scale fits in 64 bits
 302     P128.w[1] = 0;
 303     P128.w[0] = ten2k64[scale];
 304     __mul_128x128_to_256 (R256, P128, C3);
 305   } else if (scale <= 38) { // 10^scale fits in 128 bits
 306     __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3);
 307   } else if (scale <= 57) { // 39 <= scale <= 57
 308     // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits
 309     // (10^67 has 223 bits; 10^69 has 230 bits);
 310     // must split the computation:
 311     // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
 312     // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
 313     // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits
 314     __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3);
 315     // now multiply R128 by 10^38
 316     __mul_128x128_to_256 (R256, R128, ten2k128[18]);
 317   } else { // 58 <= scale <= 66
 318     // 10^scale takes between 193 and 220 bits,
 319     // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits)
 320     // must split the computation:
 321     // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
 322     // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
 323     // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits
 324     // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because
 325     // 10^(scale-38) takes more than 64 bits, C3 will take less than 64
 326     __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]);
 327     // now calculate 10*38 * 10^(scale-38) * C3
 328     __mul_128x128_to_256 (R256, R128, ten2k128[18]);
 329   }
 330   // C3 * 10^scale is now in R256
 331
 332   // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least
 333   // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is
 334   // possible
 335   // add/subtract C4 and C3 * 10^scale; the exponent is e4
 336   if (p_sign == z_sign) { // R256 = C4 + R256
 337     // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact,
 338     // but may require rounding
 339     add256 (C4, R256, &R256);
 340   } else { // if (p_sign != z_sign) { // R256 = C4 - R256
 341     // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or
 342     // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact,
 343     // but may require rounding
 344
 345     // compare first R256 = C3 * 10^scale and C4
 346     if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) ||
 347         (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) ||
 348         (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] &&
 349         R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4
 350       // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact,
 351       // but may require rounding
 352       sub256 (R256, C4, &R256);
 353       // flip p_sign too, because the result has the sign of z
 354       p_sign = z_sign;
 355     } else { // if C4 > C3 * 10^scale
 356       // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact,
 357       // but may require rounding
 358       sub256 (C4, R256, &R256);
 359     }
 360     // if the result is pure zero, the sign depends on the rounding mode
 361     // (x*y and z had opposite signs)
 362     if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull &&
 363         R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) {
 364       if (rnd_mode != ROUNDING_DOWN)
 365         p_sign = 0x0000000000000000ull;
 366       else
 367         p_sign = 0x8000000000000000ull;
 368       // the exponent is max (e4, expmin)
 369       if (e4 < -6176)
 370         e4 = expmin;
 371       // assemble result
 372       res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49);
 373       res.w[0] = 0x0;
 374       *ptrres = res;
 375       return;
 376     }
 377   }
 378
 379   // determine the number of decimal digits in R256
 380   ind = nr_digits256 (R256);
 381
 382   // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind;
 383   // round to the destination precision, with unbounded exponent
 384
 385   if (ind <= p34) {
 386     // result rounded to the destination precision with unbounded exponent
 387     // is exact
 388     if (ind + e4 < p34 + expmin) {
 389       is_tiny = 1; // applies to all rounding modes
 390     }
 391     res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1];
 392     res.w[0] = R256.w[0];
 393     // Note: res is correct only if expmin <= e4 <= expmax
 394   } else { // if (ind > p34)
 395     // if more than P digits, round to nearest to P digits
 396     // round R256 to p34 digits
 397     x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
 398     if (ind <= 38) {
 399       P128.w[1] = R256.w[1];
 400       P128.w[0] = R256.w[0];
 401       round128_19_38 (ind, x0, P128, &R128, &incr_exp,
 402                       &is_midpoint_lt_even, &is_midpoint_gt_even,
 403                       &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
 404     } else if (ind <= 57) {
 405       P192.w[2] = R256.w[2];
 406       P192.w[1] = R256.w[1];
 407       P192.w[0] = R256.w[0];
 408       round192_39_57 (ind, x0, P192, &R192, &incr_exp,
 409                       &is_midpoint_lt_even, &is_midpoint_gt_even,
 410                       &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
 411       R128.w[1] = R192.w[1];
 412       R128.w[0] = R192.w[0];
 413     } else { // if (ind <= 68)
 414       round256_58_76 (ind, x0, R256, &R256, &incr_exp,
 415                       &is_midpoint_lt_even, &is_midpoint_gt_even,
 416                       &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
 417       R128.w[1] = R256.w[1];
 418       R128.w[0] = R256.w[0];
 419     }
 420     // the rounded result has p34 = 34 digits
 421     e4 = e4 + x0 + incr_exp;
 422     if (rnd_mode == ROUNDING_TO_NEAREST) {
 423       if (e4 < expmin) {
 424         is_tiny = 1; // for other rounding modes apply correction
 425       }
 426     } else {
 427       // for RM, RP, RZ, RA apply correction in order to determine tininess
 428       // but do not save the result; apply the correction to
 429       // (-1)^p_sign * significand * 10^0
 430       P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1];
 431       P128.w[0] = R128.w[0];
 432       rounding_correction (rnd_mode,
 433                            is_inexact_lt_midpoint,
 434                            is_inexact_gt_midpoint, is_midpoint_lt_even,
 435                            is_midpoint_gt_even, 0, &P128, ptrfpsf);
 436       scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
 437       // the number of digits in the significand is p34 = 34
 438       if (e4 + scale < expmin) {
 439         is_tiny = 1;
 440       }
 441     }
 442     ind = p34; // the number of decimal digits in the signifcand of res
 443     res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN
 444     res.w[0] = R128.w[0];
 445     // Note: res is correct only if expmin <= e4 <= expmax
 446     // set the inexact flag after rounding with bounded exponent, if any
 447   }
 448   // at this point we have the result rounded with unbounded exponent in
 449   // res and we know its tininess:
 450   // res = (-1)^p_sign * significand * 10^e4,
 451   // where q (significand) = ind <= p34
 452   // Note: res is correct only if expmin <= e4 <= expmax
 453
 454   // check for overflow if RN
 455   if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) {
 456     res.w[1] = p_sign | 0x7800000000000000ull;
 457     res.w[0] = 0x0000000000000000ull;
 458     *ptrres = res;
 459     *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
 460     return; // BID_RETURN (res)
 461   } // else not overflow or not RN, so continue
 462
 463   // if (e4 >= expmin) we have the result rounded with bounded exponent
 464   if (e4 < expmin) {
 465     x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
 466     // where the result rounded [at most] once is
 467     //   (-1)^p_sign * significand_res * 10^e4
 468
 469     // avoid double rounding error
 470     is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
 471     is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
 472     is_midpoint_lt_even0 = is_midpoint_lt_even;
 473     is_midpoint_gt_even0 = is_midpoint_gt_even;
 474     is_inexact_lt_midpoint = 0;
 475     is_inexact_gt_midpoint = 0;
 476     is_midpoint_lt_even = 0;
 477     is_midpoint_gt_even = 0;
 478
 479     if (x0 > ind) {
 480       // nothing is left of res when moving the decimal point left x0 digits
 481       is_inexact_lt_midpoint = 1;
 482       res.w[1] = p_sign | 0x0000000000000000ull;
 483       res.w[0] = 0x0000000000000000ull;
 484       e4 = expmin;
 485     } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
 486       // this is <, =, or > 1/2 ulp
 487       // compare the ind-digit value in the significand of res with
 488       // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
 489       // less than, equal to, or greater than 1/2 ulp (significand of res)
 490       R128.w[1] = res.w[1] & MASK_COEFF;
 491       R128.w[0] = res.w[0];
 492       if (ind <= 19) {
 493         if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
 494           lt_half_ulp = 1;
 495           is_inexact_lt_midpoint = 1;
 496         } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
 497           eq_half_ulp = 1;
 498           is_midpoint_gt_even = 1;
 499         } else { // > 1/2 ulp
 500           // gt_half_ulp = 1;
 501           is_inexact_gt_midpoint = 1;
 502         }
 503       } else { // if (ind <= 38) {
 504         if (R128.w[1] < midpoint128[ind - 20].w[1] ||
 505             (R128.w[1] == midpoint128[ind - 20].w[1] &&
 506             R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
 507           lt_half_ulp = 1;
 508           is_inexact_lt_midpoint = 1;
 509         } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
 510             R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
 511           eq_half_ulp = 1;
 512           is_midpoint_gt_even = 1;
 513         } else { // > 1/2 ulp
 514           // gt_half_ulp = 1;
 515           is_inexact_gt_midpoint = 1;
 516         }
 517       }
 518       if (lt_half_ulp || eq_half_ulp) {
 519         // res = +0.0 * 10^expmin
 520         res.w[1] = 0x0000000000000000ull;
 521         res.w[0] = 0x0000000000000000ull;
 522       } else { // if (gt_half_ulp)
 523         // res = +1 * 10^expmin
 524         res.w[1] = 0x0000000000000000ull;
 525         res.w[0] = 0x0000000000000001ull;
 526       }
 527       res.w[1] = p_sign | res.w[1];
 528       e4 = expmin;
 529     } else { // if (1 <= x0 <= ind - 1 <= 33)
 530       // round the ind-digit result to ind - x0 digits
 531
 532       if (ind <= 18) { // 2 <= ind <= 18
 533         round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
 534                       &is_midpoint_lt_even, &is_midpoint_gt_even,
 535                       &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
 536         res.w[1] = 0x0;
 537         res.w[0] = R64;
 538       } else if (ind <= 38) {
 539         P128.w[1] = res.w[1] & MASK_COEFF;
 540         P128.w[0] = res.w[0];
 541         round128_19_38 (ind, x0, P128, &res, &incr_exp,
 542                         &is_midpoint_lt_even, &is_midpoint_gt_even,
 543                         &is_inexact_lt_midpoint,
 544                         &is_inexact_gt_midpoint);
 545       }
 546       e4 = e4 + x0; // expmin
 547       // we want the exponent to be expmin, so if incr_exp = 1 then
 548       // multiply the rounded result by 10 - it will still fit in 113 bits
 549       if (incr_exp) {
 550         // 64 x 128 -> 128
 551         P128.w[1] = res.w[1] & MASK_COEFF;
 552         P128.w[0] = res.w[0];
 553         __mul_64x128_to_128 (res, ten2k64[1], P128);
 554       }
 555       res.w[1] =
 556         p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF);
 557       // avoid a double rounding error
 558       if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
 559           is_midpoint_lt_even) { // double rounding error upward
 560         // res = res - 1
 561         res.w[0]--;
 562         if (res.w[0] == 0xffffffffffffffffull)
 563           res.w[1]--;
 564         // Note: a double rounding error upward is not possible; for this
 565         // the result after the first rounding would have to be 99...95
 566         // (35 digits in all), possibly followed by a number of zeros; this
 567         // is not possible in Cases (2)-(6) or (15)-(17) which may get here
 568         is_midpoint_lt_even = 0;
 569         is_inexact_lt_midpoint = 1;
 570       } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
 571           is_midpoint_gt_even) { // double rounding error downward
 572         // res = res + 1
 573         res.w[0]++;
 574         if (res.w[0] == 0)
 575           res.w[1]++;
 576         is_midpoint_gt_even = 0;
 577         is_inexact_gt_midpoint = 1;
 578       } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
 579                  !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
 580         // if this second rounding was exact the result may still be
 581         // inexact because of the first rounding
 582         if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
 583           is_inexact_gt_midpoint = 1;
 584         }
 585         if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
 586           is_inexact_lt_midpoint = 1;
 587         }
 588       } else if (is_midpoint_gt_even &&
 589                  (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
 590         // pulled up to a midpoint
 591         is_inexact_lt_midpoint = 1;
 592         is_inexact_gt_midpoint = 0;
 593         is_midpoint_lt_even = 0;
 594         is_midpoint_gt_even = 0;
 595       } else if (is_midpoint_lt_even &&
 596                  (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
 597         // pulled down to a midpoint
 598         is_inexact_lt_midpoint = 0;
 599         is_inexact_gt_midpoint = 1;
 600         is_midpoint_lt_even = 0;
 601         is_midpoint_gt_even = 0;
 602       } else {
 603         ;
 604       }
 605     }
 606   }
 607   // res contains the correct result
 608   // apply correction if not rounding to nearest
 609   if (rnd_mode != ROUNDING_TO_NEAREST) {
 610     rounding_correction (rnd_mode,
 611                          is_inexact_lt_midpoint, is_inexact_gt_midpoint,
 612                          is_midpoint_lt_even, is_midpoint_gt_even,
 613                          e4, &res, ptrfpsf);
 614   }
 615   if (is_midpoint_lt_even || is_midpoint_gt_even ||
 616       is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
 617     // set the inexact flag
 618     *ptrfpsf |= INEXACT_EXCEPTION;
 619     if (is_tiny)
 620       *ptrfpsf |= UNDERFLOW_EXCEPTION;
 621   }
 622
 623   *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
 624   *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
 625   *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
 626   *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
 627   *ptrres = res;
 628   return;
 629 }
 630
 631
 632 #if DECIMAL_CALL_BY_REFERENCE
 633 static void
 634 bid128_ext_fma (int *ptr_is_midpoint_lt_even,
 635                 int *ptr_is_midpoint_gt_even,
 636                 int *ptr_is_inexact_lt_midpoint,
 637                 int *ptr_is_inexact_gt_midpoint, UINT128 * pres,
 638                 UINT128 * px, UINT128 * py,
 639                 UINT128 *
 640                 pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
 641                 _EXC_INFO_PARAM) {
 642   UINT128 x = *px, y = *py, z = *pz;
 643 #if !DECIMAL_GLOBAL_ROUNDING
 644   unsigned int rnd_mode = *prnd_mode;
 645 #endif
 646 #else
 647 static UINT128
 648 bid128_ext_fma (int *ptr_is_midpoint_lt_even,
 649                 int *ptr_is_midpoint_gt_even,
 650                 int *ptr_is_inexact_lt_midpoint,
 651                 int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y,
 652                 UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
 653                 _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
 654 #endif
 655
 656   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
 657   UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign;
 658   UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp;
 659   int true_p_exp;
 660   UINT128 C1, C2, C3;
 661   UINT256 C4;
 662   int q1 = 0, q2 = 0, q3 = 0, q4;
 663   int e1, e2, e3, e4;
 664   int scale, ind, delta, x0;
 665   int p34 = P34; // used to modify the limit on the number of digits
 666   BID_UI64DOUBLE tmp;
 667   int x_nr_bits, y_nr_bits, z_nr_bits;
 668   unsigned int save_fpsf;
 669   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
 670   int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
 671   int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0;
 672   int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
 673   int incr_exp = 0;
 674   int lsb;
 675   int lt_half_ulp = 0;
 676   int eq_half_ulp = 0;
 677   int gt_half_ulp = 0;
 678   int is_tiny = 0;
 679   UINT64 R64, tmp64;
 680   UINT128 P128, R128;
 681   UINT192 P192, R192;
 682   UINT256 R256;
 683
 684   // the following are based on the table of special cases for fma; the NaN
 685   // behavior is similar to that of the IA-64 Architecture fma
 686
 687   // identify cases where at least one operand is NaN
 688
 689   BID_SWAP128 (x);
 690   BID_SWAP128 (y);
 691   BID_SWAP128 (z);
 692   if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN
 693     // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
 694     // check first for non-canonical NaN payload
 695     if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
 696         (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
 697          (y.w[0] > 0x38c15b09ffffffffull))) {
 698       y.w[1] = y.w[1] & 0xffffc00000000000ull;
 699       y.w[0] = 0x0ull;
 700     }
 701     if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN
 702       // set invalid flag
 703       *pfpsf |= INVALID_EXCEPTION;
 704       // return quiet (y)
 705       res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
 706       res.w[0] = y.w[0];
 707     } else { // y is QNaN
 708       // return y
 709       res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
 710       res.w[0] = y.w[0];
 711       // if z = SNaN or x = SNaN signal invalid exception
 712       if ((z.w[1] & MASK_SNAN) == MASK_SNAN ||
 713           (x.w[1] & MASK_SNAN) == MASK_SNAN) {
 714         // set invalid flag
 715         *pfpsf |= INVALID_EXCEPTION;
 716       }
 717     }
 718     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
 719     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
 720     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
 721     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
 722     BID_SWAP128 (res);
 723     BID_RETURN (res)
 724   } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN
 725     // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
 726     // check first for non-canonical NaN payload
 727     if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
 728         (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
 729          (z.w[0] > 0x38c15b09ffffffffull))) {
 730       z.w[1] = z.w[1] & 0xffffc00000000000ull;
 731       z.w[0] = 0x0ull;
 732     }
 733     if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN
 734       // set invalid flag
 735       *pfpsf |= INVALID_EXCEPTION;
 736       // return quiet (z)
 737       res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
 738       res.w[0] = z.w[0];
 739     } else { // z is QNaN
 740       // return z
 741       res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
 742       res.w[0] = z.w[0];
 743       // if x = SNaN signal invalid exception
 744       if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {
 745         // set invalid flag
 746         *pfpsf |= INVALID_EXCEPTION;
 747       }
 748     }
 749     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
 750     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
 751     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
 752     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
 753     BID_SWAP128 (res);
 754     BID_RETURN (res)
 755   } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
 756     // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
 757     // check first for non-canonical NaN payload
 758     if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
 759         (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
 760          (x.w[0] > 0x38c15b09ffffffffull))) {
 761       x.w[1] = x.w[1] & 0xffffc00000000000ull;
 762       x.w[0] = 0x0ull;
 763     }
 764     if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
 765       // set invalid flag
 766       *pfpsf |= INVALID_EXCEPTION;
 767       // return quiet (x)
 768       res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
 769       res.w[0] = x.w[0];
 770     } else { // x is QNaN
 771       // return x
 772       res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
 773       res.w[0] = x.w[0];
 774     }
 775     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
 776     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
 777     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
 778     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
 779     BID_SWAP128 (res);
 780     BID_RETURN (res)
 781   }
 782   // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check
 783   // for non-canonical values
 784
 785   x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
 786   C1.w[1] = x.w[1] & MASK_COEFF;
 787   C1.w[0] = x.w[0];
 788   if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf
 789     // if x is not infinity check for non-canonical values - treated as zero
 790     if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
 791       // non-canonical
 792       x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
 793       C1.w[1] = 0; // significand high
 794       C1.w[0] = 0; // significand low
 795     } else { // G0_G1 != 11
 796       x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
 797       if (C1.w[1] > 0x0001ed09bead87c0ull ||
 798           (C1.w[1] == 0x0001ed09bead87c0ull &&
 799            C1.w[0] > 0x378d8e63ffffffffull)) {
 800         // x is non-canonical if coefficient is larger than 10^34 -1
 801         C1.w[1] = 0;
 802         C1.w[0] = 0;
 803       } else { // canonical
 804         ;
 805       }
 806     }
 807   }
 808   y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
 809   C2.w[1] = y.w[1] & MASK_COEFF;
 810   C2.w[0] = y.w[0];
 811   if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf
 812     // if y is not infinity check for non-canonical values - treated as zero
 813     if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
 814       // non-canonical
 815       y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
 816       C2.w[1] = 0; // significand high
 817       C2.w[0] = 0; // significand low
 818     } else { // G0_G1 != 11
 819       y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits
 820       if (C2.w[1] > 0x0001ed09bead87c0ull ||
 821           (C2.w[1] == 0x0001ed09bead87c0ull &&
 822            C2.w[0] > 0x378d8e63ffffffffull)) {
 823         // y is non-canonical if coefficient is larger than 10^34 -1
 824         C2.w[1] = 0;
 825         C2.w[0] = 0;
 826       } else { // canonical
 827         ;
 828       }
 829     }
 830   }
 831   z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
 832   C3.w[1] = z.w[1] & MASK_COEFF;
 833   C3.w[0] = z.w[0];
 834   if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf
 835     // if z is not infinity check for non-canonical values - treated as zero
 836     if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
 837       // non-canonical
 838       z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
 839       C3.w[1] = 0; // significand high
 840       C3.w[0] = 0; // significand low
 841     } else { // G0_G1 != 11
 842       z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits
 843       if (C3.w[1] > 0x0001ed09bead87c0ull ||
 844           (C3.w[1] == 0x0001ed09bead87c0ull &&
 845            C3.w[0] > 0x378d8e63ffffffffull)) {
 846         // z is non-canonical if coefficient is larger than 10^34 -1
 847         C3.w[1] = 0;
 848         C3.w[0] = 0;
 849       } else { // canonical
 850         ;
 851       }
 852     }
 853   }
 854
 855   p_sign = x_sign ^ y_sign; // sign of the product
 856
 857   // identify cases where at least one operand is infinity
 858
 859   if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf
 860     if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
 861       if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
 862         if (p_sign == z_sign) {
 863           res.w[1] = z_sign | MASK_INF;
 864           res.w[0] = 0x0;
 865         } else {
 866           // return QNaN Indefinite
 867           res.w[1] = 0x7c00000000000000ull;
 868           res.w[0] = 0x0000000000000000ull;
 869           // set invalid flag
 870           *pfpsf |= INVALID_EXCEPTION;
 871         }
 872       } else { // z = 0 or z = f
 873         res.w[1] = p_sign | MASK_INF;
 874         res.w[0] = 0x0;
 875       }
 876     } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f
 877       if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
 878         if (p_sign == z_sign) {
 879           res.w[1] = z_sign | MASK_INF;
 880           res.w[0] = 0x0;
 881         } else {
 882           // return QNaN Indefinite
 883           res.w[1] = 0x7c00000000000000ull;
 884           res.w[0] = 0x0000000000000000ull;
 885           // set invalid flag
 886           *pfpsf |= INVALID_EXCEPTION;
 887         }
 888       } else { // z = 0 or z = f
 889         res.w[1] = p_sign | MASK_INF;
 890         res.w[0] = 0x0;
 891       }
 892     } else { // y = 0
 893       // return QNaN Indefinite
 894       res.w[1] = 0x7c00000000000000ull;
 895       res.w[0] = 0x0000000000000000ull;
 896       // set invalid flag
 897       *pfpsf |= INVALID_EXCEPTION;
 898     }
 899     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
 900     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
 901     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
 902     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
 903     BID_SWAP128 (res);
 904     BID_RETURN (res)
 905   } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
 906     if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
 907       // x = f, necessarily
 908       if ((p_sign != z_sign)
 909           || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) {
 910         // return QNaN Indefinite
 911         res.w[1] = 0x7c00000000000000ull;
 912         res.w[0] = 0x0000000000000000ull;
 913         // set invalid flag
 914         *pfpsf |= INVALID_EXCEPTION;
 915       } else {
 916         res.w[1] = z_sign | MASK_INF;
 917         res.w[0] = 0x0;
 918       }
 919     } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0
 920       // z = 0, f, inf
 921       // return QNaN Indefinite
 922       res.w[1] = 0x7c00000000000000ull;
 923       res.w[0] = 0x0000000000000000ull;
 924       // set invalid flag
 925       *pfpsf |= INVALID_EXCEPTION;
 926     } else {
 927       // x = f and z = 0, f, necessarily
 928       res.w[1] = p_sign | MASK_INF;
 929       res.w[0] = 0x0;
 930     }
 931     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
 932     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
 933     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
 934     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
 935     BID_SWAP128 (res);
 936     BID_RETURN (res)
 937   } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
 938     // x = 0, f and y = 0, f, necessarily
 939     res.w[1] = z_sign | MASK_INF;
 940     res.w[0] = 0x0;
 941     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
 942     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
 943     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
 944     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
 945     BID_SWAP128 (res);
 946     BID_RETURN (res)
 947   }
 948
 949   true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176;
 950   if (true_p_exp < -6176)
 951     p_exp = 0; // cannot be less than EXP_MIN
 952   else
 953     p_exp = (UINT64) (true_p_exp + 6176) << 49;
 954
 955   if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0
 956     // the result is 0
 957     if (p_exp < z_exp)
 958       res.w[1] = p_exp; // preferred exponent
 959     else
 960       res.w[1] = z_exp; // preferred exponent
 961     if (p_sign == z_sign) {
 962       res.w[1] |= z_sign;
 963       res.w[0] = 0x0;
 964     } else { // x * y and z have opposite signs
 965       if (rnd_mode == ROUNDING_DOWN) {
 966         // res = -0.0
 967         res.w[1] |= MASK_SIGN;
 968         res.w[0] = 0x0;
 969       } else {
 970         // res = +0.0
 971         // res.w[1] |= 0x0;
 972         res.w[0] = 0x0;
 973       }
 974     }
 975     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
 976     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
 977     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
 978     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
 979     BID_SWAP128 (res);
 980     BID_RETURN (res)
 981   }
 982   // from this point on, we may need to know the number of decimal digits
 983   // in the significands of x, y, z when x, y, z != 0
 984
 985   if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite)
 986     // q1 = nr. of decimal digits in x
 987     // determine first the nr. of bits in x
 988     if (C1.w[1] == 0) {
 989       if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
 990         // split the 64-bit value in two 32-bit halves to avoid rounding errors
 991         if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
 992           tmp.d = (double) (C1.w[0] >> 32); // exact conversion
 993           x_nr_bits =
 994             33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
 995         } else { // x < 2^32
 996           tmp.d = (double) (C1.w[0]); // exact conversion
 997           x_nr_bits =
 998             1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
 999         }
1000       } else { // if x < 2^53
1001         tmp.d = (double) C1.w[0]; // exact conversion
1002         x_nr_bits =
1003           1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1004       }
1005     } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1006       tmp.d = (double) C1.w[1]; // exact conversion
1007       x_nr_bits =
1008         65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1009     }
1010     q1 = nr_digits[x_nr_bits - 1].digits;
1011     if (q1 == 0) {
1012       q1 = nr_digits[x_nr_bits - 1].digits1;
1013       if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
1014           (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
1015            C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
1016         q1++;
1017     }
1018   }
1019
1020   if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite)
1021     if (C2.w[1] == 0) {
1022       if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53
1023         // split the 64-bit value in two 32-bit halves to avoid rounding errors
1024         if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32
1025           tmp.d = (double) (C2.w[0] >> 32); // exact conversion
1026           y_nr_bits =
1027             32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1028         } else { // y < 2^32
1029           tmp.d = (double) C2.w[0]; // exact conversion
1030           y_nr_bits =
1031             ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1032         }
1033       } else { // if y < 2^53
1034         tmp.d = (double) C2.w[0]; // exact conversion
1035         y_nr_bits =
1036           ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1037       }
1038     } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1])
1039       tmp.d = (double) C2.w[1]; // exact conversion
1040       y_nr_bits =
1041         64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1042     }
1043
1044     q2 = nr_digits[y_nr_bits].digits;
1045     if (q2 == 0) {
1046       q2 = nr_digits[y_nr_bits].digits1;
1047       if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi ||
1048           (C2.w[1] == nr_digits[y_nr_bits].threshold_hi &&
1049            C2.w[0] >= nr_digits[y_nr_bits].threshold_lo))
1050         q2++;
1051     }
1052   }
1053
1054   if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite)
1055     if (C3.w[1] == 0) {
1056       if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53
1057         // split the 64-bit value in two 32-bit halves to avoid rounding errors
1058         if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32
1059           tmp.d = (double) (C3.w[0] >> 32); // exact conversion
1060           z_nr_bits =
1061             32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1062         } else { // z < 2^32
1063           tmp.d = (double) C3.w[0]; // exact conversion
1064           z_nr_bits =
1065             ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1066         }
1067       } else { // if z < 2^53
1068         tmp.d = (double) C3.w[0]; // exact conversion
1069         z_nr_bits =
1070           ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1071       }
1072     } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1])
1073       tmp.d = (double) C3.w[1]; // exact conversion
1074       z_nr_bits =
1075         64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1076     }
1077
1078     q3 = nr_digits[z_nr_bits].digits;
1079     if (q3 == 0) {
1080       q3 = nr_digits[z_nr_bits].digits1;
1081       if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi ||
1082           (C3.w[1] == nr_digits[z_nr_bits].threshold_hi &&
1083            C3.w[0] >= nr_digits[z_nr_bits].threshold_lo))
1084         q3++;
1085     }
1086   }
1087
1088   if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) ||
1089       (C2.w[1] == 0x0 && C2.w[0] == 0x0)) {
1090     // x = 0 or y = 0
1091     // z = f, necessarily; for 0 + z return z, with the preferred exponent
1092     // the result is z, but need to get the preferred exponent
1093     if (z_exp <= p_exp) { // the preferred exponent is z_exp
1094       res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1];
1095       res.w[0] = C3.w[0];
1096     } else { // if (p_exp < z_exp) the preferred exponent is p_exp
1097       // return (C3 * 10^scale) * 10^(z_exp - scale)
1098       // where scale = min (p34-q3, (z_exp-p_exp) >> 49)
1099       scale = p34 - q3;
1100       ind = (z_exp - p_exp) >> 49;
1101       if (ind < scale)
1102         scale = ind;
1103       if (scale == 0) {
1104         res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant
1105         res.w[0] = z.w[0];
1106       } else if (q3 <= 19) { // z fits in 64 bits
1107         if (scale <= 19) { // 10^scale fits in 64 bits
1108           // 64 x 64 C3.w[0] * ten2k64[scale]
1109           __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1110         } else { // 10^scale fits in 128 bits
1111           // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1112           __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1113         }
1114       } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1115         // 64 x 128 ten2k64[scale] * C3
1116         __mul_128x64_to_128 (res, ten2k64[scale], C3);
1117       }
1118       // subtract scale from the exponent
1119       z_exp = z_exp - ((UINT64) scale << 49);
1120       res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1121     }
1122     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1123     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1124     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1125     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1126     BID_SWAP128 (res);
1127     BID_RETURN (res)
1128   } else {
1129     ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f
1130   }
1131
1132   e1 = (x_exp >> 49) - 6176; // unbiased exponent of x
1133   e2 = (y_exp >> 49) - 6176; // unbiased exponent of y
1134   e3 = (z_exp >> 49) - 6176; // unbiased exponent of z
1135   e4 = e1 + e2; // unbiased exponent of the exact x * y
1136
1137   // calculate C1 * C2 and its number of decimal digits, q4
1138
1139   // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits
1140   // where 2 <= q1 + q2 <= 68
1141   // calculate C4 = C1 * C2 and determine q
1142   C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0;
1143   if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits
1144     C4.w[0] = C1.w[0] * C2.w[0];
1145     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1146     if (C4.w[0] < ten2k64[q1 + q2 - 1])
1147       q4 = q1 + q2 - 1; // q4 in [1, 18]
1148     else
1149       q4 = q1 + q2; // q4 in [2, 19]
1150     // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1151   } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits
1152     // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits
1153     __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]);
1154     // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20
1155     if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1
1156       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1157       q4 = 19; // 19 = q1 + q2 - 1
1158     } else {
1159       // if (C4.w[1] == 0)
1160       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1161       // else
1162       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1163       q4 = 20; // 20 = q1 + q2
1164     }
1165   } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38
1166     // C4 = C1 * C2 fits in 64 or 128 bits
1167     // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits)
1168     // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits
1169     if (q1 <= 19) {
1170       __mul_128x64_to_128 (C4, C1.w[0], C2);
1171     } else { // q2 <= 19
1172       __mul_128x64_to_128 (C4, C2.w[0], C1);
1173     }
1174     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1175     if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] ||
1176         (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] &&
1177          C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) {
1178       // if (C4.w[1] == 0) // q4 = 20, necessarily
1179       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1180       // else
1181       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1182       q4 = q1 + q2 - 1; // q4 in [20, 37]
1183     } else {
1184       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1185       q4 = q1 + q2; // q4 in [21, 38]
1186     }
1187   } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits
1188     // both C1 and C2 fit in 128 bits (actually in 113 bits)
1189     // may replace this by 128x128_to192
1190     __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0
1191     // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39
1192     if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] ||
1193                          (C4.w[1] == ten2k128[18].w[1]
1194                           && C4.w[0] < ten2k128[18].w[0]))) {
1195       // 18 = 38 - 20 = q1+q2-1 - 20
1196       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1197       q4 = 38; // 38 = q1 + q2 - 1
1198     } else {
1199       // if (C4.w[2] == 0)
1200       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1201       // else
1202       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1203       q4 = 39; // 39 = q1 + q2
1204     }
1205   } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57
1206     // C4 = C1 * C2 fits in 128 or 192 bits
1207     // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits)
1208     // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1209     // may fit in 64 bits
1210     if (C1.w[1] == 0) { // C1 fits in 64 bits
1211       // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1212       __mul_64x128_full (C4.w[2], C4, C1.w[0], C2);
1213     } else if (C2.w[1] == 0) { // C2 fits in 64 bits
1214       // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1215       __mul_64x128_full (C4.w[2], C4, C2.w[0], C1);
1216     } else { // both C1 and C2 require 128 bits
1217       // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1218       __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
1219     }
1220     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1221     if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
1222         (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
1223          (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
1224           (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
1225            C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) {
1226       // if (C4.w[2] == 0) // q4 = 39, necessarily
1227       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1228       // else
1229       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1230       q4 = q1 + q2 - 1; // q4 in [39, 56]
1231     } else {
1232       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1233       q4 = q1 + q2; // q4 in [40, 57]
1234     }
1235   } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits
1236     // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1237     // may fit in 64 bits
1238     if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits
1239       __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192
1240     } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits
1241       __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192
1242     } else { // C1 * C2 will fit in 192 bits or in 256 bits
1243       __mul_128x128_to_256 (C4, C1, C2);
1244     }
1245     // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58
1246     if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] ||
1247                          (C4.w[2] == ten2k256[18].w[2]
1248                           && (C4.w[1] < ten2k256[18].w[1]
1249                               || (C4.w[1] == ten2k256[18].w[1]
1250                                   && C4.w[0] < ten2k256[18].w[0]))))) {
1251       // 18 = 57 - 39 = q1+q2-1 - 39
1252       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1253       q4 = 57; // 57 = q1 + q2 - 1
1254     } else {
1255       // if (C4.w[3] == 0)
1256       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1257       // else
1258       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1259       q4 = 58; // 58 = q1 + q2
1260     }
1261   } else { // if 59 <= q1 + q2 <= 68
1262     // C4 = C1 * C2 fits in 192 or 256 bits
1263     // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits)
1264     // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in
1265     // 64 bits
1266     // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1267     __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
1268     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1269     if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] ||
1270         (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] &&
1271          (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
1272           (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
1273            (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
1274             (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
1275              C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) {
1276       // if (C4.w[3] == 0) // q4 = 58, necessarily
1277       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1278       // else
1279       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1280       q4 = q1 + q2 - 1; // q4 in [58, 67]
1281     } else {
1282       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1283       q4 = q1 + q2; // q4 in [59, 68]
1284     }
1285   }
1286
1287   if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0
1288     save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
1289     *pfpsf = 0;
1290
1291     if (q4 > p34) {
1292
1293       // truncate C4 to p34 digits into res
1294       // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
1295       x0 = q4 - p34;
1296       if (q4 <= 38) {
1297         P128.w[1] = C4.w[1];
1298         P128.w[0] = C4.w[0];
1299         round128_19_38 (q4, x0, P128, &res, &incr_exp,
1300                         &is_midpoint_lt_even, &is_midpoint_gt_even,
1301                         &is_inexact_lt_midpoint,
1302                         &is_inexact_gt_midpoint);
1303       } else if (q4 <= 57) { // 35 <= q4 <= 57
1304         P192.w[2] = C4.w[2];
1305         P192.w[1] = C4.w[1];
1306         P192.w[0] = C4.w[0];
1307         round192_39_57 (q4, x0, P192, &R192, &incr_exp,
1308                         &is_midpoint_lt_even, &is_midpoint_gt_even,
1309                         &is_inexact_lt_midpoint,
1310                         &is_inexact_gt_midpoint);
1311         res.w[0] = R192.w[0];
1312         res.w[1] = R192.w[1];
1313       } else { // if (q4 <= 68)
1314         round256_58_76 (q4, x0, C4, &R256, &incr_exp,
1315                         &is_midpoint_lt_even, &is_midpoint_gt_even,
1316                         &is_inexact_lt_midpoint,
1317                         &is_inexact_gt_midpoint);
1318         res.w[0] = R256.w[0];
1319         res.w[1] = R256.w[1];
1320       }
1321       e4 = e4 + x0;
1322       if (incr_exp) {
1323         e4 = e4 + 1;
1324       }
1325       q4 = p34;
1326       // res is now the coefficient of the result rounded to the destination
1327       // precision, with unbounded exponent; the exponent is e4; q4=digits(res)
1328     } else { // if (q4 <= p34)
1329       // C4 * 10^e4 is the result rounded to the destination precision, with
1330       // unbounded exponent (which is exact)
1331
1332       if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) {
1333         // e4 is too large, but can be brought within range by scaling up C4
1334         scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2
1335         // res = (C4 * 10^scale) * 10^expmax
1336         if (q4 <= 19) { // C4 fits in 64 bits
1337           if (scale <= 19) { // 10^scale fits in 64 bits
1338             // 64 x 64 C4.w[0] * ten2k64[scale]
1339             __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]);
1340           } else { // 10^scale fits in 128 bits
1341             // 64 x 128 C4.w[0] * ten2k128[scale - 20]
1342             __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]);
1343           }
1344         } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
1345           // 64 x 128 ten2k64[scale] * CC43
1346           __mul_128x64_to_128 (res, ten2k64[scale], C4);
1347         }
1348         e4 = e4 - scale; // expmax
1349         q4 = q4 + scale;
1350       } else {
1351         res.w[1] = C4.w[1];
1352         res.w[0] = C4.w[0];
1353       }
1354       // res is the coefficient of the result rounded to the destination
1355       // precision, with unbounded exponent (it has q4 digits); the exponent
1356       // is e4 (exact result)
1357     }
1358
1359     // check for overflow
1360     if (q4 + e4 > p34 + expmax) {
1361       if (rnd_mode == ROUNDING_TO_NEAREST) {
1362         res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf
1363         res.w[0] = 0x0000000000000000ull;
1364         *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
1365       } else {
1366         res.w[1] = p_sign | res.w[1];
1367         rounding_correction (rnd_mode,
1368                              is_inexact_lt_midpoint,
1369                              is_inexact_gt_midpoint,
1370                              is_midpoint_lt_even, is_midpoint_gt_even,
1371                              e4, &res, pfpsf);
1372       }
1373       *pfpsf |= save_fpsf;
1374       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1375       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1376       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1377       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1378       BID_SWAP128 (res);
1379       BID_RETURN (res)
1380     }
1381     // check for underflow
1382     if (q4 + e4 < expmin + P34) {
1383       is_tiny = 1; // the result is tiny
1384       if (e4 < expmin) {
1385         // if e4 < expmin, we must truncate more of res
1386         x0 = expmin - e4; // x0 >= 1
1387         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
1388         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
1389         is_midpoint_lt_even0 = is_midpoint_lt_even;
1390         is_midpoint_gt_even0 = is_midpoint_gt_even;
1391         is_inexact_lt_midpoint = 0;
1392         is_inexact_gt_midpoint = 0;
1393         is_midpoint_lt_even = 0;
1394         is_midpoint_gt_even = 0;
1395         // the number of decimal digits in res is q4
1396         if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits
1397           if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17
1398             round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp,
1399                           &is_midpoint_lt_even, &is_midpoint_gt_even,
1400                           &is_inexact_lt_midpoint,
1401                           &is_inexact_gt_midpoint);
1402             if (incr_exp) {
1403               // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
1404               R64 = ten2k64[q4 - x0];
1405             }
1406             // res.w[1] = 0; (from above)
1407             res.w[0] = R64;
1408           } else { // if (q4 <= 34)
1409             // 19 <= q4 <= 38
1410             P128.w[1] = res.w[1];
1411             P128.w[0] = res.w[0];
1412             round128_19_38 (q4, x0, P128, &res, &incr_exp,
1413                             &is_midpoint_lt_even, &is_midpoint_gt_even,
1414                             &is_inexact_lt_midpoint,
1415                             &is_inexact_gt_midpoint);
1416             if (incr_exp) {
1417               // increase coefficient by a factor of 10; this will be <= 10^33
1418               // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
1419               if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
1420                 // res.w[1] = 0;
1421                 res.w[0] = ten2k64[q4 - x0];
1422               } else { // 20 <= q4 - x0 <= 37
1423                 res.w[0] = ten2k128[q4 - x0 - 20].w[0];
1424                 res.w[1] = ten2k128[q4 - x0 - 20].w[1];
1425               }
1426             }
1427           }
1428           e4 = e4 + x0; // expmin
1429         } else if (x0 == q4) {
1430           // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin
1431           // determine relationship with 1/2 ulp
1432           if (q4 <= 19) {
1433             if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
1434               lt_half_ulp = 1;
1435               is_inexact_lt_midpoint = 1;
1436             } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
1437               eq_half_ulp = 1;
1438               is_midpoint_gt_even = 1;
1439             } else { // > 1/2 ulp
1440               // gt_half_ulp = 1;
1441               is_inexact_gt_midpoint = 1;
1442             }
1443           } else { // if (q4 <= 34)
1444             if (res.w[1] < midpoint128[q4 - 20].w[1] ||
1445                 (res.w[1] == midpoint128[q4 - 20].w[1] &&
1446                 res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp
1447               lt_half_ulp = 1;
1448               is_inexact_lt_midpoint = 1;
1449             } else if (res.w[1] == midpoint128[q4 - 20].w[1] &&
1450                 res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
1451               eq_half_ulp = 1;
1452               is_midpoint_gt_even = 1;
1453             } else { // > 1/2 ulp
1454               // gt_half_ulp = 1;
1455               is_inexact_gt_midpoint = 1;
1456             }
1457           }
1458           if (lt_half_ulp || eq_half_ulp) {
1459             // res = +0.0 * 10^expmin
1460             res.w[1] = 0x0000000000000000ull;
1461             res.w[0] = 0x0000000000000000ull;
1462           } else { // if (gt_half_ulp)
1463             // res = +1 * 10^expmin
1464             res.w[1] = 0x0000000000000000ull;
1465             res.w[0] = 0x0000000000000001ull;
1466           }
1467           e4 = expmin;
1468         } else { // if (x0 > q4)
1469           // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin
1470           res.w[1] = 0;
1471           res.w[0] = 0;
1472           e4 = expmin;
1473           is_inexact_lt_midpoint = 1;
1474         }
1475         // avoid a double rounding error
1476         if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
1477             is_midpoint_lt_even) { // double rounding error upward
1478           // res = res - 1
1479           res.w[0]--;
1480           if (res.w[0] == 0xffffffffffffffffull)
1481             res.w[1]--;
1482           // Note: a double rounding error upward is not possible; for this
1483           // the result after the first rounding would have to be 99...95
1484           // (35 digits in all), possibly followed by a number of zeros; this
1485           // not possible for f * f + 0
1486           is_midpoint_lt_even = 0;
1487           is_inexact_lt_midpoint = 1;
1488         } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
1489             is_midpoint_gt_even) { // double rounding error downward
1490           // res = res + 1
1491           res.w[0]++;
1492           if (res.w[0] == 0)
1493             res.w[1]++;
1494           is_midpoint_gt_even = 0;
1495           is_inexact_gt_midpoint = 1;
1496         } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
1497                    !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
1498           // if this second rounding was exact the result may still be
1499           // inexact because of the first rounding
1500           if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
1501             is_inexact_gt_midpoint = 1;
1502           }
1503           if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
1504             is_inexact_lt_midpoint = 1;
1505           }
1506         } else if (is_midpoint_gt_even &&
1507                    (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
1508           // pulled up to a midpoint
1509           is_inexact_lt_midpoint = 1;
1510           is_inexact_gt_midpoint = 0;
1511           is_midpoint_lt_even = 0;
1512           is_midpoint_gt_even = 0;
1513         } else if (is_midpoint_lt_even &&
1514                    (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
1515           // pulled down to a midpoint
1516           is_inexact_lt_midpoint = 0;
1517           is_inexact_gt_midpoint = 1;
1518           is_midpoint_lt_even = 0;
1519           is_midpoint_gt_even = 0;
1520         } else {
1521           ;
1522         }
1523       } else { // if e4 >= emin then q4 < P and the result is tiny and exact
1524         if (e3 < e4) {
1525           // if (e3 < e4) the preferred exponent is e3
1526           // return (C4 * 10^scale) * 10^(e4 - scale)
1527           // where scale = min (p34-q4, (e4 - e3))
1528           scale = p34 - q4;
1529           ind = e4 - e3;
1530           if (ind < scale)
1531             scale = ind;
1532           if (scale == 0) {
1533             ; // res and e4 are unchanged
1534           } else if (q4 <= 19) { // C4 fits in 64 bits
1535             if (scale <= 19) { // 10^scale fits in 64 bits
1536               // 64 x 64 res.w[0] * ten2k64[scale]
1537               __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]);
1538             } else { // 10^scale fits in 128 bits
1539               // 64 x 128 res.w[0] * ten2k128[scale - 20]
1540               __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]);
1541             }
1542           } else { // res fits in 128 bits, but 10^scale must fit in 64 bits
1543             // 64 x 128 ten2k64[scale] * C3
1544             __mul_128x64_to_128 (res, ten2k64[scale], res);
1545           }
1546           // subtract scale from the exponent
1547           e4 = e4 - scale;
1548         }
1549       }
1550
1551       // check for inexact result
1552       if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
1553           is_midpoint_lt_even || is_midpoint_gt_even) {
1554         // set the inexact flag and the underflow flag
1555         *pfpsf |= INEXACT_EXCEPTION;
1556         *pfpsf |= UNDERFLOW_EXCEPTION;
1557       }
1558       res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
1559       if (rnd_mode != ROUNDING_TO_NEAREST) {
1560         rounding_correction (rnd_mode,
1561                              is_inexact_lt_midpoint,
1562                              is_inexact_gt_midpoint,
1563                              is_midpoint_lt_even, is_midpoint_gt_even,
1564                              e4, &res, pfpsf);
1565       }
1566       *pfpsf |= save_fpsf;
1567       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1568       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1569       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1570       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1571       BID_SWAP128 (res);
1572       BID_RETURN (res)
1573     }
1574     // no overflow, and no underflow for rounding to nearest
1575     res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
1576
1577     if (rnd_mode != ROUNDING_TO_NEAREST) {
1578       rounding_correction (rnd_mode,
1579                            is_inexact_lt_midpoint,
1580                            is_inexact_gt_midpoint,
1581                            is_midpoint_lt_even, is_midpoint_gt_even,
1582                            e4, &res, pfpsf);
1583       // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ)
1584       if (e4 == expmin) {
1585         if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
1586             ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
1587              res.w[0] < 0x38c15b0a00000000ull)) {
1588           is_tiny = 1;
1589         }
1590       }
1591     }
1592
1593     if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
1594         is_midpoint_lt_even || is_midpoint_gt_even) {
1595       // set the inexact flag
1596       *pfpsf |= INEXACT_EXCEPTION;
1597       if (is_tiny)
1598         *pfpsf |= UNDERFLOW_EXCEPTION;
1599     }
1600
1601     if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact
1602       // need to ensure that the result has the preferred exponent
1603       p_exp = res.w[1] & MASK_EXP;
1604       if (z_exp < p_exp) { // the preferred exponent is z_exp
1605         // signficand of res in C3
1606         C3.w[1] = res.w[1] & MASK_COEFF;
1607         C3.w[0] = res.w[0];
1608         // the number of decimal digits of x * y is q4 <= 34
1609         // Note: the coefficient fits in 128 bits
1610
1611         // return (C3 * 10^scale) * 10^(p_exp - scale)
1612         // where scale = min (p34-q4, (p_exp-z_exp) >> 49)
1613         scale = p34 - q4;
1614         ind = (p_exp - z_exp) >> 49;
1615         if (ind < scale)
1616           scale = ind;
1617         // subtract scale from the exponent
1618         p_exp = p_exp - ((UINT64) scale << 49);
1619         if (scale == 0) {
1620           ; // leave res unchanged
1621         } else if (q4 <= 19) { // x * y fits in 64 bits
1622           if (scale <= 19) { // 10^scale fits in 64 bits
1623             // 64 x 64 C3.w[0] * ten2k64[scale]
1624             __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1625           } else { // 10^scale fits in 128 bits
1626             // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1627             __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1628           }
1629           res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
1630         } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits
1631           // 64 x 128 ten2k64[scale] * C3
1632           __mul_128x64_to_128 (res, ten2k64[scale], C3);
1633           res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
1634         }
1635       } // else leave the result as it is, because p_exp <= z_exp
1636     }
1637     *pfpsf |= save_fpsf;
1638     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1639     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1640     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1641     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1642     BID_SWAP128 (res);
1643     BID_RETURN (res)
1644   } // else we have f * f + f
1645
1646   // continue with x = f, y = f, z = f
1647
1648   delta = q3 + e3 - q4 - e4;
1649 delta_ge_zero:
1650   if (delta >= 0) {
1651
1652     if (p34 <= delta - 1 ||     // Case (1')
1653         (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A)
1654       // check for overflow, which can occur only in Case (1')
1655       if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) {
1656         // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary
1657         // condition for (q3 + e3) > (p34 + expmax)
1658         if (rnd_mode == ROUNDING_TO_NEAREST) {
1659           res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
1660           res.w[0] = 0x0000000000000000ull;
1661           *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
1662         } else {
1663           if (p_sign == z_sign) {
1664             is_inexact_lt_midpoint = 1;
1665           } else {
1666             is_inexact_gt_midpoint = 1;
1667           }
1668           // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3)
1669           scale = p34 - q3;
1670           if (scale == 0) {
1671             res.w[1] = z_sign | C3.w[1];
1672             res.w[0] = C3.w[0];
1673           } else {
1674             if (q3 <= 19) { // C3 fits in 64 bits
1675               if (scale <= 19) { // 10^scale fits in 64 bits
1676                 // 64 x 64 C3.w[0] * ten2k64[scale]
1677                 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1678               } else { // 10^scale fits in 128 bits
1679                 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1680                 __mul_128x64_to_128 (res, C3.w[0],
1681                                      ten2k128[scale - 20]);
1682               }
1683             } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits
1684               // 64 x 128 ten2k64[scale] * C3
1685               __mul_128x64_to_128 (res, ten2k64[scale], C3);
1686             }
1687             // the coefficient in res has q3 + scale = p34 digits
1688           }
1689           e3 = e3 - scale;
1690           res.w[1] = z_sign | res.w[1];
1691           rounding_correction (rnd_mode,
1692                                is_inexact_lt_midpoint,
1693                                is_inexact_gt_midpoint,
1694                                is_midpoint_lt_even, is_midpoint_gt_even,
1695                                e3, &res, pfpsf);
1696         }
1697         *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1698         *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1699         *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1700         *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1701         BID_SWAP128 (res);
1702         BID_RETURN (res)
1703       }
1704       // res = z
1705       if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3)
1706         // return (C3 * 10^scale) * 10^(z_exp - scale)
1707         // where scale = min (p34-q3, z_exp-EMIN)
1708         scale = p34 - q3;
1709         ind = e3 + 6176;
1710         if (ind < scale)
1711           scale = ind;
1712         if (scale == 0) {
1713           res.w[1] = C3.w[1];
1714           res.w[0] = C3.w[0];
1715         } else if (q3 <= 19) { // z fits in 64 bits
1716           if (scale <= 19) { // 10^scale fits in 64 bits
1717             // 64 x 64 C3.w[0] * ten2k64[scale]
1718             __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1719           } else { // 10^scale fits in 128 bits
1720             // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1721             __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1722           }
1723         } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1724           // 64 x 128 ten2k64[scale] * C3
1725           __mul_128x64_to_128 (res, ten2k64[scale], C3);
1726         }
1727         // the coefficient in res has q3 + scale digits
1728         // subtract scale from the exponent
1729         z_exp = z_exp - ((UINT64) scale << 49);
1730         e3 = e3 - scale;
1731         res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1732         if (scale + q3 < p34)
1733           *pfpsf |= UNDERFLOW_EXCEPTION;
1734       } else {
1735         scale = 0;
1736         res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1];
1737         res.w[0] = C3.w[0];
1738       }
1739
1740       // use the following to avoid double rounding errors when operating on
1741       // mixed formats in rounding to nearest, and for correcting the result
1742       // if not rounding to nearest
1743       if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) {
1744         // there is a gap of exactly one digit between the scaled C3 and C4
1745         // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case
1746         if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) ||
1747             (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) ||
1748             (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] ||
1749                           C3.w[0] != ten2k128[q3 - 21].w[0]))) {
1750           // C3 * 10^ scale != 10^(q3-1)
1751           // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull ||
1752           // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
1753           is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value
1754         } else { // if C3 * 10^scale = 10^(q3+scale-1)
1755           // ok from above e3 = (z_exp >> 49) - 6176;
1756           // the result is always inexact
1757           if (q4 == 1) {
1758             R64 = C4.w[0];
1759           } else {
1760             // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
1761             // x = q4-1, 1 <= x <= 67 and check if this operation is exact
1762             if (q4 <= 18) { // 2 <= q4 <= 18
1763               round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
1764                             &is_midpoint_lt_even, &is_midpoint_gt_even,
1765                             &is_inexact_lt_midpoint,
1766                             &is_inexact_gt_midpoint);
1767             } else if (q4 <= 38) {
1768               P128.w[1] = C4.w[1];
1769               P128.w[0] = C4.w[0];
1770               round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
1771                               &is_midpoint_lt_even,
1772                               &is_midpoint_gt_even,
1773                               &is_inexact_lt_midpoint,
1774                               &is_inexact_gt_midpoint);
1775               R64 = R128.w[0]; // one decimal digit
1776             } else if (q4 <= 57) {
1777               P192.w[2] = C4.w[2];
1778               P192.w[1] = C4.w[1];
1779               P192.w[0] = C4.w[0];
1780               round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
1781                               &is_midpoint_lt_even,
1782                               &is_midpoint_gt_even,
1783                               &is_inexact_lt_midpoint,
1784                               &is_inexact_gt_midpoint);
1785               R64 = R192.w[0]; // one decimal digit
1786             } else { // if (q4 <= 68)
1787               round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
1788                               &is_midpoint_lt_even,
1789                               &is_midpoint_gt_even,
1790                               &is_inexact_lt_midpoint,
1791                               &is_inexact_gt_midpoint);
1792               R64 = R256.w[0]; // one decimal digit
1793             }
1794             if (incr_exp) {
1795               R64 = 10;
1796             }
1797           }
1798           if (q4 == 1 && C4.w[0] == 5) {
1799             is_inexact_lt_midpoint = 0;
1800             is_inexact_gt_midpoint = 0;
1801             is_midpoint_lt_even = 1;
1802             is_midpoint_gt_even = 0;
1803           } else if ((e3 == expmin) ||
1804                      R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) {
1805             // result does not change
1806             is_inexact_lt_midpoint = 0;
1807             is_inexact_gt_midpoint = 1;
1808             is_midpoint_lt_even = 0;
1809             is_midpoint_gt_even = 0;
1810           } else {
1811             is_inexact_lt_midpoint = 1;
1812             is_inexact_gt_midpoint = 0;
1813             is_midpoint_lt_even = 0;
1814             is_midpoint_gt_even = 0;
1815             // result decremented is 10^(q3+scale) - 1
1816             if ((q3 + scale) <= 19) {
1817               res.w[1] = 0;
1818               res.w[0] = ten2k64[q3 + scale];
1819             } else { // if ((q3 + scale + 1) <= 35)
1820               res.w[1] = ten2k128[q3 + scale - 20].w[1];
1821               res.w[0] = ten2k128[q3 + scale - 20].w[0];
1822             }
1823             res.w[0] = res.w[0] - 1; // borrow never occurs
1824             z_exp = z_exp - EXP_P1;
1825             e3 = e3 - 1;
1826             res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
1827           }
1828           if (e3 == expmin) {
1829             if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) {
1830               ; // result not tiny (in round-to-nearest mode)
1831             } else {
1832               *pfpsf |= UNDERFLOW_EXCEPTION;
1833             }
1834           }
1835         } // end 10^(q3+scale-1)
1836         // set the inexact flag
1837         *pfpsf |= INEXACT_EXCEPTION;
1838       } else {
1839         if (p_sign == z_sign) {
1840           // if (z_sign), set as if for absolute value
1841           is_inexact_lt_midpoint = 1;
1842         } else { // if (p_sign != z_sign)
1843           // if (z_sign), set as if for absolute value
1844           is_inexact_gt_midpoint = 1;
1845         }
1846         *pfpsf |= INEXACT_EXCEPTION;
1847       }
1848       // the result is always inexact => set the inexact flag
1849       // Determine tininess:
1850       //    if (exp > expmin)
1851       //      the result is not tiny
1852       //    else // if exp = emin
1853       //      if (q3 + scale < p34)
1854       //        the result is tiny
1855       //      else // if (q3 + scale = p34)
1856       //        if (C3 * 10^scale > 10^33)
1857       //          the result is not tiny
1858       //        else // if C3 * 10^scale = 10^33
1859       //          if (xy * z > 0)
1860       //            the result is not tiny
1861       //          else // if (xy * z < 0)
1862       //            if (z > 0)
1863       //              if rnd_mode != RP
1864       //                the result is tiny
1865       //              else // if RP
1866       //                the result is not tiny
1867       //            else // if (z < 0)
1868       //              if rnd_mode != RM
1869       //                the result is tiny
1870       //              else // if RM
1871       //                the result is not tiny
1872       //              endif
1873       //            endif
1874       //          endif
1875       //        endif
1876       //      endif
1877       //    endif
1878       if ((e3 == expmin && (q3 + scale) < p34) ||
1879           (e3 == expmin && (q3 + scale) == p34 &&
1880           (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&   // 10^33_high
1881           res.w[0] == 0x38c15b0a00000000ull &&  // 10^33_low
1882           z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) ||
1883           (z_sign && rnd_mode != ROUNDING_DOWN)))) {
1884         *pfpsf |= UNDERFLOW_EXCEPTION;
1885       }
1886       if (rnd_mode != ROUNDING_TO_NEAREST) {
1887         rounding_correction (rnd_mode,
1888                              is_inexact_lt_midpoint,
1889                              is_inexact_gt_midpoint,
1890                              is_midpoint_lt_even, is_midpoint_gt_even,
1891                              e3, &res, pfpsf);
1892       }
1893       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1894       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1895       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1896       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1897       BID_SWAP128 (res);
1898       BID_RETURN (res)
1899
1900     } else if (p34 == delta) { // Case (1''B)
1901
1902       // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3
1903       // and C3 can be scaled up to p34 digits if needed
1904
1905       // scale C3 to p34 digits if needed
1906       scale = p34 - q3; // 0 <= scale <= p34 - 1
1907       if (scale == 0) {
1908         res.w[1] = C3.w[1];
1909         res.w[0] = C3.w[0];
1910       } else if (q3 <= 19) { // z fits in 64 bits
1911         if (scale <= 19) { // 10^scale fits in 64 bits
1912           // 64 x 64 C3.w[0] * ten2k64[scale]
1913           __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1914         } else { // 10^scale fits in 128 bits
1915           // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1916           __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1917         }
1918       } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1919         // 64 x 128 ten2k64[scale] * C3
1920         __mul_128x64_to_128 (res, ten2k64[scale], C3);
1921       }
1922       // subtract scale from the exponent
1923       z_exp = z_exp - ((UINT64) scale << 49);
1924       e3 = e3 - scale;
1925       // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits
1926
1927       // determine whether x * y is less than, equal to, or greater than
1928       // 1/2 ulp (z)
1929       if (q4 <= 19) {
1930         if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
1931           lt_half_ulp = 1;
1932         } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
1933           eq_half_ulp = 1;
1934         } else { // > 1/2 ulp
1935           gt_half_ulp = 1;
1936         }
1937       } else if (q4 <= 38) {
1938         if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] ||
1939             (C4.w[1] == midpoint128[q4 - 20].w[1] &&
1940             C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp
1941           lt_half_ulp = 1;
1942         } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] &&
1943             C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
1944           eq_half_ulp = 1;
1945         } else { // > 1/2 ulp
1946           gt_half_ulp = 1;
1947         }
1948       } else if (q4 <= 58) {
1949         if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] ||
1950             (C4.w[2] == midpoint192[q4 - 39].w[2] &&
1951             C4.w[1] < midpoint192[q4 - 39].w[1]) ||
1952             (C4.w[2] == midpoint192[q4 - 39].w[2] &&
1953             C4.w[1] == midpoint192[q4 - 39].w[1] &&
1954             C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp
1955           lt_half_ulp = 1;
1956         } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] &&
1957             C4.w[1] == midpoint192[q4 - 39].w[1] &&
1958             C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp
1959           eq_half_ulp = 1;
1960         } else { // > 1/2 ulp
1961           gt_half_ulp = 1;
1962         }
1963       } else {
1964         if (C4.w[3] < midpoint256[q4 - 59].w[3] ||
1965             (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1966             C4.w[2] < midpoint256[q4 - 59].w[2]) ||
1967             (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1968             C4.w[2] == midpoint256[q4 - 59].w[2] &&
1969             C4.w[1] < midpoint256[q4 - 59].w[1]) ||
1970             (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1971             C4.w[2] == midpoint256[q4 - 59].w[2] &&
1972             C4.w[1] == midpoint256[q4 - 59].w[1] &&
1973             C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp
1974           lt_half_ulp = 1;
1975         } else if (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1976             C4.w[2] == midpoint256[q4 - 59].w[2] &&
1977             C4.w[1] == midpoint256[q4 - 59].w[1] &&
1978             C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp
1979           eq_half_ulp = 1;
1980         } else { // > 1/2 ulp
1981           gt_half_ulp = 1;
1982         }
1983       }
1984
1985       if (p_sign == z_sign) {
1986         if (lt_half_ulp) {
1987           res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1988           // use the following to avoid double rounding errors when operating on
1989           // mixed formats in rounding to nearest
1990           is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value
1991         } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
1992           // add 1 ulp to the significand
1993           res.w[0]++;
1994           if (res.w[0] == 0x0ull)
1995             res.w[1]++;
1996           // check for rounding overflow, when coeff == 10^34
1997           if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull &&
1998               res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34
1999             e3 = e3 + 1;
2000             // coeff = 10^33
2001             z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP;
2002             res.w[1] = 0x0000314dc6448d93ull;
2003             res.w[0] = 0x38c15b0a00000000ull;
2004           }
2005           // end add 1 ulp
2006           res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2007           if (eq_half_ulp) {
2008             is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2009           } else {
2010             is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
2011           }
2012         } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2013           // leave unchanged
2014           res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2015           is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
2016         }
2017         // the result is always inexact, and never tiny
2018         // set the inexact flag
2019         *pfpsf |= INEXACT_EXCEPTION;
2020         // check for overflow
2021         if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) {
2022           res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2023           res.w[0] = 0x0000000000000000ull;
2024           *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2025           *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2026           *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2027           *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2028           *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2029           BID_SWAP128 (res);
2030           BID_RETURN (res)
2031         }
2032         if (rnd_mode != ROUNDING_TO_NEAREST) {
2033           rounding_correction (rnd_mode,
2034                                is_inexact_lt_midpoint,
2035                                is_inexact_gt_midpoint,
2036                                is_midpoint_lt_even, is_midpoint_gt_even,
2037                                e3, &res, pfpsf);
2038           z_exp = res.w[1] & MASK_EXP;
2039         }
2040       } else { // if (p_sign != z_sign)
2041         // consider two cases, because C3 * 10^scale = 10^33 is a special case
2042         if (res.w[1] != 0x0000314dc6448d93ull ||
2043             res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
2044           if (lt_half_ulp) {
2045             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2046             // use the following to avoid double rounding errors when operating
2047             // on mixed formats in rounding to nearest
2048             is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
2049           } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
2050             // subtract 1 ulp from the significand
2051             res.w[0]--;
2052             if (res.w[0] == 0xffffffffffffffffull)
2053               res.w[1]--;
2054             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2055             if (eq_half_ulp) {
2056               is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
2057             } else {
2058               is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
2059             }
2060           } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2061             // leave unchanged
2062             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2063             is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2064           }
2065           // the result is always inexact, and never tiny
2066           // check for overflow for RN
2067           if (e3 > expmax) {
2068             if (rnd_mode == ROUNDING_TO_NEAREST) {
2069               res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2070               res.w[0] = 0x0000000000000000ull;
2071               *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2072             } else {
2073               rounding_correction (rnd_mode,
2074                                    is_inexact_lt_midpoint,
2075                                    is_inexact_gt_midpoint,
2076                                    is_midpoint_lt_even,
2077                                    is_midpoint_gt_even, e3, &res,
2078                                    pfpsf);
2079             }
2080             *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2081             *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2082             *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2083             *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2084             BID_SWAP128 (res);
2085             BID_RETURN (res)
2086           }
2087           // set the inexact flag
2088           *pfpsf |= INEXACT_EXCEPTION;
2089           if (rnd_mode != ROUNDING_TO_NEAREST) {
2090             rounding_correction (rnd_mode,
2091                                  is_inexact_lt_midpoint,
2092                                  is_inexact_gt_midpoint,
2093                                  is_midpoint_lt_even,
2094                                  is_midpoint_gt_even, e3, &res, pfpsf);
2095           }
2096           z_exp = res.w[1] & MASK_EXP;
2097         } else { // if C3 * 10^scale = 10^33
2098           e3 = (z_exp >> 49) - 6176;
2099           if (e3 > expmin) {
2100             // the result is exact if exp > expmin and C4 = d*10^(q4-1),
2101             // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact
2102             if (q4 == 1) {
2103               // if q4 = 1 the result is exact
2104               // result coefficient = 10^34 - C4
2105               res.w[1] = 0x0001ed09bead87c0ull;
2106               res.w[0] = 0x378d8e6400000000ull - C4.w[0];
2107               z_exp = z_exp - EXP_P1;
2108               e3 = e3 - 1;
2109               res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2110             } else {
2111               // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
2112               // x = q4-1, 1 <= x <= 67 and check if this operation is exact
2113               if (q4 <= 18) { // 2 <= q4 <= 18
2114                 round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
2115                               &is_midpoint_lt_even,
2116                               &is_midpoint_gt_even,
2117                               &is_inexact_lt_midpoint,
2118                               &is_inexact_gt_midpoint);
2119               } else if (q4 <= 38) {
2120                 P128.w[1] = C4.w[1];
2121                 P128.w[0] = C4.w[0];
2122                 round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
2123                                 &is_midpoint_lt_even,
2124                                 &is_midpoint_gt_even,
2125                                 &is_inexact_lt_midpoint,
2126                                 &is_inexact_gt_midpoint);
2127                 R64 = R128.w[0]; // one decimal digit
2128               } else if (q4 <= 57) {
2129                 P192.w[2] = C4.w[2];
2130                 P192.w[1] = C4.w[1];
2131                 P192.w[0] = C4.w[0];
2132                 round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
2133                                 &is_midpoint_lt_even,
2134                                 &is_midpoint_gt_even,
2135                                 &is_inexact_lt_midpoint,
2136                                 &is_inexact_gt_midpoint);
2137                 R64 = R192.w[0]; // one decimal digit
2138               } else { // if (q4 <= 68)
2139                 round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
2140                                 &is_midpoint_lt_even,
2141                                 &is_midpoint_gt_even,
2142                                 &is_inexact_lt_midpoint,
2143                                 &is_inexact_gt_midpoint);
2144                 R64 = R256.w[0]; // one decimal digit
2145               }
2146               if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2147                   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2148                 // the result is exact: 10^34 - R64
2149                 // incr_exp = 0 with certainty
2150                 z_exp = z_exp - EXP_P1;
2151                 e3 = e3 - 1;
2152                 res.w[1] =
2153                   z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull;
2154                 res.w[0] = 0x378d8e6400000000ull - R64;
2155               } else {
2156                 // We want R64 to be the top digit of C4, but we actually
2157                 // obtained (C4 * 10^(-q4+1))RN; a correction may be needed,
2158                 // because the top digit is (C4 * 10^(-q4+1))RZ
2159                 // however, if incr_exp = 1 then R64 = 10 with certainty
2160                 if (incr_exp) {
2161                   R64 = 10;
2162                 }
2163                 // the result is inexact as C4 has more than 1 significant digit
2164                 // and C3 * 10^scale = 10^33
2165                 // example of case that is treated here:
2166                 // 100...0 * 10^e3 - 0.41 * 10^e3 =
2167                 // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1)
2168                 // note that (e3 > expmin}
2169                 // in order to round, subtract R64 from 10^34 and then compare
2170                 // C4 - R64 * 10^(q4-1) with 1/2 ulp
2171                 // calculate 10^34 - R64
2172                 res.w[1] = 0x0001ed09bead87c0ull;
2173                 res.w[0] = 0x378d8e6400000000ull - R64;
2174                 z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand
2175                 // calculate C4 - R64 * 10^(q4-1); this is a rare case and
2176                 // R64 is small, 1 <= R64 <= 9
2177                 e3 = e3 - 1;
2178                 if (is_inexact_lt_midpoint) {
2179                   is_inexact_lt_midpoint = 0;
2180                   is_inexact_gt_midpoint = 1;
2181                 } else if (is_inexact_gt_midpoint) {
2182                   is_inexact_gt_midpoint = 0;
2183                   is_inexact_lt_midpoint = 1;
2184                 } else if (is_midpoint_lt_even) {
2185                   is_midpoint_lt_even = 0;
2186                   is_midpoint_gt_even = 1;
2187                 } else if (is_midpoint_gt_even) {
2188                   is_midpoint_gt_even = 0;
2189                   is_midpoint_lt_even = 1;
2190                 } else {
2191                   ;
2192                 }
2193                 // the result is always inexact, and never tiny
2194                 // check for overflow for RN
2195                 if (e3 > expmax) {
2196                   if (rnd_mode == ROUNDING_TO_NEAREST) {
2197                     res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2198                     res.w[0] = 0x0000000000000000ull;
2199                     *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2200                   } else {
2201                     rounding_correction (rnd_mode,
2202                                          is_inexact_lt_midpoint,
2203                                          is_inexact_gt_midpoint,
2204                                          is_midpoint_lt_even,
2205                                          is_midpoint_gt_even, e3, &res,
2206                                          pfpsf);
2207                   }
2208                   *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2209                   *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2210                   *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2211                   *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2212                   BID_SWAP128 (res);
2213                   BID_RETURN (res)
2214                 }
2215                 // set the inexact flag
2216                 *pfpsf |= INEXACT_EXCEPTION;
2217                 res.w[1] =
2218                   z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
2219                 if (rnd_mode != ROUNDING_TO_NEAREST) {
2220                   rounding_correction (rnd_mode,
2221                                        is_inexact_lt_midpoint,
2222                                        is_inexact_gt_midpoint,
2223                                        is_midpoint_lt_even,
2224                                        is_midpoint_gt_even, e3, &res,
2225                                        pfpsf);
2226                 }
2227                 z_exp = res.w[1] & MASK_EXP;
2228               } // end result is inexact
2229             } // end q4 > 1
2230           } else { // if (e3 = emin)
2231             // if e3 = expmin the result is also tiny (the condition for
2232             // tininess is C4 > 050...0 [q4 digits] which is met because
2233             // the msd of C4 is not zero)
2234             // the result is tiny and inexact in all rounding modes;
2235             // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp,
2236             // gt_half_ulp to calculate)
2237             // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged
2238
2239             // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp
2240             if (gt_half_ulp) { // res = 10^33 - 1
2241               res.w[1] = 0x0000314dc6448d93ull;
2242               res.w[0] = 0x38c15b09ffffffffull;
2243             } else {
2244               res.w[1] = 0x0000314dc6448d93ull;
2245               res.w[0] = 0x38c15b0a00000000ull;
2246             }
2247             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2248             *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later
2249
2250             if (eq_half_ulp) {
2251               is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2252             } else if (lt_half_ulp) {
2253               is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value
2254             } else { // if (gt_half_ulp)
2255               is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
2256             }
2257
2258             if (rnd_mode != ROUNDING_TO_NEAREST) {
2259               rounding_correction (rnd_mode,
2260                   is_inexact_lt_midpoint,
2261                   is_inexact_gt_midpoint,
2262                   is_midpoint_lt_even,
2263                   is_midpoint_gt_even, e3, &res,
2264                   pfpsf);
2265               z_exp = res.w[1] & MASK_EXP;
2266             }
2267           } // end e3 = emin
2268           // set the inexact flag (if the result was not exact)
2269           if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
2270               is_midpoint_lt_even || is_midpoint_gt_even)
2271             *pfpsf |= INEXACT_EXCEPTION;
2272         } // end 10^33
2273       } // end if (p_sign != z_sign)
2274       res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2275       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2276       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2277       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2278       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2279       BID_SWAP128 (res);
2280       BID_RETURN (res)
2281
2282     } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
2283         (q3 <= delta && delta + q4 <= p34) || // Case (3)
2284         (delta < q3 && p34 < delta + q4) || // Case (4)
2285         (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5)
2286         (delta + q4 < q3)) && // Case (6)
2287         !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6)
2288
2289       // the result has the sign of z
2290
2291       if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
2292           (delta < q3 && p34 < delta + q4)) { // Case (4)
2293         // round first the sum x * y + z with unbounded exponent
2294         // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1,
2295         // 1 <= scale <= 33
2296         // calculate res = C3 * 10^scale
2297         scale = p34 - q3;
2298         x0 = delta + q4 - p34;
2299       } else if (delta + q4 < q3) { // Case (6)
2300         // make Case (6) look like Case (3) or Case (5) with scale = 0
2301         // by scaling up C4 by 10^(q3 - delta - q4)
2302         scale = q3 - delta - q4; // 1 <= scale <= 33
2303         if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits
2304           if (scale <= 19) { // 10^scale fits in 64 bits
2305             // 64 x 64 C4.w[0] * ten2k64[scale]
2306             __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]);
2307           } else { // 10^scale fits in 128 bits
2308             // 64 x 128 C4.w[0] * ten2k128[scale - 20]
2309             __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]);
2310           }
2311         } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
2312           // 64 x 128 ten2k64[scale] * C4
2313           __mul_128x64_to_128 (P128, ten2k64[scale], C4);
2314         }
2315         C4.w[0] = P128.w[0];
2316         C4.w[1] = P128.w[1];
2317         // e4 does not need adjustment, as it is not used from this point on
2318         scale = 0;
2319         x0 = 0;
2320         // now Case (6) looks like Case (3) or Case (5) with scale = 0
2321       } else { // if Case (3) or Case (5)
2322         // Note: Case (3) is similar to Case (2), but scale differs and the
2323         // result is exact, unless it is tiny (so x0 = 0 when calculating the
2324         // result with unbounded exponent)
2325
2326         // calculate first the sum x * y + z with unbounded exponent (exact)
2327         // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1,
2328         // 1 <= scale <= 33
2329         // calculate res = C3 * 10^scale
2330         scale = delta + q4 - q3;
2331         x0 = 0;
2332         // Note: the comments which follow refer [mainly] to Case (2)]
2333       }
2334
2335     case2_repeat:
2336       if (scale == 0) { // this could happen e.g. if we return to case2_repeat
2337         // or in Case (4)
2338         res.w[1] = C3.w[1];
2339         res.w[0] = C3.w[0];
2340       } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits
2341         if (scale <= 19) { // 10^scale fits in 64 bits
2342           // 64 x 64 C3.w[0] * ten2k64[scale]
2343           __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
2344         } else { // 10^scale fits in 128 bits
2345           // 64 x 128 C3.w[0] * ten2k128[scale - 20]
2346           __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
2347         }
2348       } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
2349         // 64 x 128 ten2k64[scale] * C3
2350         __mul_128x64_to_128 (res, ten2k64[scale], C3);
2351       }
2352       // e3 is already calculated
2353       e3 = e3 - scale;
2354       // now res = C3 * 10^scale and e3 = e3 - scale
2355       // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat
2356       // because the result was too small
2357
2358       // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34,
2359       // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67)
2360       // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of
2361       // the rounding fits in 128 bits!)
2362       // x0 = delta + q4 - p34 (calculated before reaching case2_repeat)
2363       // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34
2364       if (x0 == 0) { // this could happen only if we return to case2_repeat, or
2365         // for Case (3) or Case (6)
2366         R128.w[1] = C4.w[1];
2367         R128.w[0] = C4.w[0];
2368       } else if (q4 <= 18) {
2369         // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17
2370         round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp,
2371             &is_midpoint_lt_even, &is_midpoint_gt_even,
2372             &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
2373         if (incr_exp) {
2374           // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
2375           R64 = ten2k64[q4 - x0];
2376         }
2377         R128.w[1] = 0;
2378         R128.w[0] = R64;
2379       } else if (q4 <= 38) {
2380         // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37
2381         P128.w[1] = C4.w[1];
2382         P128.w[0] = C4.w[0];
2383         round128_19_38 (q4, x0, P128, &R128, &incr_exp,
2384             &is_midpoint_lt_even, &is_midpoint_gt_even,
2385             &is_inexact_lt_midpoint,
2386             &is_inexact_gt_midpoint);
2387         if (incr_exp) {
2388           // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
2389           if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2390             R128.w[0] = ten2k64[q4 - x0];
2391             // R128.w[1] stays 0
2392           } else { // 20 <= q4 - x0 <= 37
2393             R128.w[0] = ten2k128[q4 - x0 - 20].w[0];
2394             R128.w[1] = ten2k128[q4 - x0 - 20].w[1];
2395           }
2396         }
2397       } else if (q4 <= 57) {
2398         // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56
2399         P192.w[2] = C4.w[2];
2400         P192.w[1] = C4.w[1];
2401         P192.w[0] = C4.w[0];
2402         round192_39_57 (q4, x0, P192, &R192, &incr_exp,
2403             &is_midpoint_lt_even, &is_midpoint_gt_even,
2404             &is_inexact_lt_midpoint,
2405             &is_inexact_gt_midpoint);
2406         // R192.w[2] is always 0
2407         if (incr_exp) {
2408           // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52
2409           if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2410             R192.w[0] = ten2k64[q4 - x0];
2411             // R192.w[1] stays 0
2412             // R192.w[2] stays 0
2413           } else { // 20 <= q4 - x0 <= 33
2414             R192.w[0] = ten2k128[q4 - x0 - 20].w[0];
2415             R192.w[1] = ten2k128[q4 - x0 - 20].w[1];
2416             // R192.w[2] stays 0
2417           }
2418         }
2419         R128.w[1] = R192.w[1];
2420         R128.w[0] = R192.w[0];
2421       } else {
2422         // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67
2423         round256_58_76 (q4, x0, C4, &R256, &incr_exp,
2424             &is_midpoint_lt_even, &is_midpoint_gt_even,
2425             &is_inexact_lt_midpoint,
2426             &is_inexact_gt_midpoint);
2427         // R256.w[3] and R256.w[2] are always 0
2428         if (incr_exp) {
2429           // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43
2430           if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2431             R256.w[0] = ten2k64[q4 - x0];
2432             // R256.w[1] stays 0
2433             // R256.w[2] stays 0
2434             // R256.w[3] stays 0
2435           } else { // 20 <= q4 - x0 <= 33
2436             R256.w[0] = ten2k128[q4 - x0 - 20].w[0];
2437             R256.w[1] = ten2k128[q4 - x0 - 20].w[1];
2438             // R256.w[2] stays 0
2439             // R256.w[3] stays 0
2440           }
2441         }
2442         R128.w[1] = R256.w[1];
2443         R128.w[0] = R256.w[0];
2444       }
2445       // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4,
2446       // rounded to nearest, which were copied into R128
2447       if (z_sign == p_sign) {
2448         lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale
2449         // the sum can result in [up to] p34 or p34 + 1 digits
2450         res.w[0] = res.w[0] + R128.w[0];
2451         res.w[1] = res.w[1] + R128.w[1];
2452         if (res.w[0] < R128.w[0])
2453           res.w[1]++; // carry
2454         // if res > 10^34 - 1 need to increase x0 and decrease scale by 1
2455         if (res.w[1] > 0x0001ed09bead87c0ull ||
2456             (res.w[1] == 0x0001ed09bead87c0ull &&
2457              res.w[0] > 0x378d8e63ffffffffull)) {
2458           // avoid double rounding error
2459           is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
2460           is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
2461           is_midpoint_lt_even0 = is_midpoint_lt_even;
2462           is_midpoint_gt_even0 = is_midpoint_gt_even;
2463           is_inexact_lt_midpoint = 0;
2464           is_inexact_gt_midpoint = 0;
2465           is_midpoint_lt_even = 0;
2466           is_midpoint_gt_even = 0;
2467           P128.w[1] = res.w[1];
2468           P128.w[0] = res.w[0];
2469           round128_19_38 (35, 1, P128, &res, &incr_exp,
2470               &is_midpoint_lt_even, &is_midpoint_gt_even,
2471               &is_inexact_lt_midpoint,
2472               &is_inexact_gt_midpoint);
2473           // incr_exp is 0 with certainty in this case
2474           // avoid a double rounding error
2475           if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
2476               is_midpoint_lt_even) { // double rounding error upward
2477             // res = res - 1
2478             res.w[0]--;
2479             if (res.w[0] == 0xffffffffffffffffull)
2480               res.w[1]--;
2481             // Note: a double rounding error upward is not possible; for this
2482             // the result after the first rounding would have to be 99...95
2483             // (35 digits in all), possibly followed by a number of zeros; this
2484             // not possible in Cases (2)-(6) or (15)-(17) which may get here
2485             is_midpoint_lt_even = 0;
2486             is_inexact_lt_midpoint = 1;
2487           } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
2488               is_midpoint_gt_even) { // double rounding error downward
2489             // res = res + 1
2490             res.w[0]++;
2491             if (res.w[0] == 0)
2492               res.w[1]++;
2493             is_midpoint_gt_even = 0;
2494             is_inexact_gt_midpoint = 1;
2495           } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2496                      !is_inexact_lt_midpoint
2497                      && !is_inexact_gt_midpoint) {
2498             // if this second rounding was exact the result may still be
2499             // inexact because of the first rounding
2500             if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
2501               is_inexact_gt_midpoint = 1;
2502             }
2503             if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
2504               is_inexact_lt_midpoint = 1;
2505             }
2506           } else if (is_midpoint_gt_even &&
2507                      (is_inexact_gt_midpoint0
2508                       || is_midpoint_lt_even0)) {
2509             // pulled up to a midpoint
2510             is_inexact_lt_midpoint = 1;
2511             is_inexact_gt_midpoint = 0;
2512             is_midpoint_lt_even = 0;
2513             is_midpoint_gt_even = 0;
2514           } else if (is_midpoint_lt_even &&
2515                      (is_inexact_lt_midpoint0
2516                       || is_midpoint_gt_even0)) {
2517             // pulled down to a midpoint
2518             is_inexact_lt_midpoint = 0;
2519             is_inexact_gt_midpoint = 1;
2520             is_midpoint_lt_even = 0;
2521             is_midpoint_gt_even = 0;
2522           } else {
2523             ;
2524           }
2525           // adjust exponent
2526           e3 = e3 + 1;
2527           if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2528               !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2529             if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
2530                 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
2531               is_inexact_lt_midpoint = 1;
2532             }
2533           }
2534         } else {
2535           // this is the result rounded with unbounded exponent, unless a
2536           // correction is needed
2537           res.w[1] = res.w[1] & MASK_COEFF;
2538           if (lsb == 1) {
2539             if (is_midpoint_gt_even) {
2540               // res = res + 1
2541               is_midpoint_gt_even = 0;
2542               is_midpoint_lt_even = 1;
2543               res.w[0]++;
2544               if (res.w[0] == 0x0)
2545                 res.w[1]++;
2546               // check for rounding overflow
2547               if (res.w[1] == 0x0001ed09bead87c0ull &&
2548                   res.w[0] == 0x378d8e6400000000ull) {
2549                 // res = 10^34 => rounding overflow
2550                 res.w[1] = 0x0000314dc6448d93ull;
2551                 res.w[0] = 0x38c15b0a00000000ull; // 10^33
2552                 e3++;
2553               }
2554             } else if (is_midpoint_lt_even) {
2555               // res = res - 1
2556               is_midpoint_lt_even = 0;
2557               is_midpoint_gt_even = 1;
2558               res.w[0]--;
2559               if (res.w[0] == 0xffffffffffffffffull)
2560                 res.w[1]--;
2561               // if the result is pure zero, the sign depends on the rounding
2562               // mode (x*y and z had opposite signs)
2563               if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
2564                 if (rnd_mode != ROUNDING_DOWN)
2565                   z_sign = 0x0000000000000000ull;
2566                 else
2567                   z_sign = 0x8000000000000000ull;
2568                 // the exponent is max (e3, expmin)
2569                 res.w[1] = 0x0;
2570                 res.w[0] = 0x0;
2571                 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2572                 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2573                 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2574                 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2575                 BID_SWAP128 (res);
2576                 BID_RETURN (res)
2577               }
2578             } else {
2579               ;
2580             }
2581           }
2582         }
2583       } else { // if (z_sign != p_sign)
2584         lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4
2585         // used to swap rounding indicators if p_sign != z_sign
2586         // the sum can result in [up to] p34 or p34 - 1 digits
2587         tmp64 = res.w[0];
2588         res.w[0] = res.w[0] - R128.w[0];
2589         res.w[1] = res.w[1] - R128.w[1];
2590         if (res.w[0] > tmp64)
2591           res.w[1]--; // borrow
2592         // if res < 10^33 and exp > expmin need to decrease x0 and
2593         // increase scale by 1
2594         if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull ||
2595                              (res.w[1] == 0x0000314dc6448d93ull &&
2596                               res.w[0] < 0x38c15b0a00000000ull)) ||
2597                             (is_inexact_lt_midpoint
2598                              && res.w[1] == 0x0000314dc6448d93ull
2599                              && res.w[0] == 0x38c15b0a00000000ull))
2600             && x0 >= 1) {
2601           x0 = x0 - 1;
2602           // first restore e3, otherwise it will be too small
2603           e3 = e3 + scale;
2604           scale = scale + 1;
2605           is_inexact_lt_midpoint = 0;
2606           is_inexact_gt_midpoint = 0;
2607           is_midpoint_lt_even = 0;
2608           is_midpoint_gt_even = 0;
2609           incr_exp = 0;
2610           goto case2_repeat;
2611         }
2612         // else this is the result rounded with unbounded exponent;
2613         // because the result has opposite sign to that of C4 which was
2614         // rounded, need to change the rounding indicators
2615         if (is_inexact_lt_midpoint) {
2616           is_inexact_lt_midpoint = 0;
2617           is_inexact_gt_midpoint = 1;
2618         } else if (is_inexact_gt_midpoint) {
2619           is_inexact_gt_midpoint = 0;
2620           is_inexact_lt_midpoint = 1;
2621         } else if (lsb == 0) {
2622           if (is_midpoint_lt_even) {
2623             is_midpoint_lt_even = 0;
2624             is_midpoint_gt_even = 1;
2625           } else if (is_midpoint_gt_even) {
2626             is_midpoint_gt_even = 0;
2627             is_midpoint_lt_even = 1;
2628           } else {
2629             ;
2630           }
2631         } else if (lsb == 1) {
2632           if (is_midpoint_lt_even) {
2633             // res = res + 1
2634             res.w[0]++;
2635             if (res.w[0] == 0x0)
2636               res.w[1]++;
2637             // check for rounding overflow
2638             if (res.w[1] == 0x0001ed09bead87c0ull &&
2639                 res.w[0] == 0x378d8e6400000000ull) {
2640               // res = 10^34 => rounding overflow
2641               res.w[1] = 0x0000314dc6448d93ull;
2642               res.w[0] = 0x38c15b0a00000000ull; // 10^33
2643               e3++;
2644             }
2645           } else if (is_midpoint_gt_even) {
2646             // res = res - 1
2647             res.w[0]--;
2648             if (res.w[0] == 0xffffffffffffffffull)
2649               res.w[1]--;
2650             // if the result is pure zero, the sign depends on the rounding
2651             // mode (x*y and z had opposite signs)
2652             if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
2653               if (rnd_mode != ROUNDING_DOWN)
2654                 z_sign = 0x0000000000000000ull;
2655               else
2656                 z_sign = 0x8000000000000000ull;
2657               // the exponent is max (e3, expmin)
2658               res.w[1] = 0x0;
2659               res.w[0] = 0x0;
2660               *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2661               *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2662               *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2663               *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2664               BID_SWAP128 (res);
2665               BID_RETURN (res)
2666             }
2667           } else {
2668             ;
2669           }
2670         } else {
2671           ;
2672         }
2673       }
2674       // check for underflow
2675       if (e3 == expmin) { // and if significand < 10^33 => result is tiny
2676         if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
2677             ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
2678              res.w[0] < 0x38c15b0a00000000ull)) {
2679           is_tiny = 1;
2680         }
2681       } else if (e3 < expmin) {
2682         // the result is tiny, so we must truncate more of res
2683         is_tiny = 1;
2684         x0 = expmin - e3;
2685         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
2686         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
2687         is_midpoint_lt_even0 = is_midpoint_lt_even;
2688         is_midpoint_gt_even0 = is_midpoint_gt_even;
2689         is_inexact_lt_midpoint = 0;
2690         is_inexact_gt_midpoint = 0;
2691         is_midpoint_lt_even = 0;
2692         is_midpoint_gt_even = 0;
2693         // determine the number of decimal digits in res
2694         if (res.w[1] == 0x0) {
2695           // between 1 and 19 digits
2696           for (ind = 1; ind <= 19; ind++) {
2697             if (res.w[0] < ten2k64[ind]) {
2698               break;
2699             }
2700           }
2701           // ind digits
2702         } else if (res.w[1] < ten2k128[0].w[1] ||
2703                    (res.w[1] == ten2k128[0].w[1]
2704                     && res.w[0] < ten2k128[0].w[0])) {
2705           // 20 digits
2706           ind = 20;
2707         } else { // between 21 and 38 digits
2708           for (ind = 1; ind <= 18; ind++) {
2709             if (res.w[1] < ten2k128[ind].w[1] ||
2710                 (res.w[1] == ten2k128[ind].w[1] &&
2711                  res.w[0] < ten2k128[ind].w[0])) {
2712               break;
2713             }
2714           }
2715           // ind + 20 digits
2716           ind = ind + 20;
2717         }
2718
2719         // at this point ind >= x0; because delta >= 2 on this path, the case
2720         // ind = x0 can occur only in Case (2) or case (3), when C3 has one
2721         // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin),
2722         // the signs of x * y and z are opposite, and through cancellation
2723         // the most significant decimal digit in res has the weight
2724         // 10^(emin-1); however, it is clear that in this case the most
2725         // significant digit is 9, so the result before rounding is
2726         // 0.9... * 10^emin
2727         // Otherwise, ind > x0 because there are non-zero decimal digits in the
2728         // result with weight of at least 10^emin, and correction for underflow
2729         //  can be carried out using the round*_*_2_* () routines
2730         if (x0 == ind) { // the result before rounding is 0.9... * 10^emin
2731           res.w[1] = 0x0;
2732           res.w[0] = 0x1;
2733           is_inexact_gt_midpoint = 1;
2734         } else if (ind <= 18) { // check that 2 <= ind
2735           // 2 <= ind <= 18, 1 <= x0 <= 17
2736           round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
2737                         &is_midpoint_lt_even, &is_midpoint_gt_even,
2738                         &is_inexact_lt_midpoint,
2739                         &is_inexact_gt_midpoint);
2740           if (incr_exp) {
2741             // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17
2742             R64 = ten2k64[ind - x0];
2743           }
2744           res.w[1] = 0;
2745           res.w[0] = R64;
2746         } else if (ind <= 38) {
2747           // 19 <= ind <= 38
2748           P128.w[1] = res.w[1];
2749           P128.w[0] = res.w[0];
2750           round128_19_38 (ind, x0, P128, &res, &incr_exp,
2751                           &is_midpoint_lt_even, &is_midpoint_gt_even,
2752                           &is_inexact_lt_midpoint,
2753                           &is_inexact_gt_midpoint);
2754           if (incr_exp) {
2755             // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37
2756             if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19
2757               res.w[0] = ten2k64[ind - x0];
2758               // res.w[1] stays 0
2759             } else { // 20 <= ind - x0 <= 37
2760               res.w[0] = ten2k128[ind - x0 - 20].w[0];
2761               res.w[1] = ten2k128[ind - x0 - 20].w[1];
2762             }
2763           }
2764         }
2765         // avoid a double rounding error
2766         if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
2767             is_midpoint_lt_even) { // double rounding error upward
2768           // res = res - 1
2769           res.w[0]--;
2770           if (res.w[0] == 0xffffffffffffffffull)
2771             res.w[1]--;
2772           // Note: a double rounding error upward is not possible; for this
2773           // the result after the first rounding would have to be 99...95
2774           // (35 digits in all), possibly followed by a number of zeros; this
2775           // not possible in Cases (2)-(6) which may get here
2776           is_midpoint_lt_even = 0;
2777           is_inexact_lt_midpoint = 1;
2778         } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
2779             is_midpoint_gt_even) { // double rounding error downward
2780           // res = res + 1
2781           res.w[0]++;
2782           if (res.w[0] == 0)
2783             res.w[1]++;
2784           is_midpoint_gt_even = 0;
2785           is_inexact_gt_midpoint = 1;
2786         } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2787                    !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2788           // if this second rounding was exact the result may still be
2789           // inexact because of the first rounding
2790           if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
2791             is_inexact_gt_midpoint = 1;
2792           }
2793           if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
2794             is_inexact_lt_midpoint = 1;
2795           }
2796         } else if (is_midpoint_gt_even &&
2797                    (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
2798           // pulled up to a midpoint
2799           is_inexact_lt_midpoint = 1;
2800           is_inexact_gt_midpoint = 0;
2801           is_midpoint_lt_even = 0;
2802           is_midpoint_gt_even = 0;
2803         } else if (is_midpoint_lt_even &&
2804                    (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
2805           // pulled down to a midpoint
2806           is_inexact_lt_midpoint = 0;
2807           is_inexact_gt_midpoint = 1;
2808           is_midpoint_lt_even = 0;
2809           is_midpoint_gt_even = 0;
2810         } else {
2811           ;
2812         }
2813         // adjust exponent
2814         e3 = e3 + x0;
2815         if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2816             !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2817           if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
2818               is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
2819             is_inexact_lt_midpoint = 1;
2820           }
2821         }
2822       } else {
2823         ; // not underflow
2824       }
2825       // check for inexact result
2826       if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
2827           is_midpoint_lt_even || is_midpoint_gt_even) {
2828         // set the inexact flag
2829         *pfpsf |= INEXACT_EXCEPTION;
2830         if (is_tiny)
2831           *pfpsf |= UNDERFLOW_EXCEPTION;
2832       }
2833       // now check for significand = 10^34 (may have resulted from going
2834       // back to case2_repeat)
2835       if (res.w[1] == 0x0001ed09bead87c0ull &&
2836           res.w[0] == 0x378d8e6400000000ull) { // if  res = 10^34
2837         res.w[1] = 0x0000314dc6448d93ull; // res = 10^33
2838         res.w[0] = 0x38c15b0a00000000ull;
2839         e3 = e3 + 1;
2840       }
2841       res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
2842       // check for overflow
2843       if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) {
2844         res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2845         res.w[0] = 0x0000000000000000ull;
2846         *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2847       }
2848       if (rnd_mode != ROUNDING_TO_NEAREST) {
2849         rounding_correction (rnd_mode,
2850                              is_inexact_lt_midpoint,
2851                              is_inexact_gt_midpoint,
2852                              is_midpoint_lt_even, is_midpoint_gt_even,
2853                              e3, &res, pfpsf);
2854       }
2855       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2856       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2857       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2858       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2859       BID_SWAP128 (res);
2860       BID_RETURN (res)
2861
2862     } else {
2863
2864       // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and
2865       // the signs of x*y and z are opposite; in these cases massive
2866       // cancellation can occur, so it is better to scale either C3 or C4 and
2867       // to perform the subtraction before rounding; rounding is performed
2868       // next, depending on the number of decimal digits in the result and on
2869       // the exponent value
2870       // Note: overlow is not possible in this case
2871       // this is similar to Cases (15), (16), and (17)
2872
2873       if (delta + q4 < q3) { // from Case (6)
2874         // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and
2875         // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign)
2876         // and call add_and_round; delta stays positive
2877         // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
2878         P128.w[1] = C3.w[1];
2879         P128.w[0] = C3.w[0];
2880         C3.w[1] = C4.w[1];
2881         C3.w[0] = C4.w[0];
2882         C4.w[1] = P128.w[1];
2883         C4.w[0] = P128.w[0];
2884         ind = q3;
2885         q3 = q4;
2886         q4 = ind;
2887         ind = e3;
2888         e3 = e4;
2889         e4 = ind;
2890         tmp_sign = z_sign;
2891         z_sign = p_sign;
2892         p_sign = tmp_sign;
2893       } else { // from Cases (2), (3), (4), (5)
2894         // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be
2895         // scaled up by q4 + delta - q3; this is the same as in Cases (15),
2896         // (16), and (17) if we just change the sign of delta
2897         delta = -delta;
2898       }
2899       add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
2900                      rnd_mode, &is_midpoint_lt_even,
2901                      &is_midpoint_gt_even, &is_inexact_lt_midpoint,
2902                      &is_inexact_gt_midpoint, pfpsf, &res);
2903       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2904       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2905       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2906       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2907       BID_SWAP128 (res);
2908       BID_RETURN (res)
2909
2910     }
2911
2912   } else { // if delta < 0
2913
2914     delta = -delta;
2915
2916     if (p34 < q4 && q4 <= delta) { // Case (7)
2917
2918       // truncate C4 to p34 digits into res
2919       // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
2920       x0 = q4 - p34;
2921       if (q4 <= 38) {
2922         P128.w[1] = C4.w[1];
2923         P128.w[0] = C4.w[0];
2924         round128_19_38 (q4, x0, P128, &res, &incr_exp,
2925                         &is_midpoint_lt_even, &is_midpoint_gt_even,
2926                         &is_inexact_lt_midpoint,
2927                         &is_inexact_gt_midpoint);
2928       } else if (q4 <= 57) { // 35 <= q4 <= 57
2929         P192.w[2] = C4.w[2];
2930         P192.w[1] = C4.w[1];
2931         P192.w[0] = C4.w[0];
2932         round192_39_57 (q4, x0, P192, &R192, &incr_exp,
2933                         &is_midpoint_lt_even, &is_midpoint_gt_even,
2934                         &is_inexact_lt_midpoint,
2935                         &is_inexact_gt_midpoint);
2936         res.w[0] = R192.w[0];
2937         res.w[1] = R192.w[1];
2938       } else { // if (q4 <= 68)
2939         round256_58_76 (q4, x0, C4, &R256, &incr_exp,
2940                         &is_midpoint_lt_even, &is_midpoint_gt_even,
2941                         &is_inexact_lt_midpoint,
2942                         &is_inexact_gt_midpoint);
2943         res.w[0] = R256.w[0];
2944         res.w[1] = R256.w[1];
2945       }
2946       e4 = e4 + x0;
2947       if (incr_exp) {
2948         e4 = e4 + 1;
2949       }
2950       if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2951           !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2952         // if C4 rounded to p34 digits is exact then the result is inexact,
2953         // in a way that depends on the signs of x * y and z
2954         if (p_sign == z_sign) {
2955           is_inexact_lt_midpoint = 1;
2956         } else { // if (p_sign != z_sign)
2957           if (res.w[1] != 0x0000314dc6448d93ull ||
2958               res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33
2959             is_inexact_gt_midpoint = 1;
2960           } else { // res = 10^33 and exact is a special case
2961             // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1
2962             // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1
2963             // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1
2964             // Note: ulp is really ulp/10 (after borrow which propagates to msd)
2965             if (delta > p34 + 1) { // C3 < 1/2
2966               // res = 10^33, unchanged
2967               is_inexact_gt_midpoint = 1;
2968             } else { // if (delta == p34 + 1)
2969               if (q3 <= 19) {
2970                 if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp
2971                   // res = 10^33, unchanged
2972                   is_inexact_gt_midpoint = 1;
2973                 } else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp
2974                   // res = 10^33, unchanged
2975                   is_midpoint_lt_even = 1;
2976                 } else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp
2977                   res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
2978                   res.w[0] = 0x378d8e63ffffffffull;
2979                   e4 = e4 - 1;
2980                   is_inexact_lt_midpoint = 1;
2981                 }
2982               } else { // if (20 <= q3 <=34)
2983                 if (C3.w[1] < midpoint128[q3 - 20].w[1] ||
2984                     (C3.w[1] == midpoint128[q3 - 20].w[1] &&
2985                     C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp
2986                   // res = 10^33, unchanged
2987                   is_inexact_gt_midpoint = 1;
2988                 } else if (C3.w[1] == midpoint128[q3 - 20].w[1] &&
2989                     C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp
2990                   // res = 10^33, unchanged
2991                   is_midpoint_lt_even = 1;
2992                 } else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp
2993                   res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
2994                   res.w[0] = 0x378d8e63ffffffffull;
2995                   e4 = e4 - 1;
2996                   is_inexact_lt_midpoint = 1;
2997                 }
2998               }
2999             }
3000           }
3001         }
3002       } else if (is_midpoint_lt_even) {
3003         if (z_sign != p_sign) {
3004           // needs correction: res = res - 1
3005           res.w[0] = res.w[0] - 1;
3006           if (res.w[0] == 0xffffffffffffffffull)
3007             res.w[1]--;
3008           // if it is (10^33-1)*10^e4 then the corect result is
3009           // (10^34-1)*10(e4-1)
3010           if (res.w[1] == 0x0000314dc6448d93ull &&
3011               res.w[0] == 0x38c15b09ffffffffull) {
3012             res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3013             res.w[0] = 0x378d8e63ffffffffull;
3014             e4 = e4 - 1;
3015           }
3016           is_midpoint_lt_even = 0;
3017           is_inexact_lt_midpoint = 1;
3018         } else { // if (z_sign == p_sign)
3019           is_midpoint_lt_even = 0;
3020           is_inexact_gt_midpoint = 1;
3021         }
3022       } else if (is_midpoint_gt_even) {
3023         if (z_sign == p_sign) {
3024           // needs correction: res = res + 1 (cannot cross in the next binade)
3025           res.w[0] = res.w[0] + 1;
3026           if (res.w[0] == 0x0000000000000000ull)
3027             res.w[1]++;
3028           is_midpoint_gt_even = 0;
3029           is_inexact_gt_midpoint = 1;
3030         } else { // if (z_sign != p_sign)
3031           is_midpoint_gt_even = 0;
3032           is_inexact_lt_midpoint = 1;
3033         }
3034       } else {
3035         ; // the rounded result is already correct
3036       }
3037       // check for overflow
3038       if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) {
3039         res.w[1] = p_sign | 0x7800000000000000ull;
3040         res.w[0] = 0x0000000000000000ull;
3041         *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
3042       } else { // no overflow or not RN
3043         p_exp = ((UINT64) (e4 + 6176) << 49);
3044         res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
3045       }
3046       if (rnd_mode != ROUNDING_TO_NEAREST) {
3047         rounding_correction (rnd_mode,
3048                              is_inexact_lt_midpoint,
3049                              is_inexact_gt_midpoint,
3050                              is_midpoint_lt_even, is_midpoint_gt_even,
3051                              e4, &res, pfpsf);
3052       }
3053       if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
3054           is_midpoint_lt_even || is_midpoint_gt_even) {
3055         // set the inexact flag
3056         *pfpsf |= INEXACT_EXCEPTION;
3057       }
3058       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3059       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3060       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3061       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3062       BID_SWAP128 (res);
3063       BID_RETURN (res)
3064
3065     } else if ((q4 <= p34 && p34 <= delta) || // Case (8)
3066         (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9)
3067         (q4 <= delta && delta + q3 <= p34) || // Case (10)
3068         (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13)
3069         (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14)
3070         (delta + q3 < q4 && q4 <= p34)) { // Case (18)
3071
3072       // Case (8) is similar to Case (1), with C3 and C4 swapped
3073       // Case (9) is similar to Case (2), with C3 and C4 swapped
3074       // Case (10) is similar to Case (3), with C3 and C4 swapped
3075       // Case (13) is similar to Case (4), with C3 and C4 swapped
3076       // Case (14) is similar to Case (5), with C3 and C4 swapped
3077       // Case (18) is similar to Case (6), with C3 and C4 swapped
3078
3079       // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp)
3080       // and go back to delta_ge_zero
3081       // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
3082       P128.w[1] = C3.w[1];
3083       P128.w[0] = C3.w[0];
3084       C3.w[1] = C4.w[1];
3085       C3.w[0] = C4.w[0];
3086       C4.w[1] = P128.w[1];
3087       C4.w[0] = P128.w[0];
3088       ind = q3;
3089       q3 = q4;
3090       q4 = ind;
3091       ind = e3;
3092       e3 = e4;
3093       e4 = ind;
3094       tmp_sign = z_sign;
3095       z_sign = p_sign;
3096       p_sign = tmp_sign;
3097       tmp.ui64 = z_exp;
3098       z_exp = p_exp;
3099       p_exp = tmp.ui64;
3100       goto delta_ge_zero;
3101
3102     } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11)
3103                (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12)
3104
3105       // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3,
3106       // 1 <= x0 <= q3 - 1 <= p34 - 1
3107       x0 = e4 - e3; // or x0 = delta + q3 - q4
3108       if (q3 <= 18) { // 2 <= q3 <= 18
3109         round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp,
3110                       &is_midpoint_lt_even, &is_midpoint_gt_even,
3111                       &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
3112         // C3.w[1] = 0;
3113         C3.w[0] = R64;
3114       } else if (q3 <= 38) {
3115         round128_19_38 (q3, x0, C3, &R128, &incr_exp,
3116                         &is_midpoint_lt_even, &is_midpoint_gt_even,
3117                         &is_inexact_lt_midpoint,
3118                         &is_inexact_gt_midpoint);
3119         C3.w[1] = R128.w[1];
3120         C3.w[0] = R128.w[0];
3121       }
3122       // the rounded result has q3 - x0 digits
3123       // we want the exponent to be e4, so if incr_exp = 1 then
3124       // multiply the rounded result by 10 - it will still fit in 113 bits
3125       if (incr_exp) {
3126         // 64 x 128 -> 128
3127         P128.w[1] = C3.w[1];
3128         P128.w[0] = C3.w[0];
3129         __mul_64x128_to_128 (C3, ten2k64[1], P128);
3130       }
3131       e3 = e3 + x0; // this is e4
3132       // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3;
3133       // the result will have the sign of x * y; the exponent is e4
3134       R256.w[3] = 0;
3135       R256.w[2] = 0;
3136       R256.w[1] = C3.w[1];
3137       R256.w[0] = C3.w[0];
3138       if (p_sign == z_sign) { // R256 = C4 + R256
3139         add256 (C4, R256, &R256);
3140       } else { // if (p_sign != z_sign) { // R256 = C4 - R256
3141         sub256 (C4, R256, &R256); // the result cannot be pure zero
3142         // because the result has opposite sign to that of R256 which was
3143         // rounded, need to change the rounding indicators
3144         lsb = C4.w[0] & 0x01;
3145         if (is_inexact_lt_midpoint) {
3146           is_inexact_lt_midpoint = 0;
3147           is_inexact_gt_midpoint = 1;
3148         } else if (is_inexact_gt_midpoint) {
3149           is_inexact_gt_midpoint = 0;
3150           is_inexact_lt_midpoint = 1;
3151         } else if (lsb == 0) {
3152           if (is_midpoint_lt_even) {
3153             is_midpoint_lt_even = 0;
3154             is_midpoint_gt_even = 1;
3155           } else if (is_midpoint_gt_even) {
3156             is_midpoint_gt_even = 0;
3157             is_midpoint_lt_even = 1;
3158           } else {
3159             ;
3160           }
3161         } else if (lsb == 1) {
3162           if (is_midpoint_lt_even) {
3163             // res = res + 1
3164             R256.w[0]++;
3165             if (R256.w[0] == 0x0) {
3166               R256.w[1]++;
3167               if (R256.w[1] == 0x0) {
3168                 R256.w[2]++;
3169                 if (R256.w[2] == 0x0) {
3170                   R256.w[3]++;
3171                 }
3172               }
3173             }
3174             // no check for rounding overflow - R256 was a difference
3175           } else if (is_midpoint_gt_even) {
3176             // res = res - 1
3177             R256.w[0]--;
3178             if (R256.w[0] == 0xffffffffffffffffull) {
3179               R256.w[1]--;
3180               if (R256.w[1] == 0xffffffffffffffffull) {
3181                 R256.w[2]--;
3182                 if (R256.w[2] == 0xffffffffffffffffull) {
3183                   R256.w[3]--;
3184                 }
3185               }
3186             }
3187           } else {
3188             ;
3189           }
3190         } else {
3191           ;
3192         }
3193       }
3194       // determine the number of decimal digits in R256
3195       ind = nr_digits256 (R256); // ind >= p34
3196       // if R256 is sum, then ind > p34; if R256 is a difference, then
3197       // ind >= p34; this means that we can calculate the result rounded to
3198       // the destination precision, with unbounded exponent, starting from R256
3199       // and using the indicators from the rounding of C3 to avoid a double
3200       // rounding error
3201
3202       if (ind < p34) {
3203         ;
3204       } else if (ind == p34) {
3205         // the result rounded to the destination precision with
3206         // unbounded exponent
3207         // is (-1)^p_sign * R256 * 10^e4
3208         res.w[1] = R256.w[1];
3209         res.w[0] = R256.w[0];
3210       } else { // if (ind > p34)
3211         // if more than P digits, round to nearest to P digits
3212         // round R256 to p34 digits
3213         x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
3214         // save C3 rounding indicators to help avoid double rounding error
3215         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
3216         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
3217         is_midpoint_lt_even0 = is_midpoint_lt_even;
3218         is_midpoint_gt_even0 = is_midpoint_gt_even;
3219         // initialize rounding indicators
3220         is_inexact_lt_midpoint = 0;
3221         is_inexact_gt_midpoint = 0;
3222         is_midpoint_lt_even = 0;
3223         is_midpoint_gt_even = 0;
3224         // round to p34 digits; the result fits in 113 bits
3225         if (ind <= 38) {
3226           P128.w[1] = R256.w[1];
3227           P128.w[0] = R256.w[0];
3228           round128_19_38 (ind, x0, P128, &R128, &incr_exp,
3229                           &is_midpoint_lt_even, &is_midpoint_gt_even,
3230                           &is_inexact_lt_midpoint,
3231                           &is_inexact_gt_midpoint);
3232         } else if (ind <= 57) {
3233           P192.w[2] = R256.w[2];
3234           P192.w[1] = R256.w[1];
3235           P192.w[0] = R256.w[0];
3236           round192_39_57 (ind, x0, P192, &R192, &incr_exp,
3237                           &is_midpoint_lt_even, &is_midpoint_gt_even,
3238                           &is_inexact_lt_midpoint,
3239                           &is_inexact_gt_midpoint);
3240           R128.w[1] = R192.w[1];
3241           R128.w[0] = R192.w[0];
3242         } else { // if (ind <= 68)
3243           round256_58_76 (ind, x0, R256, &R256, &incr_exp,
3244                           &is_midpoint_lt_even, &is_midpoint_gt_even,
3245                           &is_inexact_lt_midpoint,
3246                           &is_inexact_gt_midpoint);
3247           R128.w[1] = R256.w[1];
3248           R128.w[0] = R256.w[0];
3249         }
3250         // the rounded result has p34 = 34 digits
3251         e4 = e4 + x0 + incr_exp;
3252
3253         res.w[1] = R128.w[1];
3254         res.w[0] = R128.w[0];
3255
3256         // avoid a double rounding error
3257         if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
3258             is_midpoint_lt_even) { // double rounding error upward
3259           // res = res - 1
3260           res.w[0]--;
3261           if (res.w[0] == 0xffffffffffffffffull)
3262             res.w[1]--;
3263           is_midpoint_lt_even = 0;
3264           is_inexact_lt_midpoint = 1;
3265           // Note: a double rounding error upward is not possible; for this
3266           // the result after the first rounding would have to be 99...95
3267           // (35 digits in all), possibly followed by a number of zeros; this
3268           // not possible in Cases (2)-(6) or (15)-(17) which may get here
3269           // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent
3270           if (res.w[1] == 0x0000314dc6448d93ull &&
3271             res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1
3272             res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3273             res.w[0] = 0x378d8e63ffffffffull;
3274             e4--;
3275           }
3276         } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
3277             is_midpoint_gt_even) { // double rounding error downward
3278           // res = res + 1
3279           res.w[0]++;
3280           if (res.w[0] == 0)
3281             res.w[1]++;
3282           is_midpoint_gt_even = 0;
3283           is_inexact_gt_midpoint = 1;
3284         } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
3285                    !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
3286           // if this second rounding was exact the result may still be
3287           // inexact because of the first rounding
3288           if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
3289             is_inexact_gt_midpoint = 1;
3290           }
3291           if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
3292             is_inexact_lt_midpoint = 1;
3293           }
3294         } else if (is_midpoint_gt_even &&
3295                    (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
3296           // pulled up to a midpoint
3297           is_inexact_lt_midpoint = 1;
3298           is_inexact_gt_midpoint = 0;
3299           is_midpoint_lt_even = 0;
3300           is_midpoint_gt_even = 0;
3301         } else if (is_midpoint_lt_even &&
3302                    (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
3303           // pulled down to a midpoint
3304           is_inexact_lt_midpoint = 0;
3305           is_inexact_gt_midpoint = 1;
3306           is_midpoint_lt_even = 0;
3307           is_midpoint_gt_even = 0;
3308         } else {
3309           ;
3310         }
3311       }
3312
3313       // determine tininess
3314       if (rnd_mode == ROUNDING_TO_NEAREST) {
3315         if (e4 < expmin) {
3316           is_tiny = 1; // for other rounding modes apply correction
3317         }
3318       } else {
3319         // for RM, RP, RZ, RA apply correction in order to determine tininess
3320         // but do not save the result; apply the correction to
3321         // (-1)^p_sign * res * 10^0
3322         P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1];
3323         P128.w[0] = res.w[0];
3324         rounding_correction (rnd_mode,
3325                              is_inexact_lt_midpoint,
3326                              is_inexact_gt_midpoint,
3327                              is_midpoint_lt_even, is_midpoint_gt_even,
3328                              0, &P128, pfpsf);
3329         scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
3330         // the number of digits in the significand is p34 = 34
3331         if (e4 + scale < expmin) {
3332           is_tiny = 1;
3333         }
3334       }
3335
3336       // the result rounded to the destination precision with unbounded exponent
3337       // is (-1)^p_sign * res * 10^e4
3338       res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN
3339       // res.w[0] unchanged;
3340       // Note: res is correct only if expmin <= e4 <= expmax
3341       ind = p34; // the number of decimal digits in the signifcand of res
3342
3343       // at this point we have the result rounded with unbounded exponent in
3344       // res and we know its tininess:
3345       // res = (-1)^p_sign * significand * 10^e4,
3346       // where q (significand) = ind = p34
3347       // Note: res is correct only if expmin <= e4 <= expmax
3348
3349       // check for overflow if RN
3350       if (rnd_mode == ROUNDING_TO_NEAREST
3351           && (ind + e4) > (p34 + expmax)) {
3352         res.w[1] = p_sign | 0x7800000000000000ull;
3353         res.w[0] = 0x0000000000000000ull;
3354         *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
3355         *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3356         *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3357         *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3358         *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3359         BID_SWAP128 (res);
3360         BID_RETURN (res)
3361       } // else not overflow or not RN, so continue
3362
3363       // from this point on this is similar to the last part of the computation
3364       // for Cases (15), (16), (17)
3365
3366       // if (e4 >= expmin) we have the result rounded with bounded exponent
3367       if (e4 < expmin) {
3368         x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
3369         // where the result rounded [at most] once is
3370         //   (-1)^p_sign * significand_res * 10^e4
3371
3372         // avoid double rounding error
3373         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
3374         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
3375         is_midpoint_lt_even0 = is_midpoint_lt_even;
3376         is_midpoint_gt_even0 = is_midpoint_gt_even;
3377         is_inexact_lt_midpoint = 0;
3378         is_inexact_gt_midpoint = 0;
3379         is_midpoint_lt_even = 0;
3380         is_midpoint_gt_even = 0;
3381
3382         if (x0 > ind) {
3383           // nothing is left of res when moving the decimal point left x0 digits
3384           is_inexact_lt_midpoint = 1;
3385           res.w[1] = p_sign | 0x0000000000000000ull;
3386           res.w[0] = 0x0000000000000000ull;
3387           e4 = expmin;
3388         } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
3389           // this is <, =, or > 1/2 ulp
3390           // compare the ind-digit value in the significand of res with
3391           // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
3392           // less than, equal to, or greater than 1/2 ulp (significand of res)
3393           R128.w[1] = res.w[1] & MASK_COEFF;
3394           R128.w[0] = res.w[0];
3395           if (ind <= 19) {
3396             if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
3397               lt_half_ulp = 1;
3398               is_inexact_lt_midpoint = 1;
3399             } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
3400               eq_half_ulp = 1;
3401               is_midpoint_gt_even = 1;
3402             } else { // > 1/2 ulp
3403               gt_half_ulp = 1;
3404               is_inexact_gt_midpoint = 1;
3405             }
3406           } else { // if (ind <= 38)
3407             if (R128.w[1] < midpoint128[ind - 20].w[1] ||
3408                 (R128.w[1] == midpoint128[ind - 20].w[1] &&
3409                 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
3410               lt_half_ulp = 1;
3411               is_inexact_lt_midpoint = 1;
3412             } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
3413                 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
3414               eq_half_ulp = 1;
3415               is_midpoint_gt_even = 1;
3416             } else { // > 1/2 ulp
3417               gt_half_ulp = 1;
3418               is_inexact_gt_midpoint = 1;
3419             }
3420           }
3421           if (lt_half_ulp || eq_half_ulp) {
3422             // res = +0.0 * 10^expmin
3423             res.w[1] = 0x0000000000000000ull;
3424             res.w[0] = 0x0000000000000000ull;
3425           } else { // if (gt_half_ulp)
3426             // res = +1 * 10^expmin
3427             res.w[1] = 0x0000000000000000ull;
3428             res.w[0] = 0x0000000000000001ull;
3429           }
3430           res.w[1] = p_sign | res.w[1];
3431           e4 = expmin;
3432         } else { // if (1 <= x0 <= ind - 1 <= 33)
3433           // round the ind-digit result to ind - x0 digits
3434
3435           if (ind <= 18) { // 2 <= ind <= 18
3436             round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
3437                           &is_midpoint_lt_even, &is_midpoint_gt_even,
3438                           &is_inexact_lt_midpoint,
3439                           &is_inexact_gt_midpoint);
3440             res.w[1] = 0x0;
3441             res.w[0] = R64;
3442           } else if (ind <= 38) {
3443             P128.w[1] = res.w[1] & MASK_COEFF;
3444             P128.w[0] = res.w[0];
3445             round128_19_38 (ind, x0, P128, &res, &incr_exp,
3446                             &is_midpoint_lt_even, &is_midpoint_gt_even,
3447                             &is_inexact_lt_midpoint,
3448                             &is_inexact_gt_midpoint);
3449           }
3450           e4 = e4 + x0; // expmin
3451           // we want the exponent to be expmin, so if incr_exp = 1 then
3452           // multiply the rounded result by 10 - it will still fit in 113 bits
3453           if (incr_exp) {
3454             // 64 x 128 -> 128
3455             P128.w[1] = res.w[1] & MASK_COEFF;
3456             P128.w[0] = res.w[0];
3457             __mul_64x128_to_128 (res, ten2k64[1], P128);
3458           }
3459           res.w[1] =
3460             p_sign | ((UINT64) (e4 + 6176) << 49) | (res.
3461                                                      w[1] & MASK_COEFF);
3462           // avoid a double rounding error
3463           if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
3464                 is_midpoint_lt_even) { // double rounding error upward
3465             // res = res - 1
3466             res.w[0]--;
3467             if (res.w[0] == 0xffffffffffffffffull)
3468               res.w[1]--;
3469             // Note: a double rounding error upward is not possible; for this
3470             // the result after the first rounding would have to be 99...95
3471             // (35 digits in all), possibly followed by a number of zeros; this
3472             // not possible in this underflow case
3473             is_midpoint_lt_even = 0;
3474             is_inexact_lt_midpoint = 1;
3475           } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
3476                 is_midpoint_gt_even) { // double rounding error downward
3477             // res = res + 1
3478             res.w[0]++;
3479             if (res.w[0] == 0)
3480               res.w[1]++;
3481             is_midpoint_gt_even = 0;
3482             is_inexact_gt_midpoint = 1;
3483           } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
3484                      !is_inexact_lt_midpoint
3485                      && !is_inexact_gt_midpoint) {
3486             // if this second rounding was exact the result may still be
3487             // inexact because of the first rounding
3488             if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
3489               is_inexact_gt_midpoint = 1;
3490             }
3491             if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
3492               is_inexact_lt_midpoint = 1;
3493             }
3494           } else if (is_midpoint_gt_even &&
3495                      (is_inexact_gt_midpoint0
3496                       || is_midpoint_lt_even0)) {
3497             // pulled up to a midpoint
3498             is_inexact_lt_midpoint = 1;
3499             is_inexact_gt_midpoint = 0;
3500             is_midpoint_lt_even = 0;
3501             is_midpoint_gt_even = 0;
3502           } else if (is_midpoint_lt_even &&
3503                      (is_inexact_lt_midpoint0
3504                       || is_midpoint_gt_even0)) {
3505             // pulled down to a midpoint
3506             is_inexact_lt_midpoint = 0;
3507             is_inexact_gt_midpoint = 1;
3508             is_midpoint_lt_even = 0;
3509             is_midpoint_gt_even = 0;
3510           } else {
3511             ;
3512           }
3513         }
3514       }
3515       // res contains the correct result
3516       // apply correction if not rounding to nearest
3517       if (rnd_mode != ROUNDING_TO_NEAREST) {
3518         rounding_correction (rnd_mode,
3519                              is_inexact_lt_midpoint,
3520                              is_inexact_gt_midpoint,
3521                              is_midpoint_lt_even, is_midpoint_gt_even,
3522                              e4, &res, pfpsf);
3523       }
3524       if (is_midpoint_lt_even || is_midpoint_gt_even ||
3525           is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
3526         // set the inexact flag
3527         *pfpsf |= INEXACT_EXCEPTION;
3528         if (is_tiny)
3529           *pfpsf |= UNDERFLOW_EXCEPTION;
3530       }
3531       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3532       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3533       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3534       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3535       BID_SWAP128 (res);
3536       BID_RETURN (res)
3537
3538     } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15)
3539         (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16)
3540         (delta + q3 <= p34 && p34 < q4)) { // Case (17)
3541
3542       // calculate first the result rounded to the destination precision, with
3543       // unbounded exponent
3544
3545       add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
3546               rnd_mode, &is_midpoint_lt_even,
3547               &is_midpoint_gt_even, &is_inexact_lt_midpoint,
3548               &is_inexact_gt_midpoint, pfpsf, &res);
3549       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3550       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3551       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3552       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3553       BID_SWAP128 (res);
3554       BID_RETURN (res)
3555
3556     } else {
3557       ;
3558     }
3559
3560   } // end if delta < 0
3561
3562   *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3563   *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3564   *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3565   *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3566   BID_SWAP128 (res);
3567   BID_RETURN (res)
3568
3569 }
3570
3571
3572 #if DECIMAL_CALL_BY_REFERENCE
3573 void
3574 bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
3575             _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3576             _EXC_INFO_PARAM) {
3577   UINT128 x = *px, y = *py, z = *pz;
3578 #if !DECIMAL_GLOBAL_ROUNDING
3579   unsigned int rnd_mode = *prnd_mode;
3580 #endif
3581 #else
3582 UINT128
3583 bid128_fma (UINT128 x, UINT128 y, UINT128 z
3584             _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3585             _EXC_INFO_PARAM) {
3586 #endif
3587   int is_midpoint_lt_even, is_midpoint_gt_even,
3588     is_inexact_lt_midpoint, is_inexact_gt_midpoint;
3589   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3590
3591 #if DECIMAL_CALL_BY_REFERENCE
3592   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3593                   &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3594                   &res, &x, &y, &z
3595                   _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3596                   _EXC_INFO_ARG);
3597 #else
3598   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3599                         &is_inexact_lt_midpoint,
3600                         &is_inexact_gt_midpoint, x, y,
3601                         z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3602                         _EXC_INFO_ARG);
3603 #endif
3604   BID_RETURN (res);
3605 }
3606
3607
3608 #if DECIMAL_CALL_BY_REFERENCE
3609 void
3610 bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz
3611                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3612                _EXC_INFO_PARAM) {
3613   UINT64 x = *px, y = *py, z = *pz;
3614 #if !DECIMAL_GLOBAL_ROUNDING
3615   unsigned int rnd_mode = *prnd_mode;
3616 #endif
3617 #else
3618 UINT128
3619 bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z
3620                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3621                _EXC_INFO_PARAM) {
3622 #endif
3623   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3624     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3625   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3626   UINT128 x1, y1, z1;
3627
3628 #if DECIMAL_CALL_BY_REFERENCE
3629   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3630   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3631   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3632   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3633                   &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3634                   &res, &x1, &y1, &z1
3635                   _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3636                   _EXC_INFO_ARG);
3637 #else
3638   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3639   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3640   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3641   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3642                         &is_inexact_lt_midpoint,
3643                         &is_inexact_gt_midpoint, x1, y1,
3644                         z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3645                         _EXC_INFO_ARG);
3646 #endif
3647   BID_RETURN (res);
3648 }
3649
3650
3651 #if DECIMAL_CALL_BY_REFERENCE
3652 void
3653 bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
3654                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3655                _EXC_INFO_PARAM) {
3656   UINT64 x = *px, y = *py;
3657   UINT128 z = *pz;
3658 #if !DECIMAL_GLOBAL_ROUNDING
3659   unsigned int rnd_mode = *prnd_mode;
3660 #endif
3661 #else
3662 UINT128
3663 bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z
3664                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3665                _EXC_INFO_PARAM) {
3666 #endif
3667   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3668     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3669   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3670   UINT128 x1, y1;
3671
3672 #if DECIMAL_CALL_BY_REFERENCE
3673   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3674   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3675   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3676                   &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3677                   &res, &x1, &y1, &z
3678                   _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3679                   _EXC_INFO_ARG);
3680 #else
3681   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3682   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3683   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3684                         &is_inexact_lt_midpoint,
3685                         &is_inexact_gt_midpoint, x1, y1,
3686                         z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3687                         _EXC_INFO_ARG);
3688 #endif
3689   BID_RETURN (res);
3690 }
3691
3692
3693 #if DECIMAL_CALL_BY_REFERENCE
3694 void
3695 bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
3696                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3697                _EXC_INFO_PARAM) {
3698   UINT64 x = *px, z = *pz;
3699 #if !DECIMAL_GLOBAL_ROUNDING
3700   unsigned int rnd_mode = *prnd_mode;
3701 #endif
3702 #else
3703 UINT128
3704 bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z
3705                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3706                _EXC_INFO_PARAM) {
3707 #endif
3708   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3709     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3710   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3711   UINT128 x1, z1;
3712
3713 #if DECIMAL_CALL_BY_REFERENCE
3714   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3715   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3716   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3717                   &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3718                   &res, &x1, py, &z1
3719                   _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3720                   _EXC_INFO_ARG);
3721 #else
3722   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3723   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3724   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3725                         &is_inexact_lt_midpoint,
3726                         &is_inexact_gt_midpoint, x1, y,
3727                         z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3728                         _EXC_INFO_ARG);
3729 #endif
3730   BID_RETURN (res);
3731 }
3732
3733
3734 #if DECIMAL_CALL_BY_REFERENCE
3735 void
3736 bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
3737                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3738                _EXC_INFO_PARAM) {
3739   UINT64 x = *px;
3740 #if !DECIMAL_GLOBAL_ROUNDING
3741   unsigned int rnd_mode = *prnd_mode;
3742 #endif
3743 #else
3744 UINT128
3745 bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z
3746                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3747                _EXC_INFO_PARAM) {
3748 #endif
3749   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3750     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3751   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3752   UINT128 x1;
3753
3754 #if DECIMAL_CALL_BY_REFERENCE
3755   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3756   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3757                   &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3758                   &res, &x1, py, pz
3759                   _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3760                   _EXC_INFO_ARG);
3761 #else
3762   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3763   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3764                         &is_inexact_lt_midpoint,
3765                         &is_inexact_gt_midpoint, x1, y,
3766                         z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3767                         _EXC_INFO_ARG);
3768 #endif
3769   BID_RETURN (res);
3770 }
3771
3772
3773 #if DECIMAL_CALL_BY_REFERENCE
3774 void
3775 bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
3776                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3777                _EXC_INFO_PARAM) {
3778   UINT64 y = *py, z = *pz;
3779 #if !DECIMAL_GLOBAL_ROUNDING
3780   unsigned int rnd_mode = *prnd_mode;
3781 #endif
3782 #else
3783 UINT128
3784 bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z
3785                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3786                _EXC_INFO_PARAM) {
3787 #endif
3788   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3789     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3790   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3791   UINT128 y1, z1;
3792
3793 #if DECIMAL_CALL_BY_REFERENCE
3794   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3795   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3796   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3797                   &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3798                   &res, px, &y1, &z1
3799                   _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3800                   _EXC_INFO_ARG);
3801 #else
3802   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3803   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3804   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3805                         &is_inexact_lt_midpoint,
3806                         &is_inexact_gt_midpoint, x, y1,
3807                         z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3808                         _EXC_INFO_ARG);
3809 #endif
3810   BID_RETURN (res);
3811 }
3812
3813
3814 #if DECIMAL_CALL_BY_REFERENCE
3815 void
3816 bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
3817                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3818                _EXC_INFO_PARAM) {
3819   UINT64 y = *py;
3820 #if !DECIMAL_GLOBAL_ROUNDING
3821   unsigned int rnd_mode = *prnd_mode;
3822 #endif
3823 #else
3824 UINT128
3825 bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z
3826                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3827                _EXC_INFO_PARAM) {
3828 #endif
3829   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3830     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3831   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3832   UINT128 y1;
3833
3834 #if DECIMAL_CALL_BY_REFERENCE
3835   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3836   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3837                   &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3838                   &res, px, &y1, pz
3839                   _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3840                   _EXC_INFO_ARG);
3841 #else
3842   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3843   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3844                         &is_inexact_lt_midpoint,
3845                         &is_inexact_gt_midpoint, x, y1,
3846                         z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3847                         _EXC_INFO_ARG);
3848 #endif
3849   BID_RETURN (res);
3850 }
3851
3852
3853 #if DECIMAL_CALL_BY_REFERENCE
3854 void
3855 bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
3856                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3857                _EXC_INFO_PARAM) {
3858   UINT64 z = *pz;
3859 #if !DECIMAL_GLOBAL_ROUNDING
3860   unsigned int rnd_mode = *prnd_mode;
3861 #endif
3862 #else
3863 UINT128
3864 bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z
3865                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3866                _EXC_INFO_PARAM) {
3867 #endif
3868   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3869     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3870   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3871   UINT128 z1;
3872
3873 #if DECIMAL_CALL_BY_REFERENCE
3874   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3875   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3876                   &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3877                   &res, px, py, &z1
3878                   _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3879                   _EXC_INFO_ARG);
3880 #else
3881   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3882   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3883                         &is_inexact_lt_midpoint,
3884                         &is_inexact_gt_midpoint, x, y,
3885                         z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3886                         _EXC_INFO_ARG);
3887 #endif
3888   BID_RETURN (res);
3889 }
3890
3891 // Note: bid128qqq_fma is represented by bid128_fma
3892
3893 // Note: bid64ddd_fma is represented by bid64_fma
3894
3895 #if DECIMAL_CALL_BY_REFERENCE
3896 void
3897 bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
3898               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3899               _EXC_INFO_PARAM) {
3900   UINT64 x = *px, y = *py;
3901 #if !DECIMAL_GLOBAL_ROUNDING
3902   unsigned int rnd_mode = *prnd_mode;
3903 #endif
3904 #else
3905 UINT64
3906 bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z
3907               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3908               _EXC_INFO_PARAM) {
3909 #endif
3910   UINT64 res1 = 0xbaddbaddbaddbaddull;
3911   UINT128 x1, y1;
3912
3913 #if DECIMAL_CALL_BY_REFERENCE
3914   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3915   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3916   bid64qqq_fma (&res1, &x1, &y1, pz
3917                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3918                 _EXC_INFO_ARG);
3919 #else
3920   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3921   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3922   res1 = bid64qqq_fma (x1, y1, z
3923                        _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3924                        _EXC_INFO_ARG);
3925 #endif
3926   BID_RETURN (res1);
3927 }
3928
3929
3930 #if DECIMAL_CALL_BY_REFERENCE
3931 void
3932 bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
3933               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3934               _EXC_INFO_PARAM) {
3935   UINT64 x = *px, z = *pz;
3936 #if !DECIMAL_GLOBAL_ROUNDING
3937   unsigned int rnd_mode = *prnd_mode;
3938 #endif
3939 #else
3940 UINT64
3941 bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z
3942               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3943               _EXC_INFO_PARAM) {
3944 #endif
3945   UINT64 res1 = 0xbaddbaddbaddbaddull;
3946   UINT128 x1, z1;
3947
3948 #if DECIMAL_CALL_BY_REFERENCE
3949   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3950   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3951   bid64qqq_fma (&res1, &x1, py, &z1
3952                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3953                 _EXC_INFO_ARG);
3954 #else
3955   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3956   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3957   res1 = bid64qqq_fma (x1, y, z1
3958                        _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3959                        _EXC_INFO_ARG);
3960 #endif
3961   BID_RETURN (res1);
3962 }
3963
3964
3965 #if DECIMAL_CALL_BY_REFERENCE
3966 void
3967 bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
3968               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3969               _EXC_INFO_PARAM) {
3970   UINT64 x = *px;
3971 #if !DECIMAL_GLOBAL_ROUNDING
3972   unsigned int rnd_mode = *prnd_mode;
3973 #endif
3974 #else
3975 UINT64
3976 bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z
3977               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3978               _EXC_INFO_PARAM) {
3979 #endif
3980   UINT64 res1 = 0xbaddbaddbaddbaddull;
3981   UINT128 x1;
3982
3983 #if DECIMAL_CALL_BY_REFERENCE
3984   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3985   bid64qqq_fma (&res1, &x1, py, pz
3986                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3987                 _EXC_INFO_ARG);
3988 #else
3989   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3990   res1 = bid64qqq_fma (x1, y, z
3991                        _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3992                        _EXC_INFO_ARG);
3993 #endif
3994   BID_RETURN (res1);
3995 }
3996
3997
3998 #if DECIMAL_CALL_BY_REFERENCE
3999 void
4000 bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
4001               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4002               _EXC_INFO_PARAM) {
4003   UINT64 y = *py, z = *pz;
4004 #if !DECIMAL_GLOBAL_ROUNDING
4005   unsigned int rnd_mode = *prnd_mode;
4006 #endif
4007 #else
4008 UINT64
4009 bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z
4010               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4011               _EXC_INFO_PARAM) {
4012 #endif
4013   UINT64 res1 = 0xbaddbaddbaddbaddull;
4014   UINT128 y1, z1;
4015
4016 #if DECIMAL_CALL_BY_REFERENCE
4017   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4018   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4019   bid64qqq_fma (&res1, px, &y1, &z1
4020                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4021                 _EXC_INFO_ARG);
4022 #else
4023   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4024   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4025   res1 = bid64qqq_fma (x, y1, z1
4026                        _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4027                        _EXC_INFO_ARG);
4028 #endif
4029   BID_RETURN (res1);
4030 }
4031
4032
4033 #if DECIMAL_CALL_BY_REFERENCE
4034 void
4035 bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
4036               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4037               _EXC_INFO_PARAM) {
4038   UINT64 y = *py;
4039 #if !DECIMAL_GLOBAL_ROUNDING
4040   unsigned int rnd_mode = *prnd_mode;
4041 #endif
4042 #else
4043 UINT64
4044 bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z
4045               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4046               _EXC_INFO_PARAM) {
4047 #endif
4048   UINT64 res1 = 0xbaddbaddbaddbaddull;
4049   UINT128 y1;
4050
4051 #if DECIMAL_CALL_BY_REFERENCE
4052   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4053   bid64qqq_fma (&res1, px, &y1, pz
4054                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4055                 _EXC_INFO_ARG);
4056 #else
4057   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4058   res1 = bid64qqq_fma (x, y1, z
4059                        _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4060                        _EXC_INFO_ARG);
4061 #endif
4062   BID_RETURN (res1);
4063 }
4064
4065
4066 #if DECIMAL_CALL_BY_REFERENCE
4067 void
4068 bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
4069               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4070               _EXC_INFO_PARAM) {
4071   UINT64 z = *pz;
4072 #if !DECIMAL_GLOBAL_ROUNDING
4073   unsigned int rnd_mode = *prnd_mode;
4074 #endif
4075 #else
4076 UINT64
4077 bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z
4078               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4079               _EXC_INFO_PARAM) {
4080 #endif
4081   UINT64 res1 = 0xbaddbaddbaddbaddull;
4082   UINT128 z1;
4083
4084 #if DECIMAL_CALL_BY_REFERENCE
4085   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4086   bid64qqq_fma (&res1, px, py, &z1
4087                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4088                 _EXC_INFO_ARG);
4089 #else
4090   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4091   res1 = bid64qqq_fma (x, y, z1
4092                        _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4093                        _EXC_INFO_ARG);
4094 #endif
4095   BID_RETURN (res1);
4096 }
4097
4098
4099 #if DECIMAL_CALL_BY_REFERENCE
4100 void
4101 bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
4102               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4103               _EXC_INFO_PARAM) {
4104   UINT128 x = *px, y = *py, z = *pz;
4105 #if !DECIMAL_GLOBAL_ROUNDING
4106   unsigned int rnd_mode = *prnd_mode;
4107 #endif
4108 #else
4109 UINT64
4110 bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z
4111               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4112               _EXC_INFO_PARAM) {
4113 #endif
4114   int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0,
4115     is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
4116   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
4117     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
4118   int incr_exp;
4119   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
4120   UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
4121   UINT64 res1 = 0xbaddbaddbaddbaddull;
4122   unsigned int save_fpsf; // needed because of the call to bid128_ext_fma
4123   UINT64 sign;
4124   UINT64 exp;
4125   int unbexp;
4126   UINT128 C;
4127   BID_UI64DOUBLE tmp;
4128   int nr_bits;
4129   int q, x0;
4130   int scale;
4131   int lt_half_ulp = 0, eq_half_ulp = 0;
4132
4133   // Note: for rounding modes other than RN or RA, the result can be obtained
4134   // by rounding first to BID128 and then to BID64
4135
4136   save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
4137   *pfpsf = 0;
4138
4139 #if DECIMAL_CALL_BY_REFERENCE
4140   bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
4141                   &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0,
4142                   &res, &x, &y, &z
4143                   _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4144                   _EXC_INFO_ARG);
4145 #else
4146   res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
4147                         &is_inexact_lt_midpoint0,
4148                         &is_inexact_gt_midpoint0, x, y,
4149                         z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4150                         _EXC_INFO_ARG);
4151 #endif
4152
4153   if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) ||
4154       (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible
4155       ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN)
4156       ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity
4157 #if DECIMAL_CALL_BY_REFERENCE
4158     bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
4159 #else
4160     res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
4161 #endif
4162     // determine the unbiased exponent of the result
4163     unbexp = ((res1 >> 53) & 0x3ff) - 398;
4164
4165     // if subnormal, res1  must have exp = -398
4166     // if tiny and inexact set underflow and inexact status flags
4167     if (!((res1 & MASK_NAN) == MASK_NAN) &&     // res1 not NaN
4168         (unbexp == -398)
4169         && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull)
4170         && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0
4171             || is_midpoint_lt_even0 || is_midpoint_gt_even0)) {
4172       // set the inexact flag and the underflow flag
4173       *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
4174     } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
4175                is_midpoint_lt_even0 || is_midpoint_gt_even0) {
4176       // set the inexact flag and the underflow flag
4177       *pfpsf |= INEXACT_EXCEPTION;
4178     }
4179
4180     *pfpsf |= save_fpsf;
4181     BID_RETURN (res1);
4182   } // else continue, and use rounding to nearest to round to 16 digits
4183
4184   // at this point the result is rounded to nearest (even or away) to 34 digits
4185   // (or less if exact), and it is zero or finite non-zero canonical [sub]normal
4186   sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
4187   exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits
4188   unbexp = (exp >> 49) - 6176;
4189   C.w[1] = res.w[HIGH_128W] & MASK_COEFF;
4190   C.w[0] = res.w[LOW_128W];
4191
4192   if ((C.w[1] == 0x0 && C.w[0] == 0x0) ||       // result is zero
4193       (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) {
4194       // clear under/overflow
4195 #if DECIMAL_CALL_BY_REFERENCE
4196     bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
4197 #else
4198     res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
4199 #endif
4200     *pfpsf |= save_fpsf;
4201     BID_RETURN (res1);
4202   } // else continue
4203
4204   // -398 - 34 <= unbexp <= 369 + 15
4205   if (rnd_mode == ROUNDING_TIES_AWAY) {
4206     // apply correction, if needed, to make the result rounded to nearest-even
4207     if (is_midpoint_gt_even) {
4208       // res = res - 1
4209       res1--; // res1 is now even
4210     } // else the result is already correctly rounded to nearest-even
4211   }
4212   // at this point the result is finite, non-zero canonical normal or subnormal,
4213   // and in most cases overflow or underflow will not occur
4214
4215   // determine the number of digits q in the result
4216   // q = nr. of decimal digits in x
4217   // determine first the nr. of bits in x
4218   if (C.w[1] == 0) {
4219     if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53
4220       // split the 64-bit value in two 32-bit halves to avoid rounding errors
4221       if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32
4222         tmp.d = (double) (C.w[0] >> 32); // exact conversion
4223         nr_bits =
4224           33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4225       } else { // x < 2^32
4226         tmp.d = (double) (C.w[0]); // exact conversion
4227         nr_bits =
4228           1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4229       }
4230     } else { // if x < 2^53
4231       tmp.d = (double) C.w[0]; // exact conversion
4232       nr_bits =
4233         1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4234     }
4235   } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1])
4236     tmp.d = (double) C.w[1]; // exact conversion
4237     nr_bits =
4238       65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4239   }
4240   q = nr_digits[nr_bits - 1].digits;
4241   if (q == 0) {
4242     q = nr_digits[nr_bits - 1].digits1;
4243     if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi ||
4244         (C.w[1] == nr_digits[nr_bits - 1].threshold_hi &&
4245          C.w[0] >= nr_digits[nr_bits - 1].threshold_lo))
4246       q++;
4247   }
4248   // if q > 16, round to nearest even to 16 digits (but for underflow it may
4249   // have to be truncated even more)
4250   if (q > 16) {
4251     x0 = q - 16;
4252     if (q <= 18) {
4253       round64_2_18 (q, x0, C.w[0], &res1, &incr_exp,
4254                     &is_midpoint_lt_even, &is_midpoint_gt_even,
4255                     &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4256     } else { // 19 <= q <= 34
4257       round128_19_38 (q, x0, C, &res128, &incr_exp,
4258                       &is_midpoint_lt_even, &is_midpoint_gt_even,
4259                       &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4260       res1 = res128.w[0]; // the result fits in 64 bits
4261     }
4262     unbexp = unbexp + x0;
4263     if (incr_exp)
4264       unbexp++;
4265     q = 16; // need to set in case denormalization is necessary
4266   } else {
4267     // the result does not require a second rounding (and it must have
4268     // been exact in the first rounding, since q <= 16)
4269     res1 = C.w[0];
4270   }
4271
4272   // avoid a double rounding error
4273   if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
4274       is_midpoint_lt_even) { // double rounding error upward
4275     // res = res - 1
4276     res1--; // res1 becomes odd
4277     is_midpoint_lt_even = 0;
4278     is_inexact_lt_midpoint = 1;
4279     if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1
4280       res1 = 0x002386f26fc0ffffull; // 10^16 - 1
4281       unbexp--;
4282     }
4283   } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
4284       is_midpoint_gt_even) { // double rounding error downward
4285     // res = res + 1
4286     res1++; // res1 becomes odd (so it cannot be 10^16)
4287     is_midpoint_gt_even = 0;
4288     is_inexact_gt_midpoint = 1;
4289   } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
4290              !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
4291     // if this second rounding was exact the result may still be
4292     // inexact because of the first rounding
4293     if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
4294       is_inexact_gt_midpoint = 1;
4295     }
4296     if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
4297       is_inexact_lt_midpoint = 1;
4298     }
4299   } else if (is_midpoint_gt_even &&
4300              (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
4301     // pulled up to a midpoint
4302     is_inexact_lt_midpoint = 1;
4303     is_inexact_gt_midpoint = 0;
4304     is_midpoint_lt_even = 0;
4305     is_midpoint_gt_even = 0;
4306   } else if (is_midpoint_lt_even &&
4307              (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
4308     // pulled down to a midpoint
4309     is_inexact_lt_midpoint = 0;
4310     is_inexact_gt_midpoint = 1;
4311     is_midpoint_lt_even = 0;
4312     is_midpoint_gt_even = 0;
4313   } else {
4314     ;
4315   }
4316   // this is the result rounded correctly to nearest even, with unbounded exp.
4317
4318   // check for overflow
4319   if (q + unbexp > P16 + expmax16) {
4320     res1 = sign | 0x7800000000000000ull;
4321     *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
4322     *pfpsf |= save_fpsf;
4323     BID_RETURN (res1)
4324   } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16
4325     // not overflow; the result must be exact, and we can multiply res1 by
4326     // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits
4327     scale = unbexp - expmax16;
4328     res1 = res1 * ten2k64[scale]; // res1 * 10^scale
4329     unbexp = expmax16; // unbexp - scale
4330   } else {
4331     ; // continue
4332   }
4333
4334   // check for underflow
4335   if (q + unbexp < P16 + expmin16) {
4336     if (unbexp < expmin16) {
4337       // we must truncate more of res
4338       x0 = expmin16 - unbexp; // x0 >= 1
4339       is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
4340       is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
4341       is_midpoint_lt_even0 = is_midpoint_lt_even;
4342       is_midpoint_gt_even0 = is_midpoint_gt_even;
4343       is_inexact_lt_midpoint = 0;
4344       is_inexact_gt_midpoint = 0;
4345       is_midpoint_lt_even = 0;
4346       is_midpoint_gt_even = 0;
4347       // the number of decimal digits in res1 is q
4348       if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits
4349         // 2 <= q <= 16, 1 <= x0 <= 15
4350         round64_2_18 (q, x0, res1, &res1, &incr_exp,
4351                       &is_midpoint_lt_even, &is_midpoint_gt_even,
4352                       &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4353         if (incr_exp) {
4354           // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15
4355           res1 = ten2k64[q - x0];
4356         }
4357         unbexp = unbexp + x0; // expmin16
4358       } else if (x0 == q) {
4359         // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin
4360         // determine relationship with 1/2 ulp
4361         // q <= 16
4362         if (res1 < midpoint64[q - 1]) { // < 1/2 ulp
4363           lt_half_ulp = 1;
4364           is_inexact_lt_midpoint = 1;
4365         } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp
4366           eq_half_ulp = 1;
4367           is_midpoint_gt_even = 1;
4368         } else { // > 1/2 ulp
4369           // gt_half_ulp = 1;
4370           is_inexact_gt_midpoint = 1;
4371         }
4372         if (lt_half_ulp || eq_half_ulp) {
4373           // res = +0.0 * 10^expmin16
4374           res1 = 0x0000000000000000ull;
4375         } else { // if (gt_half_ulp)
4376           // res = +1 * 10^expmin16
4377           res1 = 0x0000000000000001ull;
4378         }
4379         unbexp = expmin16;
4380       } else { // if (x0 > q)
4381         // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin
4382         res1 = 0x0000000000000000ull;
4383         unbexp = expmin16;
4384         is_inexact_lt_midpoint = 1;
4385       }
4386       // avoid a double rounding error
4387       if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
4388           is_midpoint_lt_even) { // double rounding error upward
4389         // res = res - 1
4390         res1--; // res1 becomes odd
4391         is_midpoint_lt_even = 0;
4392         is_inexact_lt_midpoint = 1;
4393       } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
4394           is_midpoint_gt_even) { // double rounding error downward
4395         // res = res + 1
4396         res1++; // res1 becomes odd
4397         is_midpoint_gt_even = 0;
4398         is_inexact_gt_midpoint = 1;
4399       } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
4400                  !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
4401         // if this rounding was exact the result may still be
4402         // inexact because of the previous roundings
4403         if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
4404           is_inexact_gt_midpoint = 1;
4405         }
4406         if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
4407           is_inexact_lt_midpoint = 1;
4408         }
4409       } else if (is_midpoint_gt_even &&
4410                  (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
4411         // pulled up to a midpoint
4412         is_inexact_lt_midpoint = 1;
4413         is_inexact_gt_midpoint = 0;
4414         is_midpoint_lt_even = 0;
4415         is_midpoint_gt_even = 0;
4416       } else if (is_midpoint_lt_even &&
4417                  (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
4418         // pulled down to a midpoint
4419         is_inexact_lt_midpoint = 0;
4420         is_inexact_gt_midpoint = 1;
4421         is_midpoint_lt_even = 0;
4422         is_midpoint_gt_even = 0;
4423       } else {
4424         ;
4425       }
4426     }
4427     // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16)
4428     // and the result is tiny and exact
4429
4430     // check for inexact result
4431     if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
4432         is_midpoint_lt_even || is_midpoint_gt_even ||
4433         is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
4434         is_midpoint_lt_even0 || is_midpoint_gt_even0) {
4435       // set the inexact flag and the underflow flag
4436       *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
4437     }
4438   } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
4439              is_midpoint_lt_even || is_midpoint_gt_even) {
4440     *pfpsf |= INEXACT_EXCEPTION;
4441   }
4442   // this is the result rounded correctly to nearest, with bounded exponent
4443
4444   if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction
4445     // res = res + 1
4446     res1++; // res1 is now odd
4447   } // else the result is already correct
4448
4449   // assemble the result
4450   if (res1 < 0x0020000000000000ull) { // res < 2^53
4451     res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1;
4452   } else { // res1 >= 2^53
4453     res1 = sign | MASK_STEERING_BITS |
4454       ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2);
4455   }
4456   *pfpsf |= save_fpsf;
4457   BID_RETURN (res1);
4458 }