soft-fp/op-2.h

   1 /* Software floating-point emulation.
   2    Basic two-word fraction declaration and manipulation.
   3    Copyright (C) 1997-2019 Free Software Foundation, Inc.
   4    This file is part of the GNU C Library.
   5    Contributed by Richard Henderson (rth@cygnus.com),
   6                   Jakub Jelinek (jj@ultra.linux.cz),
   7                   David S. Miller (davem@redhat.com) and
   8                   Peter Maydell (pmaydell@chiark.greenend.org.uk).
   9
  10    The GNU C Library is free software; you can redistribute it and/or
  11    modify it under the terms of the GNU Lesser General Public
  12    License as published by the Free Software Foundation; either
  13    version 2.1 of the License, or (at your option) any later version.
  14
  15    In addition to the permissions in the GNU Lesser General Public
  16    License, the Free Software Foundation gives you unlimited
  17    permission to link the compiled version of this file into
  18    combinations with other programs, and to distribute those
  19    combinations without any restriction coming from the use of this
  20    file.  (The Lesser General Public License restrictions do apply in
  21    other respects; for example, they cover modification of the file,
  22    and distribution when not linked into a combine executable.)
  23
  24    The GNU C Library is distributed in the hope that it will be useful,
  25    but WITHOUT ANY WARRANTY; without even the implied warranty of
  26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27    Lesser General Public License for more details.
  28
  29    You should have received a copy of the GNU Lesser General Public
  30    License along with the GNU C Library; if not, see
  31    <http://www.gnu.org/licenses/>.  */
  32
  33 #ifndef SOFT_FP_OP_2_H
  34 #define SOFT_FP_OP_2_H  1
  35
  36 #define _FP_FRAC_DECL_2(X)                              \
  37   _FP_W_TYPE X##_f0 _FP_ZERO_INIT, X##_f1 _FP_ZERO_INIT
  38 #define _FP_FRAC_COPY_2(D, S)   (D##_f0 = S##_f0, D##_f1 = S##_f1)
  39 #define _FP_FRAC_SET_2(X, I)    __FP_FRAC_SET_2 (X, I)
  40 #define _FP_FRAC_HIGH_2(X)      (X##_f1)
  41 #define _FP_FRAC_LOW_2(X)       (X##_f0)
  42 #define _FP_FRAC_WORD_2(X, w)   (X##_f##w)
  43
  44 #define _FP_FRAC_SLL_2(X, N)                                            \
  45   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
  46           ? ({                                                          \
  47               if (__builtin_constant_p (N) && (N) == 1)                 \
  48                 {                                                       \
  49                   X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
  50                   X##_f0 += X##_f0;                                     \
  51                 }                                                       \
  52               else                                                      \
  53                 {                                                       \
  54                   X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
  55                   X##_f0 <<= (N);                                       \
  56                 }                                                       \
  57               0;                                                        \
  58             })                                                          \
  59           : ({                                                          \
  60               X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);               \
  61               X##_f0 = 0;                                               \
  62             }))
  63
  64
  65 #define _FP_FRAC_SRL_2(X, N)                                            \
  66   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
  67           ? ({                                                          \
  68               X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
  69               X##_f1 >>= (N);                                           \
  70             })                                                          \
  71           : ({                                                          \
  72               X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);               \
  73               X##_f1 = 0;                                               \
  74             }))
  75
  76 /* Right shift with sticky-lsb.  */
  77 #define _FP_FRAC_SRST_2(X, S, N, sz)                                    \
  78   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
  79           ? ({                                                          \
  80               S = (__builtin_constant_p (N) && (N) == 1                 \
  81                    ? X##_f0 & 1                                         \
  82                    : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0);         \
  83               X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
  84               X##_f1 >>= (N);                                           \
  85             })                                                          \
  86           : ({                                                          \
  87               S = ((((N) == _FP_W_TYPE_SIZE                             \
  88                      ? 0                                                \
  89                      : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))           \
  90                     | X##_f0) != 0);                                    \
  91               X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE));             \
  92               X##_f1 = 0;                                               \
  93             }))
  94
  95 #define _FP_FRAC_SRS_2(X, N, sz)                                        \
  96   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
  97           ? ({                                                          \
  98               X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
  99                         | (__builtin_constant_p (N) && (N) == 1         \
 100                            ? X##_f0 & 1                                 \
 101                            : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
 102               X##_f1 >>= (N);                                           \
 103             })                                                          \
 104           : ({                                                          \
 105               X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)               \
 106                         | ((((N) == _FP_W_TYPE_SIZE                     \
 107                              ? 0                                        \
 108                              : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))   \
 109                             | X##_f0) != 0));                           \
 110               X##_f1 = 0;                                               \
 111             }))
 112
 113 #define _FP_FRAC_ADDI_2(X, I)   \
 114   __FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
 115
 116 #define _FP_FRAC_ADD_2(R, X, Y) \
 117   __FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
 118
 119 #define _FP_FRAC_SUB_2(R, X, Y) \
 120   __FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
 121
 122 #define _FP_FRAC_DEC_2(X, Y)    \
 123   __FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
 124
 125 #define _FP_FRAC_CLZ_2(R, X)                    \
 126   do                                            \
 127     {                                           \
 128       if (X##_f1)                               \
 129         __FP_CLZ ((R), X##_f1);                 \
 130       else                                      \
 131         {                                       \
 132           __FP_CLZ ((R), X##_f0);               \
 133           (R) += _FP_W_TYPE_SIZE;               \
 134         }                                       \
 135     }                                           \
 136   while (0)
 137
 138 /* Predicates.  */
 139 #define _FP_FRAC_NEGP_2(X)      ((_FP_WS_TYPE) X##_f1 < 0)
 140 #define _FP_FRAC_ZEROP_2(X)     ((X##_f1 | X##_f0) == 0)
 141 #define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
 142 #define _FP_FRAC_CLEAR_OVERP_2(fs, X)   (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
 143 #define _FP_FRAC_HIGHBIT_DW_2(fs, X)    \
 144   (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
 145 #define _FP_FRAC_EQ_2(X, Y)     (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
 146 #define _FP_FRAC_GT_2(X, Y)     \
 147   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
 148 #define _FP_FRAC_GE_2(X, Y)     \
 149   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
 150
 151 #define _FP_ZEROFRAC_2          0, 0
 152 #define _FP_MINFRAC_2           0, 1
 153 #define _FP_MAXFRAC_2           (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
 154
 155 /* Internals.  */
 156
 157 #define __FP_FRAC_SET_2(X, I1, I0)      (X##_f0 = I0, X##_f1 = I1)
 158
 159 #define __FP_CLZ_2(R, xh, xl)                   \
 160   do                                            \
 161     {                                           \
 162       if (xh)                                   \
 163         __FP_CLZ ((R), xh);                     \
 164       else                                      \
 165         {                                       \
 166           __FP_CLZ ((R), xl);                   \
 167           (R) += _FP_W_TYPE_SIZE;               \
 168         }                                       \
 169     }                                           \
 170   while (0)
 171
 172 #if 0
 173
 174 # ifndef __FP_FRAC_ADDI_2
 175 #  define __FP_FRAC_ADDI_2(xh, xl, i)   \
 176   (xh += ((xl += i) < i))
 177 # endif
 178 # ifndef __FP_FRAC_ADD_2
 179 #  define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl)       \
 180   (rh = xh + yh + ((rl = xl + yl) < xl))
 181 # endif
 182 # ifndef __FP_FRAC_SUB_2
 183 #  define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl)       \
 184   (rh = xh - yh - ((rl = xl - yl) > xl))
 185 # endif
 186 # ifndef __FP_FRAC_DEC_2
 187 #  define __FP_FRAC_DEC_2(xh, xl, yh, yl)               \
 188   do                                                    \
 189     {                                                   \
 190       UWtype __FP_FRAC_DEC_2_t = xl;                    \
 191       xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t);      \
 192     }                                                   \
 193   while (0)
 194 # endif
 195
 196 #else
 197
 198 # undef __FP_FRAC_ADDI_2
 199 # define __FP_FRAC_ADDI_2(xh, xl, i)    add_ssaaaa (xh, xl, xh, xl, 0, i)
 200 # undef __FP_FRAC_ADD_2
 201 # define __FP_FRAC_ADD_2                add_ssaaaa
 202 # undef __FP_FRAC_SUB_2
 203 # define __FP_FRAC_SUB_2                sub_ddmmss
 204 # undef __FP_FRAC_DEC_2
 205 # define __FP_FRAC_DEC_2(xh, xl, yh, yl)        \
 206   sub_ddmmss (xh, xl, xh, xl, yh, yl)
 207
 208 #endif
 209
 210 /* Unpack the raw bits of a native fp value.  Do not classify or
 211    normalize the data.  */
 212
 213 #define _FP_UNPACK_RAW_2(fs, X, val)                    \
 214   do                                                    \
 215     {                                                   \
 216       union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo;        \
 217       _FP_UNPACK_RAW_2_flo.flt = (val);                 \
 218                                                         \
 219       X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0;         \
 220       X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1;         \
 221       X##_e  = _FP_UNPACK_RAW_2_flo.bits.exp;           \
 222       X##_s  = _FP_UNPACK_RAW_2_flo.bits.sign;          \
 223     }                                                   \
 224   while (0)
 225
 226 #define _FP_UNPACK_RAW_2_P(fs, X, val)                  \
 227   do                                                    \
 228     {                                                   \
 229       union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo      \
 230         = (union _FP_UNION_##fs *) (val);               \
 231                                                         \
 232       X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0;      \
 233       X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1;      \
 234       X##_e  = _FP_UNPACK_RAW_2_P_flo->bits.exp;        \
 235       X##_s  = _FP_UNPACK_RAW_2_P_flo->bits.sign;       \
 236     }                                                   \
 237   while (0)
 238
 239
 240 /* Repack the raw bits of a native fp value.  */
 241
 242 #define _FP_PACK_RAW_2(fs, val, X)              \
 243   do                                            \
 244     {                                           \
 245       union _FP_UNION_##fs _FP_PACK_RAW_2_flo;  \
 246                                                 \
 247       _FP_PACK_RAW_2_flo.bits.frac0 = X##_f0;   \
 248       _FP_PACK_RAW_2_flo.bits.frac1 = X##_f1;   \
 249       _FP_PACK_RAW_2_flo.bits.exp   = X##_e;    \
 250       _FP_PACK_RAW_2_flo.bits.sign  = X##_s;    \
 251                                                 \
 252       (val) = _FP_PACK_RAW_2_flo.flt;           \
 253     }                                           \
 254   while (0)
 255
 256 #define _FP_PACK_RAW_2_P(fs, val, X)                    \
 257   do                                                    \
 258     {                                                   \
 259       union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo        \
 260         = (union _FP_UNION_##fs *) (val);               \
 261                                                         \
 262       _FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0;        \
 263       _FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1;        \
 264       _FP_PACK_RAW_2_P_flo->bits.exp   = X##_e;         \
 265       _FP_PACK_RAW_2_P_flo->bits.sign  = X##_s;         \
 266     }                                                   \
 267   while (0)
 268
 269
 270 /* Multiplication algorithms: */
 271
 272 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
 273
 274 #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit)                \
 275   do                                                                    \
 276     {                                                                   \
 277       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b);                       \
 278       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c);                       \
 279                                                                         \
 280       doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0),             \
 281             X##_f0, Y##_f0);                                            \
 282       doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0,   \
 283             X##_f0, Y##_f1);                                            \
 284       doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0,   \
 285             X##_f1, Y##_f0);                                            \
 286       doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),             \
 287             X##_f1, Y##_f1);                                            \
 288                                                                         \
 289       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 290                        _FP_FRAC_WORD_4 (R, 1), 0,                       \
 291                        _FP_MUL_MEAT_DW_2_wide_b_f1,                     \
 292                        _FP_MUL_MEAT_DW_2_wide_b_f0,                     \
 293                        _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 294                        _FP_FRAC_WORD_4 (R, 1));                         \
 295       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 296                        _FP_FRAC_WORD_4 (R, 1), 0,                       \
 297                        _FP_MUL_MEAT_DW_2_wide_c_f1,                     \
 298                        _FP_MUL_MEAT_DW_2_wide_c_f0,                     \
 299                        _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 300                        _FP_FRAC_WORD_4 (R, 1));                         \
 301     }                                                                   \
 302   while (0)
 303
 304 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)                   \
 305   do                                                                    \
 306     {                                                                   \
 307       _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z);                          \
 308                                                                         \
 309       _FP_MUL_MEAT_DW_2_wide ((wfracbits), _FP_MUL_MEAT_2_wide_z,       \
 310                               X, Y, doit);                              \
 311                                                                         \
 312       /* Normalize since we know where the msb of the multiplicands     \
 313          were (bit B), we know that the msb of the of the product is    \
 314          at either 2B or 2B-1.  */                                      \
 315       _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, (wfracbits)-1,             \
 316                       2*(wfracbits));                                   \
 317       R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0);              \
 318       R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1);              \
 319     }                                                                   \
 320   while (0)
 321
 322 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
 323    Do only 3 multiplications instead of four. This one is for machines
 324    where multiplication is much more expensive than subtraction.  */
 325
 326 #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit)           \
 327   do                                                                    \
 328     {                                                                   \
 329       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b);                  \
 330       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c);                  \
 331       _FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d;                         \
 332       int _FP_MUL_MEAT_DW_2_wide_3mul_c1;                               \
 333       int _FP_MUL_MEAT_DW_2_wide_3mul_c2;                               \
 334                                                                         \
 335       _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1;               \
 336       _FP_MUL_MEAT_DW_2_wide_3mul_c1                                    \
 337         = _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0;                    \
 338       _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1;               \
 339       _FP_MUL_MEAT_DW_2_wide_3mul_c2                                    \
 340         = _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0;                    \
 341       doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0),      \
 342             X##_f0, Y##_f0);                                            \
 343       doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1),             \
 344             _FP_MUL_MEAT_DW_2_wide_3mul_b_f0,                           \
 345             _FP_MUL_MEAT_DW_2_wide_3mul_b_f1);                          \
 346       doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1,                           \
 347             _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1);          \
 348                                                                         \
 349       _FP_MUL_MEAT_DW_2_wide_3mul_b_f0                                  \
 350         &= -_FP_MUL_MEAT_DW_2_wide_3mul_c2;                             \
 351       _FP_MUL_MEAT_DW_2_wide_3mul_b_f1                                  \
 352         &= -_FP_MUL_MEAT_DW_2_wide_3mul_c1;                             \
 353       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 354                        _FP_FRAC_WORD_4 (R, 1),                          \
 355                        (_FP_MUL_MEAT_DW_2_wide_3mul_c1                  \
 356                         & _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0,           \
 357                        _FP_MUL_MEAT_DW_2_wide_3mul_d,                   \
 358                        0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
 359       __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
 360                         _FP_MUL_MEAT_DW_2_wide_3mul_b_f0);              \
 361       __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
 362                         _FP_MUL_MEAT_DW_2_wide_3mul_b_f1);              \
 363       __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 364                        _FP_FRAC_WORD_4 (R, 1),                          \
 365                        0, _FP_MUL_MEAT_DW_2_wide_3mul_d,                \
 366                        _FP_FRAC_WORD_4 (R, 0));                         \
 367       __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 368                        _FP_FRAC_WORD_4 (R, 1), 0,                       \
 369                        _FP_MUL_MEAT_DW_2_wide_3mul_c_f1,                \
 370                        _FP_MUL_MEAT_DW_2_wide_3mul_c_f0);               \
 371       __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 372                        _FP_MUL_MEAT_DW_2_wide_3mul_c_f1,                \
 373                        _FP_MUL_MEAT_DW_2_wide_3mul_c_f0,                \
 374                        _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \
 375     }                                                                   \
 376   while (0)
 377
 378 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)              \
 379   do                                                                    \
 380     {                                                                   \
 381       _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z);                     \
 382                                                                         \
 383       _FP_MUL_MEAT_DW_2_wide_3mul ((wfracbits),                         \
 384                                    _FP_MUL_MEAT_2_wide_3mul_z,          \
 385                                    X, Y, doit);                         \
 386                                                                         \
 387       /* Normalize since we know where the msb of the multiplicands     \
 388          were (bit B), we know that the msb of the of the product is    \
 389          at either 2B or 2B-1.  */                                      \
 390       _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z,                       \
 391                       (wfracbits)-1, 2*(wfracbits));                    \
 392       R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0);         \
 393       R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1);         \
 394     }                                                                   \
 395   while (0)
 396
 397 #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y)       \
 398   do                                                    \
 399     {                                                   \
 400       _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2];            \
 401       _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2];            \
 402       _FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0;              \
 403       _FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1;              \
 404       _FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0;              \
 405       _FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1;              \
 406                                                         \
 407       mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x,        \
 408                  _FP_MUL_MEAT_DW_2_gmp_y, 2);           \
 409     }                                                   \
 410   while (0)
 411
 412 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)                          \
 413   do                                                                    \
 414     {                                                                   \
 415       _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z);                           \
 416                                                                         \
 417       _FP_MUL_MEAT_DW_2_gmp ((wfracbits), _FP_MUL_MEAT_2_gmp_z, X, Y);  \
 418                                                                         \
 419       /* Normalize since we know where the msb of the multiplicands     \
 420          were (bit B), we know that the msb of the of the product is    \
 421          at either 2B or 2B-1.  */                                      \
 422       _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, (wfracbits)-1,              \
 423                       2*(wfracbits));                                   \
 424       R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0];                               \
 425       R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1];                               \
 426     }                                                                   \
 427   while (0)
 428
 429 /* Do at most 120x120=240 bits multiplication using double floating
 430    point multiplication.  This is useful if floating point
 431    multiplication has much bigger throughput than integer multiply.
 432    It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
 433    between 106 and 120 only.
 434    Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
 435    SETFETZ is a macro which will disable all FPU exceptions and set rounding
 436    towards zero,  RESETFE should optionally reset it back.  */
 437
 438 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
 439   do                                                                    \
 440     {                                                                   \
 441       static const double _const[] =                                    \
 442         {                                                               \
 443           /* 2^-24 */ 5.9604644775390625e-08,                           \
 444           /* 2^-48 */ 3.5527136788005009e-15,                           \
 445           /* 2^-72 */ 2.1175823681357508e-22,                           \
 446           /* 2^-96 */ 1.2621774483536189e-29,                           \
 447           /* 2^28 */ 2.68435456e+08,                                    \
 448           /* 2^4 */ 1.600000e+01,                                       \
 449           /* 2^-20 */ 9.5367431640625e-07,                              \
 450           /* 2^-44 */ 5.6843418860808015e-14,                           \
 451           /* 2^-68 */ 3.3881317890172014e-21,                           \
 452           /* 2^-92 */ 2.0194839173657902e-28,                           \
 453           /* 2^-116 */ 1.2037062152420224e-35                           \
 454         };                                                              \
 455       double _a240, _b240, _c240, _d240, _e240, _f240,                  \
 456         _g240, _h240, _i240, _j240, _k240;                              \
 457       union { double d; UDItype i; } _l240, _m240, _n240, _o240,        \
 458                                        _p240, _q240, _r240, _s240;      \
 459       UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;             \
 460                                                                         \
 461       _FP_STATIC_ASSERT ((wfracbits) >= 106 && (wfracbits) <= 120,      \
 462                          "wfracbits out of range");                     \
 463                                                                         \
 464       setfetz;                                                          \
 465                                                                         \
 466       _e240 = (double) (long) (X##_f0 & 0xffffff);                      \
 467       _j240 = (double) (long) (Y##_f0 & 0xffffff);                      \
 468       _d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff);              \
 469       _i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff);              \
 470       _c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
 471       _h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
 472       _b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff);               \
 473       _g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff);               \
 474       _a240 = (double) (long) (X##_f1 >> 32);                           \
 475       _f240 = (double) (long) (Y##_f1 >> 32);                           \
 476       _e240 *= _const[3];                                               \
 477       _j240 *= _const[3];                                               \
 478       _d240 *= _const[2];                                               \
 479       _i240 *= _const[2];                                               \
 480       _c240 *= _const[1];                                               \
 481       _h240 *= _const[1];                                               \
 482       _b240 *= _const[0];                                               \
 483       _g240 *= _const[0];                                               \
 484       _s240.d =                                                       _e240*_j240; \
 485       _r240.d =                                         _d240*_j240 + _e240*_i240; \
 486       _q240.d =                           _c240*_j240 + _d240*_i240 + _e240*_h240; \
 487       _p240.d =             _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \
 488       _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \
 489       _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;  \
 490       _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;                \
 491       _l240.d = _a240*_g240 + _b240*_f240;                              \
 492       _k240 =   _a240*_f240;                                            \
 493       _r240.d += _s240.d;                                               \
 494       _q240.d += _r240.d;                                               \
 495       _p240.d += _q240.d;                                               \
 496       _o240.d += _p240.d;                                               \
 497       _n240.d += _o240.d;                                               \
 498       _m240.d += _n240.d;                                               \
 499       _l240.d += _m240.d;                                               \
 500       _k240 += _l240.d;                                                 \
 501       _s240.d -= ((_const[10]+_s240.d)-_const[10]);                     \
 502       _r240.d -= ((_const[9]+_r240.d)-_const[9]);                       \
 503       _q240.d -= ((_const[8]+_q240.d)-_const[8]);                       \
 504       _p240.d -= ((_const[7]+_p240.d)-_const[7]);                       \
 505       _o240.d += _const[7];                                             \
 506       _n240.d += _const[6];                                             \
 507       _m240.d += _const[5];                                             \
 508       _l240.d += _const[4];                                             \
 509       if (_s240.d != 0.0)                                               \
 510         _y240 = 1;                                                      \
 511       if (_r240.d != 0.0)                                               \
 512         _y240 = 1;                                                      \
 513       if (_q240.d != 0.0)                                               \
 514         _y240 = 1;                                                      \
 515       if (_p240.d != 0.0)                                               \
 516         _y240 = 1;                                                      \
 517       _t240 = (DItype) _k240;                                           \
 518       _u240 = _l240.i;                                                  \
 519       _v240 = _m240.i;                                                  \
 520       _w240 = _n240.i;                                                  \
 521       _x240 = _o240.i;                                                  \
 522       R##_f1 = ((_t240 << (128 - (wfracbits - 1)))                      \
 523                 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)));     \
 524       R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1)))         \
 525                 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))       \
 526                 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))       \
 527                 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))        \
 528                 | _y240);                                               \
 529       resetfe;                                                          \
 530     }                                                                   \
 531   while (0)
 532
 533 /* Division algorithms: */
 534
 535 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)                                \
 536   do                                                                    \
 537     {                                                                   \
 538       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2;                              \
 539       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1;                              \
 540       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0;                              \
 541       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1;                              \
 542       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0;                              \
 543       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1;                              \
 544       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0;                              \
 545       if (_FP_FRAC_GE_2 (X, Y))                                         \
 546         {                                                               \
 547           _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1;                       \
 548           _FP_DIV_MEAT_2_udiv_n_f1                                      \
 549             = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;            \
 550           _FP_DIV_MEAT_2_udiv_n_f0                                      \
 551             = X##_f0 << (_FP_W_TYPE_SIZE - 1);                          \
 552         }                                                               \
 553       else                                                              \
 554         {                                                               \
 555           R##_e--;                                                      \
 556           _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1;                            \
 557           _FP_DIV_MEAT_2_udiv_n_f1 = X##_f0;                            \
 558           _FP_DIV_MEAT_2_udiv_n_f0 = 0;                                 \
 559         }                                                               \
 560                                                                         \
 561       /* Normalize, i.e. make the most significant bit of the           \
 562          denominator set.  */                                           \
 563       _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs);                          \
 564                                                                         \
 565       udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1,                     \
 566                   _FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1,   \
 567                   Y##_f1);                                              \
 568       umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0,    \
 569                  R##_f1, Y##_f0);                                       \
 570       _FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0;              \
 571       if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r)) \
 572         {                                                               \
 573           R##_f1--;                                                     \
 574           _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,                     \
 575                           _FP_DIV_MEAT_2_udiv_r);                       \
 576           if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y)                  \
 577               && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,                  \
 578                                 _FP_DIV_MEAT_2_udiv_r))                 \
 579             {                                                           \
 580               R##_f1--;                                                 \
 581               _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,                 \
 582                               _FP_DIV_MEAT_2_udiv_r);                   \
 583             }                                                           \
 584         }                                                               \
 585       _FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m);    \
 586                                                                         \
 587       if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1)                           \
 588         {                                                               \
 589           /* This is a special case, not an optimization                \
 590              (_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype).  \
 591              As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y,          \
 592              R##_f0 can be either (UWtype)-1 or (UWtype)-2.  But as we  \
 593              know what kind of bits it is (sticky, guard, round),       \
 594              we don't care.  We also don't care what the reminder is,   \
 595              because the guard bit will be set anyway.  -jj */          \
 596           R##_f0 = -1;                                                  \
 597         }                                                               \
 598       else                                                              \
 599         {                                                               \
 600           udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1,                 \
 601                       _FP_DIV_MEAT_2_udiv_r_f1,                         \
 602                       _FP_DIV_MEAT_2_udiv_r_f0, Y##_f1);                \
 603           umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1,                          \
 604                      _FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0);         \
 605           _FP_DIV_MEAT_2_udiv_r_f0 = 0;                                 \
 606           if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,                     \
 607                              _FP_DIV_MEAT_2_udiv_r))                    \
 608             {                                                           \
 609               R##_f0--;                                                 \
 610               _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,                 \
 611                               _FP_DIV_MEAT_2_udiv_r);                   \
 612               if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y)              \
 613                   && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,              \
 614                                     _FP_DIV_MEAT_2_udiv_r))             \
 615                 {                                                       \
 616                   R##_f0--;                                             \
 617                   _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,             \
 618                                   _FP_DIV_MEAT_2_udiv_r);               \
 619                 }                                                       \
 620             }                                                           \
 621           if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r,                    \
 622                               _FP_DIV_MEAT_2_udiv_m))                   \
 623             R##_f0 |= _FP_WORK_STICKY;                                  \
 624         }                                                               \
 625     }                                                                   \
 626   while (0)
 627
 628
 629 /* Square root algorithms:
 630    We have just one right now, maybe Newton approximation
 631    should be added for those machines where division is fast.  */
 632
 633 #define _FP_SQRT_MEAT_2(R, S, T, X, q)                          \
 634   do                                                            \
 635     {                                                           \
 636       while (q)                                                 \
 637         {                                                       \
 638           T##_f1 = S##_f1 + (q);                                \
 639           if (T##_f1 <= X##_f1)                                 \
 640             {                                                   \
 641               S##_f1 = T##_f1 + (q);                            \
 642               X##_f1 -= T##_f1;                                 \
 643               R##_f1 += (q);                                    \
 644             }                                                   \
 645           _FP_FRAC_SLL_2 (X, 1);                                \
 646           (q) >>= 1;                                            \
 647         }                                                       \
 648       (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);            \
 649       while ((q) != _FP_WORK_ROUND)                             \
 650         {                                                       \
 651           T##_f0 = S##_f0 + (q);                                \
 652           T##_f1 = S##_f1;                                      \
 653           if (T##_f1 < X##_f1                                   \
 654               || (T##_f1 == X##_f1 && T##_f0 <= X##_f0))        \
 655             {                                                   \
 656               S##_f0 = T##_f0 + (q);                            \
 657               S##_f1 += (T##_f0 > S##_f0);                      \
 658               _FP_FRAC_DEC_2 (X, T);                            \
 659               R##_f0 += (q);                                    \
 660             }                                                   \
 661           _FP_FRAC_SLL_2 (X, 1);                                \
 662           (q) >>= 1;                                            \
 663         }                                                       \
 664       if (X##_f0 | X##_f1)                                      \
 665         {                                                       \
 666           if (S##_f1 < X##_f1                                   \
 667               || (S##_f1 == X##_f1 && S##_f0 < X##_f0))         \
 668             R##_f0 |= _FP_WORK_ROUND;                           \
 669           R##_f0 |= _FP_WORK_STICKY;                            \
 670         }                                                       \
 671     }                                                           \
 672   while (0)
 673
 674
 675 /* Assembly/disassembly for converting to/from integral types.
 676    No shifting or overflow handled here.  */
 677
 678 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize)        \
 679   (void) (((rsize) <= _FP_W_TYPE_SIZE)          \
 680           ? ({ (r) = X##_f0; })                 \
 681           : ({                                  \
 682               (r) = X##_f1;                     \
 683               (r) <<= _FP_W_TYPE_SIZE;          \
 684               (r) += X##_f0;                    \
 685             }))
 686
 687 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)     \
 688   do                                            \
 689     {                                           \
 690       X##_f0 = (r);                             \
 691       X##_f1 = ((rsize) <= _FP_W_TYPE_SIZE      \
 692                 ? 0                             \
 693                 : (r) >> _FP_W_TYPE_SIZE);      \
 694     }                                           \
 695   while (0)
 696
 697 /* Convert FP values between word sizes.  */
 698
 699 #define _FP_FRAC_COPY_1_2(D, S)         (D##_f = S##_f0)
 700
 701 #define _FP_FRAC_COPY_2_1(D, S)         ((D##_f0 = S##_f), (D##_f1 = 0))
 702
 703 #define _FP_FRAC_COPY_2_2(D, S)         _FP_FRAC_COPY_2 (D, S)
 704
 705 #endif /* !SOFT_FP_OP_2_H */